commit a274d6f4377b2e6de289fcb2c7b2aa0f42d99903
parent 51081b5ac4913f56f762cec8ac5da1b900f66521
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date: Fri, 16 May 2025 05:36:40 +0800
v0
Diffstat:
| A | .gitignore | | | 1 | + |
| A | build.zig | | | 63 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | build.zig.zon | | | 16 | ++++++++++++++++ |
| A | src/Ast.zig | | | 236 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/AstGen.zig | | | 559 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/AstGen/test.zig | | | 346 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/main.zig | | | 22 | ++++++++++++++++++++++ |
| A | src/root.zig | | | 40 | ++++++++++++++++++++++++++++++++++++++++ |
| A | src/str.zig | | | 95 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/utils.zig | | | 126 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
10 files changed, 1504 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+zig-out
diff --git a/build.zig b/build.zig
@@ -0,0 +1,63 @@
+const std = @import("std");
+
+pub fn build(b: *std.Build) void {
+ const target = b.standardTargetOptions(.{});
+ const optimize = b.standardOptimizeOption(.{});
+
+ const mymarkdown = b.addModule("mymarkdown", .{
+ .root_source_file = b.path("src/root.zig"),
+ .target = target,
+ .optimize = optimize,
+ });
+ mymarkdown.addImport("ziggy", b.dependency("ziggy", .{}).module("ziggy"));
+ const mymarkdown_cli = b.addModule("mymarkdown", .{
+ .root_source_file = b.path("src/main.zig"),
+ .target = target,
+ .optimize = optimize,
+ });
+ mymarkdown_cli.addImport("mymarkdown", mymarkdown);
+
+ const mymarkdown_cli_compile = b.addExecutable(.{
+ .name = "mymarkdown",
+ .root_module = mymarkdown_cli,
+ });
+ b.installArtifact(mymarkdown_cli_compile);
+
+ const check = b.step("check", "Check if the mymarkdown CLI compiles");
+ check.dependOn(&mymarkdown_cli_compile.step);
+
+ setupTestStep(b, target, optimize, mymarkdown, mymarkdown_cli, check);
+ setupRunStep(b, mymarkdown_cli_compile);
+}
+
+fn setupTestStep(
+ b: *std.Build,
+ target: std.Build.ResolvedTarget,
+ optimize: std.builtin.OptimizeMode,
+ mymarkdown: *std.Build.Module,
+ mymarkdown_cli: *std.Build.Module,
+ check: *std.Build.Step,
+) void {
+ const test_step = b.step("test", "Run unit tests");
+ test_step.dependOn(check);
+ test_step.dependOn(&b.addRunArtifact(b.addTest(.{
+ .root_module = mymarkdown,
+ .target = target,
+ .optimize = optimize,
+ })).step);
+ test_step.dependOn(&b.addRunArtifact(b.addTest(.{
+ .root_module = mymarkdown_cli,
+ .target = target,
+ .optimize = optimize,
+ })).step);
+}
+
+fn setupRunStep(
+ b: *std.Build,
+ mymarkdown_cli_compile: *std.Build.Step.Compile,
+) void {
+ const run_exe = b.addRunArtifact(mymarkdown_cli_compile);
+ if (b.args) |args| run_exe.addArgs(args);
+ const run_exe_step = b.step("run", "Run the mymarkdown CLI");
+ run_exe_step.dependOn(&run_exe.step);
+}
diff --git a/build.zig.zon b/build.zig.zon
@@ -0,0 +1,16 @@
+.{
+ .name = .mymarkdown,
+ .version = "0.0.0",
+ .fingerprint = 0x680fc5b268bbdd89, // Changing this has security and trust implications.
+ .minimum_zig_version = "0.14.0",
+ .dependencies = .{
+ .ziggy = .{
+ .path = "../../../manual-software/ziggy",
+ },
+ },
+ .paths = .{
+ "build.zig",
+ "build.zig.zon",
+ "src",
+ },
+}
diff --git a/src/Ast.zig b/src/Ast.zig
@@ -0,0 +1,236 @@
+const std = @import("std");
+const ziggy = @import("ziggy");
+const utils = @import("utils.zig");
+const Allocator = std.mem.Allocator;
+const Ast = @This();
+
+nodes: []const Node,
+errors: []const Error,
+extra: []const u32,
+
+pub const empty: Ast = .{ .nodes = &.{}, .errors = &.{}, .extra = &.{} };
+
+fn ZiggyFormat(comptime T: type, opts: ziggy.serializer.StringifyOptions) type {
+ return struct {
+ pub fn format(self: T, comptime _: []const u8, _: anytype, writer: anytype) !void {
+ _ = try writer.writeAll(".");
+ try ziggy.stringify(self, opts, writer);
+ }
+ };
+}
+
+fn ziggyFormat(comptime T: type, opts: ziggy.serializer.StringifyOptions) @TypeOf(ZiggyFormat(T, opts).format) {
+ return ZiggyFormat(T, opts).format;
+}
+
+fn UnionFormat(comptime T: type) type {
+ return struct {
+ pub fn format(self: T, comptime _: []const u8, _: anytype, writer: anytype) !void {
+ const info = @typeInfo(T).@"union";
+ if (info.tag_type) |UnionTagType| {
+ try writer.writeAll(".{ .");
+ try writer.writeAll(@tagName(@as(UnionTagType, self)));
+ try writer.writeAll(" = ");
+ inline for (info.fields) |u_field| {
+ if (self == @field(UnionTagType, u_field.name)) {
+ try writer.print("{}", .{@field(self, u_field.name)});
+ }
+ }
+ try writer.writeAll(" }");
+ } else {
+ try writer.print("@{x}", .{@intFromPtr(&self)});
+ }
+ }
+ };
+}
+
+fn unionFormat(comptime T: type) @TypeOf(UnionFormat(T).format) {
+ return UnionFormat(T).format;
+}
+
+fn StructFormat(comptime T: type) type {
+ return struct {
+ pub fn format(value: T, comptime actual_fmt: []const u8, _: anytype, writer: anytype) !void {
+ const info = @typeInfo(T).@"struct";
+ if (actual_fmt.len != 0) std.fmt.invalidFmtError(actual_fmt, value);
+ if (info.is_tuple) {
+ // Skip the type and field names when formatting tuples.
+ try writer.writeAll(".{");
+ inline for (info.fields, 0..) |f, i| {
+ if (i == 0) {
+ try writer.writeAll(" ");
+ } else {
+ try writer.writeAll(", ");
+ }
+ try writer.print("{}", .{@field(value, f.name)});
+ }
+ return writer.writeAll(" }");
+ }
+ try writer.writeAll(".{");
+ inline for (info.fields, 0..) |f, i| {
+ if (i == 0) {
+ try writer.writeAll(" .");
+ } else {
+ try writer.writeAll(", .");
+ }
+ try writer.writeAll(f.name);
+ try writer.writeAll(" = ");
+ try writer.print("{}", .{@field(value, f.name)});
+ }
+ try writer.writeAll(" }");
+ }
+ };
+}
+
+fn structFormat(comptime T: type) @TypeOf(StructFormat(T).format) {
+ return StructFormat(T).format;
+}
+
+pub const StrOffset = u32;
+pub const StrLen = u24;
+
+pub const Node = utils.Packed(union(enum(u8)) {
+ document: Root,
+ marker: Leaf, // First child of nodes like heading, list items, ...
+
+ thematic_break: Leaf,
+ heading: Container,
+ quote: Container,
+ paragraph: Container,
+ unordered_item: Container,
+ ordered_item: Container,
+ term_item: Container,
+ task_item: Container,
+ elaboration: Container,
+
+ text: Leaf,
+ space_text: Leaf, // text with 1 space added before it
+
+ pub const Idx = utils.NewType(u24, opaque {});
+ pub const Root = packed struct {
+ num_children: u24 = 0,
+ pub const format = structFormat(@This());
+ };
+ pub const Container = packed struct {
+ off: StrOffset,
+ num_children: u24 = 0,
+ pub const format = structFormat(@This());
+ };
+ pub const Leaf = packed struct {
+ off: StrOffset,
+ len: StrLen,
+ const num_children = 0;
+ pub const format = structFormat(@This());
+ };
+
+ pub fn incrementNumChildren(self: *Node) void {
+ switch (self.tag) {
+ inline else => |t| {
+ if (@TypeOf(@field(self.data, @tagName(t))) == Container or @TypeOf(@field(self.data, @tagName(t))) == Root) {
+ @field(self.data, @tagName(t)).num_children += 1;
+ } else unreachable;
+ },
+ }
+ }
+
+ pub const format = unionFormat(@This());
+});
+
+pub const Error = utils.Packed(union(enum(u8)) {
+ marker_too_long: NodeError,
+ invalid_marker: PointError,
+ empty_line_in_inline_block: PointError,
+ inconsistent_indentation: PointError,
+
+ /// Used when the error diagnostic spans the entire node
+ pub const NodeError = packed struct {
+ idx: Node.Idx,
+
+ pub const format = structFormat(@This());
+ };
+
+ /// Used when the error diagnostic should point at a single location
+ pub const PointError = packed struct {
+ idx: Node.Idx,
+ off: StrOffset,
+
+ pub const format = structFormat(@This());
+ };
+ pub const Idx = utils.NewType(u24, opaque {});
+
+ pub const format = unionFormat(@This());
+});
+
+test "Tracking size of Node struct" {
+ try std.testing.expectEqual(24, @bitSizeOf(Node.Idx));
+ try std.testing.expectEqual(4, @sizeOf(Node.Idx));
+ try std.testing.expectEqual(64, @bitSizeOf(Node));
+ try std.testing.expectEqual(8, @sizeOf(Node));
+}
+
+test "Tracking size of Error struct" {
+ try std.testing.expectEqual(24, @bitSizeOf(Error.Idx));
+ try std.testing.expectEqual(4, @sizeOf(Error.Idx));
+ try std.testing.expectEqual(64, @bitSizeOf(Error));
+ try std.testing.expectEqual(8, @sizeOf(Error));
+}
+
+pub const format = ziggyFormat(@This(), .{
+ .whitespace = .space_2,
+ .omit_top_level_curly = false,
+});
+
+pub const Tagged = struct {
+ nodes: []const Node.Tagged,
+ errors: []const Error.Tagged,
+ extra: []const u32,
+
+ pub const empty: Tagged = .{ .nodes = &.{}, .errors = &.{}, .extra = &.{} };
+};
+pub fn toTagged(self: Ast, gpa: Allocator) !Tagged {
+ const nodes = try gpa.alloc(Node.Tagged, self.nodes.len);
+ const errors = try gpa.alloc(Error.Tagged, self.errors.len);
+ const extra = try gpa.dupe(u32, self.extra);
+ for (self.nodes, nodes) |node, *out| out.* = node.toTagged();
+ for (self.errors, errors) |err, *out| out.* = err.toTagged();
+ return .{ .nodes = nodes, .errors = errors, .extra = extra };
+}
+
+pub fn render(self: Ast, writer: anytype, input: []const u8, start_: ?Node.Idx) !?Node.Idx {
+ const start: Node.Idx = start_ orelse @enumFromInt(0);
+ switch (self.nodes[@intFromEnum(start)].tag) {
+ .document => try writer.writeAll("<body>\n"),
+ .paragraph => try writer.writeAll("<p>"),
+ .text => {
+ const data: Node.Leaf = self.nodes[@intFromEnum(start)].data.text;
+ try writer.writeAll(input[data.off .. data.off + data.len]);
+ },
+ .space_text => {
+ const data: Node.Leaf = self.nodes[@intFromEnum(start)].data.text;
+ try writer.writeByte(' ');
+ try writer.writeAll(input[data.off .. data.off + data.len]);
+ },
+ else => unreachable,
+ }
+ var cur_idx: ?Node.Idx = start.next();
+ switch (self.nodes[@intFromEnum(start)].tag) {
+ inline .document, .paragraph => |t| {
+ const data = @field(self.nodes[@intFromEnum(start)].data, @tagName(t));
+ for (0..data.num_children) |_| {
+ if (cur_idx) |idx| {
+ cur_idx = try self.render(writer, input, idx);
+ } else {
+ unreachable;
+ }
+ }
+ },
+ else => {},
+ }
+ switch (self.nodes[@intFromEnum(start)].tag) {
+ .document => try writer.writeAll("</body>\n"),
+ .paragraph => try writer.writeAll("</p>\n"),
+ .text, .space_text => {},
+ else => unreachable,
+ }
+ return cur_idx;
+}
diff --git a/src/AstGen.zig b/src/AstGen.zig
@@ -0,0 +1,559 @@
+const std = @import("std");
+const ziggy = @import("ziggy");
+const utils = @import("utils.zig");
+const str = @import("str.zig");
+const ArenaAllocator = std.heap.ArenaAllocator;
+const Allocator = std.mem.Allocator;
+const AstGen = @This();
+const Ast = @import("Ast.zig");
+const Node = Ast.Node;
+const Error = Ast.Error;
+
+input_base: [*]u8,
+input: []u8,
+nodes: std.ArrayListUnmanaged(Node),
+errors: std.ArrayListUnmanaged(Error),
+extra: std.ArrayListUnmanaged(u32),
+
+fn getNode(self: AstGen, idx: Node.Idx) *Node {
+ @setRuntimeSafety(true);
+ return &self.nodes.items[@intFromEnum(idx)];
+}
+fn lastNodeIdx(self: AstGen) Node.Idx {
+ @setRuntimeSafety(true);
+ return @enumFromInt(self.nodes.items.len - 1);
+}
+fn nextNodeIdx(self: AstGen) Node.Idx {
+ @setRuntimeSafety(true);
+ return @enumFromInt(self.nodes.items.len);
+}
+fn appendNode(self: *AstGen, gpa: Allocator, node: Node.Tagged) !Node.Idx {
+ {
+ @setRuntimeSafety(true);
+ if (self.nodes.items.len > std.math.maxInt(
+ @typeInfo(Node.Idx).@"enum".tag_type,
+ )) unreachable;
+ }
+ const idx = self.nodes.items.len;
+ try self.nodes.append(gpa, .fromTagged(node));
+ return @enumFromInt(idx);
+}
+
+pub fn deinit(self: *AstGen, gpa: Allocator) void {
+ self.nodes.deinit(gpa);
+ self.errors.deinit(gpa);
+ self.extra.deinit(gpa);
+}
+
+pub fn parse(gpa: Allocator, output_gpa: ?Allocator, input: []const u8) error{ InputTooLarge, OutOfMemory }!Ast {
+ if (input.len > std.math.maxInt(u32) - 1) {
+ return error.InputTooLarge;
+ }
+
+ // const input_copy = input;
+ // const input_copy = try gpa.dupe(u8, input);
+ // defer gpa.free(input_copy);
+ var input_copy_arraylist: std.ArrayListUnmanaged(u8) = .empty;
+ defer input_copy_arraylist.deinit(gpa);
+ try input_copy_arraylist.ensureTotalCapacityPrecise(gpa, input.len + 2);
+
+ var ast: AstGen = .{
+ .input_base = input_copy_arraylist.items.ptr,
+ .input = undefined,
+ .nodes = .empty,
+ .errors = .empty,
+ .extra = .empty,
+ };
+ defer ast.deinit(gpa);
+ const root = try ast.appendNode(gpa, .{ .document = .{} });
+
+ var lines: std.ArrayListUnmanaged(Ast.StrOffset) = .empty;
+ defer lines.deinit(gpa);
+ // var lines: std.ArrayListUnmanaged([]u8) = .empty;
+ // defer lines.deinit(gpa);
+
+ var lines_it = std.mem.splitScalar(u8, input, '\n');
+ var maybe_line: ?[]u8 = @constCast(lines_it.first());
+ while (maybe_line) |line| : (maybe_line = @constCast(lines_it.next())) {
+ if (str.lastIndexOfNone(line, " \t\r\n")) |idx| {
+ const old_len = input_copy_arraylist.items.len;
+ try lines.append(gpa, @intCast(old_len));
+ input_copy_arraylist.appendSliceAssumeCapacity(line);
+ input_copy_arraylist.appendAssumeCapacity('\n');
+ input_copy_arraylist.items[old_len + idx + 1] = '\n';
+ // try lines.append(gpa, input_copy_arraylist.items[old_len .. old_len + idx + 1]);
+ } else {
+ try lines.append(gpa, @intCast(input_copy_arraylist.items.len));
+ input_copy_arraylist.appendAssumeCapacity('\n');
+ // try lines.append(gpa, &.{});
+ }
+ }
+ input_copy_arraylist.appendAssumeCapacity('\n');
+ ast.input = input_copy_arraylist.items;
+ // stripTrailingWhitespace(&lines.items);
+
+ try ast.parseColumn(gpa, lines.items, root);
+
+ // std.time.sleep(std.time.ns_per_hour);
+
+ if (output_gpa) |gpa2| {
+ return .{
+ .nodes = try gpa2.dupe(Node, ast.nodes.items),
+ .errors = try gpa2.dupe(Error, ast.errors.items),
+ .extra = try gpa2.dupe(u32, ast.extra.items),
+ };
+ } else {
+ return .{
+ .nodes = try ast.nodes.toOwnedSlice(gpa),
+ .errors = try ast.errors.toOwnedSlice(gpa),
+ .extra = try ast.extra.toOwnedSlice(gpa),
+ };
+ }
+}
+
+fn stripTrailingWhitespace(lines: *[][]u8) void {
+ for (lines.*) |*line| {
+ if (str.lastIndexOfNone(line.*, " \t\r\n")) |idx| {
+ line.* = line.*[0 .. idx + 1];
+ } else {
+ line.* = line.*[0..0];
+ }
+ }
+}
+
+fn calcOffset(self: *AstGen, c: *u8) u32 {
+ return @intCast(c - self.input_base);
+}
+
+fn findIndentedColumn(self: *AstGen, gpa: Allocator, lines_: []u32, node_idx: Node.Idx) ![]u32 {
+ var lines = lines_;
+
+ // empty lines at the start of the inline block are fine, just skip these
+ // special case: the first line consist of only whitespace
+ // because they may have been introduced via marker replacement
+ if (lines.len > 0)
+ if (str.indexOfNone(self.input[lines[0]..], " \t\r")) |idx|
+ if (self.input[lines[0] + idx] == '\n') {
+ lines = lines[1..];
+ while (true) : (lines = lines[1..]) {
+ if (lines.len == 0) return &.{};
+ if (self.input[lines[0]] != '\n') break;
+ }
+ };
+ if (lines.len == 0) return &.{};
+
+ // determine indentation
+ const indentation_idx = str.indexOfNone(self.input[lines[0]..], " \t\r") orelse unreachable;
+ if (indentation_idx == 0) return &.{};
+ const indentation = self.input[lines[0] .. lines[0] + indentation_idx];
+
+ // strip all lines of their indentation
+ lines[0] += @truncate(indentation.len);
+ for (lines[1..]) |*line| {
+ if (self.input[line.*] == '\n') continue;
+
+ const diff_idx = std.mem.indexOfDiff(u8, self.input[line.*..], indentation) orelse unreachable;
+ // std.debug.assert(diff_idx != line.len);
+ if (diff_idx != indentation.len) {
+ try self.errors.append(gpa, .fromTagged(.{
+ .inconsistent_indentation = .{ .idx = node_idx, .off = line.* },
+ }));
+ // Recover by stripping all whitespace on this line
+ const recover_indentation_idx = std.mem.indexOfNone(u8, self.input[line.*..], " \t\r") orelse unreachable;
+ line.* += @truncate(recover_indentation_idx);
+ } else {
+ line.* += @truncate(indentation.len);
+ }
+ }
+
+ return lines;
+}
+
+fn parseInlineBlock(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, parent_idx: Node.Idx) !void {
+ var lines = lines_;
+ var empty_line_off: ?u32 = null;
+
+ outer: {
+ // empty lines at the start of the inline block are fine, just skip these
+ // special case: the first line consist of only whitespace
+ // because they may have been introduced via marker replacement
+ if (lines.len > 0)
+ if (str.indexOfNone(self.input[lines[0]..], " \t\r")) |idx|
+ if (self.input[lines[0] + idx] == '\n') {
+ lines = lines[1..];
+ while (true) : (lines = lines[1..]) {
+ if (lines.len == 0) break :outer;
+ if (self.input[lines[0]] != '\n') break;
+ }
+ };
+ if (lines.len == 0) break :outer;
+
+ self.getNode(parent_idx).incrementNumChildren();
+
+ // determine indentation
+ const indentation_idx = str.indexOfNone(self.input[lines[0]..], " \t\r") orelse unreachable;
+ const indentation = self.input[lines[0] .. lines[0] + indentation_idx];
+
+ lines[0] += @truncate(indentation.len);
+ // lines[0] = lines[0][indentation.len..];
+
+ var len = str.indexOfChar(self.input[lines[0]..], '\n') orelse unreachable;
+ if (len <= std.math.maxInt(Ast.StrLen)) {
+ _ = try self.appendNode(gpa, .{
+ .text = .{
+ .off = lines[0],
+ .len = @truncate(len),
+ },
+ });
+ } else {
+ @branchHint(.cold);
+ while (len > 0) {
+ const chunk_len = @min(len, std.math.maxInt(Ast.StrLen));
+ _ = try self.appendNode(gpa, .{
+ .text = .{
+ .off = lines[0],
+ .len = chunk_len,
+ },
+ });
+ lines[0] += chunk_len;
+ len -= chunk_len;
+ }
+ }
+ lines = lines[1..];
+
+ while (true) {
+ // Skip and error on empty lines
+ while (true) : (lines = lines[1..]) {
+ if (lines.len == 0) break :outer;
+ if (self.input[lines[0]] != '\n') break;
+ // empty line detected
+ empty_line_off = lines[0];
+ }
+
+ if (empty_line_off) |off| {
+ try self.errors.append(gpa, .fromTagged(.{
+ .empty_line_in_inline_block = .{ .idx = self.nextNodeIdx(), .off = off },
+ }));
+ }
+
+ const diff_idx = std.mem.indexOfDiff(u8, self.input[lines[0]..], indentation) orelse unreachable;
+ // std.debug.assert(diff_idx != lines[0].len);
+ if (diff_idx != indentation.len) {
+ try self.errors.append(gpa, .fromTagged(.{
+ .inconsistent_indentation = .{ .idx = self.nextNodeIdx(), .off = lines[0] },
+ }));
+ // Recover by stripping all whitespace on this line
+ const recover_indentation_idx = std.mem.indexOfNone(u8, self.input[lines[0]..], " \t\r\n") orelse unreachable;
+ lines[0] += @truncate(recover_indentation_idx);
+ } else {
+ lines[0] += @truncate(indentation.len);
+ }
+
+ self.getNode(parent_idx).incrementNumChildren();
+
+ var len2 = str.indexOfChar(self.input[lines[0]..], '\n') orelse unreachable;
+ if (len2 <= std.math.maxInt(Ast.StrLen)) {
+ _ = try self.appendNode(gpa, .{
+ .space_text = .{
+ .off = lines[0],
+ .len = @truncate(len2),
+ },
+ });
+ } else {
+ @branchHint(.cold);
+ _ = try self.appendNode(gpa, .{
+ .space_text = .{
+ .off = lines[0],
+ .len = std.math.maxInt(Ast.StrLen),
+ },
+ });
+ len2 -= std.math.maxInt(Ast.StrLen);
+ lines[0] += std.math.maxInt(Ast.StrLen);
+ while (len2 > 0) {
+ const chunk_len = @min(len2, std.math.maxInt(Ast.StrLen));
+ _ = try self.appendNode(gpa, .{
+ .text = .{
+ .off = lines[0],
+ .len = chunk_len,
+ },
+ });
+ lines[0] += chunk_len;
+ len2 -= chunk_len;
+ }
+ }
+ lines = lines[1..];
+ }
+ }
+}
+
+fn parseColumn(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, parent_idx: Node.Idx) !void {
+ var lines = lines_;
+ outer: while (true) {
+ // Skip empty lines
+ // special case: the first line consist of only whitespace
+ // because they may have been introduced via marker replacement
+ if (lines.len > 0) {
+ if (str.indexOfNone(self.input[lines[0]..], " \t\r")) |idx| {
+ if (self.input[lines[0] + idx] == '\n') {
+ lines = lines[1..];
+ while (true) : (lines = lines[1..]) {
+ if (lines.len == 0) break :outer;
+ if (self.input[lines[0]] != '\n') break;
+ }
+ }
+ }
+ }
+ if (lines.len == 0) break :outer;
+
+ // Use first character to determine marker
+ const mode, const child = try self.parseBlockStart(gpa, lines[0]);
+ self.getNode(parent_idx).incrementNumChildren();
+
+ switch (mode) {
+ .paragraph => {
+ // take indented or non-block-marker lines
+ var num_lines: usize = 1;
+ for (lines[1..]) |line| {
+ if (self.input[line] == '\n') break;
+ if (block_specs[self.input[line]] != null) break;
+ num_lines += 1;
+ }
+
+ const paragraph_lines = lines[0..num_lines];
+ lines = lines[num_lines..];
+ try self.parseInlineBlock(gpa, paragraph_lines, child);
+ },
+ .indented_inline_block => {
+ // take indented or empty lines
+ var num_lines: usize = 1;
+ for (lines[1..]) |line| {
+ if (str.isNoneOf(self.input[line], " \t\r\n")) break;
+ num_lines += 1;
+ }
+
+ const inline_block_lines = try self.findIndentedColumn(gpa, lines[0..num_lines], child);
+ lines = lines[num_lines..];
+ try self.parseInlineBlock(gpa, inline_block_lines, child);
+ },
+ .indented_column => {
+ // take indented or empty lines
+ var num_lines: usize = 1;
+ for (lines[1..]) |line| {
+ if (str.isNoneOf(self.input[line], " \t\r\n")) break;
+ num_lines += 1;
+ }
+
+ const column_lines = try self.findIndentedColumn(gpa, lines[0..num_lines], child);
+ lines = lines[num_lines..];
+ try self.parseColumn(gpa, column_lines, child);
+ },
+ .no_children => {
+ lines = lines[1..];
+ },
+ else => unreachable,
+ }
+ }
+}
+
+const ParseMode = union(enum) {
+ indented_column,
+ indented_inline_block,
+ paragraph,
+ raw: struct { fence: []u8 },
+ no_children,
+};
+
+const MarkerSpec = union(enum) {
+ exact: []const u8,
+ starts_with: []const u8,
+ starts_with_multi: struct {
+ marker_char: u8,
+ extra: []const []const u8 = &.{""}, // any extra characters to check after the marker
+ max_chars: ?u32 = null,
+ },
+};
+const BlockSpecCase = struct {
+ tag: Node.Tag,
+ marker: MarkerSpec,
+ mode: ParseMode,
+ store_marker_child: enum { store, no_store },
+};
+
+const BlockSpec = ?[]const BlockSpecCase;
+
+fn blockSpecs(comptime spec: type) [256]BlockSpec {
+ var arr: [256]BlockSpec = undefined;
+ for (0..256) |c| arr[c] = null;
+ for (@typeInfo(spec).@"struct".decls) |decl| {
+ const c = decl.name[0];
+ arr[c] = @field(spec, decl.name);
+ }
+ return arr;
+}
+
+const block_specs = blockSpecs(struct {
+ pub const @"*": BlockSpec = &.{
+ .{
+ .tag = .thematic_break,
+ .marker = .{ .exact = "***" },
+ .mode = .no_children,
+ .store_marker_child = .no_store,
+ },
+ };
+ pub const @"#": BlockSpec = &.{
+ .{
+ .tag = .heading,
+ .marker = .{ .starts_with_multi = .{ .marker_char = '#', .max_chars = 6 } },
+ .mode = .indented_inline_block,
+ .store_marker_child = .store,
+ },
+ };
+ pub const @"-": BlockSpec = &.{
+ .{
+ .tag = .task_item,
+ .marker = .{ .starts_with_multi = .{ .marker_char = '-', .extra = &.{ " [ ]", " [x]", " [X]" } } },
+ .mode = .indented_inline_block,
+ .store_marker_child = .store,
+ },
+ .{
+ .tag = .unordered_item,
+ .marker = .{ .starts_with_multi = .{ .marker_char = '-' } },
+ .mode = .indented_inline_block,
+ .store_marker_child = .store,
+ },
+ };
+ pub const @".": BlockSpec = &.{
+ .{
+ .tag = .ordered_item,
+ .marker = .{ .starts_with_multi = .{ .marker_char = '.' } },
+ .mode = .indented_inline_block,
+ .store_marker_child = .store,
+ },
+ };
+ pub const @":": BlockSpec = &.{
+ .{
+ .tag = .term_item,
+ .marker = .{ .starts_with_multi = .{ .marker_char = ':' } },
+ .mode = .indented_inline_block,
+ .store_marker_child = .store,
+ },
+ };
+ pub const @">": BlockSpec = &.{
+ .{
+ .tag = .quote,
+ .marker = .{ .starts_with = ">" },
+ .mode = .indented_column,
+ .store_marker_child = .no_store,
+ },
+ };
+ pub const @"+": BlockSpec = &.{
+ .{
+ .tag = .elaboration,
+ .marker = .{ .starts_with = "+" },
+ .mode = .indented_column,
+ .store_marker_child = .no_store,
+ },
+ };
+});
+
+/// Appends the suitable block node to the ast,
+/// then returns how parsing should proceed for the children of this block.
+/// Also returns the idx of the container node created.
+fn parseBlockStart(self: *AstGen, gpa: Allocator, line: Ast.StrOffset) !struct { ParseMode, Node.Idx } {
+ switch (self.input[line]) {
+ inline else => |c| {
+ const spec_or_null = block_specs[c];
+ if (spec_or_null) |spec| {
+ inline for (spec) |case| {
+ switch (case.marker) {
+ .exact, .starts_with => |marker| {
+ if (std.mem.startsWith(u8, self.input[line..], marker)) {
+ const node = if (case.mode == .no_children) try self.appendNode(gpa, @unionInit(Node.Tagged, @tagName(case.tag), @as(Node.Tagged.Leaf, .{
+ .off = line,
+ .len = marker.len,
+ }))) else try self.appendNode(gpa, @unionInit(Node.Tagged, @tagName(case.tag), @as(Node.Tagged.Container, .{
+ .off = line,
+ .num_children = if (case.store_marker_child == .store) 1 else 0,
+ })));
+ @memset(self.input[line .. line + marker.len], ' ');
+ if (case.store_marker_child == .store) {
+ _ = try self.appendNode(gpa, .{ .marker = .{
+ .off = line,
+ .len = case.marker.len,
+ } });
+ }
+ return .{ case.mode, node };
+ }
+ },
+ .starts_with_multi => |marker_spec| {
+ var marker_len = str.indexOfNotChar(self.input[line..], marker_spec.marker_char) orelse str.indexOfChar(self.input[line..], '\n') orelse unreachable;
+
+ inline for (marker_spec.extra) |extra| {
+ if (std.mem.startsWith(u8, self.input[line + marker_len ..], extra)) {
+ marker_len += extra.len;
+
+ const node = try self.appendNode(gpa, @unionInit(Node.Tagged, @tagName(case.tag), @as(Node.Tagged.Container, .{
+ .off = line,
+ .num_children = if (case.store_marker_child == .store) 1 else 0,
+ })));
+
+ if (marker_spec.max_chars) |max|
+ if (marker_len > max)
+ try self.errors.append(gpa, .fromTagged(.{
+ .marker_too_long = .{
+ .idx = if (case.store_marker_child == .no_store)
+ self.lastNodeIdx()
+ else
+ self.nextNodeIdx(),
+ },
+ }));
+
+ @memset(self.input[line .. line + marker_len], ' ');
+ if (case.store_marker_child == .store) {
+ _ = try self.appendNode(gpa, .{ .marker = .{
+ .off = line,
+ .len = utils.safeIntCast(Ast.StrLen, marker_len),
+ } });
+ }
+ return .{ case.mode, node };
+ }
+ }
+ },
+ }
+ }
+ } else {
+ // Default behaviour is to parse a paragraph until the next newline or block character
+ return .{
+ .paragraph,
+ try self.appendNode(gpa, .{
+ .paragraph = .{
+ .off = line,
+ },
+ }),
+ };
+ }
+ },
+ }
+
+ // Line started with a special character, but it didn't match any markers
+ // Fallback to paragraph, but place a warning.
+ try self.errors.append(gpa, .fromTagged(.{
+ .invalid_marker = .{
+ .idx = self.nextNodeIdx(),
+ .off = line,
+ },
+ }));
+
+ return .{
+ .paragraph,
+ try self.appendNode(gpa, .{
+ .paragraph = .{
+ .off = line,
+ },
+ }),
+ };
+}
+
+test {
+ _ = @import("AstGen/test.zig");
+}
diff --git a/src/AstGen/test.zig b/src/AstGen/test.zig
@@ -0,0 +1,346 @@
+const std = @import("std");
+const parse = @import("../AstGen.zig").parse;
+const Ast = @import("../Ast.zig");
+
+const GeneralPurposeAllocator = std.heap.GeneralPurposeAllocator(.{});
+const ArenaAllocator = std.heap.ArenaAllocator;
+
+fn testParse(input: []const u8, expected: Ast.Tagged) !void {
+ var arena: ArenaAllocator = .init(std.testing.allocator);
+ defer arena.deinit();
+ const ast = try parse(std.testing.allocator, arena.allocator(), input);
+ const tagged_ast = try ast.toTagged(arena.allocator());
+ // try std.testing.expectEqualDeep(expected.nodes.len, tagged_ast.nodes.len);
+ try std.testing.expectEqualDeep(expected, tagged_ast);
+}
+
+test "Empty" {
+ try testParse("", .{
+ .nodes = &.{
+ .{ .document = .{ .num_children = 0 } },
+ },
+ .errors = &.{},
+ .extra = &.{},
+ });
+}
+
+test "Happy path paragraph" {
+ try testParse(
+ \\text
+ \\
+ \\text
+ \\text
+ \\
+ \\text
+ \\ text
+ \\
+ , .{
+ .nodes = &.{
+ .{ .document = .{ .num_children = 3 } },
+ .{ .paragraph = .{ .off = 0, .num_children = 1 } },
+ .{ .text = .{ .off = 0, .len = 4 } },
+ .{ .paragraph = .{ .off = 6, .num_children = 2 } },
+ .{ .text = .{ .off = 6, .len = 4 } },
+ .{ .space_text = .{ .off = 11, .len = 4 } },
+ .{ .paragraph = .{ .off = 17, .num_children = 2 } },
+ .{ .text = .{ .off = 17, .len = 4 } },
+ .{ .space_text = .{ .off = 22, .len = 7 } },
+ },
+ .errors = &.{},
+ .extra = &.{},
+ });
+}
+
+test "Happy path headings" {
+ try testParse(
+ \\# text
+ \\# text
+ \\# text
+ \\ text
+ \\
+ \\# text
+ \\
+ \\# text
+ \\ text
+ \\
+ \\# text
+ \\ text
+ \\
+ \\## text
+ \\## text
+ \\## text
+ \\ text
+ \\
+ \\## text
+ \\
+ \\## text
+ \\ text
+ \\
+ \\## text
+ \\ text
+ \\
+ , .{
+ .nodes = &.{
+ .{ .document = .{ .num_children = 12 } },
+ .{ .heading = .{ .off = 0, .num_children = 2 } },
+ .{ .marker = .{ .off = 0, .len = 1 } },
+ .{ .text = .{ .off = 2, .len = 4 } },
+ .{ .heading = .{ .off = 7, .num_children = 2 } },
+ .{ .marker = .{ .off = 7, .len = 1 } },
+ .{ .text = .{ .off = 9, .len = 4 } },
+ .{ .heading = .{ .off = 14, .num_children = 3 } },
+ .{ .marker = .{ .off = 14, .len = 1 } },
+ .{ .text = .{ .off = 16, .len = 4 } },
+ .{ .space_text = .{ .off = 23, .len = 4 } },
+ .{ .heading = .{ .off = 29, .num_children = 2 } },
+ .{ .marker = .{ .off = 29, .len = 1 } },
+ .{ .text = .{ .off = 31, .len = 4 } },
+ .{ .heading = .{ .off = 37, .num_children = 3 } },
+ .{ .marker = .{ .off = 37, .len = 1 } },
+ .{ .text = .{ .off = 39, .len = 4 } },
+ .{ .space_text = .{ .off = 46, .len = 4 } },
+ .{ .heading = .{ .off = 52, .num_children = 3 } },
+ .{ .marker = .{ .off = 52, .len = 1 } },
+ .{ .text = .{ .off = 54, .len = 4 } },
+ .{ .space_text = .{ .off = 61, .len = 6 } },
+ .{ .heading = .{ .off = 69, .num_children = 2 } },
+ .{ .marker = .{ .off = 69, .len = 2 } },
+ .{ .text = .{ .off = 72, .len = 4 } },
+ .{ .heading = .{ .off = 77, .num_children = 2 } },
+ .{ .marker = .{ .off = 77, .len = 2 } },
+ .{ .text = .{ .off = 80, .len = 4 } },
+ .{ .heading = .{ .off = 85, .num_children = 3 } },
+ .{ .marker = .{ .off = 85, .len = 2 } },
+ .{ .text = .{ .off = 88, .len = 4 } },
+ .{ .space_text = .{ .off = 96, .len = 4 } },
+ .{ .heading = .{ .off = 102, .num_children = 2 } },
+ .{ .marker = .{ .off = 102, .len = 2 } },
+ .{ .text = .{ .off = 105, .len = 4 } },
+ .{ .heading = .{ .off = 111, .num_children = 3 } },
+ .{ .marker = .{ .off = 111, .len = 2 } },
+ .{ .text = .{ .off = 114, .len = 4 } },
+ .{ .space_text = .{ .off = 122, .len = 4 } },
+ .{ .heading = .{ .off = 128, .num_children = 3 } },
+ .{ .marker = .{ .off = 128, .len = 2 } },
+ .{ .text = .{ .off = 131, .len = 4 } },
+ .{ .space_text = .{ .off = 139, .len = 6 } },
+ },
+ .errors = &.{},
+ .extra = &.{},
+ });
+}
+
+test "Happy path quote" {
+ try testParse(
+ \\> text
+ \\ text
+ \\
+ \\> text
+ \\ text
+ \\> text
+ \\> text
+ \\text
+ \\
+ , .{
+ .nodes = &.{
+ .{ .document = .{ .num_children = 5 } },
+ .{ .quote = .{ .off = 0, .num_children = 1 } },
+ .{ .paragraph = .{ .off = 2, .num_children = 2 } },
+ .{ .text = .{ .off = 2, .len = 4 } },
+ .{ .space_text = .{ .off = 9, .len = 4 } },
+ .{ .quote = .{ .off = 15, .num_children = 1 } },
+ .{ .paragraph = .{ .off = 17, .num_children = 2 } },
+ .{ .text = .{ .off = 17, .len = 4 } },
+ .{ .space_text = .{ .off = 24, .len = 6 } },
+ .{ .quote = .{ .off = 31, .num_children = 1 } },
+ .{ .paragraph = .{ .off = 33, .num_children = 1 } },
+ .{ .text = .{ .off = 33, .len = 4 } },
+ .{ .quote = .{ .off = 38, .num_children = 1 } },
+ .{ .paragraph = .{ .off = 40, .num_children = 1 } },
+ .{ .text = .{ .off = 40, .len = 4 } },
+ .{ .paragraph = .{ .off = 45, .num_children = 1 } },
+ .{ .text = .{ .off = 45, .len = 4 } },
+ },
+ .errors = &.{},
+ .extra = &.{},
+ });
+}
+
+test "Happy path list" {
+ try testParse(
+ \\- text
+ \\- [ ] text
+ \\. text
+ \\: text
+ \\-- text
+ \\-- [ ] text
+ \\.. text
+ \\:: text
+ \\
+ , .{
+ .nodes = &.{
+ .{ .document = .{ .num_children = 8 } },
+ .{ .unordered_item = .{ .off = 0, .num_children = 2 } },
+ .{ .marker = .{ .off = 0, .len = 1 } },
+ .{ .text = .{ .off = 2, .len = 4 } },
+ .{ .task_item = .{ .off = 7, .num_children = 2 } },
+ .{ .marker = .{ .off = 7, .len = 5 } },
+ .{ .text = .{ .off = 13, .len = 4 } },
+ .{ .ordered_item = .{ .off = 18, .num_children = 2 } },
+ .{ .marker = .{ .off = 18, .len = 1 } },
+ .{ .text = .{ .off = 20, .len = 4 } },
+ .{ .term_item = .{ .off = 25, .num_children = 2 } },
+ .{ .marker = .{ .off = 25, .len = 1 } },
+ .{ .text = .{ .off = 27, .len = 4 } },
+ .{ .unordered_item = .{ .off = 32, .num_children = 2 } },
+ .{ .marker = .{ .off = 32, .len = 2 } },
+ .{ .text = .{ .off = 35, .len = 4 } },
+ .{ .task_item = .{ .off = 40, .num_children = 2 } },
+ .{ .marker = .{ .off = 40, .len = 6 } },
+ .{ .text = .{ .off = 47, .len = 4 } },
+ .{ .ordered_item = .{ .off = 52, .num_children = 2 } },
+ .{ .marker = .{ .off = 52, .len = 2 } },
+ .{ .text = .{ .off = 55, .len = 4 } },
+ .{ .term_item = .{ .off = 60, .num_children = 2 } },
+ .{ .marker = .{ .off = 60, .len = 2 } },
+ .{ .text = .{ .off = 63, .len = 4 } },
+ },
+ .errors = &.{},
+ .extra = &.{},
+ });
+}
+
+test "Happy path list elaboration" {
+ try testParse(
+ \\- a
+ \\+ bb
+ \\
+ \\ ccc
+ \\
+ , .{
+ .nodes = &.{
+ .{ .document = .{ .num_children = 2 } },
+ .{ .unordered_item = .{ .off = 0, .num_children = 2 } },
+ .{ .marker = .{ .off = 0, .len = 1 } },
+ .{ .text = .{ .off = 2, .len = 1 } },
+ .{ .elaboration = .{ .off = 4, .num_children = 2 } },
+ .{ .paragraph = .{ .off = 6, .num_children = 1 } },
+ .{ .text = .{ .off = 6, .len = 2 } },
+ .{ .paragraph = .{ .off = 12, .num_children = 1 } },
+ .{ .text = .{ .off = 12, .len = 3 } },
+ },
+ .errors = &.{},
+ .extra = &.{},
+ });
+}
+
+test "Thematic break" {
+ try testParse(
+ \\a
+ \\***
+ \\b
+ \\*
+ \\c
+ \\
+ , .{
+ .nodes = &.{
+ .{ .document = .{ .num_children = 4 } },
+ .{ .paragraph = .{ .off = 0, .num_children = 1 } },
+ .{ .text = .{ .off = 0, .len = 1 } },
+ .{ .thematic_break = .{ .off = 2, .len = 3 } },
+ .{ .paragraph = .{ .off = 6, .num_children = 1 } },
+ .{ .text = .{ .off = 6, .len = 1 } },
+ .{ .paragraph = .{ .off = 8, .num_children = 2 } },
+ .{ .text = .{ .off = 8, .len = 1 } },
+ .{ .space_text = .{ .off = 10, .len = 1 } },
+ },
+ .errors = &.{
+ .{ .invalid_marker = .{ .idx = @enumFromInt(6), .off = 8 } },
+ },
+ .extra = &.{},
+ });
+}
+
+test "Mixed indentation" {
+ try testParse(
+ \\+ aaa
+ \\
+ \\
+ ++ "\tbbbbb\n", .{
+ .nodes = &.{
+ .{ .document = .{ .num_children = 1 } },
+ .{ .elaboration = .{ .off = 0, .num_children = 2 } },
+ .{ .paragraph = .{ .off = 2, .num_children = 1 } },
+ .{ .text = .{ .off = 2, .len = 3 } },
+ .{ .paragraph = .{ .off = 8, .num_children = 1 } },
+ .{ .text = .{ .off = 8, .len = 5 } },
+ },
+ .errors = &.{
+ .{ .inconsistent_indentation = .{ .idx = @enumFromInt(1), .off = 7 } },
+ },
+ .extra = &.{},
+ });
+}
+
+test "Empty line in heading" {
+ try testParse(
+ \\# heading
+ \\
+ \\ text
+ \\
+ \\text
+ \\
+ , .{
+ .nodes = &.{
+ .{ .document = .{ .num_children = 2 } },
+ .{ .heading = .{ .off = 0, .num_children = 3 } },
+ .{ .marker = .{ .off = 0, .len = 1 } },
+ .{ .text = .{ .off = 2, .len = 7 } },
+ .{ .space_text = .{ .off = 13, .len = 4 } },
+ .{ .paragraph = .{ .off = 19, .num_children = 1 } },
+ .{ .text = .{ .off = 19, .len = 4 } },
+ },
+ .errors = &.{
+ .{ .empty_line_in_inline_block = .{ .idx = @enumFromInt(4), .off = 10 } },
+ },
+ .extra = &.{},
+ });
+}
+
+// test "Super long line" {
+// const input = try std.testing.allocator.create([(1 << 24) * 4]u8);
+// defer std.testing.allocator.destroy(input);
+// @memset(input, 'a');
+// input[1] = '\n';
+// try testParse(input, .{
+// .nodes = &.{
+// .{ .document = .{ .num_children = 1 } },
+// .{ .paragraph = .{ .off = 0, .num_children = 2 } },
+// .{ .text = .{ .off = 0, .len = 1 } },
+// .{ .space_text = .{ .off = 2, .len = 16777215 } },
+// .{ .text = .{ .off = 2, .len = 16777215 } },
+// .{ .text = .{ .off = 2, .len = 16777215 } },
+// .{ .text = .{ .off = 2, .len = 16777215 } },
+// .{ .text = .{ .off = 2, .len = 2 } },
+// },
+// .errors = &.{},
+// .extra = &.{},
+// });
+// }
+
+// test "Many short lines" {
+// const input = try std.testing.allocator.create([(1 << 23) - 2][2]u8);
+// defer std.testing.allocator.destroy(input);
+// @memset(input, [2]u8{ 'a', '\n' });
+
+// var arena: ArenaAllocator = .init(std.testing.allocator);
+// defer arena.deinit();
+// const ast = try parse(std.testing.allocator, arena.allocator(), @as([*]u8, @ptrCast(input))[0 .. (1 << 23) * 2 - 4]);
+// try std.testing.expectEqual(1 << 23, ast.nodes.len);
+// try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .document = .{ .num_children = 1 } }), ast.nodes[0].toTagged());
+// try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .paragraph = .{ .off = 0, .num_children = (1 << 23) - 2 } }), ast.nodes[1].toTagged());
+// try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .text = .{ .off = 0, .len = 1 } }), ast.nodes[2].toTagged());
+// for (1..(1 << 23) - 2) |i| {
+// try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .space_text = .{ .off = @intCast(i * 2), .len = 1 } }), ast.nodes[i + 2].toTagged());
+// }
+// }
diff --git a/src/main.zig b/src/main.zig
@@ -0,0 +1,22 @@
+const std = @import("std");
+const mymarkdown = @import("mymarkdown");
+
+const GeneralPurposeAllocator = std.heap.GeneralPurposeAllocator(.{});
+const ArenaAllocator = std.heap.ArenaAllocator;
+
+pub fn main() !void {
+ var gpa: GeneralPurposeAllocator = .{};
+ var arena: ArenaAllocator = .init(gpa.allocator());
+ defer arena.deinit();
+
+ const input = try std.io.getStdIn().readToEndAlloc(arena.allocator(), std.math.maxInt(u32));
+
+ const ast = try mymarkdown.parse(gpa.allocator(), arena.allocator(), input);
+ // std.mem.doNotOptimizeAway(ast);
+
+ var bw = std.io.bufferedWriter(std.io.getStdOut().writer());
+ const stdout = bw.writer();
+ // try stdout.print("{}\n", .{ast});
+ _ = try ast.render(stdout, input, null);
+ try bw.flush();
+}
diff --git a/src/root.zig b/src/root.zig
@@ -0,0 +1,40 @@
+const std = @import("std");
+pub const Ast = @import("Ast.zig");
+pub const AstGen = @import("AstGen.zig");
+pub const parse = AstGen.parse;
+
+test {
+ _ = Ast;
+ _ = AstGen;
+}
+
+// test {
+// var arena: std.heap.ArenaAllocator = .init(std.testing.allocator);
+// defer arena.deinit();
+
+// // const input = try std.io.getStdIn().readToEndAlloc(arena.allocator(), std.math.maxInt(u32));
+// const input =
+// \\# heading
+// \\
+// \\blah
+// \\
+// \\blah
+// \\
+// ;
+// const ast = try parse(std.testing.allocator, arena.allocator(), input);
+// try std.testing.expectEqualDeep(6, ast.nodes.len);
+// // try std.testing.expectEqualDeep(Ast{
+// // .nodes = &.{
+// // .{ .document = .{ .num_children = 2 } },
+// // .{ .heading = .{ .off = 0, .level = .h1, .num_children = 2 } },
+// // .{ .text = .{ .off = 0, .len = 8 } },
+// // .{ .space_text = .{ .off = 11, .len = 5 } },
+// // .{ .paragraph = .{ .off = 19, .num_children = 1 } },
+// // .{ .text = .{ .off = 19, .len = 3 } },
+// // },
+// // .errors = &.{
+// // .{ .empty_line_in_inline_block = .{ .idx = @enumFromInt(3) } },
+// // },
+// // .extra = &.{},
+// // }, ast);
+// }
diff --git a/src/str.zig b/src/str.zig
@@ -0,0 +1,95 @@
+//! Utils for "strings", []u8 or []const u8 slices
+//!
+//! The only purpose of this file is to reduce typing.
+//! `std.mem.indexOfScalar(u8, s, c)` is very long, and
+//! this file lets you just type `str.indexOfChar(s, c)`.
+//!
+//! If I need any functionality I will also just put it here,
+//! so this file may have functions not in `std.mem`.
+const std = @import("std");
+const mem = std.mem;
+pub const Char = u8;
+pub const Str = []const u8;
+pub const Charset = []const u8;
+
+pub fn isAnyOf(c: Char, cs: Charset) bool {
+ return indexOfChar(cs, c) != null;
+}
+
+pub fn isNoneOf(c: Char, cs: Charset) bool {
+ return !isAnyOf(c, cs);
+}
+
+pub fn indexOfChar(s: Str, c: Char) ?usize {
+ return mem.indexOfScalar(u8, s, c);
+}
+
+pub fn indexOfNotChar(slice: Str, value: Char) ?usize {
+ var i: usize = 0;
+ if (switch (@import("builtin").zig_backend) {
+ .stage2_llvm, .stage2_c => true,
+ else => false,
+ } and
+ !std.debug.inValgrind() and // https://github.com/ziglang/zig/issues/17717
+ !@inComptime())
+ {
+ if (std.simd.suggestVectorLength(Char)) |block_len| {
+ // For Intel Nehalem (2009) and AMD Bulldozer (2012) or later, unaligned loads on aligned data result
+ // in the same execution as aligned loads. We ignore older arch's here and don't bother pre-aligning.
+ //
+ // Use `std.simd.suggestVectorLength(T)` to get the same alignment as used in this function
+ // however this usually isn't necessary unless your arch has a performance penalty due to this.
+ //
+ // This may differ for other arch's. Arm for example costs a cycle when loading across a cache
+ // line so explicit alignment prologues may be worth exploration.
+
+ // Unrolling here is ~10% improvement. We can then do one bounds check every 2 blocks
+ // instead of one which adds up.
+ const Block = @Vector(block_len, Char);
+ if (i + 2 * block_len < slice.len) {
+ const mask: Block = @splat(value);
+ while (true) {
+ inline for (0..2) |_| {
+ const block: Block = slice[i..][0..block_len].*;
+ const matches = block != mask;
+ if (@reduce(.Or, matches)) {
+ return i + std.simd.firstTrue(matches).?;
+ }
+ i += block_len;
+ }
+ if (i + 2 * block_len >= slice.len) break;
+ }
+ }
+
+ // {block_len, block_len / 2} check
+ inline for (0..2) |j| {
+ const block_x_len = block_len / (1 << j);
+ comptime if (block_x_len < 4) break;
+
+ const BlockX = @Vector(block_x_len, Char);
+ if (i + block_x_len < slice.len) {
+ const mask: BlockX = @splat(value);
+ const block: BlockX = slice[i..][0..block_x_len].*;
+ const matches = block != mask;
+ if (@reduce(.Or, matches)) {
+ return i + std.simd.firstTrue(matches).?;
+ }
+ i += block_x_len;
+ }
+ }
+ }
+ }
+
+ for (slice[i..], i..) |c, j| {
+ if (c != value) return j;
+ }
+ return null;
+}
+
+pub fn indexOfNone(s: Str, cs: Charset) ?usize {
+ return mem.indexOfNone(u8, s, cs);
+}
+
+pub fn lastIndexOfNone(s: Str, cs: Charset) ?usize {
+ return mem.lastIndexOfNone(u8, s, cs);
+}
diff --git a/src/utils.zig b/src/utils.zig
@@ -0,0 +1,126 @@
+const std = @import("std");
+const ziggy = @import("ziggy");
+
+pub fn NewType(comptime int_type: type, comptime dummy_type_: type) type {
+ return enum(int_type) {
+ _,
+
+ const Self = @This();
+
+ pub fn next(self: @This()) ?@This() {
+ if (@intFromEnum(self) == std.math.maxInt(int_type))
+ return null;
+ return @enumFromInt(@intFromEnum(self) + 1);
+ }
+
+ pub fn format(self: @This(), comptime _: []const u8, _: anytype, writer: anytype) !void {
+ try writer.print("@enumFromInt({})", .{@intFromEnum(self)});
+ }
+
+ pub const ziggy_options = struct {
+ const dummy_type = dummy_type_;
+ pub fn parse(
+ self: *ziggy.Parser,
+ first_tok: ziggy.Tokenizer.Token,
+ ) !Self {
+ return @enumFromInt(try self.parseValue(u32, first_tok));
+ }
+ pub fn stringify(
+ self: Self,
+ opts: ziggy.serializer.StringifyOptions,
+ indent_level: usize,
+ depth: usize,
+ writer: anytype,
+ ) !void {
+ const serialized: u32 = @intFromEnum(self);
+ return ziggy.serializer.stringifyInner(serialized, opts, indent_level, depth, writer);
+ }
+ };
+ };
+}
+
+pub fn Packed(comptime Tagged_: type) type {
+ return packed struct {
+ data: Data,
+ tag: Tag,
+
+ const Self = @This();
+ pub const Tagged = Tagged_;
+ pub const Tag = @typeInfo(Tagged_).@"union".tag_type.?;
+ pub const Data = @Type(.{ .@"union" = .{
+ .layout = .@"packed",
+ .tag_type = null,
+ .fields = @typeInfo(Tagged_).@"union".fields,
+ .decls = &.{},
+ } });
+
+ pub fn fromTagged(tagged: Tagged_) Self {
+ switch (@as(Tag, tagged)) {
+ inline else => |t| return .{
+ .tag = tagged,
+ .data = @unionInit(
+ Data,
+ @tagName(t),
+ @field(tagged, @tagName(t)),
+ ),
+ },
+ }
+ }
+
+ pub fn toTagged(self: Self) Tagged_ {
+ switch (self.tag) {
+ inline else => |t| return @unionInit(
+ Tagged_,
+ @tagName(t),
+ @field(self.data, @tagName(t)),
+ ),
+ }
+ }
+
+ pub const ziggy_options = struct {
+ pub fn parse(
+ self: *ziggy.Parser,
+ first_tok: ziggy.Tokenizer.Token,
+ ) !Self {
+ return .fromNode(
+ try self.parseValue(Tagged_, first_tok),
+ );
+ }
+ pub fn stringify(
+ self: Self,
+ opts: ziggy.serializer.StringifyOptions,
+ indent_level: usize,
+ depth: usize,
+ writer: anytype,
+ ) !void {
+ return ziggy.serializer.stringifyInner(
+ self.toTagged(),
+ opts,
+ indent_level,
+ depth,
+ writer,
+ );
+ }
+ };
+
+ /// May not exist, but we can define it anyway thanks to lazy decl analysis.
+ pub const Idx = Tagged_.Idx;
+ /// May not exist, but we can define it anyway thanks to lazy decl analysis.
+ pub const HeadingLevel = Tagged_.HeadingLevel;
+ /// May not exist, but we can define it anyway thanks to lazy decl analysis.
+ pub const Leaf = Tagged_.Leaf;
+ // /// May not exist, but we can define it anyway thanks to lazy decl analysis.
+ // pub const format = Tagged_.format;
+ /// May not exist, but we can define it anyway thanks to lazy decl analysis.
+ pub const incrementNumChildren = Tagged_.incrementNumChildren;
+
+ pub fn format(self: @This(), comptime _: []const u8, _: anytype, writer: anytype) !void {
+ try writer.print("{}", .{self.toTagged()});
+ }
+ };
+}
+
+pub fn safeIntCast(comptime T: type, value: anytype) T {
+ @setRuntimeSafety(true);
+ return @intCast(value);
+}