v0 vs v1 - mymarkdown

commit 561fee2b93050e3cc92927e9a73b14252d45ed85
parent c08e3785171f96ac5779ee9c5577dc2ce4fa6c9a
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date:   Sun, 18 May 2025 01:48:45 +0800

v0 vs v1

Diffstat:
M src/Ast.zig  | 169 +++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
M src/AstGen.zig  | 24 +++++++++++++++++-------
A src/AstGen2.zig  | 801 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/main.zig  | 40 ++++++++++++++++++++++------------------
M src/root.zig  | 2 ++
M src/str.zig  | 18 ++++++++++++++++++
M src/test/test.zig  | 6 +++++-
M src/utils.zig  | 10 ++++++----

8 files changed, 980 insertions(+), 90 deletions(-)
diff --git a/src/Ast.zig b/src/Ast.zig
@@ -37,12 +37,12 @@ pub const Node = utils.Packed(union(enum(u8)) {
 
     pub const Idx = utils.NewType(u24, opaque {});
     pub const Root = packed struct {
-        num_children: u24 = 0,
+        num_children: Idx.IntType = 0,
         pub const format = utils.structFormat(@This());
     };
     pub const Container = packed struct {
         off: StrOffset,
-        num_children: u24 = 0,
+        num_children: Idx.IntType = 0,
         pub const format = utils.structFormat(@This());
     };
     pub const Leaf = packed struct {
@@ -55,7 +55,9 @@ pub const Node = utils.Packed(union(enum(u8)) {
     pub fn incrementNumChildren(self: *Node) void {
         switch (self.tag) {
             inline else => |t| {
-                if (@TypeOf(@field(self.data, @tagName(t))) == Container or @TypeOf(@field(self.data, @tagName(t))) == Root) {
+                if (@TypeOf(@field(self.data, @tagName(t))) == Container or
+                    @TypeOf(@field(self.data, @tagName(t))) == Root)
+                {
                     @field(self.data, @tagName(t)).num_children += 1;
                 } else unreachable;
             },
@@ -67,8 +69,8 @@ pub const Node = utils.Packed(union(enum(u8)) {
 
 pub const Error = utils.Packed(union(enum(u8)) {
     marker_too_long: NodeError,
+    unexpected_block_in_inline_context: NodeError,
     invalid_marker: PointError,
-    empty_line_in_inline_block: PointError,
     inconsistent_indentation: PointError,
 
     /// Used when the error diagnostic spans the entire node
@@ -126,7 +128,13 @@ pub fn toTagged(self: Ast, gpa: Allocator) !Tagged {
 }
 
 pub fn renderAst(self: Ast, writer: anytype, input: []const u8) !void {
-    var renderer: AstRenderer(@TypeOf(writer)) = .{ .self = self, .writer = writer, .input = input, .cached_offset = 0, .cached_pos = .{ .row = 1, .col = 1 } };
+    var renderer: AstRenderer(@TypeOf(writer)) = .{
+        .self = self,
+        .writer = writer,
+        .input = input,
+        .cached_offset = 0,
+        .cached_pos = .{ .row = 1, .col = 1 },
+    };
     const node_idx, const error_idx = try renderer.renderAstInner(0, 0, 0);
     assert(node_idx == self.nodes.len);
     assert(error_idx == self.errors.len);
@@ -175,72 +183,110 @@ fn AstRenderer(Writer: type) type {
             }
         }
 
-        pub fn renderAstInner(renderer: *@This(), start_node: usize, start_error: usize, indent: usize) !struct { usize, usize } {
+        pub fn renderAstInner(
+            renderer: *@This(),
+            start_node: usize,
+            start_error: usize,
+            indent: usize,
+        ) !struct { usize, usize } {
             const tracy_frame = tracy.trace(@src());
             defer tracy_frame.end();
+            try renderer.writer.writeByteNTimes(' ', indent);
+            try renderer.writer.writeByte('.');
+            try renderer.writer.writeAll(@tagName(renderer.self.nodes[start_node].tag));
+            try renderer.writer.writeByte('\n');
+            var cur_node: usize = start_node + 1;
+            var cur_error: usize = start_error;
+            while (cur_error < renderer.self.errors.len and
+                @intFromEnum(renderer.self.errors[cur_error].get(.idx)) == start_node)
+            {
+                try renderer.writer.writeByteNTimes(' ', indent + 2);
+                try renderer.writer.writeAll(".error .");
+                try renderer.writer.writeAll(@tagName(renderer.self.errors[cur_error].tag));
+                switch (renderer.self.errors[cur_error].toTagged()) {
+                    // PointError
+                    .inconsistent_indentation,
+                    .invalid_marker,
+                    => |error_data| {
+                        try renderer.writer.print(
+                            " at {}:{}",
+                            renderer.offsetToPos(error_data.off),
+                        );
+                    },
+                    // NodeError
+                    .unexpected_block_in_inline_context,
+                    .marker_too_long,
+                    => {},
+                }
+                try renderer.writer.writeByte('\n');
+                cur_error += 1;
+            }
             switch (renderer.self.nodes[start_node].toTagged()) {
-                inline else => |data, tag| {
-                    try renderer.writer.writeByteNTimes(' ', indent);
-                    try renderer.writer.writeByte('.');
-                    try renderer.writer.writeAll(@tagName(tag));
-                    try renderer.writer.writeByte('\n');
-                    var cur_node: usize = start_node + 1;
-                    var cur_error: usize = start_error;
-                    while (cur_error < renderer.self.errors.len and @intFromEnum(renderer.self.errors[cur_error].get(.idx)) == start_node) {
-                        switch (renderer.self.errors[cur_error].toTagged()) {
-                            // PointError
-                            .empty_line_in_inline_block,
-                            .inconsistent_indentation,
-                            .invalid_marker,
-                            => |error_data| {
-                                try renderer.writer.writeByteNTimes(' ', indent + 2);
-                                try renderer.writer.writeAll(".error .");
-                                try renderer.writer.writeAll(@tagName(renderer.self.errors[cur_error].tag));
-                                try renderer.writer.print(" at {}:{}", renderer.offsetToPos(error_data.off));
-                                try renderer.writer.writeByte('\n');
-                            },
-                            // NodeError
-                            .marker_too_long => {
-                                try renderer.writer.writeByteNTimes(' ', indent + 2);
-                                try renderer.writer.writeAll("error .");
-                                try renderer.writer.writeAll(@tagName(renderer.self.errors[cur_error].tag));
-                                try renderer.writer.writeByte('\n');
-                            },
-                        }
-                        cur_error += 1;
+                .document,
+                => |data| {
+                    for (0..data.num_children) |_| {
+                        assert(cur_node < renderer.self.nodes.len);
+                        cur_node, cur_error = try renderer.renderAstInner(
+                            cur_node,
+                            cur_error,
+                            indent + 2,
+                        );
+                        assert(cur_node <= renderer.self.nodes.len);
                     }
-                    if (@TypeOf(data) == Node.Tagged.Container or @TypeOf(data) == Node.Tagged.Root) {
-                        for (0..data.num_children) |_| {
-                            assert(cur_node < renderer.self.nodes.len);
-                            cur_node, cur_error = try renderer.renderAstInner(cur_node, cur_error, indent + 2);
-                            assert(cur_node <= renderer.self.nodes.len);
-                        }
-                        return .{ cur_node, cur_error };
-                    } else if (@TypeOf(data) == Node.Tagged.Leaf) {
-                        try renderer.writer.writeByteNTimes(' ', indent + 2);
-                        // This was too slow!
-                        // try renderer.writer.print("\"{}\"", .{
-                        //     std.zig.fmtEscapes(renderer.input[data.off .. data.off + data.len]),
-                        // });
-                        try renderer.writer.writeByte('"');
-                        try str.escapeStringForDoubleQuotedString(
-                            renderer.writer,
-                            renderer.input[data.off .. data.off + data.len],
+                    return .{ cur_node, cur_error };
+                },
+
+                .heading,
+                .quote,
+                .paragraph,
+                .unordered_item,
+                .ordered_item,
+                .term_item,
+                .task_item,
+                .elaboration,
+                => |data| {
+                    for (0..data.num_children) |_| {
+                        assert(cur_node < renderer.self.nodes.len);
+                        cur_node, cur_error = try renderer.renderAstInner(
+                            cur_node,
+                            cur_error,
+                            indent + 2,
                         );
-                        try renderer.writer.writeByte('"');
-                        try renderer.writer.writeByte('\n');
-                        return .{ cur_node, cur_error };
-                    } else {
-                        @compileError(@typeName(@TypeOf(data)));
+                        assert(cur_node <= renderer.self.nodes.len);
                     }
+                    return .{ cur_node, cur_error };
+                },
+
+                .marker,
+                .thematic_break,
+                .text,
+                .space_text,
+                => |data| {
+                    try renderer.writer.writeByteNTimes(' ', indent + 2);
+                    // This was too slow!
+                    // try renderer.writer.print("\"{}\"", .{
+                    //     std.zig.fmtEscapes(renderer.input[data.off .. data.off + data.len]),
+                    // });
+                    try renderer.writer.writeByte('"');
+                    try str.escapeStringForDoubleQuotedString(
+                        renderer.writer,
+                        renderer.input[data.off .. data.off + data.len],
+                    );
+                    try renderer.writer.writeByte('"');
+                    try renderer.writer.writeByte('\n');
+                    return .{ cur_node, cur_error };
                 },
             }
-            unreachable;
         }
     };
 }
 
-pub fn renderHtml(self: Ast, writer: anytype, input: []const u8, start_: ?Node.Idx) !?Node.Idx {
+pub fn renderHtml(
+    self: Ast,
+    writer: anytype,
+    input: []const u8,
+    start_: ?Node.Idx,
+) !?Node.Idx {
     const tracy_frame = tracy.trace(@src());
     defer tracy_frame.end();
     const start: Node.Idx = start_ orelse @enumFromInt(0);
@@ -261,7 +307,10 @@ pub fn renderHtml(self: Ast, writer: anytype, input: []const u8, start_: ?Node.I
     var cur_idx: ?Node.Idx = start.next();
     switch (self.nodes[@intFromEnum(start)].tag) {
         inline .document, .paragraph => |t| {
-            const data = @field(self.nodes[@intFromEnum(start)].data, @tagName(t));
+            const data = @field(
+                self.nodes[@intFromEnum(start)].data,
+                @tagName(t),
+            );
             for (0..data.num_children) |_| {
                 if (cur_idx) |idx| {
                     cur_idx = try self.renderHtml(writer, input, idx);
diff --git a/src/AstGen.zig b/src/AstGen.zig
@@ -187,7 +187,7 @@ fn parseInlineBlock(self: *AstGen, gpa: Allocator, lines_: [][]u8, parent_idx: N
     const tracy_frame = tracy.trace(@src());
     defer tracy_frame.end();
     var lines = lines_;
-    var empty_line_off: ?u32 = null;
+    var saw_empty_line: bool = false;
 
     outer: {
         // empty lines at the start of the inline block are fine, just skip these
@@ -230,12 +230,13 @@ fn parseInlineBlock(self: *AstGen, gpa: Allocator, lines_: [][]u8, parent_idx: N
                 if (lines.len == 0) break :outer;
                 if (lines[0].len != 0) break;
                 // empty line detected
-                empty_line_off = self.calcOffset(@ptrCast(lines[0].ptr));
+                saw_empty_line = true;
             }
 
-            if (empty_line_off) |off| {
+            if (saw_empty_line) {
+                saw_empty_line = false;
                 try self.errors.append(gpa, .fromTagged(.{
-                    .empty_line_in_inline_block = .{ .idx = self.nextNodeIdx(), .off = off },
+                    .unexpected_block_in_inline_context = .{ .idx = self.nextNodeIdx() },
                 }));
             }
 
@@ -309,7 +310,16 @@ fn parseColumn(self: *AstGen, gpa: Allocator, lines_: [][]u8, parent_idx: Node.I
                     num_lines += 1;
                 }
 
-                const paragraph_lines = lines[0..num_lines];
+                var paragraph_lines = lines[0..num_lines];
+                if (lines[0][0] == ' ' or lines[0][0] == '\t') {
+                    try self.errors.append(gpa, .fromTagged(.{
+                        .inconsistent_indentation = .{
+                            .idx = self.lastNodeIdx(),
+                            .off = self.calcOffset(&lines[0][0]),
+                        },
+                    }));
+                    paragraph_lines = try self.findIndentedColumn(gpa, paragraph_lines, child);
+                }
                 lines = lines[num_lines..];
                 try self.parseInlineBlock(gpa, paragraph_lines, child);
             },
@@ -446,9 +456,9 @@ const block_specs = blockSpecs(struct {
     pub const @"+": BlockSpec = &.{
         .{
             .tag = .elaboration,
-            .marker = .{ .starts_with = "+" },
+            .marker = .{ .starts_with_multi = .{ .marker_char = '+' } },
             .mode = .indented_column,
-            .store_marker_child = .no_store,
+            .store_marker_child = .store,
         },
     };
     pub const @";": BlockSpec = &.{
diff --git a/src/AstGen2.zig b/src/AstGen2.zig
@@ -0,0 +1,801 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const assert = std.debug.assert;
+
+const ziggy = @import("ziggy");
+const tracy = @import("tracy");
+
+const utils = @import("utils.zig");
+const str = @import("str.zig");
+const Ast = @import("Ast.zig");
+const StrOffset = Ast.StrOffset;
+const StrLen = Ast.StrLen;
+const Node = Ast.Node;
+const Error = Ast.Error;
+
+const Column = u10;
+
+const AstGen = @This();
+
+gpa: Allocator,
+output_gpa: Allocator,
+output_gpa_same_as_gpa: bool,
+input: []const u8,
+cursor: []const u8, // suffix of input
+indentation: [std.math.maxInt(Column)]u8,
+nodes: std.ArrayListUnmanaged(Node),
+errors: std.ArrayListUnmanaged(Error),
+extra: std.ArrayListUnmanaged(u32),
+
+fn cursorOffset(self: *AstGen) StrOffset {
+    return @intCast(self.cursor.ptr - self.input.ptr);
+}
+fn advanceCursor(self: *AstGen, advance: usize) void {
+    // NOTE: `advance` should really be u32, but this makes it easier to work with other str functions.
+    self.cursor = self.cursor[advance..];
+}
+fn findMarkerEnd(self: AstGen, m: u8) error{IndentationTooLong}!Column {
+    // NOTE: null is impossible because input is guaranteed to end in newlines.
+    const idx = str.indexOfNotChar(self.cursor, m) orelse unreachable;
+    // Explicitly check for marker length because malicious input is possible
+    if (idx > std.math.maxInt(Column))
+        return error.IndentationTooLong;
+    return @intCast(idx);
+}
+
+fn getNode(self: AstGen, idx: Node.Idx) *Node {
+    @setRuntimeSafety(true);
+    return &self.nodes.items[@intFromEnum(idx)];
+}
+fn lastNodeIdx(self: AstGen) Node.Idx {
+    @setRuntimeSafety(true);
+    return @enumFromInt(self.nodes.items.len - 1);
+}
+fn nextNodeIdx(self: AstGen) Node.Idx {
+    @setRuntimeSafety(true);
+    return @enumFromInt(self.nodes.items.len);
+}
+fn appendNode(self: *AstGen, node: Node.Tagged) !Node.Idx {
+    if (self.nodes.items.len > std.math.maxInt(
+        @typeInfo(Node.Idx).@"enum".tag_type,
+    )) return error.OutOfNodeIdx;
+    const idx = self.nodes.items.len;
+    try self.nodes.append(self.gpa, .fromTagged(node));
+    return @enumFromInt(idx);
+}
+fn appendContainerNodeAtCursor(self: *AstGen, parent_idx: Node.Idx, comptime container_node_tag: Node.Tag) !Node.Idx {
+    self.getNode(parent_idx).incrementNumChildren();
+    return try self.appendNode(
+        @unionInit(
+            Node.Tagged,
+            @tagName(container_node_tag),
+            .{ .off = self.cursorOffset() },
+        ),
+    );
+}
+fn appendLeafNodeAtCursor(self: *AstGen, parent_idx: Node.Idx, comptime leaf_node_tag: Node.Tag, len: StrLen) !Node.Idx {
+    self.getNode(parent_idx).incrementNumChildren();
+    return try self.appendNode(
+        @unionInit(
+            Node.Tagged,
+            @tagName(leaf_node_tag),
+            .{ .off = self.cursorOffset(), .len = len },
+        ),
+    );
+}
+fn appendError(self: *AstGen, err: Error.Tagged) !void {
+    if (self.errors.items.len > std.math.maxInt(
+        @typeInfo(Error.Idx).@"enum".tag_type,
+    )) return error.OutOfErrorIdx;
+    try self.errors.append(self.gpa, .fromTagged(err));
+}
+fn appendPointErrorAtCursor(self: *AstGen, comptime tag: Error.Tag, idx: Node.Idx) !void {
+    try self.appendError(
+        @unionInit(
+            Error.Tagged,
+            @tagName(tag),
+            .{ .idx = idx, .off = self.cursorOffset() },
+        ),
+    );
+}
+fn appendNodeErrorAtCursor(self: *AstGen, comptime tag: Error.Tag, idx: Node.Idx) !void {
+    try self.appendError(
+        @unionInit(
+            Error.Tagged,
+            @tagName(tag),
+            .{ .idx = idx },
+        ),
+    );
+}
+
+pub fn deinit(self: *AstGen) void {
+    self.nodes.deinit(self.gpa);
+    self.errors.deinit(self.gpa);
+    self.extra.deinit(self.gpa);
+}
+
+/// Parses mymarkdown
+///
+/// : `gpa`
+/// +   A suitable allocator for scratch allocations that supports free and ideally remap.
+/// : `output_gpa`
+/// +   If passed, no scratch allocations will outlive this function,
+///     and any allocations returned will be allocated on this.
+/// : `input`
+/// +   The input slice to be parsed. Must end in at least 1024 \n characters.
+///
+/// Errors:
+/// : `IndentationTooLong`
+/// +   This implementation of mymarkdown supports up to 1023 characters of indentation.
+pub fn parse(
+    gpa: Allocator,
+    output_gpa: ?Allocator,
+    input: []const u8,
+) !Ast {
+    const tracy_frame = tracy.trace(@src());
+    defer tracy_frame.end();
+
+    if (@typeInfo(Column).int.bits > @typeInfo(StrLen).int.bits)
+        @compileError("Column should have less bits than StrLen");
+    if (input.len < 128 or !std.mem.eql(u8, input[input.len - 128 ..], "\n" ** 128))
+        return error.InputUnsafe;
+    if (input.len > std.math.maxInt(u32))
+        return error.InputTooLarge;
+
+    var ast: AstGen = .{
+        .gpa = gpa,
+        .output_gpa = output_gpa orelse gpa,
+        .output_gpa_same_as_gpa = output_gpa == null,
+        .input = input,
+        .cursor = input,
+        .indentation = undefined,
+        .nodes = .empty,
+        .errors = .empty,
+        .extra = .empty,
+    };
+    defer ast.deinit();
+
+    try ast.parseRoot();
+
+    if (output_gpa) |gpa2| {
+        return .{
+            .nodes = try gpa2.dupe(Node, ast.nodes.items),
+            .errors = try gpa2.dupe(Error, ast.errors.items),
+            .extra = try gpa2.dupe(u32, ast.extra.items),
+        };
+    } else {
+        return .{
+            .nodes = try ast.nodes.toOwnedSlice(gpa),
+            .errors = try ast.errors.toOwnedSlice(gpa),
+            .extra = try ast.extra.toOwnedSlice(gpa),
+        };
+    }
+}
+
+const ParsingContext = enum { block_context, inline_context };
+
+fn parseRoot(self: *AstGen) !void {
+    const root = try self.appendNode(.{ .document = .{} });
+    assert(root == .root);
+    assert(self.input.ptr == self.cursor.ptr);
+    assert(self.input.len == self.cursor.len);
+
+    if (str.indexOfNone(self.input, " \t\r\n")) |idx| {
+        if (idx == 0 or self.input[idx - 1] != '\n') {
+            // Happy case: input starts at the start of the line :)
+            self.advanceCursor(idx);
+        } else {
+            // Input doesn't start at the start of the line :(
+            // Log an error but otherwise proceed as usual
+            try self.appendPointErrorAtCursor(.inconsistent_indentation, root);
+            self.advanceCursor(idx);
+        }
+
+        // The actual parse
+        // "inline" hack to get different branch predictors for the root column
+        _ = try self.parseColumnInline(root, 0, 0, .block_context);
+    } else {
+        // Input is completely empty, return without really parsing anything
+    }
+}
+
+fn parseColumn(
+    self: *AstGen,
+    parent_idx: Node.Idx,
+    parent_col: Column,
+    cursor_col: Column,
+    comptime parsing_context: ParsingContext,
+) error{
+    IndentationTooLong,
+    OutOfNodeIdx,
+    OutOfErrorIdx,
+    OutOfMemory,
+}!Column {
+    return self.parseColumnInline(parent_idx, parent_col, cursor_col, parsing_context);
+}
+
+fn parseMarkerItem(
+    self: *AstGen,
+    comptime node_tag: Node.Tag,
+    parent_idx: Node.Idx,
+    block_col: Column,
+    marker_len: Column,
+    comptime output_marker: enum { output_marker, no_output_marker },
+    comptime parent_parsing_context: ParsingContext,
+    comptime child_parsing_context: ParsingContext,
+) !Column {
+    const block_idx = try self.appendContainerNodeAtCursor(parent_idx, node_tag);
+    if (parent_parsing_context == .inline_context)
+        try self.appendNodeErrorAtCursor(.unexpected_block_in_inline_context, block_idx);
+    if (output_marker == .output_marker) {
+        _ = try self.appendLeafNodeAtCursor(block_idx, .marker, marker_len);
+    }
+    switch (try self.findIndentation(block_col, marker_len)) {
+        .found_column => |child_col| {
+            return try self.parseColumn(block_idx, block_col, child_col, child_parsing_context);
+        },
+        .mismatched_indentation => |indentation_idx_found| return indentation_idx_found,
+    }
+}
+
+inline fn parseColumnInline(
+    self: *AstGen,
+    parent_idx: Node.Idx,
+    parent_col: Column,
+    cursor_col: Column,
+    comptime parsing_context: ParsingContext,
+) !Column {
+    const tracy_frame = tracy.trace(@src());
+    defer tracy_frame.end();
+    assert(cursor_col == 0 or parent_col < cursor_col);
+
+    // Used for "indentation correction".
+    // For simplicity, just think of this as this column's indentation.
+    var block_col = cursor_col;
+
+    var parsed_first_paragraph_for_inline_context: bool = false;
+
+    // # parseColumn's input parameter explanation
+    // + AKA: How to deal with indentation
+    //
+    // Our cursor points at the first (usually non-whitespace) char of the column.
+    // (The caller is in charge of finding the start of the column.)
+    //
+    // We also need both our own and our parent's indentations.
+    // These are represented as column indices that indicate
+    // how many characters of `self.indentation` should match.
+    // indicating where the parent column's ends,
+    // as well as where this column's indentation ends.
+    // For the root column, both of these values are 0, which means the column
+    // can never be exited from until it reaches the end of the file.
+    //
+    // ===
+    //  column 0, file starts here
+    //  | column 2, parent column starts here
+    //  | |  column 5, our column starts here
+    //  v v  v
+    //
+    // |  Parent column (parent's indentation = "  ", represented as parent_col = 2)
+    // |     v (cursor points at H, our indentation = "     ", represented as cursor_col = 5)
+    // |     Hello there
+    // |     Same column
+    // |  Parent column
+    // ===
+    //
+    // Lines that match with our indentation are considered part of our column,
+    // and the column ends upon finding a line aligns with the parent indentation.
+    // When a line matches the parent's indentation but NOT ours, we log an error,
+    // then do error recovery by pretending it does match with our indentation.
+    //
+    // ===
+    // |  Parent column
+    // |     v (cursor)
+    // |     Hello there
+    // |  \t Same column but syntax error! ("  \t " does not match with "     ", but matches "  ")
+    // |  Parent column
+    // ===
+    //
+    // This indentation recovery system is not the best and it can fix indentation "incorrectly",
+    // but I don't have anything better. (Btw, the original AstGen.zig uses the same recovery logic.)
+
+    parse_another_block: while (true) {
+        assert(self.cursor.len > 0 and str.isNoneOf(self.cursor[0], " \t\r\n"));
+
+        // Will be set by the recursive call (if any),
+        // to indicate how much indentation was previously checked.
+        var indentation_idx: Column = undefined;
+
+        finish_parsing_block: {
+            switch (self.cursor[0]) {
+                // Par-like repeatable markers
+                inline '-', '.', ':', '#' => |m| {
+                    const marker_len = try self.findMarkerEnd(m);
+                    if (m == '-') {
+                        const potential_task_item = str.indexOfNone(self.cursor[marker_len..], "[ ]xX") orelse unreachable;
+                        if (potential_task_item >= 3 and
+                            self.cursor[marker_len + potential_task_item - 1] == ']' and
+                            self.cursor[marker_len + potential_task_item - 3] == '[' and
+                            (self.cursor[marker_len + potential_task_item - 2] == ' ' or
+                                self.cursor[marker_len + potential_task_item - 2] == 'x' or
+                                self.cursor[marker_len + potential_task_item - 2] == 'X') and
+                            std.mem.allEqual(u8, self.cursor[marker_len .. marker_len + potential_task_item - 3], ' '))
+                        {
+                            if (marker_len + potential_task_item > std.math.maxInt(Column))
+                                return error.IndentationTooLong;
+                            indentation_idx = try self.parseMarkerItem(
+                                .task_item,
+                                parent_idx,
+                                block_col,
+                                @intCast(marker_len + potential_task_item),
+                                .output_marker,
+                                parsing_context,
+                                .inline_context,
+                            );
+                            break :finish_parsing_block;
+                        }
+                    }
+                    indentation_idx = try self.parseMarkerItem(
+                        switch (m) {
+                            '-' => .unordered_item,
+                            '.' => .ordered_item,
+                            ':' => .term_item,
+                            '#' => .heading,
+                            else => unreachable,
+                        },
+                        parent_idx,
+                        block_col,
+                        marker_len,
+                        .output_marker,
+                        parsing_context,
+                        .inline_context,
+                    );
+                    break :finish_parsing_block;
+                },
+
+                // Div-like repeatable markers
+                inline '+' => |m| {
+                    const marker_len = try self.findMarkerEnd(m);
+                    indentation_idx = try self.parseMarkerItem(
+                        switch (m) {
+                            '+' => .elaboration,
+                            else => unreachable,
+                        },
+                        parent_idx,
+                        block_col,
+                        marker_len,
+                        .output_marker,
+                        parsing_context,
+                        .block_context,
+                    );
+                    break :finish_parsing_block;
+                },
+
+                // Par-like single markers
+                inline ';' => |m| {
+                    indentation_idx = try self.parseMarkerItem(
+                        switch (m) {
+                            ';' => .paragraph,
+                            else => unreachable,
+                        },
+                        parent_idx,
+                        block_col,
+                        1,
+                        .no_output_marker,
+                        parsing_context,
+                        .inline_context,
+                    );
+                    break :finish_parsing_block;
+                },
+
+                // Div-like single markers
+                inline '>' => |m| {
+                    indentation_idx = try self.parseMarkerItem(
+                        switch (m) {
+                            '>' => .quote,
+                            else => unreachable,
+                        },
+                        parent_idx,
+                        block_col,
+                        1,
+                        .no_output_marker,
+                        parsing_context,
+                        .block_context,
+                    );
+                    break :finish_parsing_block;
+                },
+
+                '*' => {
+                    if (std.mem.eql(u8, self.cursor[0..3], "***") and
+                        self.cursor[
+                            3 + (str.indexOfNone(
+                                self.cursor[3..],
+                                " \t",
+                            ) orelse unreachable)
+                        ] == '\n')
+                    {
+                        _ = try self.appendLeafNodeAtCursor(parent_idx, .thematic_break, 3);
+                        break :finish_parsing_block;
+                    }
+                },
+
+                else => {},
+            }
+
+            // Handle paragraph
+            switch (parsing_context) {
+                .inline_context => {
+                    if (parsed_first_paragraph_for_inline_context) {
+                        try self.appendNodeErrorAtCursor(.unexpected_block_in_inline_context, self.nextNodeIdx());
+                        indentation_idx = try self.parseParagraph(.space_text, parent_idx, parent_col, block_col);
+                    } else {
+                        indentation_idx = try self.parseParagraph(.text, parent_idx, parent_col, block_col);
+                        parsed_first_paragraph_for_inline_context = true;
+                    }
+                },
+                .block_context => {
+                    const paragraph_idx = try self.appendContainerNodeAtCursor(parent_idx, .paragraph);
+                    indentation_idx = try self.parseParagraph(.text, paragraph_idx, parent_col, block_col);
+                },
+            }
+        }
+
+        // We just finished parsing a block, so cursor points at the start of a line:
+        //
+        // ===
+        // |  Parent column
+        // |     Hello there
+        // |     Same column
+        // |v----------------- (cursor)
+        // |  Parent column
+        // ===
+        //
+        // We need to find the next block. This involves checking for empty lines and indentation.
+        //
+        // We find where the indentation differs, if any. There are 6 cases:
+        // - **The line is empty.**
+        // +   We loop again, looking for another block.
+        // - **Matches us, next char is non-whitespace.**
+        // +   Happy path, we continue parsing as usual.
+        // - **Matches us, next char is whitespace.**
+        // +   Log and error and recover by treating all leading whitespace
+        //     as if it matched our indentation level exactly.
+        // - **Does not even match parent.**
+        // +   Return with no errors.
+        // - **Matches parent and not us, next char is non-whitespace.**
+        // +   Return with no errors.
+        // - **Matches parent and not us, next char is whitespace.**
+        // +   Log and error and recover by treating all leading whitespace
+        //     as if it matched our indentation level exactly.
+
+        block_col = cursor_col;
+        // finding_block:
+        while (true) {
+            // Special case: when we hit EOF, there's nothing left to parse.
+            if (self.cursor.len == 0) return 0;
+
+            assert(self.cursor[indentation_idx] != '\n');
+            // if (self.cursor[indentation_idx] == '\n') {
+            //     std.debug.assert(false); // TODO: Check this logic
+            //     // Empty line
+            //     self.advanceCursor(indentation_idx + 1);
+            //     // NOTE: null is impossible because input is guaranteed to end in newlines.
+            //     indentation_idx = if (str.indexOfNone(self.cursor, " \t") orelse unreachable) |idx|
+            //         // Explicitly check for indentation length because malicious input is possible
+            //         if (idx > std.math.maxInt(Column))
+            //             return error.IndentationTooLong
+            //         else
+            //             @intCast(idx);
+            //     continue :finding_block;
+            // } else
+            if (indentation_idx > cursor_col) {
+                // Matches us but there's too much whitespace.
+                // Fix the indentation.
+                // Here, we fix the indentation by pretending that
+                // the block starts from wherever the whitespace ended.
+                block_col = indentation_idx;
+                @memcpy(
+                    self.indentation[cursor_col..block_col],
+                    self.cursor[cursor_col..block_col],
+                );
+                // Log the error.
+                self.advanceCursor(cursor_col);
+                try self.appendPointErrorAtCursor(.inconsistent_indentation, self.nextNodeIdx());
+                // Continue parsing.
+                self.advanceCursor(indentation_idx - cursor_col);
+                continue :parse_another_block;
+            } else if (indentation_idx == cursor_col) {
+                // Matches us exactly.
+                self.advanceCursor(indentation_idx);
+                continue :parse_another_block;
+            } else if (indentation_idx > parent_col) {
+                // Matches parent but there's extra whitespace that doesn't match us.
+                // Log the error.
+                self.advanceCursor(parent_col);
+                try self.appendPointErrorAtCursor(.inconsistent_indentation, parent_idx);
+                // Fix the indentation.
+                // Here, we fix the indentation by pretending that
+                // the block starts from the correct amount of whitespace.
+                // <no op>
+
+                // Continue parsing.
+                self.advanceCursor(indentation_idx - parent_col);
+                continue :parse_another_block;
+            } else {
+                // Matches parent exactly or doesn't match parent, return.
+                return indentation_idx;
+            }
+        }
+    }
+}
+
+/// Finds where the indented block starts
+fn findIndentation(
+    self: *AstGen,
+    parent_col: Column,
+    skip: Column,
+) !union(enum) {
+    found_column: Column,
+    mismatched_indentation: Column,
+} {
+    // We're given the input at the marker.
+    //
+    // ===
+    //    parent_col
+    //    v
+    //
+    // |  Parent column
+    // |  v------------ (cursor)
+    // |  -  Hello there
+    // |     Same column
+    // |  Parent column
+    // ===
+    //
+    // We first skip some number of characters, and then scan forward until we find non-whitespace.
+    //
+    // ===
+    // first skip...
+    //
+    //    parent_col
+    //    v
+    //
+    // |  Parent column
+    // |   v------------ (cursor)
+    // |  -  Hello there
+    // |     Same column
+    // |  Parent column
+    //
+    // then scan for non-whitespace:
+    //
+    //    parent_col
+    //    v
+    //
+    // |  Parent column
+    // |     v------------ (cursor)
+    // |  -  Hello there
+    // |     Same column
+    // |  Parent column
+    // ===
+    //
+    // Then we store the indentation found in `self.indentation` and return the indentation column (in this case 5).
+    // Because in this case we found the non-whitespace character on the first line, we will memset the marker to spaces.
+    //
+    // - return = 5
+    // - self.indentation = `     `
+    //
+    // ***
+    //
+    // There are a couple other cases.
+    //
+    // This next case is when the non-whitespace char is not on the same line.
+    // In this case we can simply copy the indentation from the input.
+    //
+    // ===
+    //    parent_col
+    //    v
+    //
+    // |  Parent column
+    // |
+    // |  -  v------------ (cursor)
+    // |     Hello there
+    // |     Same column
+    // |  Parent column
+    // ===
+    //
+    // - return = 5
+    // - self.indentation = `     `
+    //
+    // ***
+    //
+    // This next case is we find that the indentation does not match the parent's indentation.
+    // Note that this can only happen when the non-whitespace char is not on the same line as the initial cursor,
+    // since the indentation behind the initial cursor has already been checked by the caller.
+    //
+    // ===
+    //    parent_col
+    //    v
+    //
+    // |  Parent column
+    // |
+    // |  -  v------------ (cursor)
+    // | \t  Hello there
+    // |     Same column
+    // |  Parent column
+    // ===
+    //
+    // - return = mismatched indentation at column 1
+    //
+    // Here, parent's indentation is `  ` but we saw ` \t  `.
+    // In this case we return how many characters did match to the caller.
+    // The caller should interpret this case as that no indented column was found.
+    //
+    // Note that we do not break out of the loop until we find a non-whitespace char,
+    // even if we see non-matching indentation. In this next example, we ignore the `\t`
+    // and continue to the H, where the indentation actually does match.
+    //
+    // ===
+    //    parent_col
+    //    v
+    //
+    // |  Parent column
+    // |
+    // |  -
+    // | \t  v------------ (cursor)
+    // |     Hello there
+    // |     Same column
+    // |  Parent column
+    // ===
+    //
+    // - return = 5
+    // - self.indentation = `     `
+
+    // Handle first line separately
+    {
+        self.advanceCursor(skip);
+        const inner_block_idx = str.indexOfNone(self.cursor, " \t") orelse unreachable;
+        if (self.cursor[inner_block_idx] != '\n') {
+            // We found the indentation!
+            // Because this is the first line, we need to memset the marker into spaces.
+            if (parent_col + skip + inner_block_idx > std.math.maxInt(Column))
+                return error.IndentationTooLong;
+            @memset(self.indentation[parent_col .. parent_col + skip], ' ');
+            @memcpy(self.indentation[parent_col + skip .. parent_col + skip + inner_block_idx], self.cursor[0..inner_block_idx]);
+            self.advanceCursor(inner_block_idx);
+            return .{ .found_column = @intCast(parent_col + skip + inner_block_idx) };
+        } else {
+            // I lied, inner_block_idx doesn't point to the inner block.
+            self.advanceCursor(inner_block_idx + 1);
+        }
+    }
+
+    // Remaining lines don't need to memset the marker into spaces.
+    while (true) {
+        // Find column
+        const inner_block_idx = str.indexOfNone(self.cursor, " \t") orelse unreachable;
+        if (self.cursor[inner_block_idx] != '\n') {
+            // Verify parent indentation
+            const indentation_idx = std.mem.indexOfDiff(u8, self.cursor, self.indentation[0..parent_col]) orelse unreachable;
+            if (indentation_idx != parent_col) {
+                return .{ .mismatched_indentation = @intCast(indentation_idx) };
+            }
+            // We found the indentation!
+            if (parent_col + inner_block_idx > std.math.maxInt(Column))
+                return error.IndentationTooLong;
+            @memcpy(self.indentation[parent_col .. parent_col + inner_block_idx], self.cursor[parent_col..inner_block_idx]);
+            self.advanceCursor(inner_block_idx);
+            return .{ .found_column = @intCast(parent_col + inner_block_idx) };
+        } else {
+            // I lied, inner_block_idx doesn't point to the inner block.
+            self.advanceCursor(inner_block_idx + 1);
+        }
+    }
+}
+
+fn insertTextLine(
+    self: *AstGen,
+    comptime first_text_tag: Node.Tag,
+    comptime rest_text_tag: Node.Tag,
+    parent_idx: Node.Idx,
+    len_: usize,
+) !void {
+    var len = len_;
+    if (len <= std.math.maxInt(StrLen)) {
+        _ = try self.appendLeafNodeAtCursor(parent_idx, first_text_tag, @intCast(len));
+        self.advanceCursor(len);
+    } else {
+        @branchHint(.cold);
+        {
+            const consumed_len = @min(len, std.math.maxInt(StrLen));
+            _ = try self.appendLeafNodeAtCursor(parent_idx, first_text_tag, @intCast(consumed_len));
+            self.advanceCursor(consumed_len);
+            len -= consumed_len;
+        }
+        while (len > 0) {
+            const consumed_len = @min(len, std.math.maxInt(StrLen));
+            _ = try self.appendLeafNodeAtCursor(parent_idx, rest_text_tag, @intCast(consumed_len));
+            self.advanceCursor(consumed_len);
+            len -= consumed_len;
+        }
+    }
+}
+
+fn parseParagraph(
+    self: *AstGen,
+    comptime first_text_tag: Node.Tag,
+    parent_idx: Node.Idx,
+    parent_col: Column,
+    block_col: Column,
+) !Column {
+    const tracy_frame = tracy.trace(@src());
+    defer tracy_frame.end();
+
+    {
+        const newline = str.indexOfChar(self.cursor, '\n') orelse unreachable;
+        try self.insertTextLine(first_text_tag, .text, parent_idx, newline);
+        self.advanceCursor(1);
+    }
+
+    while (true) {
+        if (self.cursor.len == 0) return 0;
+
+        const indentation_idx = str.indexOfNone(self.cursor, " \t") orelse unreachable;
+        // block line found, exit
+        if (str.isAnyOf(self.cursor[indentation_idx], "-.:+>#;")) {
+            const verified_indentation_idx = std.mem.indexOfDiff(
+                u8,
+                self.cursor,
+                self.indentation[0..block_col],
+            ) orelse unreachable;
+            if (verified_indentation_idx == block_col) {
+                return @intCast(indentation_idx);
+            } else {
+                return @intCast(verified_indentation_idx);
+            }
+        }
+        // empty line found, consume to next nonwhitespace and exit
+        if (self.cursor[indentation_idx] == '\n') {
+            self.advanceCursor(indentation_idx + 1);
+            while (true) {
+                if (self.cursor.len == 0) return 0;
+
+                const next_idx = str.indexOfNone(self.cursor, " \t") orelse unreachable;
+                if (self.cursor[next_idx] == '\n') {
+                    self.advanceCursor(next_idx + 1);
+                    continue;
+                }
+
+                const verified_indentation_idx = std.mem.indexOfDiff(
+                    u8,
+                    self.cursor,
+                    self.indentation[0..block_col],
+                ) orelse unreachable;
+                if (verified_indentation_idx == block_col) {
+                    return @intCast(next_idx);
+                } else {
+                    return @intCast(verified_indentation_idx);
+                }
+            }
+        }
+
+        // verify indentation
+        const verified_indentation_idx = std.mem.indexOfDiff(
+            u8,
+            self.cursor,
+            self.indentation[0..block_col],
+        ) orelse unreachable;
+        if (verified_indentation_idx == block_col) {
+            self.advanceCursor(verified_indentation_idx);
+            const newline = str.indexOfChar(self.cursor, '\n') orelse unreachable;
+            try self.insertTextLine(.space_text, .text, parent_idx, newline);
+            self.advanceCursor(1);
+        } else if (verified_indentation_idx > parent_col) {
+            self.advanceCursor(parent_col);
+            try self.appendPointErrorAtCursor(.inconsistent_indentation, parent_idx);
+            self.advanceCursor(indentation_idx - parent_col);
+            const newline = str.indexOfChar(self.cursor, '\n') orelse unreachable;
+            try self.insertTextLine(.space_text, .text, parent_idx, newline);
+            self.advanceCursor(1);
+        } else {
+            return @intCast(verified_indentation_idx);
+        }
+    }
+}
diff --git a/src/main.zig b/src/main.zig
@@ -17,23 +17,23 @@ pub fn main() !void {
         defer tracy_frame.end();
         if (stdin.stat()) |stat| {
             if (stat.size > 0) {
-                var al: std.ArrayList(u8) = try .initCapacity(arena.allocator(), stat.size + 128);
-                try stdin.reader().readAllArrayList(&al, std.math.maxInt(u32) - 128);
-                try al.appendNTimes('\n', 128);
+                var al: std.ArrayList(u8) = try .initCapacity(arena.allocator(), stat.size + 1024);
+                try stdin.reader().readAllArrayList(&al, std.math.maxInt(u32) - 1024);
+                try al.appendNTimes('\n', 1024);
                 break :blk al;
             }
         } else |_| {}
-        var al: std.ArrayList(u8) = try .initCapacity(gpa.allocator(), 1024);
+        var al: std.ArrayList(u8) = try .initCapacity(gpa.allocator(), 4096);
         errdefer al.deinit();
-        try stdin.reader().readAllArrayList(&al, std.math.maxInt(u32) - 128);
-        try al.appendNTimes('\n', 128);
+        try stdin.reader().readAllArrayList(&al, std.math.maxInt(u32) - 1024);
+        try al.appendNTimes('\n', 1024);
         break :blk al;
     };
     defer input_arraylist.deinit();
     const input = input_arraylist.items;
 
     const ast = blk: {
-        const tracy_frame = tracy.namedFrame("parse");
+        const tracy_frame = tracy.namedFrame("parse 0");
         defer tracy_frame.end();
         break :blk try mymarkdown.parse(
             gpa.allocator(),
@@ -41,24 +41,28 @@ pub fn main() !void {
             input,
         );
     };
-    // const ast2 = blk: {
-    //     const tracy_frame = tracy.namedFrame("parse 2");
-    //     tracy_frame.end();
-    //     break :blk try mymarkdown.parse2(
-    //         gpa.allocator(),
-    //         arena.allocator(),
-    //         input,
-    //     );
-    // };
-    // try std.testing.expectEqualDeep(ast, ast2);
+    const ast2 = blk: {
+        const tracy_frame = tracy.namedFrame("parse 1");
+        defer tracy_frame.end();
+        break :blk try mymarkdown.parse2(
+            gpa.allocator(),
+            arena.allocator(),
+            input,
+        );
+    };
 
     var bw = std.io.bufferedWriter(std.io.getStdOut().writer());
     const stdout = bw.writer();
     {
-        const tracy_frame = tracy.namedFrame("render");
+        const tracy_frame = tracy.namedFrame("render 0");
         defer tracy_frame.end();
         _ = try ast.renderAst(stdout, input);
     }
+    {
+        const tracy_frame = tracy.namedFrame("render 1");
+        defer tracy_frame.end();
+        _ = try ast2.renderAst(stdout, input);
+    }
     try bw.flush();
 
     if (tracy.enable) {
diff --git a/src/root.zig b/src/root.zig
@@ -2,7 +2,9 @@ const std = @import("std");
 
 pub const Ast = @import("Ast.zig");
 pub const AstGen = @import("AstGen.zig");
+pub const AstGen2 = @import("AstGen2.zig");
 pub const parse = AstGen.parse;
+pub const parse2 = AstGen2.parse;
 
 test {
     _ = @import("test/test.zig");
diff --git a/src/str.zig b/src/str.zig
@@ -111,6 +111,24 @@ pub fn escapeStringForSingleQuotedString(
     return escapeString(writer, slice, .double_quoted_string);
 }
 
+pub fn fmtEscapes(bytes: []const u8) std.fmt.Formatter(stringEscapeFormatter) {
+    return .{ .data = bytes };
+}
+
+pub fn stringEscapeFormatter(
+    bytes: []const u8,
+    comptime f: []const u8,
+    options: std.fmt.FormatOptions,
+    writer: anytype,
+) !void {
+    _ = options;
+    if (f.len == 1 and f[0] == '\'') {
+        try escapeString(writer, bytes, .single_quoted_string);
+    } else {
+        try escapeString(writer, bytes, .double_quoted_string);
+    }
+}
+
 pub fn escapeString(
     writer: anytype,
     slice: []const u8,
diff --git a/src/test/test.zig b/src/test/test.zig
@@ -269,6 +269,8 @@ test "Happy path list elaboration" {
         \\    .text
         \\      "a"
         \\  .elaboration
+        \\    .marker
+        \\      "+"
         \\    .paragraph
         \\      .text
         \\        "bb"
@@ -318,6 +320,8 @@ test "Mixed indentation" {
         \\.document
         \\  .elaboration
         \\    .error .inconsistent_indentation at 3:1
+        \\    .marker
+        \\      "+"
         \\    .paragraph
         \\      .text
         \\        "aaa"
@@ -358,7 +362,7 @@ test "Empty line in heading" {
         \\    .text
         \\      "heading"
         \\    .space_text
-        \\      .error .empty_line_in_inline_block at 2:1
+        \\      error .unexpected_block_in_inline_context
         \\      "text"
         \\  .paragraph
         \\    .text
diff --git a/src/utils.zig b/src/utils.zig
@@ -2,14 +2,16 @@ const std = @import("std");
 
 const ziggy = @import("ziggy");
 
-pub fn NewType(comptime int_type: type, comptime dummy_type_: type) type {
-    return enum(int_type) {
+pub fn NewType(comptime IntType_: type, comptime DummyType_: type) type {
+    return enum(IntType_) {
+        root = 0,
         _,
 
         const Self = @This();
+        pub const IntType = IntType_;
 
         pub fn next(self: @This()) ?@This() {
-            if (@intFromEnum(self) == std.math.maxInt(int_type))
+            if (@intFromEnum(self) == std.math.maxInt(IntType_))
                 return null;
             return @enumFromInt(@intFromEnum(self) + 1);
         }
@@ -19,7 +21,7 @@ pub fn NewType(comptime int_type: type, comptime dummy_type_: type) type {
         }
 
         pub const ziggy_options = struct {
-            const dummy_type = dummy_type_;
+            const DummyType = DummyType_;
             pub fn parse(
                 self: *ziggy.Parser,
                 first_tok: ziggy.Tokenizer.Token,

	mymarkdown My markdown
	git clone https://git.grace.moe/mymarkdown
	Log \| Files \| Refs

M	src/Ast.zig	\|	169	+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
M	src/AstGen.zig	\|	24	+++++++++++++++++-------
A	src/AstGen2.zig	\|	801	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/main.zig	\|	40	++++++++++++++++++++++------------------
M	src/root.zig	\|	2	++
M	src/str.zig	\|	18	++++++++++++++++++
M	src/test/test.zig	\|	6	+++++-
M	src/utils.zig	\|	10	++++++----