more microopts - mymarkdown

commit bd95922ce51c9eb723dde54821c792f7a40b9ad7
parent 710682847dfa6ef35230d275a79fe35bbca21fc6
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date:   Wed, 21 May 2025 21:13:50 +0800

more microopts

Diffstat:
M src/Ast.zig  | 23 ++++++++---------------
M src/AstGen3.zig  | 206 +++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
M src/IndentationScanner.zig  | 9 ++++-----

3 files changed, 135 insertions(+), 103 deletions(-)
diff --git a/src/Ast.zig b/src/Ast.zig
@@ -25,11 +25,8 @@ pub const StrLen = u24;
 
 pub const Tag = enum(u8) {
     document = 255,
-    marker = 254, // First child of nodes like heading, list items, ...
 
     list = 253,
-
-    thematic_break = 252,
     heading = '#',
     quote = '>',
     paragraph = ';',
@@ -39,17 +36,16 @@ pub const Tag = enum(u8) {
     task_item = 251,
     elaboration = '+',
 
+    marker = 254, // First child of nodes like heading, list items, ...
+    thematic_break = 252,
     text = 250,
     space_text = 249,
 };
 
 pub const Node = utils.Packed(union(Tag) {
     document: Root,
-    marker: Leaf, // First child of nodes like heading, list items, ...
 
     list: Container,
-
-    thematic_break: Leaf,
     heading: Container,
     quote: Container,
     paragraph: Container,
@@ -59,6 +55,8 @@ pub const Node = utils.Packed(union(Tag) {
     task_item: Container,
     elaboration: Container,
 
+    marker: Leaf, // First child of nodes like heading, list items, ...
+    thematic_break: Leaf,
     text: Leaf,
     space_text: Leaf, // text with 1 space added before it
 
@@ -68,8 +66,8 @@ pub const Node = utils.Packed(union(Tag) {
         pub const format = utils.structFormat(@This());
     };
     pub const Container = packed struct {
-        off: StrOffset,
         num_children: Idx.IntType = 0,
+        off: StrOffset,
         pub const format = utils.structFormat(@This());
     };
     pub const Leaf = packed struct {
@@ -79,15 +77,10 @@ pub const Node = utils.Packed(union(Tag) {
         pub const format = utils.structFormat(@This());
     };
 
-    pub inline fn incrementNumChildren(self: *Node) void {
+    pub fn incrementNumChildren(self: *Node) void {
         switch (self.tag) {
-            inline else => |t| {
-                if (@TypeOf(@field(self.data, @tagName(t))) == Container or
-                    @TypeOf(@field(self.data, @tagName(t))) == Root)
-                {
-                    @field(self.data, @tagName(t)).num_children += 1;
-                } else unreachable;
-            },
+            .marker, .thematic_break, .text, .space_text => unreachable,
+            else => @as(*Idx.IntType, @ptrCast(&self.data)).* += 1,
         }
     }
 
diff --git a/src/AstGen3.zig b/src/AstGen3.zig
@@ -54,65 +54,112 @@ const PRINT_ALLOC_STATS = false;
 // The same applies for Error / Error.Tagged below, but the impact is less severe
 // as appending Errors is a much less common operation. Nevertheless, we inline it
 // despite not having any data to back it up, because I have vibes that it should be faster.
-inline fn appendNode(self: *AstGen, node: Node.Tagged) !Node.Idx {
+fn appendNode(self: *AstGen, node: Node) !Node.Idx {
     if (self.nodes.items.len > std.math.maxInt(
         @typeInfo(Node.Idx).@"enum".tag_type,
     )) return error.OutOfNodeIdx;
     const idx = self.nodes.items.len;
     const cap = if (PRINT_ALLOC_STATS) self.nodes.capacity;
-    try self.nodes.append(self.gpa, .fromTagged(node));
+    try self.nodes.append(self.gpa, node);
     if (PRINT_ALLOC_STATS and cap != self.nodes.capacity) {
         self.num_node_allocs += 1;
     }
     return @enumFromInt(idx);
 }
-inline fn appendContainerNode(self: *AstGen, parent_idx: Node.Idx, comptime container_node_tag: Node.Tag, ptr: PaddedMany) !Node.Idx {
+fn appendContainerNode(self: *AstGen, parent_idx: Node.Idx, container_node_tag: Node.Tag, ptr: PaddedMany) !Node.Idx {
     self.getNode(parent_idx).incrementNumChildren();
-    return try self.appendNode(
-        @unionInit(
-            Node.Tagged,
-            @tagName(container_node_tag),
-            @as(Node.Tagged.Container, .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) }),
-        ),
-    );
+    switch (container_node_tag) {
+        .document => unreachable,
+
+        .list,
+        .heading,
+        .quote,
+        .paragraph,
+        .unordered_item,
+        .ordered_item,
+        .term_item,
+        .task_item,
+        .elaboration,
+        => return self.appendNode(.{
+            .data = .{ .list = .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) } },
+            .tag = container_node_tag,
+        }),
+
+        .marker,
+        .thematic_break,
+        .text,
+        .space_text,
+        => unreachable,
+    }
 }
-inline fn appendLeafNode(self: *AstGen, parent_idx: Node.Idx, comptime leaf_node_tag: Node.Tag, ptr: PaddedMany, len: StrLen) !Node.Idx {
+fn appendLeafNode(self: *AstGen, parent_idx: Node.Idx, leaf_node_tag: Node.Tag, ptr: PaddedMany, len: StrLen) !Node.Idx {
     self.getNode(parent_idx).incrementNumChildren();
-    return try self.appendNode(
-        @unionInit(
-            Node.Tagged,
-            @tagName(leaf_node_tag),
-            @as(Node.Leaf, .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)), .len = len }),
-        ),
-    );
+    switch (leaf_node_tag) {
+        .document => unreachable,
+
+        .list,
+        .heading,
+        .quote,
+        .paragraph,
+        .unordered_item,
+        .ordered_item,
+        .term_item,
+        .task_item,
+        .elaboration,
+        => unreachable,
+
+        .marker,
+        .thematic_break,
+        .text,
+        .space_text,
+        => return self.appendNode(.{
+            .data = .{ .text = .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)), .len = len } },
+            .tag = leaf_node_tag,
+        }),
+    }
 }
-inline fn appendError(self: *AstGen, err: Error.Tagged) !void {
+
+fn appendError(self: *AstGen, err: Error) !void {
     if (self.errors.items.len > std.math.maxInt(
         @typeInfo(Error.Idx).@"enum".tag_type,
     )) return error.OutOfErrorIdx;
     const cap = if (PRINT_ALLOC_STATS) self.errors.capacity;
-    try self.errors.append(self.gpa, .fromTagged(err));
+    try self.errors.append(self.gpa, err);
     if (PRINT_ALLOC_STATS and cap != self.errors.capacity) {
         self.num_error_allocs += 1;
     }
 }
-inline fn appendPointError(self: *AstGen, comptime tag: Error.Tag, idx: Node.Idx, ptr: PaddedMany) !void {
-    try self.appendError(
-        @unionInit(
-            Error.Tagged,
-            @tagName(tag),
-            .{ .idx = idx, .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) },
-        ),
-    );
+fn appendPointError(self: *AstGen, tag: Error.Tag, idx: Node.Idx, ptr: PaddedMany) !void {
+    switch (tag) {
+        .marker_too_long,
+        .unexpected_block_in_inline_context,
+        .elaboration_after_unelaboratable_node,
+        .incorrect_elaboration_marker,
+        => unreachable,
+
+        .invalid_marker,
+        .inconsistent_indentation,
+        => try self.appendError(.{
+            .data = .{ .invalid_marker = .{ .idx = idx, .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) } },
+            .tag = tag,
+        }),
+    }
 }
-inline fn appendNodeError(self: *AstGen, comptime tag: Error.Tag, idx: Node.Idx) !void {
-    try self.appendError(
-        @unionInit(
-            Error.Tagged,
-            @tagName(tag),
-            .{ .idx = idx },
-        ),
-    );
+fn appendNodeError(self: *AstGen, tag: Error.Tag, idx: Node.Idx) !void {
+    switch (tag) {
+        .marker_too_long,
+        .unexpected_block_in_inline_context,
+        .elaboration_after_unelaboratable_node,
+        .incorrect_elaboration_marker,
+        => try self.appendError(.{
+            .data = .{ .marker_too_long = .{ .idx = idx } },
+            .tag = tag,
+        }),
+
+        .invalid_marker,
+        .inconsistent_indentation,
+        => unreachable,
+    }
 }
 fn castStrLen(len: usize, comptime err: anytype) @TypeOf(err)!StrLen {
     return if (len <= std.math.maxInt(StrLen)) @intCast(len) else return err;
@@ -226,27 +273,14 @@ const ParsingContext = enum { block_context, inline_context };
 fn parseRoot(self: *AstGen) !void {
     const tracy_frame = tracy.trace(@src());
     defer tracy_frame.end();
-    const root = try self.appendNode(.{ .document = .{} });
+    const root = try self.appendNode(.fromTagged(.{ .document = .{} }));
     assert(root == .root);
     if (self.scanner.peek()) |p| assert(self.input.ptr._ptr == p.@"1".ptr._ptr);
 
-    // "inline" hack to get different branch predictors for the root column
-    _ = try self.parseColumnImpl(root);
+    _ = try self.parseColumn(root);
     assert(self.scanner.peek() == null);
 }
 
-fn parseColumn(
-    self: *AstGen,
-    parent_idx: Node.Idx,
-) (error{
-    TooMuchIndentation,
-    MarkerTooLong,
-    OutOfNodeIdx,
-    OutOfErrorIdx,
-} || std.mem.Allocator.Error)!void {
-    return self.parseColumnImpl(parent_idx);
-}
-
 /// Used in headings, semicolon paragraphs, etc.
 /// Has different rules as regular unmarked paragraphs, because
 /// unmarked paragraphs are interrupted by block elements and
@@ -313,7 +347,7 @@ fn parseInlineColumn(
     }
 }
 
-inline fn parseColumnImpl(
+fn parseColumn(
     self: *AstGen,
     parent_idx: Node.Idx,
 ) !void {
@@ -473,37 +507,8 @@ inline fn parseColumnImpl(
                 last_elaboratable_idx = null;
                 try self.insertTextLine(.text, .text, paragraph_idx, line);
                 self.scanner.advance();
-
-                while (self.scanner.peek()) |peek2| {
-                    _, const line2 = peek2;
-                    const line_load4: u32 = @bitCast(line2.ptr._ptr[0..4].*);
-                    const line_load3: u32 = line_load4 & @as(u32, @bitCast("\xff\xff\xff\x00\x00"[0..4].*));
-                    const line_load2: u32 = line_load4 & @as(u32, @bitCast("\xff\xff\x00\x00\x00"[0..4].*));
-
-                    switch (line2.ptr._ptr[0]) {
-                        // Special block chars / actually empty line
-                        '\n', '-', '.', ':', '+', '>', '#', '@' => break,
-                        // Empty line (only whitespace)
-                        ' ', '\t' => if (line2.len == line2.indexOfNotSpaceOrTab()) break,
-                        // string literals have extra chars so [0..4] doesn't give a sentinel ptr
-                        // Thematic break
-                        '*' => if (line_load3 ==
-                            @as(u32, @bitCast("***\x00\x00"[0..4].*)) and
-                            (line2.len == 3 or
-                                line2.len == 3 + line2.sliceOpen(3).indexOfNotSpaceOrTab())) break,
-                        // Verbatim block
-                        '=' => if (line_load2 ==
-                            @as(u32, @bitCast("==\x00\x00\x00"[0..4].*))) break,
-                        // Math block
-                        '$' => if (line_load3 ==
-                            @as(u32, @bitCast("$==\x00\x00"[0..4].*))) break,
-                        else => {},
-                    }
-
-                    try self.insertTextLine(.space_text, .text, paragraph_idx, line2);
-                    self.scanner.advance();
-                }
-
+                // Split into separate function so parseRoot / parseColumn share the same branch predictors
+                try self.parseRestParagraph(paragraph_idx);
                 break :block_idx paragraph_idx;
             }
         };
@@ -514,6 +519,41 @@ inline fn parseColumnImpl(
     }
 }
 
+fn parseRestParagraph(
+    self: *AstGen,
+    paragraph_idx: Node.Idx,
+) !void {
+    while (self.scanner.peek()) |peek2| {
+        _, const line2 = peek2;
+        const line_load4: u32 = @bitCast(line2.ptr._ptr[0..4].*);
+        const line_load3: u32 = line_load4 & @as(u32, @bitCast("\xff\xff\xff\x00\x00"[0..4].*));
+        const line_load2: u32 = line_load4 & @as(u32, @bitCast("\xff\xff\x00\x00\x00"[0..4].*));
+
+        switch (line2.ptr._ptr[0]) {
+            // Special block chars / actually empty line
+            '\n', '-', '.', ':', '+', '>', '#', '@' => break,
+            // Empty line (only whitespace)
+            ' ', '\t' => if (line2.len == line2.indexOfNotSpaceOrTab()) break,
+            // string literals have extra chars so [0..4] doesn't give a sentinel ptr
+            // Thematic break
+            '*' => if (line_load3 ==
+                @as(u32, @bitCast("***\x00\x00"[0..4].*)) and
+                (line2.len == 3 or
+                    line2.len == 3 + line2.sliceOpen(3).indexOfNotSpaceOrTab())) break,
+            // Verbatim block
+            '=' => if (line_load2 ==
+                @as(u32, @bitCast("==\x00\x00\x00"[0..4].*))) break,
+            // Math block
+            '$' => if (line_load3 ==
+                @as(u32, @bitCast("$==\x00\x00"[0..4].*))) break,
+            else => {},
+        }
+
+        try self.insertTextLine(.space_text, .text, paragraph_idx, line2);
+        self.scanner.advance();
+    }
+}
+
 fn insertTextLine(
     self: *AstGen,
     comptime first_text_tag: Node.Tag,
diff --git a/src/IndentationScanner.zig b/src/IndentationScanner.zig
@@ -243,18 +243,17 @@ pub fn peek(it: *const IndentationScanner) PeekResult {
 
 /// Advances the cursor to the next line.
 /// Precondition: peek() would not have returned null.
-pub fn advance(it: *IndentationScanner) void {
+pub noinline fn advance(it: *IndentationScanner) void {
     assert(it.peek() != null);
     it._cur_line = null;
     const line_maybe, it._rest_lines = it._rest_lines.splitOneLine();
     const line = line_maybe orelse return;
-    const spaces_or_tabs = line.indexOfNotSpaceOrTab();
-    if (spaces_or_tabs == line.len) {
+    const spaces = line.indexOfNotSpace();
+    if (spaces == line.len) {
         // Lie to the caller -- pretend empty lines have been indented to the correct level
-        it._cur_line = .{ it.curLevel(), it.curExpectedIndents().correct_start, line.sliceOpen(spaces_or_tabs) };
+        it._cur_line = .{ it.curLevel(), it.curExpectedIndents().correct_start, line.sliceOpen(spaces) };
         return;
     }
-    const spaces = line.indexOfNotSpace();
     it._cur_line = for (0..it._expected_indents.len) |rev_i| {
         const i = it._expected_indents.len - 1 - rev_i;
         const misalignment_start = it._expected_indents.buffer[i].misalignment_start;

	mymarkdown My markdown
	git clone https://git.grace.moe/mymarkdown
	Log \| Files \| Refs

M	src/Ast.zig	\|	23	++++++++---------------
M	src/AstGen3.zig	\|	206	+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
M	src/IndentationScanner.zig	\|	9	++++-----