mymarkdown

My markdown
git clone https://git.grace.moe/mymarkdown
Log | Files | Refs

commit bd95922ce51c9eb723dde54821c792f7a40b9ad7
parent 710682847dfa6ef35230d275a79fe35bbca21fc6
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date:   Wed, 21 May 2025 21:13:50 +0800

more microopts

Diffstat:
Msrc/Ast.zig | 23++++++++---------------
Msrc/AstGen3.zig | 206+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
Msrc/IndentationScanner.zig | 9++++-----
3 files changed, 135 insertions(+), 103 deletions(-)

diff --git a/src/Ast.zig b/src/Ast.zig @@ -25,11 +25,8 @@ pub const StrLen = u24; pub const Tag = enum(u8) { document = 255, - marker = 254, // First child of nodes like heading, list items, ... list = 253, - - thematic_break = 252, heading = '#', quote = '>', paragraph = ';', @@ -39,17 +36,16 @@ pub const Tag = enum(u8) { task_item = 251, elaboration = '+', + marker = 254, // First child of nodes like heading, list items, ... + thematic_break = 252, text = 250, space_text = 249, }; pub const Node = utils.Packed(union(Tag) { document: Root, - marker: Leaf, // First child of nodes like heading, list items, ... list: Container, - - thematic_break: Leaf, heading: Container, quote: Container, paragraph: Container, @@ -59,6 +55,8 @@ pub const Node = utils.Packed(union(Tag) { task_item: Container, elaboration: Container, + marker: Leaf, // First child of nodes like heading, list items, ... + thematic_break: Leaf, text: Leaf, space_text: Leaf, // text with 1 space added before it @@ -68,8 +66,8 @@ pub const Node = utils.Packed(union(Tag) { pub const format = utils.structFormat(@This()); }; pub const Container = packed struct { - off: StrOffset, num_children: Idx.IntType = 0, + off: StrOffset, pub const format = utils.structFormat(@This()); }; pub const Leaf = packed struct { @@ -79,15 +77,10 @@ pub const Node = utils.Packed(union(Tag) { pub const format = utils.structFormat(@This()); }; - pub inline fn incrementNumChildren(self: *Node) void { + pub fn incrementNumChildren(self: *Node) void { switch (self.tag) { - inline else => |t| { - if (@TypeOf(@field(self.data, @tagName(t))) == Container or - @TypeOf(@field(self.data, @tagName(t))) == Root) - { - @field(self.data, @tagName(t)).num_children += 1; - } else unreachable; - }, + .marker, .thematic_break, .text, .space_text => unreachable, + else => @as(*Idx.IntType, @ptrCast(&self.data)).* += 1, } } diff --git a/src/AstGen3.zig b/src/AstGen3.zig @@ -54,65 +54,112 @@ const PRINT_ALLOC_STATS = false; // The same applies for Error / Error.Tagged below, but the impact is less severe // as appending Errors is a much less common operation. Nevertheless, we inline it // despite not having any data to back it up, because I have vibes that it should be faster. -inline fn appendNode(self: *AstGen, node: Node.Tagged) !Node.Idx { +fn appendNode(self: *AstGen, node: Node) !Node.Idx { if (self.nodes.items.len > std.math.maxInt( @typeInfo(Node.Idx).@"enum".tag_type, )) return error.OutOfNodeIdx; const idx = self.nodes.items.len; const cap = if (PRINT_ALLOC_STATS) self.nodes.capacity; - try self.nodes.append(self.gpa, .fromTagged(node)); + try self.nodes.append(self.gpa, node); if (PRINT_ALLOC_STATS and cap != self.nodes.capacity) { self.num_node_allocs += 1; } return @enumFromInt(idx); } -inline fn appendContainerNode(self: *AstGen, parent_idx: Node.Idx, comptime container_node_tag: Node.Tag, ptr: PaddedMany) !Node.Idx { +fn appendContainerNode(self: *AstGen, parent_idx: Node.Idx, container_node_tag: Node.Tag, ptr: PaddedMany) !Node.Idx { self.getNode(parent_idx).incrementNumChildren(); - return try self.appendNode( - @unionInit( - Node.Tagged, - @tagName(container_node_tag), - @as(Node.Tagged.Container, .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) }), - ), - ); + switch (container_node_tag) { + .document => unreachable, + + .list, + .heading, + .quote, + .paragraph, + .unordered_item, + .ordered_item, + .term_item, + .task_item, + .elaboration, + => return self.appendNode(.{ + .data = .{ .list = .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) } }, + .tag = container_node_tag, + }), + + .marker, + .thematic_break, + .text, + .space_text, + => unreachable, + } } -inline fn appendLeafNode(self: *AstGen, parent_idx: Node.Idx, comptime leaf_node_tag: Node.Tag, ptr: PaddedMany, len: StrLen) !Node.Idx { +fn appendLeafNode(self: *AstGen, parent_idx: Node.Idx, leaf_node_tag: Node.Tag, ptr: PaddedMany, len: StrLen) !Node.Idx { self.getNode(parent_idx).incrementNumChildren(); - return try self.appendNode( - @unionInit( - Node.Tagged, - @tagName(leaf_node_tag), - @as(Node.Leaf, .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)), .len = len }), - ), - ); + switch (leaf_node_tag) { + .document => unreachable, + + .list, + .heading, + .quote, + .paragraph, + .unordered_item, + .ordered_item, + .term_item, + .task_item, + .elaboration, + => unreachable, + + .marker, + .thematic_break, + .text, + .space_text, + => return self.appendNode(.{ + .data = .{ .text = .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)), .len = len } }, + .tag = leaf_node_tag, + }), + } } -inline fn appendError(self: *AstGen, err: Error.Tagged) !void { + +fn appendError(self: *AstGen, err: Error) !void { if (self.errors.items.len > std.math.maxInt( @typeInfo(Error.Idx).@"enum".tag_type, )) return error.OutOfErrorIdx; const cap = if (PRINT_ALLOC_STATS) self.errors.capacity; - try self.errors.append(self.gpa, .fromTagged(err)); + try self.errors.append(self.gpa, err); if (PRINT_ALLOC_STATS and cap != self.errors.capacity) { self.num_error_allocs += 1; } } -inline fn appendPointError(self: *AstGen, comptime tag: Error.Tag, idx: Node.Idx, ptr: PaddedMany) !void { - try self.appendError( - @unionInit( - Error.Tagged, - @tagName(tag), - .{ .idx = idx, .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) }, - ), - ); +fn appendPointError(self: *AstGen, tag: Error.Tag, idx: Node.Idx, ptr: PaddedMany) !void { + switch (tag) { + .marker_too_long, + .unexpected_block_in_inline_context, + .elaboration_after_unelaboratable_node, + .incorrect_elaboration_marker, + => unreachable, + + .invalid_marker, + .inconsistent_indentation, + => try self.appendError(.{ + .data = .{ .invalid_marker = .{ .idx = idx, .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) } }, + .tag = tag, + }), + } } -inline fn appendNodeError(self: *AstGen, comptime tag: Error.Tag, idx: Node.Idx) !void { - try self.appendError( - @unionInit( - Error.Tagged, - @tagName(tag), - .{ .idx = idx }, - ), - ); +fn appendNodeError(self: *AstGen, tag: Error.Tag, idx: Node.Idx) !void { + switch (tag) { + .marker_too_long, + .unexpected_block_in_inline_context, + .elaboration_after_unelaboratable_node, + .incorrect_elaboration_marker, + => try self.appendError(.{ + .data = .{ .marker_too_long = .{ .idx = idx } }, + .tag = tag, + }), + + .invalid_marker, + .inconsistent_indentation, + => unreachable, + } } fn castStrLen(len: usize, comptime err: anytype) @TypeOf(err)!StrLen { return if (len <= std.math.maxInt(StrLen)) @intCast(len) else return err; @@ -226,27 +273,14 @@ const ParsingContext = enum { block_context, inline_context }; fn parseRoot(self: *AstGen) !void { const tracy_frame = tracy.trace(@src()); defer tracy_frame.end(); - const root = try self.appendNode(.{ .document = .{} }); + const root = try self.appendNode(.fromTagged(.{ .document = .{} })); assert(root == .root); if (self.scanner.peek()) |p| assert(self.input.ptr._ptr == p.@"1".ptr._ptr); - // "inline" hack to get different branch predictors for the root column - _ = try self.parseColumnImpl(root); + _ = try self.parseColumn(root); assert(self.scanner.peek() == null); } -fn parseColumn( - self: *AstGen, - parent_idx: Node.Idx, -) (error{ - TooMuchIndentation, - MarkerTooLong, - OutOfNodeIdx, - OutOfErrorIdx, -} || std.mem.Allocator.Error)!void { - return self.parseColumnImpl(parent_idx); -} - /// Used in headings, semicolon paragraphs, etc. /// Has different rules as regular unmarked paragraphs, because /// unmarked paragraphs are interrupted by block elements and @@ -313,7 +347,7 @@ fn parseInlineColumn( } } -inline fn parseColumnImpl( +fn parseColumn( self: *AstGen, parent_idx: Node.Idx, ) !void { @@ -473,37 +507,8 @@ inline fn parseColumnImpl( last_elaboratable_idx = null; try self.insertTextLine(.text, .text, paragraph_idx, line); self.scanner.advance(); - - while (self.scanner.peek()) |peek2| { - _, const line2 = peek2; - const line_load4: u32 = @bitCast(line2.ptr._ptr[0..4].*); - const line_load3: u32 = line_load4 & @as(u32, @bitCast("\xff\xff\xff\x00\x00"[0..4].*)); - const line_load2: u32 = line_load4 & @as(u32, @bitCast("\xff\xff\x00\x00\x00"[0..4].*)); - - switch (line2.ptr._ptr[0]) { - // Special block chars / actually empty line - '\n', '-', '.', ':', '+', '>', '#', '@' => break, - // Empty line (only whitespace) - ' ', '\t' => if (line2.len == line2.indexOfNotSpaceOrTab()) break, - // string literals have extra chars so [0..4] doesn't give a sentinel ptr - // Thematic break - '*' => if (line_load3 == - @as(u32, @bitCast("***\x00\x00"[0..4].*)) and - (line2.len == 3 or - line2.len == 3 + line2.sliceOpen(3).indexOfNotSpaceOrTab())) break, - // Verbatim block - '=' => if (line_load2 == - @as(u32, @bitCast("==\x00\x00\x00"[0..4].*))) break, - // Math block - '$' => if (line_load3 == - @as(u32, @bitCast("$==\x00\x00"[0..4].*))) break, - else => {}, - } - - try self.insertTextLine(.space_text, .text, paragraph_idx, line2); - self.scanner.advance(); - } - + // Split into separate function so parseRoot / parseColumn share the same branch predictors + try self.parseRestParagraph(paragraph_idx); break :block_idx paragraph_idx; } }; @@ -514,6 +519,41 @@ inline fn parseColumnImpl( } } +fn parseRestParagraph( + self: *AstGen, + paragraph_idx: Node.Idx, +) !void { + while (self.scanner.peek()) |peek2| { + _, const line2 = peek2; + const line_load4: u32 = @bitCast(line2.ptr._ptr[0..4].*); + const line_load3: u32 = line_load4 & @as(u32, @bitCast("\xff\xff\xff\x00\x00"[0..4].*)); + const line_load2: u32 = line_load4 & @as(u32, @bitCast("\xff\xff\x00\x00\x00"[0..4].*)); + + switch (line2.ptr._ptr[0]) { + // Special block chars / actually empty line + '\n', '-', '.', ':', '+', '>', '#', '@' => break, + // Empty line (only whitespace) + ' ', '\t' => if (line2.len == line2.indexOfNotSpaceOrTab()) break, + // string literals have extra chars so [0..4] doesn't give a sentinel ptr + // Thematic break + '*' => if (line_load3 == + @as(u32, @bitCast("***\x00\x00"[0..4].*)) and + (line2.len == 3 or + line2.len == 3 + line2.sliceOpen(3).indexOfNotSpaceOrTab())) break, + // Verbatim block + '=' => if (line_load2 == + @as(u32, @bitCast("==\x00\x00\x00"[0..4].*))) break, + // Math block + '$' => if (line_load3 == + @as(u32, @bitCast("$==\x00\x00"[0..4].*))) break, + else => {}, + } + + try self.insertTextLine(.space_text, .text, paragraph_idx, line2); + self.scanner.advance(); + } +} + fn insertTextLine( self: *AstGen, comptime first_text_tag: Node.Tag, diff --git a/src/IndentationScanner.zig b/src/IndentationScanner.zig @@ -243,18 +243,17 @@ pub fn peek(it: *const IndentationScanner) PeekResult { /// Advances the cursor to the next line. /// Precondition: peek() would not have returned null. -pub fn advance(it: *IndentationScanner) void { +pub noinline fn advance(it: *IndentationScanner) void { assert(it.peek() != null); it._cur_line = null; const line_maybe, it._rest_lines = it._rest_lines.splitOneLine(); const line = line_maybe orelse return; - const spaces_or_tabs = line.indexOfNotSpaceOrTab(); - if (spaces_or_tabs == line.len) { + const spaces = line.indexOfNotSpace(); + if (spaces == line.len) { // Lie to the caller -- pretend empty lines have been indented to the correct level - it._cur_line = .{ it.curLevel(), it.curExpectedIndents().correct_start, line.sliceOpen(spaces_or_tabs) }; + it._cur_line = .{ it.curLevel(), it.curExpectedIndents().correct_start, line.sliceOpen(spaces) }; return; } - const spaces = line.indexOfNotSpace(); it._cur_line = for (0..it._expected_indents.len) |rev_i| { const i = it._expected_indents.len - 1 - rev_i; const misalignment_start = it._expected_indents.buffer[i].misalignment_start;