commit bd95922ce51c9eb723dde54821c792f7a40b9ad7
parent 710682847dfa6ef35230d275a79fe35bbca21fc6
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date: Wed, 21 May 2025 21:13:50 +0800
more microopts
Diffstat:
3 files changed, 135 insertions(+), 103 deletions(-)
diff --git a/src/Ast.zig b/src/Ast.zig
@@ -25,11 +25,8 @@ pub const StrLen = u24;
pub const Tag = enum(u8) {
document = 255,
- marker = 254, // First child of nodes like heading, list items, ...
list = 253,
-
- thematic_break = 252,
heading = '#',
quote = '>',
paragraph = ';',
@@ -39,17 +36,16 @@ pub const Tag = enum(u8) {
task_item = 251,
elaboration = '+',
+ marker = 254, // First child of nodes like heading, list items, ...
+ thematic_break = 252,
text = 250,
space_text = 249,
};
pub const Node = utils.Packed(union(Tag) {
document: Root,
- marker: Leaf, // First child of nodes like heading, list items, ...
list: Container,
-
- thematic_break: Leaf,
heading: Container,
quote: Container,
paragraph: Container,
@@ -59,6 +55,8 @@ pub const Node = utils.Packed(union(Tag) {
task_item: Container,
elaboration: Container,
+ marker: Leaf, // First child of nodes like heading, list items, ...
+ thematic_break: Leaf,
text: Leaf,
space_text: Leaf, // text with 1 space added before it
@@ -68,8 +66,8 @@ pub const Node = utils.Packed(union(Tag) {
pub const format = utils.structFormat(@This());
};
pub const Container = packed struct {
- off: StrOffset,
num_children: Idx.IntType = 0,
+ off: StrOffset,
pub const format = utils.structFormat(@This());
};
pub const Leaf = packed struct {
@@ -79,15 +77,10 @@ pub const Node = utils.Packed(union(Tag) {
pub const format = utils.structFormat(@This());
};
- pub inline fn incrementNumChildren(self: *Node) void {
+ pub fn incrementNumChildren(self: *Node) void {
switch (self.tag) {
- inline else => |t| {
- if (@TypeOf(@field(self.data, @tagName(t))) == Container or
- @TypeOf(@field(self.data, @tagName(t))) == Root)
- {
- @field(self.data, @tagName(t)).num_children += 1;
- } else unreachable;
- },
+ .marker, .thematic_break, .text, .space_text => unreachable,
+ else => @as(*Idx.IntType, @ptrCast(&self.data)).* += 1,
}
}
diff --git a/src/AstGen3.zig b/src/AstGen3.zig
@@ -54,65 +54,112 @@ const PRINT_ALLOC_STATS = false;
// The same applies for Error / Error.Tagged below, but the impact is less severe
// as appending Errors is a much less common operation. Nevertheless, we inline it
// despite not having any data to back it up, because I have vibes that it should be faster.
-inline fn appendNode(self: *AstGen, node: Node.Tagged) !Node.Idx {
+fn appendNode(self: *AstGen, node: Node) !Node.Idx {
if (self.nodes.items.len > std.math.maxInt(
@typeInfo(Node.Idx).@"enum".tag_type,
)) return error.OutOfNodeIdx;
const idx = self.nodes.items.len;
const cap = if (PRINT_ALLOC_STATS) self.nodes.capacity;
- try self.nodes.append(self.gpa, .fromTagged(node));
+ try self.nodes.append(self.gpa, node);
if (PRINT_ALLOC_STATS and cap != self.nodes.capacity) {
self.num_node_allocs += 1;
}
return @enumFromInt(idx);
}
-inline fn appendContainerNode(self: *AstGen, parent_idx: Node.Idx, comptime container_node_tag: Node.Tag, ptr: PaddedMany) !Node.Idx {
+fn appendContainerNode(self: *AstGen, parent_idx: Node.Idx, container_node_tag: Node.Tag, ptr: PaddedMany) !Node.Idx {
self.getNode(parent_idx).incrementNumChildren();
- return try self.appendNode(
- @unionInit(
- Node.Tagged,
- @tagName(container_node_tag),
- @as(Node.Tagged.Container, .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) }),
- ),
- );
+ switch (container_node_tag) {
+ .document => unreachable,
+
+ .list,
+ .heading,
+ .quote,
+ .paragraph,
+ .unordered_item,
+ .ordered_item,
+ .term_item,
+ .task_item,
+ .elaboration,
+ => return self.appendNode(.{
+ .data = .{ .list = .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) } },
+ .tag = container_node_tag,
+ }),
+
+ .marker,
+ .thematic_break,
+ .text,
+ .space_text,
+ => unreachable,
+ }
}
-inline fn appendLeafNode(self: *AstGen, parent_idx: Node.Idx, comptime leaf_node_tag: Node.Tag, ptr: PaddedMany, len: StrLen) !Node.Idx {
+fn appendLeafNode(self: *AstGen, parent_idx: Node.Idx, leaf_node_tag: Node.Tag, ptr: PaddedMany, len: StrLen) !Node.Idx {
self.getNode(parent_idx).incrementNumChildren();
- return try self.appendNode(
- @unionInit(
- Node.Tagged,
- @tagName(leaf_node_tag),
- @as(Node.Leaf, .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)), .len = len }),
- ),
- );
+ switch (leaf_node_tag) {
+ .document => unreachable,
+
+ .list,
+ .heading,
+ .quote,
+ .paragraph,
+ .unordered_item,
+ .ordered_item,
+ .term_item,
+ .task_item,
+ .elaboration,
+ => unreachable,
+
+ .marker,
+ .thematic_break,
+ .text,
+ .space_text,
+ => return self.appendNode(.{
+ .data = .{ .text = .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)), .len = len } },
+ .tag = leaf_node_tag,
+ }),
+ }
}
-inline fn appendError(self: *AstGen, err: Error.Tagged) !void {
+
+fn appendError(self: *AstGen, err: Error) !void {
if (self.errors.items.len > std.math.maxInt(
@typeInfo(Error.Idx).@"enum".tag_type,
)) return error.OutOfErrorIdx;
const cap = if (PRINT_ALLOC_STATS) self.errors.capacity;
- try self.errors.append(self.gpa, .fromTagged(err));
+ try self.errors.append(self.gpa, err);
if (PRINT_ALLOC_STATS and cap != self.errors.capacity) {
self.num_error_allocs += 1;
}
}
-inline fn appendPointError(self: *AstGen, comptime tag: Error.Tag, idx: Node.Idx, ptr: PaddedMany) !void {
- try self.appendError(
- @unionInit(
- Error.Tagged,
- @tagName(tag),
- .{ .idx = idx, .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) },
- ),
- );
+fn appendPointError(self: *AstGen, tag: Error.Tag, idx: Node.Idx, ptr: PaddedMany) !void {
+ switch (tag) {
+ .marker_too_long,
+ .unexpected_block_in_inline_context,
+ .elaboration_after_unelaboratable_node,
+ .incorrect_elaboration_marker,
+ => unreachable,
+
+ .invalid_marker,
+ .inconsistent_indentation,
+ => try self.appendError(.{
+ .data = .{ .invalid_marker = .{ .idx = idx, .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) } },
+ .tag = tag,
+ }),
+ }
}
-inline fn appendNodeError(self: *AstGen, comptime tag: Error.Tag, idx: Node.Idx) !void {
- try self.appendError(
- @unionInit(
- Error.Tagged,
- @tagName(tag),
- .{ .idx = idx },
- ),
- );
+fn appendNodeError(self: *AstGen, tag: Error.Tag, idx: Node.Idx) !void {
+ switch (tag) {
+ .marker_too_long,
+ .unexpected_block_in_inline_context,
+ .elaboration_after_unelaboratable_node,
+ .incorrect_elaboration_marker,
+ => try self.appendError(.{
+ .data = .{ .marker_too_long = .{ .idx = idx } },
+ .tag = tag,
+ }),
+
+ .invalid_marker,
+ .inconsistent_indentation,
+ => unreachable,
+ }
}
fn castStrLen(len: usize, comptime err: anytype) @TypeOf(err)!StrLen {
return if (len <= std.math.maxInt(StrLen)) @intCast(len) else return err;
@@ -226,27 +273,14 @@ const ParsingContext = enum { block_context, inline_context };
fn parseRoot(self: *AstGen) !void {
const tracy_frame = tracy.trace(@src());
defer tracy_frame.end();
- const root = try self.appendNode(.{ .document = .{} });
+ const root = try self.appendNode(.fromTagged(.{ .document = .{} }));
assert(root == .root);
if (self.scanner.peek()) |p| assert(self.input.ptr._ptr == p.@"1".ptr._ptr);
- // "inline" hack to get different branch predictors for the root column
- _ = try self.parseColumnImpl(root);
+ _ = try self.parseColumn(root);
assert(self.scanner.peek() == null);
}
-fn parseColumn(
- self: *AstGen,
- parent_idx: Node.Idx,
-) (error{
- TooMuchIndentation,
- MarkerTooLong,
- OutOfNodeIdx,
- OutOfErrorIdx,
-} || std.mem.Allocator.Error)!void {
- return self.parseColumnImpl(parent_idx);
-}
-
/// Used in headings, semicolon paragraphs, etc.
/// Has different rules as regular unmarked paragraphs, because
/// unmarked paragraphs are interrupted by block elements and
@@ -313,7 +347,7 @@ fn parseInlineColumn(
}
}
-inline fn parseColumnImpl(
+fn parseColumn(
self: *AstGen,
parent_idx: Node.Idx,
) !void {
@@ -473,37 +507,8 @@ inline fn parseColumnImpl(
last_elaboratable_idx = null;
try self.insertTextLine(.text, .text, paragraph_idx, line);
self.scanner.advance();
-
- while (self.scanner.peek()) |peek2| {
- _, const line2 = peek2;
- const line_load4: u32 = @bitCast(line2.ptr._ptr[0..4].*);
- const line_load3: u32 = line_load4 & @as(u32, @bitCast("\xff\xff\xff\x00\x00"[0..4].*));
- const line_load2: u32 = line_load4 & @as(u32, @bitCast("\xff\xff\x00\x00\x00"[0..4].*));
-
- switch (line2.ptr._ptr[0]) {
- // Special block chars / actually empty line
- '\n', '-', '.', ':', '+', '>', '#', '@' => break,
- // Empty line (only whitespace)
- ' ', '\t' => if (line2.len == line2.indexOfNotSpaceOrTab()) break,
- // string literals have extra chars so [0..4] doesn't give a sentinel ptr
- // Thematic break
- '*' => if (line_load3 ==
- @as(u32, @bitCast("***\x00\x00"[0..4].*)) and
- (line2.len == 3 or
- line2.len == 3 + line2.sliceOpen(3).indexOfNotSpaceOrTab())) break,
- // Verbatim block
- '=' => if (line_load2 ==
- @as(u32, @bitCast("==\x00\x00\x00"[0..4].*))) break,
- // Math block
- '$' => if (line_load3 ==
- @as(u32, @bitCast("$==\x00\x00"[0..4].*))) break,
- else => {},
- }
-
- try self.insertTextLine(.space_text, .text, paragraph_idx, line2);
- self.scanner.advance();
- }
-
+ // Split into separate function so parseRoot / parseColumn share the same branch predictors
+ try self.parseRestParagraph(paragraph_idx);
break :block_idx paragraph_idx;
}
};
@@ -514,6 +519,41 @@ inline fn parseColumnImpl(
}
}
+fn parseRestParagraph(
+ self: *AstGen,
+ paragraph_idx: Node.Idx,
+) !void {
+ while (self.scanner.peek()) |peek2| {
+ _, const line2 = peek2;
+ const line_load4: u32 = @bitCast(line2.ptr._ptr[0..4].*);
+ const line_load3: u32 = line_load4 & @as(u32, @bitCast("\xff\xff\xff\x00\x00"[0..4].*));
+ const line_load2: u32 = line_load4 & @as(u32, @bitCast("\xff\xff\x00\x00\x00"[0..4].*));
+
+ switch (line2.ptr._ptr[0]) {
+ // Special block chars / actually empty line
+ '\n', '-', '.', ':', '+', '>', '#', '@' => break,
+ // Empty line (only whitespace)
+ ' ', '\t' => if (line2.len == line2.indexOfNotSpaceOrTab()) break,
+ // string literals have extra chars so [0..4] doesn't give a sentinel ptr
+ // Thematic break
+ '*' => if (line_load3 ==
+ @as(u32, @bitCast("***\x00\x00"[0..4].*)) and
+ (line2.len == 3 or
+ line2.len == 3 + line2.sliceOpen(3).indexOfNotSpaceOrTab())) break,
+ // Verbatim block
+ '=' => if (line_load2 ==
+ @as(u32, @bitCast("==\x00\x00\x00"[0..4].*))) break,
+ // Math block
+ '$' => if (line_load3 ==
+ @as(u32, @bitCast("$==\x00\x00"[0..4].*))) break,
+ else => {},
+ }
+
+ try self.insertTextLine(.space_text, .text, paragraph_idx, line2);
+ self.scanner.advance();
+ }
+}
+
fn insertTextLine(
self: *AstGen,
comptime first_text_tag: Node.Tag,
diff --git a/src/IndentationScanner.zig b/src/IndentationScanner.zig
@@ -243,18 +243,17 @@ pub fn peek(it: *const IndentationScanner) PeekResult {
/// Advances the cursor to the next line.
/// Precondition: peek() would not have returned null.
-pub fn advance(it: *IndentationScanner) void {
+pub noinline fn advance(it: *IndentationScanner) void {
assert(it.peek() != null);
it._cur_line = null;
const line_maybe, it._rest_lines = it._rest_lines.splitOneLine();
const line = line_maybe orelse return;
- const spaces_or_tabs = line.indexOfNotSpaceOrTab();
- if (spaces_or_tabs == line.len) {
+ const spaces = line.indexOfNotSpace();
+ if (spaces == line.len) {
// Lie to the caller -- pretend empty lines have been indented to the correct level
- it._cur_line = .{ it.curLevel(), it.curExpectedIndents().correct_start, line.sliceOpen(spaces_or_tabs) };
+ it._cur_line = .{ it.curLevel(), it.curExpectedIndents().correct_start, line.sliceOpen(spaces) };
return;
}
- const spaces = line.indexOfNotSpace();
it._cur_line = for (0..it._expected_indents.len) |rev_i| {
const i = it._expected_indents.len - 1 - rev_i;
const misalignment_start = it._expected_indents.buffer[i].misalignment_start;