mymarkdown

My markdown
git clone https://git.grace.moe/mymarkdown
Log | Files | Refs

commit 54df051635b265ec8aa5f3ef95ba814e654f1a02
parent 5c05d98ae750e4df8ddf2771738f278239156314
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date:   Fri, 23 May 2025 03:33:35 +0800

Redesign AST format

Diffstat:
Msrc/Ast.zig | 439++++++++++++++++++++++++++++++++++++++++---------------------------------------
Msrc/AstGen.zig | 272++++++++++++++++++++++---------------------------------------------------------
Msrc/padded_str_impl.zig | 22+++++++++++++++-------
Msrc/test.zig | 66++++++++++++++++++++++++++++++++++++------------------------------
Msrc/utils.zig | 138+------------------------------------------------------------------------------
5 files changed, 351 insertions(+), 586 deletions(-)

diff --git a/src/Ast.zig b/src/Ast.zig @@ -1,5 +1,6 @@ const std = @import("std"); const assert = std.debug.assert; +const panic = std.debug.panic; const tracy = @import("tracy"); @@ -10,109 +11,225 @@ const Ast = @This(); nodes: []const Node, errors: []const Error, -extra: []const u32, -pub const empty: Ast = .{ .nodes = &.{}, .errors = &.{}, .extra = &.{} }; +pub const empty: Ast = .{ .nodes = &.{}, .errors = &.{} }; pub fn deinit(self: Ast, gpa: std.mem.Allocator) void { gpa.free(self.nodes); gpa.free(self.errors); - gpa.free(self.extra); } pub const StrOffset = u32; pub const StrLen = u24; -pub const Tag = enum(u8) { - document = 255, - - list = 253, - heading = '#', - quote = '>', - paragraph = ';', - unordered_item = '-', - ordered_item = '.', - term_item = ':', - task_item = 251, - elaboration = '+', - - marker = 254, // First child of nodes like heading, list items, ... - thematic_break = 252, - text = 250, - space_text = 249, -}; +pub const Node = packed struct(u64) { + /// Do not get or set directly + _data: packed union { + /// For container nodes, which have nodes nested within them + container: packed struct { num_children: Idx.Int }, + /// For leaf nodes, which have no children but point to a range in the source (and hence has a length) + leaf: packed struct { len: StrLen }, + }, + + /// Source location of the node + off: StrOffset, + + /// The type of node + tag: Tag, + + // ================ + // pub fns + + // Getters + + pub fn numChildren(node: Node) ?Idx.Int { + if (node.tag.layout() != .container) return null; + return node._data.container.num_children; + } + pub fn numChildrenMut(node: *Node) ?*align(8:0:8) Idx.Int { + if (node.tag.layout() != .container) return null; + return &node._data.container.num_children; + } + pub fn len(node: Node) ?StrLen { + if (node.tag.layout() != .leaf) return null; + return node._data.leaf.len; + } -pub const Node = utils.Packed(union(Tag) { - document: Root, + // Constructors - list: Container, - heading: Container, - quote: Container, - paragraph: Container, - unordered_item: Container, - ordered_item: Container, - term_item: Container, - task_item: Container, - elaboration: Container, + /// The recommended way to construct a Node with .container layout. + pub fn initContainerNode(comptime tag: Tag, off: StrOffset, num_children: Idx.Int) Node { + comptime assert(tag.layout() == .container); + return @call(.always_inline, initContainerNodeRuntime, .{ tag, off, num_children }).?; + } + /// The recommended way to construct a Node with .leaf layout. + pub fn initLeafNode(comptime tag: Tag, off: StrOffset, len_: StrLen) Node { + comptime assert(tag.layout() == .leaf); + return @call(.always_inline, initLeafNodeRuntime, .{ tag, off, len_ }).?; + } - marker: Leaf, // First child of nodes like heading, list items, ... - thematic_break: Leaf, - text: Leaf, - space_text: Leaf, // text with 1 space added before it + /// Construct a Node with .container layout when the tag is runtime known. + pub fn initContainerNodeRuntime(tag: Tag, off: StrOffset, num_children: Idx.Int) ?Node { + if (tag.layout() != .container) return null; + return .{ + ._data = .{ .container = .{ .num_children = num_children } }, + .off = off, + .tag = tag, + }; + } + /// Construct a Node with .leaf layout when the tag is runtime known. + pub fn initLeafNodeRuntime(tag: Tag, off: StrOffset, len_: StrLen) ?Node { + if (tag.layout() != .leaf) return null; + return .{ + ._data = .{ .leaf = .{ .len = len_ } }, + .off = off, + .tag = tag, + }; + } + // ================ + // pub types pub const Idx = utils.NewType(u24, opaque {}); - pub const Root = packed struct { - num_children: Idx.IntType = 0, - pub const format = utils.structFormat(@This()); - }; - pub const Container = packed struct { - num_children: Idx.IntType = 0, - off: StrOffset, - pub const format = utils.structFormat(@This()); + + pub const Tag = enum(TagBacking.Int) { + // Node with .container layout (has source location + num_children) + document = @bitCast(TagBacking{ .tag = 0, .layout = .container }), + list, + heading, + quote, + paragraph, + unordered_item, + ordered_item, + term_item, + task_item, + elaboration, + + // Node with .leaf layout (has source location + len) + marker = @bitCast(TagBacking{ .tag = 0, .layout = .leaf }), + thematic_break, + text, + space_text, + + pub fn fromMarker(comptime c: u8) Tag { + return comptime .fromMarkerRuntime(c) orelse + panic("Invalid marker char '{c}' in fromMarker", .{c}); + } + pub fn fromMarkerRuntime(c: u8) ?Tag { + return switch (c) { + '#' => .heading, + '>' => .quote, + ';' => .paragraph, + '-' => .unordered_item, + '.' => .ordered_item, + ':' => .term_item, + '+' => .elaboration, + else => null, + }; + } + + fn layout(tag: Tag) Layout { + return @as(TagBacking, @bitCast(@intFromEnum(tag))).layout; + } }; - pub const Leaf = packed struct { - off: StrOffset, - len: StrLen, - const num_children = 0; - pub const format = utils.structFormat(@This()); + + // ================ + // Implementation detail + const Layout = enum(u1) { container, leaf }; + const TagBacking = packed struct(u8) { + tag: u7, + layout: Layout, + + const Int = u8; }; +}; - pub fn incrementNumChildren(self: *Node) void { - switch (self.tag) { - .marker, .thematic_break, .text, .space_text => unreachable, - else => @as(*Idx.IntType, @ptrCast(&self.data)).* += 1, - } +pub const Error = packed struct(u64) { + /// Do not get or set directly + _data: packed union { + /// For errors with a precise error position in the source + point: packed struct { off: StrOffset }, + /// For errors that simply say something is wrong about the node + node: void, + }, + + /// The idx of the node the error points to + node_idx: Node.Idx, + + /// The type of error + tag: Tag, + + // ================ + // pub fns + + // Getters + + pub fn off(err: Error) ?StrOffset { + if (err.tag.layout() != .point) return null; + return err._data.point.off; } - pub const format = utils.unionFormat(@This()); -}); + // Constructors -pub const Error = utils.Packed(union(enum(u8)) { - marker_too_long: NodeError, - unexpected_block_in_inline_context: NodeError, - elaboration_after_unelaboratable_node: NodeError, - incorrect_elaboration_marker: NodeError, - invalid_marker: PointError, - inconsistent_indentation: PointError, + /// The recommended way to construct an Error with .point layout. + pub fn initPointError(comptime tag: Tag, node_idx: Node.Idx, off_: StrOffset) Error { + comptime assert(tag.layout() == .point); + return @call(.always_inline, initPointErrorRuntime, .{ tag, node_idx, off_ }).?; + } + /// The recommended way to construct an Error with .node layout. + pub fn initNodeError(comptime tag: Tag, node_idx: Node.Idx) Error { + comptime assert(tag.layout() == .node); + return @call(.always_inline, initNodeErrorRuntime, .{ tag, node_idx }).?; + } - /// Used when the error diagnostic spans the entire node - pub const NodeError = packed struct { - idx: Node.Idx, + /// Construct an Error with .point layout when the tag is runtime known. + pub fn initPointErrorRuntime(tag: Tag, node_idx: Node.Idx, off_: StrOffset) ?Error { + if (tag.layout() != .point) return null; + return .{ + ._data = .{ .point = .{ .off = off_ } }, + .node_idx = node_idx, + .tag = tag, + }; + } + /// Construct an Error with .node layout when the tag is runtime known. + pub fn initNodeErrorRuntime(tag: Tag, node_idx: Node.Idx) ?Error { + if (tag.layout() != .node) return null; + return .{ + ._data = .{ .node = {} }, + .node_idx = node_idx, + .tag = tag, + }; + } - pub const format = utils.structFormat(@This()); - }; + // ================ + // pub types + pub const Idx = utils.NewType(u24, opaque {}); + + pub const Tag = enum(TagBacking.Int) { + // Errors with .point layout (has node_idx and source location) + invalid_marker = @bitCast(TagBacking{ .tag = 0, .layout = .point }), + inconsistent_indentation, - /// Used when the error diagnostic should point at a single location - pub const PointError = packed struct { - idx: Node.Idx, - off: StrOffset, + // Errors with .node layout (has node_idx) + marker_too_long = @bitCast(TagBacking{ .tag = 0, .layout = .node }), + unexpected_block_in_inline_context, + elaboration_after_unelaboratable_node, + incorrect_elaboration_marker, - pub const format = utils.structFormat(@This()); + fn layout(tag: Tag) Layout { + return @as(TagBacking, @bitCast(@intFromEnum(tag))).layout; + } }; - pub const Idx = utils.NewType(u24, opaque {}); - pub const format = utils.unionFormat(@This()); -}); + // ================ + // Implementation detail + const Layout = enum(u1) { point, node }; + const TagBacking = packed struct(u8) { + tag: u7, + layout: Layout, + + const Int = u8; + }; +}; comptime { assert(24 == @bitSizeOf(Node.Idx)); @@ -126,22 +243,6 @@ comptime { assert(8 == @sizeOf(Error)); } -pub const Tagged = struct { - nodes: []const Node.Tagged, - errors: []const Error.Tagged, - extra: []const u32, - - pub const empty: Tagged = .{ .nodes = &.{}, .errors = &.{}, .extra = &.{} }; -}; -pub fn toTagged(self: Ast, gpa: std.mem.Allocator) !Tagged { - const nodes = try gpa.alloc(Node.Tagged, self.nodes.len); - const errors = try gpa.alloc(Error.Tagged, self.errors.len); - const extra = try gpa.dupe(u32, self.extra); - for (self.nodes, nodes) |node, *out| out.* = node.toTagged(); - for (self.errors, errors) |err, *out| out.* = err.toTagged(); - return .{ .nodes = nodes, .errors = errors, .extra = extra }; -} - pub fn renderAst(self: Ast, writer: anytype, input: []const u8) !void { var renderer: AstRenderer(@TypeOf(writer)) = .{ .self = self, @@ -213,138 +314,46 @@ fn AstRenderer(Writer: type) type { var cur_node: usize = start_node + 1; var cur_error: usize = start_error; while (cur_error < renderer.self.errors.len and - @intFromEnum(renderer.self.errors[cur_error].get(.idx)) == start_node) + @intFromEnum(renderer.self.errors[cur_error].node_idx) == start_node) { try renderer.writer.writeByteNTimes(' ', indent + 2); try renderer.writer.writeAll(".error ."); try renderer.writer.writeAll(@tagName(renderer.self.errors[cur_error].tag)); - switch (renderer.self.errors[cur_error].toTagged()) { - // PointError - .inconsistent_indentation, - .invalid_marker, - => |error_data| { - try renderer.writer.print( - " at {}:{}", - renderer.offsetToPos(error_data.off), - ); - }, - // NodeError - .marker_too_long, - .unexpected_block_in_inline_context, - .elaboration_after_unelaboratable_node, - .incorrect_elaboration_marker, - => {}, - } + if (renderer.self.errors[cur_error].off()) |off| + try renderer.writer.print(" at {}:{}", renderer.offsetToPos(off)); try renderer.writer.writeByte('\n'); cur_error += 1; } - switch (renderer.self.nodes[start_node].toTagged()) { - .document, - => |data| { - for (0..data.num_children) |_| { - assert(cur_node < renderer.self.nodes.len); - cur_node, cur_error = try renderer.renderAstInner( - cur_node, - cur_error, - indent + 2, - ); - assert(cur_node <= renderer.self.nodes.len); - } - return .{ cur_node, cur_error }; - }, - - .list, - .heading, - .quote, - .paragraph, - .unordered_item, - .ordered_item, - .term_item, - .task_item, - .elaboration, - => |data| { - for (0..data.num_children) |_| { - assert(cur_node < renderer.self.nodes.len); - cur_node, cur_error = try renderer.renderAstInner( - cur_node, - cur_error, - indent + 2, - ); - assert(cur_node <= renderer.self.nodes.len); - } - return .{ cur_node, cur_error }; - }, - - .marker, - .thematic_break, - .text, - .space_text, - => |data| { - try renderer.writer.writeByteNTimes(' ', indent + 2); - // This was too slow! - // try renderer.writer.print("\"{}\"", .{ - // std.zig.fmtEscapes(renderer.input[data.off .. data.off + data.len]), - // }); - try renderer.writer.writeByte('"'); - try str.escapeStringForDoubleQuotedString( - renderer.writer, - renderer.input[data.off .. data.off + data.len], - .padded, + + if (renderer.self.nodes[start_node].numChildren()) |num_children| { + for (0..num_children) |_| { + assert(cur_node < renderer.self.nodes.len); + cur_node, cur_error = try renderer.renderAstInner( + cur_node, + cur_error, + indent + 2, ); - try renderer.writer.writeByte('"'); - try renderer.writer.writeByte('\n'); - return .{ cur_node, cur_error }; - }, + assert(cur_node <= renderer.self.nodes.len); + } } - } - }; -} -pub fn renderHtml( - self: Ast, - writer: anytype, - input: []const u8, - start_: ?Node.Idx, -) !?Node.Idx { - const tracy_frame = tracy.trace(@src()); - defer tracy_frame.end(); - const start: Node.Idx = start_ orelse @enumFromInt(0); - switch (self.nodes[@intFromEnum(start)].tag) { - .document => try writer.writeAll("<body>\n"), - .paragraph => try writer.writeAll("<p>"), - .text => { - const data: Node.Leaf = self.nodes[@intFromEnum(start)].data.text; - try writer.writeAll(input[data.off .. data.off + data.len]); - }, - .space_text => { - const data: Node.Leaf = self.nodes[@intFromEnum(start)].data.text; - try writer.writeByte(' '); - try writer.writeAll(input[data.off .. data.off + data.len]); - }, - else => {}, - } - var cur_idx: ?Node.Idx = start.next(); - switch (self.nodes[@intFromEnum(start)].tag) { - inline .document, .paragraph => |t| { - const data = @field( - self.nodes[@intFromEnum(start)].data, - @tagName(t), - ); - for (0..data.num_children) |_| { - if (cur_idx) |idx| { - cur_idx = try self.renderHtml(writer, input, idx); - } else { - unreachable; - } + if (renderer.self.nodes[start_node].len()) |len| { + try renderer.writer.writeByteNTimes(' ', indent + 2); + // This was too slow! + // try renderer.writer.print("\"{}\"", .{ + // std.zig.fmtEscapes(renderer.input[data.off .. data.off + data.len]), + // }); + try renderer.writer.writeByte('"'); + try str.escapeStringForDoubleQuotedString( + renderer.writer, + renderer.input[renderer.self.nodes[start_node].off .. renderer.self.nodes[start_node].off + len], + .padded, + ); + try renderer.writer.writeByte('"'); + try renderer.writer.writeByte('\n'); } - }, - else => {}, - } - switch (self.nodes[@intFromEnum(start)].tag) { - .document => try writer.writeAll("</body>\n"), - .paragraph => try writer.writeAll("</p>\n"), - .text, .space_text => {}, - else => {}, - } - return cur_idx; + + return .{ cur_node, cur_error }; + } + }; } diff --git a/src/AstGen.zig b/src/AstGen.zig @@ -45,27 +45,13 @@ fn nextNodeIdx(self: *const AstGen) Node.Idx { const PRINT_ALLOC_STATS = false; -// These need manual inlining for some reason. -// -// LLVM doesn't seem to think that inlining these are worth it, but LLVM is wrong. -// Because this constructs the Node using .fromTagged, and Node.Tagged and Node -// have different memory representations, the only way to construct in place and -// elide copies is for every function from this one to the callsite to be inlined. -// -// The same applies for Error / Error.Tagged below, but the impact is less severe -// as appending Errors is a much less common operation. Nevertheless, we inline it -// despite not having any data to back it up, because I have vibes that it should be faster. +fn appendChildNode(self: *AstGen, parent_idx: Node.Idx, node: Node) !Node.Idx { + try self.ensureUnusedCapacityNode(1); + return self.appendAssumeCapacityChildNode(parent_idx, node); +} fn appendNode(self: *AstGen, node: Node) !Node.Idx { - if (self.nodes.items.len > std.math.maxInt( - @typeInfo(Node.Idx).@"enum".tag_type, - )) return error.OutOfNodeIdx; - const idx = self.nodes.items.len; - const cap = if (PRINT_ALLOC_STATS) self.nodes.capacity; - try self.nodes.append(self.gpa, node); - if (PRINT_ALLOC_STATS and cap != self.nodes.capacity) { - self.num_node_allocs += 1; - } - return @enumFromInt(idx); + try self.ensureUnusedCapacityNode(1); + return self.appendAssumeCapacityNode(node); } fn ensureUnusedCapacityNode(self: *AstGen, additional_count: usize) !void { if (self.nodes.items.len > (1 << @bitSizeOf( @@ -73,6 +59,11 @@ fn ensureUnusedCapacityNode(self: *AstGen, additional_count: usize) !void { )) - additional_count) return error.OutOfNodeIdx; return self.nodes.ensureUnusedCapacity(self.gpa, additional_count); } + +fn appendAssumeCapacityChildNode(self: *AstGen, parent_idx: Node.Idx, node: Node) Node.Idx { + self.getNode(parent_idx).numChildrenMut().?.* += 1; + return self.appendAssumeCapacityNode(node); +} fn appendAssumeCapacityNode(self: *AstGen, node: Node) Node.Idx { const idx = self.nodes.items.len; const cap = if (PRINT_ALLOC_STATS) self.nodes.capacity; @@ -83,110 +74,17 @@ fn appendAssumeCapacityNode(self: *AstGen, node: Node) Node.Idx { return @enumFromInt(idx); } -fn appendContainerNode(self: *AstGen, parent_idx: Node.Idx, container_node_tag: Node.Tag, ptr: PaddedMany) !Node.Idx { - self.getNode(parent_idx).incrementNumChildren(); - switch (container_node_tag) { - .document => unreachable, - - .list, - .heading, - .quote, - .paragraph, - .unordered_item, - .ordered_item, - .term_item, - .task_item, - .elaboration, - => return self.appendNode(.{ - .data = .{ .list = .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) } }, - .tag = container_node_tag, - }), - - .marker, - .thematic_break, - .text, - .space_text, - => unreachable, - } +fn makeContainerNode(self: *AstGen, comptime container_node_tag: Node.Tag, ptr: PaddedMany) Node { + return .initContainerNode(container_node_tag, @intCast(self.input.ptr.calcOffsetTo(ptr)), 0); } -fn appendLeafNode(self: *AstGen, parent_idx: Node.Idx, leaf_node_tag: Node.Tag, ptr: PaddedMany, len: StrLen) !Node.Idx { - self.getNode(parent_idx).incrementNumChildren(); - switch (leaf_node_tag) { - .document => unreachable, - - .list, - .heading, - .quote, - .paragraph, - .unordered_item, - .ordered_item, - .term_item, - .task_item, - .elaboration, - => unreachable, - - .marker, - .thematic_break, - .text, - .space_text, - => return self.appendNode(.{ - .data = .{ .text = .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)), .len = len } }, - .tag = leaf_node_tag, - }), - } +fn makeContainerNodeRuntime(self: *AstGen, container_node_tag: Node.Tag, ptr: PaddedMany) ?Node { + return .initContainerNodeRuntime(container_node_tag, @intCast(self.input.ptr.calcOffsetTo(ptr)), 0); } - -fn appendAssumeCapacityContainerNode(self: *AstGen, parent_idx: Node.Idx, container_node_tag: Node.Tag, ptr: PaddedMany) Node.Idx { - self.getNode(parent_idx).incrementNumChildren(); - switch (container_node_tag) { - .document => unreachable, - - .list, - .heading, - .quote, - .paragraph, - .unordered_item, - .ordered_item, - .term_item, - .task_item, - .elaboration, - => return self.appendAssumeCapacityNode(.{ - .data = .{ .list = .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) } }, - .tag = container_node_tag, - }), - - .marker, - .thematic_break, - .text, - .space_text, - => unreachable, - } +fn makeLeafNode(self: *AstGen, comptime leaf_node_tag: Node.Tag, ptr: PaddedMany, len: StrLen) Node { + return .initLeafNode(leaf_node_tag, @intCast(self.input.ptr.calcOffsetTo(ptr)), len); } -fn appendAssumeCapacityLeafNode(self: *AstGen, parent_idx: Node.Idx, leaf_node_tag: Node.Tag, ptr: PaddedMany, len: StrLen) Node.Idx { - self.getNode(parent_idx).incrementNumChildren(); - switch (leaf_node_tag) { - .document => unreachable, - - .list, - .heading, - .quote, - .paragraph, - .unordered_item, - .ordered_item, - .term_item, - .task_item, - .elaboration, - => unreachable, - - .marker, - .thematic_break, - .text, - .space_text, - => return self.appendAssumeCapacityNode(.{ - .data = .{ .text = .{ .off = @intCast(self.input.ptr.calcOffsetTo(ptr)), .len = len } }, - .tag = leaf_node_tag, - }), - } +fn makeLeafNodeRuntime(self: *AstGen, leaf_node_tag: Node.Tag, ptr: PaddedMany, len: StrLen) ?Node { + return .initLeafNodeRuntime(leaf_node_tag, @intCast(self.input.ptr.calcOffsetTo(ptr)), len); } fn appendError(self: *AstGen, err: Error) !void { @@ -199,38 +97,13 @@ fn appendError(self: *AstGen, err: Error) !void { self.num_error_allocs += 1; } } -fn appendPointError(self: *AstGen, tag: Error.Tag, idx: Node.Idx, ptr: PaddedMany) !void { - switch (tag) { - .marker_too_long, - .unexpected_block_in_inline_context, - .elaboration_after_unelaboratable_node, - .incorrect_elaboration_marker, - => unreachable, - - .invalid_marker, - .inconsistent_indentation, - => try self.appendError(.{ - .data = .{ .invalid_marker = .{ .idx = idx, .off = @intCast(self.input.ptr.calcOffsetTo(ptr)) } }, - .tag = tag, - }), - } +fn makePointError(self: *AstGen, comptime point_error_tag: Error.Tag, node_idx: Node.Idx, ptr: PaddedMany) Error { + return .initPointError(point_error_tag, node_idx, @intCast(self.input.ptr.calcOffsetTo(ptr))); } -fn appendNodeError(self: *AstGen, tag: Error.Tag, idx: Node.Idx) !void { - switch (tag) { - .marker_too_long, - .unexpected_block_in_inline_context, - .elaboration_after_unelaboratable_node, - .incorrect_elaboration_marker, - => try self.appendError(.{ - .data = .{ .marker_too_long = .{ .idx = idx } }, - .tag = tag, - }), - - .invalid_marker, - .inconsistent_indentation, - => unreachable, - } +fn makePointErrorRuntime(self: *AstGen, point_error_tag: Error.Tag, node_idx: Node.Idx, ptr: PaddedMany) ?Error { + return .initPointErrorRuntime(point_error_tag, node_idx, @intCast(self.input.ptr.calcOffsetTo(ptr))); } + fn castStrLen(len: usize, comptime err: anytype) @TypeOf(err)!StrLen { return if (len <= std.math.maxInt(StrLen)) @intCast(len) else return err; } @@ -296,7 +169,7 @@ pub fn parse( std.sort.pdq(Error, ast.errors.items, {}, struct { fn func(_: void, lhs: Error, rhs: Error) bool { - return @intFromEnum(lhs.get(.idx)) < @intFromEnum(rhs.get(.idx)); + return @intFromEnum(lhs.node_idx) < @intFromEnum(rhs.node_idx); } }.func); @@ -328,24 +201,18 @@ pub fn parse( errdefer gpa2.free(nodes); const errors = try gpa2.dupe(Error, ast.errors.items); errdefer gpa2.free(errors); - const extra = try gpa2.dupe(u32, ast.extra.items); - errdefer gpa2.free(extra); return .{ .nodes = nodes, .errors = errors, - .extra = extra, }; } else { const nodes = try ast.nodes.toOwnedSlice(gpa); errdefer gpa.free(nodes); const errors = try ast.errors.toOwnedSlice(gpa); errdefer gpa.free(errors); - const extra = try ast.extra.toOwnedSlice(gpa); - errdefer gpa.free(extra); return .{ .nodes = nodes, .errors = errors, - .extra = extra, }; } } @@ -353,7 +220,7 @@ pub fn parse( fn parseRoot(self: *AstGen) !void { const tracy_frame = tracy.trace(@src()); defer tracy_frame.end(); - const root = try self.appendNode(.fromTagged(.{ .document = .{} })); + const root = try self.appendNode(self.makeContainerNode(.document, self.input.ptr)); assert(root == .root); if (self.scanner.peek()) |p| assert(self.input.ptr._ptr == p.@"1".ptr._ptr); @@ -384,7 +251,7 @@ fn parseInlineColumn( defer tracy_frame.end(); tracy_frame.setName("inline"); tracy_frame.addText(peek_maybe.?.@"1".toUnpaddedSlice()); - var text_tag: Node.Tag = .text; + var text_tag: TextTag = .text; var saw_empty_line = false; while (peek_maybe) |peek| { const alignment, const line = peek; @@ -395,9 +262,9 @@ fn parseInlineColumn( continue; } if (alignment == .misaligned) - try self.appendPointError(.inconsistent_indentation, self.nextNodeIdx(), line.ptr); + try self.appendError(self.makePointError(.inconsistent_indentation, self.nextNodeIdx(), line.ptr)); if (saw_empty_line) { - try self.appendNodeError(.unexpected_block_in_inline_context, self.nextNodeIdx()); + try self.appendError(.initNodeError(.unexpected_block_in_inline_context, self.nextNodeIdx())); saw_empty_line = false; } try self.insertTextLine(text_tag, .text, parent_idx, line); @@ -451,22 +318,22 @@ fn parseColumn( switch (line.index(0)) { // Headings - inline '#' => { - const marker_len = line.indexOfNotChar('#'); + '#' => { + const marker_len = line.indexOfNotCharUnsafe('#'); if (TRACY_INSTRUMENT_HERE) tracy_frame.setName(@tagName(.heading)); try self.ensureUnusedCapacityNode(2); - const block_idx = self.appendAssumeCapacityContainerNode(parent_idx, .heading, line.ptr); - _ = self.appendAssumeCapacityLeafNode(block_idx, .marker, line.ptr, try castStrLen(marker_len, error.MarkerTooLong)); + const block_idx = self.appendAssumeCapacityChildNode(parent_idx, self.makeContainerNode(.heading, line.ptr)); + _ = self.appendAssumeCapacityChildNode(block_idx, self.makeLeafNode(.marker, line.ptr, try castStrLen(marker_len, error.MarkerTooLong))); last_elaboratable_idx = .{ block_idx, marker_len }; try self.parseInlineColumn(block_idx, .{ .skip = line_start + marker_len }); break :block_idx block_idx; }, // List markers - inline '-', '.', ':' => |m| { - const base_marker_len = line.indexOfNotChar(m); + '-', '.', ':' => |m| { + const base_marker_len = line.indexOfNotCharUnsafe(m); const marker_len, const list_type: Node.Tag = blk: { - if (comptime m == '-') { + if (m == '-') { var potential_task_item = line.sliceOpen(base_marker_len).indexOfNone("[ ]xX"); while (potential_task_item >= 3 and line.index(base_marker_len + potential_task_item - 1) == ' ') potential_task_item -= 1; @@ -478,12 +345,13 @@ fn parseColumn( line.index(base_marker_len + potential_task_item - 2) == 'X') and std.mem.allEqual(u8, line.toUnpaddedSlice()[base_marker_len .. base_marker_len + potential_task_item - 3], ' ')) { - if (TRACY_INSTRUMENT_HERE) tracy_frame.setName(@tagName(.task_item)); + if (TRACY_INSTRUMENT_HERE) tracy_frame.setName("task_item"); break :blk .{ base_marker_len + potential_task_item, .task_item }; } } - if (TRACY_INSTRUMENT_HERE) tracy_frame.setName(@tagName(@as(Node.Tag, @enumFromInt(m)))); - break :blk .{ base_marker_len, @enumFromInt(m) }; + const list_type = Node.Tag.fromMarkerRuntime(m).?; + if (TRACY_INSTRUMENT_HERE) tracy_frame.setName(@tagName(list_type)); + break :blk .{ base_marker_len, list_type }; }; try self.ensureUnusedCapacityNode(3); const list_node_idx = blk: { @@ -493,51 +361,51 @@ fn parseColumn( break :blk cur_node_idx; } } - const list_node_idx = self.appendAssumeCapacityContainerNode(parent_idx, .list, line.ptr); + const list_node_idx = self.appendAssumeCapacityChildNode(parent_idx, self.makeContainerNode(.list, line.ptr)); current_list = .{ list_type, list_node_idx }; break :blk list_node_idx; }; - const block_idx = self.appendAssumeCapacityContainerNode(list_node_idx, list_type, line.ptr); - _ = self.appendAssumeCapacityLeafNode(block_idx, .marker, line.ptr, try castStrLen(marker_len, error.MarkerTooLong)); + const block_idx = self.appendAssumeCapacityChildNode(list_node_idx, self.makeContainerNodeRuntime(list_type, line.ptr).?); + _ = self.appendAssumeCapacityChildNode(block_idx, self.makeLeafNode(.marker, line.ptr, try castStrLen(marker_len, error.MarkerTooLong))); last_elaboratable_idx = .{ block_idx, base_marker_len }; try self.parseInlineColumn(block_idx, .{ .skip = line_start + marker_len }); break :block_idx block_idx; }, // Elaborations - inline '+' => { + '+' => { if (TRACY_INSTRUMENT_HERE) tracy_frame.setName(@tagName(.elaboration)); - const marker_len = try castStrLen(line.indexOfNotChar('+'), error.MarkerTooLong); + const marker_len = try castStrLen(line.indexOfNotCharUnsafe('+'), error.MarkerTooLong); try self.ensureUnusedCapacityNode(2); const block_idx = if (last_elaboratable_idx) |elab| blk: { const elaboratable_idx, const required_marker_len = elab; - const block_idx = self.appendAssumeCapacityContainerNode(elaboratable_idx, .elaboration, line.ptr); + const block_idx = self.appendAssumeCapacityChildNode(elaboratable_idx, self.makeContainerNode(.elaboration, line.ptr)); if (marker_len != required_marker_len) - try self.appendNodeError(.incorrect_elaboration_marker, block_idx); + try self.appendError(.initNodeError(.incorrect_elaboration_marker, block_idx)); break :blk block_idx; } else blk: { - const block_idx = self.appendAssumeCapacityContainerNode(parent_idx, .elaboration, line.ptr); - try self.appendNodeError(.elaboration_after_unelaboratable_node, block_idx); + const block_idx = self.appendAssumeCapacityChildNode(parent_idx, self.makeContainerNode(.elaboration, line.ptr)); + try self.appendError(.initNodeError(.elaboration_after_unelaboratable_node, block_idx)); break :blk block_idx; }; - _ = self.appendAssumeCapacityLeafNode(block_idx, .marker, line.ptr, marker_len); + _ = self.appendAssumeCapacityChildNode(block_idx, self.makeLeafNode(.marker, line.ptr, marker_len)); try self.parseColumn(block_idx, .{ .skip = line_start + marker_len }); break :block_idx block_idx; }, // Par-like single markers - inline ';' => |m| { - if (TRACY_INSTRUMENT_HERE) tracy_frame.setName(@tagName(@as(Node.Tag, @enumFromInt(m)))); - const block_idx = try self.appendContainerNode(parent_idx, @enumFromInt(m), line.ptr); + ';' => { + if (TRACY_INSTRUMENT_HERE) tracy_frame.setName("paragraph"); + const block_idx = try self.appendChildNode(parent_idx, self.makeContainerNode(.paragraph, line.ptr)); last_elaboratable_idx = null; try self.parseInlineColumn(block_idx, .{ .skip = line_start + 1 }); break :block_idx block_idx; }, // Div-like single markers - inline '>' => |m| { - if (TRACY_INSTRUMENT_HERE) tracy_frame.setName(@tagName(@as(Node.Tag, @enumFromInt(m)))); - const block_idx = try self.appendContainerNode(parent_idx, @enumFromInt(m), line.ptr); + '>' => { + if (TRACY_INSTRUMENT_HERE) tracy_frame.setName("quote"); + const block_idx = try self.appendChildNode(parent_idx, self.makeContainerNode(.quote, line.ptr)); last_elaboratable_idx = null; try self.parseColumn(block_idx, .{ .skip = line_start + 1 }); break :block_idx block_idx; @@ -550,7 +418,7 @@ fn parseColumn( self.scanner.advance(); if (TRACY_INSTRUMENT_HERE) tracy_frame.setName(@tagName(.thematic_break)); last_elaboratable_idx = null; - break :block_idx try self.appendLeafNode(parent_idx, .thematic_break, line.ptr, 3); + break :block_idx try self.appendChildNode(parent_idx, self.makeLeafNode(.thematic_break, line.ptr, 3)); } } }, @@ -562,7 +430,7 @@ fn parseColumn( { if (TRACY_INSTRUMENT_HERE) tracy_frame.setName(@tagName(.paragraph)); try self.ensureUnusedCapacityNode(3); - const paragraph_idx = self.appendAssumeCapacityContainerNode(parent_idx, .paragraph, line.ptr); + const paragraph_idx = self.appendAssumeCapacityChildNode(parent_idx, self.makeContainerNode(.paragraph, line.ptr)); last_elaboratable_idx = null; try self.insertTextLine(.text, .text, paragraph_idx, line); self.scanner.advance(); @@ -594,7 +462,7 @@ fn parseColumn( } if (alignment2 == .misaligned) - try self.appendPointError(.inconsistent_indentation, self.nextNodeIdx(), line2.ptr); + try self.appendError(self.makePointError(.inconsistent_indentation, self.nextNodeIdx(), line2.ptr)); try self.insertTextLine(.space_text, .text, paragraph_idx, line2); self.scanner.advance(); } @@ -604,31 +472,41 @@ fn parseColumn( }; if (alignment == .misaligned or overaligned) { - try self.appendPointError(.inconsistent_indentation, block_idx, raw_line.ptr); + try self.appendError(self.makePointError(.inconsistent_indentation, block_idx, raw_line.ptr)); } } } +const TextTag = enum { text, space_text }; fn insertTextLine( self: *AstGen, - first_text_tag: Node.Tag, - rest_text_tag: Node.Tag, + first_text_tag_: TextTag, + rest_text_tag_: TextTag, parent_idx: Node.Idx, line_: PaddedSlice, ) !void { + const first_text_tag: Node.Tag = switch (first_text_tag_) { + .text => .text, + .space_text => .space_text, + }; + const rest_text_tag: Node.Tag = switch (rest_text_tag_) { + .text => .text, + .space_text => .space_text, + }; + var line = line_; if (line.len <= std.math.maxInt(StrLen)) { - _ = try self.appendLeafNode(parent_idx, first_text_tag, line.ptr, @intCast(line.len)); + _ = try self.appendChildNode(parent_idx, self.makeLeafNodeRuntime(first_text_tag, line.ptr, @intCast(line.len)).?); } else { @branchHint(.cold); { const consumed_len = @min(line.len, std.math.maxInt(StrLen)); - _ = try self.appendLeafNode(parent_idx, first_text_tag, line.ptr, @intCast(consumed_len)); + _ = try self.appendChildNode(parent_idx, self.makeLeafNodeRuntime(first_text_tag, line.ptr, @intCast(consumed_len)).?); line = line.sliceOpen(consumed_len); } while (line.len > 0) { const consumed_len = @min(line.len, std.math.maxInt(StrLen)); - _ = try self.appendLeafNode(parent_idx, rest_text_tag, line.ptr, @intCast(consumed_len)); + _ = try self.appendChildNode(parent_idx, self.makeLeafNodeRuntime(rest_text_tag, line.ptr, @intCast(consumed_len)).?); line = line.sliceOpen(consumed_len); } } diff --git a/src/padded_str_impl.zig b/src/padded_str_impl.zig @@ -1,5 +1,6 @@ const std = @import("std"); const assert = std.debug.assert; +const panic = std.debug.panic; /// A slice pointer with a "many sentinel": /// not only must there be a newline at the end, @@ -95,8 +96,8 @@ pub fn PaddedSlice(comptime PADDING_: usize) type { return self.ptr.indexOfAny(charset); } - pub fn indexOfNotChar(self: @This(), comptime char: u8) usize { - return self.ptr.indexOfNone(&.{char}); + pub fn indexOfNotCharUnsafe(self: @This(), char: u8) usize { + return self.ptr.indexOfNotCharUnsafe(char); } }; } @@ -222,6 +223,17 @@ pub fn PaddedMany(comptime PADDING_: usize) type { } } + /// Non-SIMD implementation here because all current callers expect this to be a short string. + /// Must not be used with newline as that can be unsafe. Use std.mem.indexOfNone for that. + pub fn indexOfNotCharUnsafe(ptr: @This(), c: u8) usize { + if (c == '\n') panic("Unsafe use of indexOfNotCharUnsafe, {c} must not be newline", .{c}); + var it = ptr._ptr; + while (true) { + if (it[0] != c) return it - ptr._ptr; + it += 1; + } + } + /// charset should be as small as possible -- one vector `cmpeq` and `or` /// is emitted for each char in the charset. if you want range queries, /// copy paste this and write your own custom impl. @@ -272,16 +284,12 @@ pub fn PaddedMany(comptime PADDING_: usize) type { } } - pub fn indexOfNotChar(ptr: @This(), comptime char: u8) usize { - return ptr.indexOfNone(&.{char}); - } - pub fn indexOfNotSpaceOrTab(ptr: @This()) usize { return ptr.indexOfNone(" \t"); } pub fn indexOfNotSpace(ptr: @This()) usize { - return ptr.indexOfNotChar(' '); + return ptr.indexOfNotCharUnsafe(' '); } pub fn indexOfNewline(ptr: @This()) usize { diff --git a/src/test.zig b/src/test.zig @@ -159,20 +159,29 @@ test "Super long line" { var arena: ArenaAllocator = .init(std.testing.allocator); defer arena.deinit(); const ast = try parse(std.testing.allocator, arena.allocator(), input); - const taggedAst = try ast.toTagged(arena.allocator()); - try std.testing.expectEqualDeep(@as(Ast.Tagged, .{ - .nodes = &.{ - .{ .document = .{ .num_children = 1 } }, - .{ .paragraph = .{ .off = 0, .num_children = 5 } }, - .{ .text = .{ .off = 0, .len = 16777215 } }, - .{ .text = .{ .off = 16777215, .len = 16777215 } }, - .{ .text = .{ .off = 33554430, .len = 16777215 } }, - .{ .text = .{ .off = 50331645, .len = 16777215 } }, - .{ .text = .{ .off = 67108860, .len = 4 } }, - }, - .errors = &.{}, - .extra = &.{}, - }), taggedAst); + try std.testing.expectEqual(7, ast.nodes.len); + try std.testing.expectEqual(0, ast.errors.len); + try std.testing.expectEqual(.document, ast.nodes[0].tag); + try std.testing.expectEqual(0, ast.nodes[0].off); + try std.testing.expectEqual(1, ast.nodes[0].numChildren()); + try std.testing.expectEqual(.paragraph, ast.nodes[1].tag); + try std.testing.expectEqual(0, ast.nodes[1].off); + try std.testing.expectEqual(5, ast.nodes[1].numChildren()); + try std.testing.expectEqual(.text, ast.nodes[2].tag); + try std.testing.expectEqual(0, ast.nodes[2].off); + try std.testing.expectEqual(16777215, ast.nodes[2].len()); + try std.testing.expectEqual(.text, ast.nodes[3].tag); + try std.testing.expectEqual(16777215, ast.nodes[3].off); + try std.testing.expectEqual(16777215, ast.nodes[3].len()); + try std.testing.expectEqual(.text, ast.nodes[4].tag); + try std.testing.expectEqual(33554430, ast.nodes[4].off); + try std.testing.expectEqual(16777215, ast.nodes[4].len()); + try std.testing.expectEqual(.text, ast.nodes[5].tag); + try std.testing.expectEqual(50331645, ast.nodes[5].off); + try std.testing.expectEqual(16777215, ast.nodes[5].len()); + try std.testing.expectEqual(.text, ast.nodes[6].tag); + try std.testing.expectEqual(67108860, ast.nodes[6].off); + try std.testing.expectEqual(4, ast.nodes[6].len()); } test "Many short lines" { @@ -186,22 +195,19 @@ test "Many short lines" { const ast = try parse(std.testing.allocator, arena.allocator(), input); try std.testing.expectEqual(1 << 24, ast.nodes.len); - try std.testing.expectEqual( - @as(Ast.Node.Tagged, .{ .document = .{ .num_children = 1 } }), - ast.nodes[0].toTagged(), - ); - try std.testing.expectEqual( - @as(Ast.Node.Tagged, .{ .paragraph = .{ .off = 0, .num_children = (1 << 24) - 2 } }), - ast.nodes[1].toTagged(), - ); - try std.testing.expectEqual( - @as(Ast.Node.Tagged, .{ .text = .{ .off = 0, .len = 1 } }), - ast.nodes[2].toTagged(), - ); + try std.testing.expectEqual(0, ast.errors.len); + try std.testing.expectEqual(.document, ast.nodes[0].tag); + try std.testing.expectEqual(0, ast.nodes[0].off); + try std.testing.expectEqual(1, ast.nodes[0].numChildren()); + try std.testing.expectEqual(.paragraph, ast.nodes[1].tag); + try std.testing.expectEqual(0, ast.nodes[1].off); + try std.testing.expectEqual((1 << 24) - 2, ast.nodes[1].numChildren()); + try std.testing.expectEqual(.text, ast.nodes[2].tag); + try std.testing.expectEqual(0, ast.nodes[2].off); + try std.testing.expectEqual(1, ast.nodes[2].len()); for (1..(1 << 24) - 2) |i| { - try std.testing.expectEqual( - @as(Ast.Node.Tagged, .{ .space_text = .{ .off = @intCast(i * 2), .len = 1 } }), - ast.nodes[i + 2].toTagged(), - ); + try std.testing.expectEqual(.space_text, ast.nodes[i + 2].tag); + try std.testing.expectEqual(i * 2, ast.nodes[i + 2].off); + try std.testing.expectEqual(1, ast.nodes[i + 2].len()); } } diff --git a/src/utils.zig b/src/utils.zig @@ -6,7 +6,7 @@ pub fn NewType(comptime IntType_: type, comptime DummyType: type) type { _, const Self = @This(); - pub const IntType = IntType_; + pub const Int = IntType_; const _DummyType = DummyType; pub fn next(self: @This()) ?@This() { @@ -20,139 +20,3 @@ pub fn NewType(comptime IntType_: type, comptime DummyType: type) type { } }; } - -pub fn Packed(comptime Tagged_: type) type { - return packed struct { - data: Data, - tag: Tag, - - const Self = @This(); - pub const Tagged = Tagged_; - pub const Tag = @typeInfo(Tagged_).@"union".tag_type.?; - pub const Data = @Type(.{ .@"union" = .{ - .layout = .@"packed", - .tag_type = null, - .fields = @typeInfo(Tagged_).@"union".fields, - .decls = &.{}, - } }); - - pub inline fn fromTagged(tagged: Tagged_) Self { - switch (tagged) { - inline else => |data, tag| return .{ - .tag = tagged, - .data = @unionInit( - Data, - @tagName(tag), - data, - ), - }, - } - } - - pub inline fn toTagged(self: Self) Tagged_ { - switch (self.tag) { - inline else => |t| return @unionInit( - Tagged_, - @tagName(t), - @field(self.data, @tagName(t)), - ), - } - } - - fn GetType(field: anytype) type { - return @FieldType(@typeInfo(Tagged_).@"union".fields[0].type, @tagName(field)); - } - - pub fn get(self: Self, field: anytype) GetType(field) { - switch (self.tag) { - inline else => |t| { - return @field( - @field( - self.data, - @tagName(t), - ), - @tagName(field), - ); - }, - } - } - - /// May not exist, but we can define it anyway thanks to lazy decl analysis. - pub const Idx = Tagged_.Idx; - /// May not exist, but we can define it anyway thanks to lazy decl analysis. - pub const HeadingLevel = Tagged_.HeadingLevel; - /// May not exist, but we can define it anyway thanks to lazy decl analysis. - pub const Leaf = Tagged_.Leaf; - // /// May not exist, but we can define it anyway thanks to lazy decl analysis. - // pub const format = Tagged_.format; - /// May not exist, but we can define it anyway thanks to lazy decl analysis. - pub const incrementNumChildren = Tagged_.incrementNumChildren; - - pub fn format(self: @This(), comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - try std.fmt.formatType(self.toTagged(), fmt, options, writer); - } - }; -} - -fn UnionFormat(comptime T: type) type { - return struct { - pub fn format(self: T, comptime _: []const u8, _: anytype, writer: anytype) !void { - const info = @typeInfo(T).@"union"; - if (info.tag_type) |UnionTagType| { - try writer.writeAll(".{ ."); - try writer.writeAll(@tagName(@as(UnionTagType, self))); - try writer.writeAll(" = "); - inline for (info.fields) |u_field| { - if (self == @field(UnionTagType, u_field.name)) { - try writer.print("{}", .{@field(self, u_field.name)}); - } - } - try writer.writeAll(" }"); - } else { - try writer.print("@{x}", .{@intFromPtr(&self)}); - } - } - }; -} - -pub fn unionFormat(comptime T: type) @TypeOf(UnionFormat(T).format) { - return UnionFormat(T).format; -} - -fn StructFormat(comptime T: type) type { - return struct { - pub fn format(value: T, comptime actual_fmt: []const u8, _: anytype, writer: anytype) !void { - const info = @typeInfo(T).@"struct"; - if (actual_fmt.len != 0) std.fmt.invalidFmtError(actual_fmt, value); - if (info.is_tuple) { - // Skip the type and field names when formatting tuples. - try writer.writeAll(".{"); - inline for (info.fields, 0..) |f, i| { - if (i == 0) { - try writer.writeAll(" "); - } else { - try writer.writeAll(", "); - } - try writer.print("{}", .{@field(value, f.name)}); - } - return writer.writeAll(" }"); - } - try writer.writeAll(".{"); - inline for (info.fields, 0..) |f, i| { - if (i == 0) { - try writer.writeAll(" ."); - } else { - try writer.writeAll(", ."); - } - try writer.writeAll(f.name); - try writer.writeAll(" = "); - try writer.print("{}", .{@field(value, f.name)}); - } - try writer.writeAll(" }"); - } - }; -} - -pub fn structFormat(comptime T: type) @TypeOf(StructFormat(T).format) { - return StructFormat(T).format; -}