mymarkdown

My markdown
git clone https://git.grace.moe/mymarkdown
Log | Files | Refs

commit 7145b5826c7eca7b203a663b1cada149f2a44891
parent 200c1f7c82ca4056761cd0d0ed2e368eec504501
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date:   Thu, 22 May 2025 14:05:31 +0800

Remove old AstGen

Diffstat:
Dsrc/AstGen.zig | 616-------------------------------------------------------------------------------
Dsrc/AstGen2.zig | 848-------------------------------------------------------------------------------
Msrc/main.zig | 143++++++++++++-------------------------------------------------------------------
Msrc/root.zig | 4----
4 files changed, 21 insertions(+), 1590 deletions(-)

diff --git a/src/AstGen.zig b/src/AstGen.zig @@ -1,616 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; - -const tracy = @import("tracy"); - -const Ast = @import("Ast.zig"); -const Node = Ast.Node; -const Error = Ast.Error; -const str = @import("str.zig"); -const utils = @import("utils.zig"); - -const AstGen = @This(); - -input_base: [*]u8, -nodes: std.ArrayListUnmanaged(Node), -errors: std.ArrayListUnmanaged(Error), -extra: std.ArrayListUnmanaged(u32), - -fn getNode(self: AstGen, idx: Node.Idx) *Node { - @setRuntimeSafety(true); - return &self.nodes.items[@intFromEnum(idx)]; -} -fn lastNodeIdx(self: AstGen) Node.Idx { - @setRuntimeSafety(true); - return @enumFromInt(self.nodes.items.len - 1); -} -fn nextNodeIdx(self: AstGen) Node.Idx { - @setRuntimeSafety(true); - return @enumFromInt(self.nodes.items.len); -} -fn appendNode(self: *AstGen, gpa: std.mem.Allocator, node: Node.Tagged) !Node.Idx { - if (self.nodes.items.len > std.math.maxInt( - @typeInfo(Node.Idx).@"enum".tag_type, - )) return error.OutOfNodeIdx; - const idx = self.nodes.items.len; - try self.nodes.append(gpa, .fromTagged(node)); - return @enumFromInt(idx); -} - -pub fn deinit(self: *AstGen, gpa: std.mem.Allocator) void { - self.nodes.deinit(gpa); - self.errors.deinit(gpa); - self.extra.deinit(gpa); -} - -/// Parses mymarkdown -/// -/// gpa: A suitable allocator for scratch allocations. -/// output_gpa: If passed, no scratch allocations will outlive this function, -/// and any allocations returned will be allocated on this. -/// input: The input slice to be parsed. -pub fn parse( - gpa: std.mem.Allocator, - output_gpa: ?std.mem.Allocator, - input: []const u8, -) error{ - InputTooLarge, // When the input length exceeds 2^32 bytes - MarkerTooLong, // When the input contains a marker that exceeds 2^24 bytes - OutOfNodeIdx, // When there are more than 2^24 nodes created during parsing - OutOfMemory, // When allocation fails - Todo, // When I'm too lazy -}!Ast { - const tracy_frame = tracy.trace(@src()); - defer tracy_frame.end(); - - if (input.len > std.math.maxInt(u32)) { - return error.InputTooLarge; - } - - const tracy_frame2 = tracy.traceNamed(@src(), "Allocate input copy"); - // const input_copy = try gpa.dupe(u8, input); - // defer gpa.free(input_copy); - var input_copy: std.ArrayListUnmanaged(u8) = .empty; - defer input_copy.deinit(gpa); - try input_copy.ensureTotalCapacityPrecise(gpa, input.len + 1); - tracy_frame2.end(); - var ast: AstGen = .{ - .input_base = input_copy.items.ptr, - .nodes = .empty, - .errors = .empty, - .extra = .empty, - }; - defer ast.deinit(gpa); - const root = try ast.appendNode(gpa, .{ .document = .{} }); - - var lines: std.ArrayListUnmanaged([]u8) = .empty; - defer lines.deinit(gpa); - - const tracy_frame3 = tracy.traceNamed(@src(), "Split into lines and copy"); - var lines_it = std.mem.splitScalar(u8, input, '\n'); - var maybe_line: ?[]u8 = @constCast(lines_it.first()); - var off: usize = 0; - while (maybe_line) |line| : (maybe_line = @constCast(lines_it.next())) { - input_copy.appendSliceAssumeCapacity(line); - input_copy.appendAssumeCapacity('\n'); - if (str.lastIndexOfNone(line, " \t\r\n")) |idx| { - try lines.append(gpa, input_copy.items[off .. off + idx + 1]); - } else { - try lines.append(gpa, input_copy.items[off..off]); - } - off += line.len + 1; - } - tracy_frame3.end(); - - // stripTrailingWhitespace(&lines.items); - - try ast.parseColumn(gpa, lines.items, root); - - // std.time.sleep(std.time.ns_per_hour); - - std.sort.pdq(Error, ast.errors.items, {}, struct { - fn func(_: void, lhs: Error, rhs: Error) bool { - return @intFromEnum(lhs.get(.idx)) < @intFromEnum(rhs.get(.idx)); - } - }.func); - - if (output_gpa) |gpa2| { - return .{ - .nodes = try gpa2.dupe(Node, ast.nodes.items), - .errors = try gpa2.dupe(Error, ast.errors.items), - .extra = try gpa2.dupe(u32, ast.extra.items), - }; - } else { - return .{ - .nodes = try ast.nodes.toOwnedSlice(gpa), - .errors = try ast.errors.toOwnedSlice(gpa), - .extra = try ast.extra.toOwnedSlice(gpa), - }; - } -} - -fn stripTrailingWhitespace(lines: *[][]u8) void { - const tracy_frame = tracy.trace(@src()); - defer tracy_frame.end(); - for (lines.*) |*line| { - if (str.lastIndexOfNone(line.*, " \t\r\n")) |idx| { - line.* = line.*[0 .. idx + 1]; - } else { - line.* = line.*[0..0]; - } - } -} - -fn calcOffset(self: *AstGen, c: *u8) u32 { - return @intCast(c - self.input_base); -} - -fn findIndentedColumn(self: *AstGen, gpa: std.mem.Allocator, lines_: [][]u8, node_idx: Node.Idx) ![][]u8 { - const tracy_frame = tracy.trace(@src()); - defer tracy_frame.end(); - var lines = lines_; - - // empty lines at the start of the inline block are fine, just skip these - // special case: the first line consist of only whitespace - // because they may have been introduced via marker replacement - if (lines.len > 0 and str.indexOfNone(lines[0], " \t\r\n") == null) lines = lines[1..]; - while (true) : (lines = lines[1..]) { - if (lines.len == 0) return &.{}; - if (lines[0].len != 0) break; - } - - // determine indentation - const indentation_idx = str.indexOfNone(lines[0], " \t\r\n") orelse unreachable; - if (indentation_idx == 0) return &.{}; - - const indentation = lines[0][0..indentation_idx]; - - // strip all lines of their indentation - lines[0] = lines[0][indentation.len..]; - for (lines[1..]) |*line| { - if (line.len == 0) continue; - - const diff_idx = std.mem.indexOfDiff(u8, line.*, indentation) orelse unreachable; - assert(diff_idx != line.len); - if (diff_idx != indentation.len) { - // Recover by stripping all whitespace on this line - const recover_indentation_idx = std.mem.indexOfNone(u8, line.*, " \t\r\n") orelse unreachable; - try self.errors.append(gpa, .fromTagged(.{ - .inconsistent_indentation = .{ .idx = node_idx, .off = self.calcOffset(&line.*[diff_idx]) }, - })); - line.* = line.*[recover_indentation_idx..]; - } else { - line.* = line.*[indentation.len..]; - } - } - - return lines; -} - -fn parseInlineBlock(self: *AstGen, gpa: std.mem.Allocator, lines_: [][]u8, parent_idx: Node.Idx) !void { - const tracy_frame = tracy.trace(@src()); - defer tracy_frame.end(); - var lines = lines_; - var saw_empty_line: bool = false; - - outer: { - // empty lines at the start of the inline block are fine, just skip these - // special case: the first line consist of only whitespace - // because they may have been introduced via marker replacement - if (lines.len > 0 and str.indexOfNone(lines[0], " \t\r\n") == null) lines = lines[1..]; - while (true) : (lines = lines[1..]) { - if (lines.len == 0) break :outer; - if (lines[0].len != 0) break; - } - - self.getNode(parent_idx).incrementNumChildren(); - - if (lines[0].len <= std.math.maxInt(Ast.StrLen)) { - _ = try self.appendNode(gpa, .{ - .text = .{ - .off = self.calcOffset(&lines[0][0]), - .len = @intCast(lines[0].len), - }, - }); - } else { - @branchHint(.cold); - var line = lines[0]; - while (line.len > 0) { - const len = @min(line.len, std.math.maxInt(Ast.StrLen)); - _ = try self.appendNode(gpa, .{ - .text = .{ - .off = self.calcOffset(&line[0]), - .len = @intCast(len), - }, - }); - line = line[len..]; - } - } - lines = lines[1..]; - - while (true) { - // Skip and error on empty lines - while (true) : (lines = lines[1..]) { - if (lines.len == 0) break :outer; - if (lines[0].len != 0) break; - // empty line detected - saw_empty_line = true; - } - - if (saw_empty_line) { - saw_empty_line = false; - try self.errors.append(gpa, .fromTagged(.{ - .unexpected_block_in_inline_context = .{ .idx = self.nextNodeIdx() }, - })); - } - - self.getNode(parent_idx).incrementNumChildren(); - - if (lines[0].len <= std.math.maxInt(Ast.StrLen)) { - _ = try self.appendNode(gpa, .{ - .space_text = .{ - .off = self.calcOffset(&lines[0][0]), - .len = @intCast(lines[0].len), - }, - }); - } else { - @branchHint(.cold); - var line = lines[0]; - _ = try self.appendNode(gpa, .{ - .space_text = .{ - .off = self.calcOffset(&line[0]), - .len = @intCast(std.math.maxInt(Ast.StrLen)), - }, - }); - line = line[std.math.maxInt(Ast.StrLen)..]; - while (line.len > 0) { - const len = @min(line.len, std.math.maxInt(Ast.StrLen)); - _ = try self.appendNode(gpa, .{ - .text = .{ - .off = self.calcOffset(&line[0]), - .len = @intCast(len), - }, - }); - line = line[len..]; - } - } - lines = lines[1..]; - } - } -} - -fn parseColumn(self: *AstGen, gpa: std.mem.Allocator, lines_: [][]u8, parent_idx: Node.Idx) !void { - const tracy_frame = tracy.trace(@src()); - defer tracy_frame.end(); - var lines = lines_; - outer: while (true) { - // Skip empty lines - // special case: the first line consist of only whitespace - // because they may have been introduced via marker replacement - { - const tracy_frame_skip = tracy.traceNamed(@src(), "skip empty lines"); - defer tracy_frame_skip.end(); - if (lines.len > 0 and str.indexOfNone(lines[0], " \t\r\n") == null) lines = lines[1..]; - while (true) : (lines = lines[1..]) { - if (lines.len == 0) break :outer; - if (lines[0].len != 0) break; - } - } - - // Use first character to determine marker - const mode, const child = try self.parseBlockStart(gpa, lines[0]); - - self.getNode(parent_idx).incrementNumChildren(); - - switch (mode) { - .paragraph => { - // take indented or non-block-marker lines - var num_lines: usize = 1; - for (lines[1..]) |line| { - if (line.len == 0) break; - if (line[0] == '*') { - if (std.mem.eql(u8, line, "***")) break; - } else if (block_specs[line[0]] != null) break; - num_lines += 1; - } - - var paragraph_lines = lines[0..num_lines]; - if (lines[0][0] == ' ' or lines[0][0] == '\t') { - try self.errors.append(gpa, .fromTagged(.{ - .inconsistent_indentation = .{ - .idx = self.lastNodeIdx(), - .off = self.calcOffset(&lines[0][0]), - }, - })); - paragraph_lines = try self.findIndentedColumn(gpa, paragraph_lines, child); - } - lines = lines[num_lines..]; - try self.parseInlineBlock(gpa, paragraph_lines, child); - }, - .indented_inline_block => { - // take indented or empty lines - var num_lines: usize = 1; - for (lines[1..]) |line| { - if (line.len != 0 and str.isNoneOf(line[0], " \t\r\n")) break; - num_lines += 1; - } - - const inline_block_lines = try self.findIndentedColumn(gpa, lines[0..num_lines], child); - lines = lines[num_lines..]; - try self.parseInlineBlock(gpa, inline_block_lines, child); - }, - .indented_column => { - // take indented or empty lines - var num_lines: usize = 1; - for (lines[1..]) |line| { - if (line.len != 0 and std.mem.indexOfScalar(u8, " \t\r\n", line[0]) == null) break; - num_lines += 1; - } - - const column_lines = try self.findIndentedColumn(gpa, lines[0..num_lines], child); - lines = lines[num_lines..]; - try self.parseColumn(gpa, column_lines, child); - }, - .no_children => { - lines = lines[1..]; - }, - else => return error.Todo, - } - } -} - -const ParseMode = union(enum) { - indented_column, - indented_inline_block, - paragraph, - raw: struct { fence: []u8 }, - no_children, -}; - -const MarkerSpec = union(enum) { - paragraph, - exact: []const u8, - starts_with: []const u8, - starts_with_multi: struct { - marker_char: u8, - extra: []const []const u8 = &.{""}, // any extra characters to check after the marker - max_chars: ?u32 = null, - }, -}; -const BlockSpecCase = struct { - tag: Node.Tag, - marker: MarkerSpec, - mode: ParseMode, - store_marker_child: enum { store, no_store }, -}; - -const BlockSpec = ?[]const BlockSpecCase; - -fn blockSpecs(comptime spec: type) [256]BlockSpec { - var arr: [256]BlockSpec = undefined; - for (0..256) |c| arr[c] = null; - for (@typeInfo(spec).@"struct".decls) |decl| { - const c = decl.name[0]; - arr[c] = @field(spec, decl.name); - } - return arr; -} - -const block_specs = blockSpecs(struct { - pub const @"*": BlockSpec = &.{ - .{ - .tag = .thematic_break, - .marker = .{ .exact = "***" }, - .mode = .no_children, - .store_marker_child = .no_store, - }, - .{ - .tag = .paragraph, - .marker = .paragraph, - .mode = .paragraph, - .store_marker_child = .no_store, - }, - }; - pub const @"#": BlockSpec = &.{ - .{ - .tag = .heading, - .marker = .{ .starts_with_multi = .{ .marker_char = '#', .max_chars = 6 } }, - .mode = .indented_inline_block, - .store_marker_child = .store, - }, - }; - pub const @"-": BlockSpec = &.{ - .{ - .tag = .task_item, - .marker = .{ .starts_with_multi = .{ .marker_char = '-', .extra = &.{ " [ ]", " [x]", " [X]" } } }, - .mode = .indented_inline_block, - .store_marker_child = .store, - }, - .{ - .tag = .unordered_item, - .marker = .{ .starts_with_multi = .{ .marker_char = '-' } }, - .mode = .indented_inline_block, - .store_marker_child = .store, - }, - }; - pub const @".": BlockSpec = &.{ - .{ - .tag = .ordered_item, - .marker = .{ .starts_with_multi = .{ .marker_char = '.' } }, - .mode = .indented_inline_block, - .store_marker_child = .store, - }, - }; - pub const @":": BlockSpec = &.{ - .{ - .tag = .term_item, - .marker = .{ .starts_with_multi = .{ .marker_char = ':' } }, - .mode = .indented_inline_block, - .store_marker_child = .store, - }, - }; - pub const @">": BlockSpec = &.{ - .{ - .tag = .quote, - .marker = .{ .starts_with = ">" }, - .mode = .indented_column, - .store_marker_child = .no_store, - }, - }; - pub const @"+": BlockSpec = &.{ - .{ - .tag = .elaboration, - .marker = .{ .starts_with_multi = .{ .marker_char = '+' } }, - .mode = .indented_column, - .store_marker_child = .store, - }, - }; - pub const @";": BlockSpec = &.{ - .{ - .tag = .paragraph, - .marker = .{ .starts_with = ";" }, - .mode = .indented_inline_block, - .store_marker_child = .no_store, - }, - }; -}); - -/// Appends the suitable block node to the ast, -/// then returns how parsing should proceed for the children of this block. -/// Also returns the idx of the container node created. -fn parseBlockStart(self: *AstGen, gpa: std.mem.Allocator, line: []u8) !struct { ParseMode, Node.Idx } { - const tracy_frame = tracy.trace(@src()); - defer tracy_frame.end(); - if (block_specs[line[0]] == null) { - return .{ - .paragraph, - try self.appendNode(gpa, .{ - .paragraph = .{ - .off = self.calcOffset(&line[0]), - }, - }), - }; - } - - // Inline switch by starting character so codegen proceeds as if each blockspec was converted to code then concatenated. - // Note that we separately handle the null case above, then make the inline case below `unreachable`. - // That makes it so that we don't have 240+ branches that all just do exactly the same thing. - // - // Regardless, the blockspec must be comptime known (the inline for is mandatory) because we do @unionInit with case.tag. - switch (line[0]) { - inline else => |c| { - assert(block_specs[c] != null); - inline for (block_specs[c] orelse unreachable) |case| { - switch (case.marker) { - .exact, .starts_with => |marker| { - if (std.mem.startsWith(u8, line, marker)) { - const node = if (case.mode == .no_children) - try self.appendNode(gpa, @unionInit( - Node.Tagged, - @tagName(case.tag), - @as(Node.Tagged.Leaf, .{ - .off = self.calcOffset(&line[0]), - .len = marker.len, - }), - )) - else - try self.appendNode(gpa, @unionInit( - Node.Tagged, - @tagName(case.tag), - @as(Node.Tagged.Container, .{ - .off = self.calcOffset(&line[0]), - .num_children = if (case.store_marker_child == .store) 1 else 0, - }), - )); - @memset(line[0..marker.len], ' '); - if (case.store_marker_child == .store) { - _ = try self.appendNode(gpa, .{ .marker = .{ - .off = self.calcOffset(&line[0]), - .len = case.marker.len, - } }); - } - return .{ case.mode, node }; - } - }, - .starts_with_multi => |marker_spec| { - var marker_len = str.indexOfNotChar(line, marker_spec.marker_char) orelse line.len; - - inline for (marker_spec.extra) |extra| { - if (std.mem.startsWith(u8, line[marker_len..], extra)) { - marker_len += extra.len; - - const node = if (case.mode == .no_children) - try self.appendNode(gpa, @unionInit( - Node.Tagged, - @tagName(case.tag), - @as(Node.Tagged.Leaf, .{ - .off = self.calcOffset(&line[0]), - .len = marker_len, - }), - )) - else - try self.appendNode(gpa, @unionInit( - Node.Tagged, - @tagName(case.tag), - @as(Node.Tagged.Container, .{ - .off = self.calcOffset(&line[0]), - .num_children = if (case.store_marker_child == .store) 1 else 0, - }), - )); - - if (marker_spec.max_chars) |max| - if (marker_len > max) - try self.errors.append(gpa, .fromTagged(.{ - .marker_too_long = .{ - .idx = if (case.store_marker_child == .no_store) - self.lastNodeIdx() - else - self.nextNodeIdx(), - }, - })); - - @memset(line[0..marker_len], ' '); - if (marker_len > std.math.maxInt(Ast.StrLen)) - return error.MarkerTooLong; - - if (case.store_marker_child == .store) { - _ = try self.appendNode(gpa, .{ .marker = .{ - .off = self.calcOffset(&line[0]), - .len = @intCast(marker_len), - } }); - } - return .{ case.mode, node }; - } - } - }, - .paragraph => return .{ - .paragraph, - try self.appendNode(gpa, .{ - .paragraph = .{ - .off = self.calcOffset(&line[0]), - }, - }), - }, - } - } - }, - } - - // Line started with a special character, but it didn't match any markers - // Fallback to paragraph, but place a warning. - try self.errors.append(gpa, .fromTagged(.{ - .invalid_marker = .{ - .idx = self.nextNodeIdx(), - .off = self.calcOffset(&line[0]), - }, - })); - - return .{ - .paragraph, - try self.appendNode(gpa, .{ - .paragraph = .{ - .off = self.calcOffset(&line[0]), - }, - }), - }; -} diff --git a/src/AstGen2.zig b/src/AstGen2.zig @@ -1,848 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; - -const tracy = @import("tracy"); - -const Ast = @import("Ast.zig"); -const StrOffset = Ast.StrOffset; -const StrLen = Ast.StrLen; -const Node = Ast.Node; -const Error = Ast.Error; -const str = @import("str.zig"); -const utils = @import("utils.zig"); - -const AstGen = @This(); - -gpa: std.mem.Allocator, -output_gpa: std.mem.Allocator, -output_gpa_same_as_gpa: bool, -input: []const u8, -cursor: []const u8, // suffix of input -indentation: [std.math.maxInt(Column)]u8, -nodes: std.ArrayListUnmanaged(Node), -errors: std.ArrayListUnmanaged(Error), -extra: std.ArrayListUnmanaged(u32), - -const Column = u10; - -fn cursorOffset(self: *AstGen) StrOffset { - return @intCast(self.cursor.ptr - self.input.ptr); -} -fn advanceCursor(self: *AstGen, advance: usize) void { - // NOTE: `advance` should really be u32, but this makes it easier to work with other str functions. - self.cursor = self.cursor[advance..]; -} -fn findMarkerEnd(self: AstGen, m: u8) error{IndentationTooLong}!Column { - // NOTE: null is impossible because input is guaranteed to end in newlines. - const idx = str.indexOfNotChar(self.cursor, m) orelse unreachable; - // Explicitly check for marker length because malicious input is possible - if (idx > std.math.maxInt(Column)) - return error.IndentationTooLong; - return @intCast(idx); -} - -fn getNode(self: AstGen, idx: Node.Idx) *Node { - @setRuntimeSafety(true); - return &self.nodes.items[@intFromEnum(idx)]; -} -fn lastNodeIdx(self: AstGen) Node.Idx { - @setRuntimeSafety(true); - return @enumFromInt(self.nodes.items.len - 1); -} -fn nextNodeIdx(self: AstGen) Node.Idx { - @setRuntimeSafety(true); - return @enumFromInt(self.nodes.items.len); -} - -// These need manual inlining for some reason. -// -// LLVM doesn't seem to think that inlining these are worth it, but LLVM is wrong. -// Because this constructs the Node using .fromTagged, and Node.Tagged and Node -// have different memory representations, the only way to construct in place and -// elide copies is for every function from this one to the callsite to be inlined. -// -// The same applies for Error / Error.Tagged below, but the impact is less severe -// as appending Errors is a much less common operation. Nevertheless, we inline it -// despite not having any data to back it up, because I have vibes that it should be faster. -inline fn appendNode(self: *AstGen, node: Node.Tagged) !Node.Idx { - if (self.nodes.items.len > std.math.maxInt( - @typeInfo(Node.Idx).@"enum".tag_type, - )) return error.OutOfNodeIdx; - const idx = self.nodes.items.len; - try self.nodes.append(self.gpa, .fromTagged(node)); - return @enumFromInt(idx); -} -inline fn appendContainerNodeAtCursor(self: *AstGen, parent_idx: Node.Idx, comptime container_node_tag: Node.Tag) !Node.Idx { - self.getNode(parent_idx).incrementNumChildren(); - return try self.appendNode( - @unionInit( - Node.Tagged, - @tagName(container_node_tag), - .{ .off = self.cursorOffset() }, - ), - ); -} -inline fn appendLeafNodeAtCursor(self: *AstGen, parent_idx: Node.Idx, comptime leaf_node_tag: Node.Tag, len: StrLen) !Node.Idx { - self.getNode(parent_idx).incrementNumChildren(); - return try self.appendNode( - @unionInit( - Node.Tagged, - @tagName(leaf_node_tag), - .{ .off = self.cursorOffset(), .len = len }, - ), - ); -} -inline fn appendError(self: *AstGen, err: Error.Tagged) !void { - if (self.errors.items.len > std.math.maxInt( - @typeInfo(Error.Idx).@"enum".tag_type, - )) return error.OutOfErrorIdx; - try self.errors.append(self.gpa, .fromTagged(err)); -} -inline fn appendPointErrorAtCursor(self: *AstGen, comptime tag: Error.Tag, idx: Node.Idx) !void { - try self.appendError( - @unionInit( - Error.Tagged, - @tagName(tag), - .{ .idx = idx, .off = self.cursorOffset() }, - ), - ); -} -inline fn appendNodeErrorAtCursor(self: *AstGen, comptime tag: Error.Tag, idx: Node.Idx) !void { - try self.appendError( - @unionInit( - Error.Tagged, - @tagName(tag), - .{ .idx = idx }, - ), - ); -} - -pub fn deinit(self: *AstGen) void { - self.nodes.deinit(self.gpa); - self.errors.deinit(self.gpa); - self.extra.deinit(self.gpa); -} - -/// Parses mymarkdown -/// -/// : `gpa` -/// + A suitable allocator for scratch allocations that supports free and ideally remap. -/// : `output_gpa` -/// + If passed, no scratch allocations will outlive this function, -/// and any allocations returned will be allocated on this. -/// : `input` -/// + The input slice to be parsed. Must end in at least 1024 \n characters. -/// -/// Errors: -/// : `IndentationTooLong` -/// + This implementation of mymarkdown supports up to 1023 characters of indentation. -pub fn parse( - gpa: std.mem.Allocator, - output_gpa: ?std.mem.Allocator, - input: []const u8, -) !Ast { - const tracy_frame = tracy.trace(@src()); - defer tracy_frame.end(); - - if (@typeInfo(Column).int.bits > @typeInfo(StrLen).int.bits) - @compileError("Column should have less bits than StrLen"); - if (input.len < 128 or !std.mem.eql(u8, input[input.len - 128 ..], "\n" ** 128)) - return error.InputUnsafe; - if (input.len > std.math.maxInt(u32)) - return error.InputTooLarge; - - var ast: AstGen = .{ - .gpa = gpa, - .output_gpa = output_gpa orelse gpa, - .output_gpa_same_as_gpa = output_gpa == null, - .input = input, - .cursor = input, - .indentation = undefined, - .nodes = .empty, - .errors = .empty, - .extra = .empty, - }; - defer ast.deinit(); - - try ast.parseRoot(); - - std.sort.pdq(Error, ast.errors.items, {}, struct { - fn func(_: void, lhs: Error, rhs: Error) bool { - return @intFromEnum(lhs.get(.idx)) < @intFromEnum(rhs.get(.idx)); - } - }.func); - - if (output_gpa) |gpa2| { - return .{ - .nodes = try gpa2.dupe(Node, ast.nodes.items), - .errors = try gpa2.dupe(Error, ast.errors.items), - .extra = try gpa2.dupe(u32, ast.extra.items), - }; - } else { - return .{ - .nodes = try ast.nodes.toOwnedSlice(gpa), - .errors = try ast.errors.toOwnedSlice(gpa), - .extra = try ast.extra.toOwnedSlice(gpa), - }; - } -} - -const ParsingContext = enum { block_context, inline_context }; - -fn parseRoot(self: *AstGen) !void { - const tracy_frame = tracy.trace(@src()); - defer tracy_frame.end(); - const root = try self.appendNode(.{ .document = .{} }); - assert(root == .root); - assert(self.input.ptr == self.cursor.ptr); - assert(self.input.len == self.cursor.len); - - if (str.indexOfNone(self.input, " \t\r\n")) |idx| { - if (idx == 0 or self.input[idx - 1] != '\n') { - // Happy case: input starts at the start of the line :) - self.advanceCursor(idx); - } else { - // Input doesn't start at the start of the line :( - // Log an error but otherwise proceed as usual - try self.appendPointErrorAtCursor(.inconsistent_indentation, root); - self.advanceCursor(idx); - } - - // The actual parse - // "inline" hack to get different branch predictors for the root column - _ = try self.parseColumnInline(root, 0, 0, .block_context); - } else { - // Input is completely empty, return without really parsing anything - } -} - -fn parseColumn( - self: *AstGen, - parent_idx: Node.Idx, - parent_col: Column, - cursor_col: Column, - comptime parsing_context: ParsingContext, -) error{ - IndentationTooLong, - OutOfNodeIdx, - OutOfErrorIdx, - OutOfMemory, -}!Column { - return self.parseColumnInline(parent_idx, parent_col, cursor_col, parsing_context); -} - -fn parseMarkerItem( - self: *AstGen, - comptime node_tag: Node.Tag, - parent_idx: Node.Idx, - block_col: Column, - marker_len: Column, - comptime output_marker: enum { output_marker, no_output_marker }, - comptime parent_parsing_context: ParsingContext, - comptime child_parsing_context: ParsingContext, -) !Column { - const block_idx = try self.appendContainerNodeAtCursor(parent_idx, node_tag); - if (parent_parsing_context == .inline_context) - try self.appendNodeErrorAtCursor(.unexpected_block_in_inline_context, block_idx); - if (output_marker == .output_marker) { - _ = try self.appendLeafNodeAtCursor(block_idx, .marker, marker_len); - } - switch (try self.findIndentation(block_col, marker_len)) { - .found_column => |child_col| { - return try self.parseColumn(block_idx, block_col, child_col, child_parsing_context); - }, - .mismatched_indentation => |indentation_idx_found| return indentation_idx_found, - } -} - -inline fn parseColumnInline( - self: *AstGen, - parent_idx: Node.Idx, - parent_col: Column, - cursor_col: Column, - comptime parsing_context: ParsingContext, -) !Column { - assert(cursor_col == 0 or parent_col < cursor_col); - - // Used for "indentation correction". - // For simplicity, just think of this as this column's indentation. - var block_col = cursor_col; - - var parsed_first_paragraph_for_inline_context: bool = false; - - // # parseColumn's input parameter explanation - // + AKA: How to deal with indentation - // - // Our cursor points at the first (usually non-whitespace) char of the column. - // (The caller is in charge of finding the start of the column.) - // - // We also need both our own and our parent's indentations. - // These are represented as column indices that indicate - // how many characters of `self.indentation` should match. - // indicating where the parent column's ends, - // as well as where this column's indentation ends. - // For the root column, both of these values are 0, which means the column - // can never be exited from until it reaches the end of the file. - // - // === - // column 0, file starts here - // | column 2, parent column starts here - // | | column 5, our column starts here - // v v v - // - // | Parent column (parent's indentation = " ", represented as parent_col = 2) - // | v (cursor points at H, our indentation = " ", represented as cursor_col = 5) - // | Hello there - // | Same column - // | Parent column - // === - // - // Lines that match with our indentation are considered part of our column, - // and the column ends upon finding a line aligns with the parent indentation. - // When a line matches the parent's indentation but NOT ours, we log an error, - // then do error recovery by pretending it does match with our indentation. - // - // === - // | Parent column - // | v (cursor) - // | Hello there - // | \t Same column but syntax error! (" \t " does not match with " ", but matches " ") - // | Parent column - // === - // - // This indentation recovery system is not the best and it can fix indentation "incorrectly", - // but I don't have anything better. (Btw, the original AstGen.zig uses the same recovery logic.) - - parse_another_block: while (true) { - assert(self.cursor.len > 0); - assert(str.isNoneOf(self.cursor[0], " \t\r\n")); - - // Will be set by the recursive call (if any), - // to indicate how much indentation was previously checked. - var indentation_idx: Column = undefined; - - finish_parsing_block: { - const tracy_frame = tracy.trace(@src()); - defer tracy_frame.end(); - switch (self.cursor[0]) { - // Par-like repeatable markers - inline '-', '.', ':', '#' => |m| { - const marker_len = try self.findMarkerEnd(m); - if (m == '-') { - var potential_task_item = str.indexOfNone(self.cursor[marker_len..], "[ ]xX") orelse unreachable; - while (potential_task_item >= 3 and self.cursor[marker_len + potential_task_item - 1] == ' ') - potential_task_item -= 1; - if (potential_task_item >= 3 and - self.cursor[marker_len + potential_task_item - 1] == ']' and - self.cursor[marker_len + potential_task_item - 3] == '[' and - (self.cursor[marker_len + potential_task_item - 2] == ' ' or - self.cursor[marker_len + potential_task_item - 2] == 'x' or - self.cursor[marker_len + potential_task_item - 2] == 'X') and - std.mem.allEqual(u8, self.cursor[marker_len .. marker_len + potential_task_item - 3], ' ')) - { - if (marker_len + potential_task_item > std.math.maxInt(Column)) - return error.IndentationTooLong; - tracy_frame.setName(@tagName(.task_item)); - indentation_idx = try self.parseMarkerItem( - .task_item, - parent_idx, - block_col, - @intCast(marker_len + potential_task_item), - .output_marker, - parsing_context, - .inline_context, - ); - break :finish_parsing_block; - } - } - const tag = switch (m) { - '-' => .unordered_item, - '.' => .ordered_item, - ':' => .term_item, - '#' => .heading, - else => unreachable, - }; - tracy_frame.setName(@tagName(tag)); - indentation_idx = try self.parseMarkerItem( - tag, - parent_idx, - block_col, - marker_len, - .output_marker, - parsing_context, - .inline_context, - ); - break :finish_parsing_block; - }, - - // Div-like repeatable markers - inline '+' => |m| { - tracy_frame.setName(@tagName(.elaboration)); - const marker_len = try self.findMarkerEnd(m); - indentation_idx = try self.parseMarkerItem( - switch (m) { - '+' => .elaboration, - else => unreachable, - }, - parent_idx, - block_col, - marker_len, - .output_marker, - parsing_context, - .block_context, - ); - break :finish_parsing_block; - }, - - // Par-like single markers - inline ';' => |m| { - tracy_frame.setName(@tagName(.paragraph)); - indentation_idx = try self.parseMarkerItem( - switch (m) { - ';' => .paragraph, - else => unreachable, - }, - parent_idx, - block_col, - 1, - .no_output_marker, - parsing_context, - .inline_context, - ); - break :finish_parsing_block; - }, - - // Div-like single markers - inline '>' => |m| { - tracy_frame.setName(@tagName(.quote)); - indentation_idx = try self.parseMarkerItem( - switch (m) { - '>' => .quote, - else => unreachable, - }, - parent_idx, - block_col, - 1, - .no_output_marker, - parsing_context, - .block_context, - ); - break :finish_parsing_block; - }, - - '*' => { - if (std.mem.eql(u8, self.cursor[0..3], "***")) { - const after_stars = self.cursor[3..]; - const skip_whitespace_idx = str.indexOfNone(after_stars, " \t") orelse unreachable; - if (after_stars[skip_whitespace_idx] == '\n') { - tracy_frame.setName(@tagName(.thematic_break)); - _ = try self.appendLeafNodeAtCursor(parent_idx, .thematic_break, 3); - self.advanceCursor(3 + skip_whitespace_idx + 1); - while (true) { - if (self.cursor.len == 0) return 0; - - const next_idx = str.indexOfNone(self.cursor, " \t") orelse unreachable; - if (self.cursor[next_idx] == '\n') { - self.advanceCursor(next_idx + 1); - continue; - } - - const verified_indentation_idx = std.mem.indexOfDiff( - u8, - self.cursor, - self.indentation[0..block_col], - ) orelse unreachable; - if (verified_indentation_idx == block_col) { - indentation_idx = @intCast(next_idx); - break :finish_parsing_block; - } else { - indentation_idx = @intCast(verified_indentation_idx); - break :finish_parsing_block; - } - } - } - } - }, - - else => {}, - } - - // Handle paragraph - switch (parsing_context) { - .inline_context => { - tracy_frame.setName(@tagName(.text)); - if (parsed_first_paragraph_for_inline_context) { - try self.appendNodeErrorAtCursor(.unexpected_block_in_inline_context, self.nextNodeIdx()); - indentation_idx = try self.parseParagraph(.space_text, parent_idx, parent_col, block_col); - } else { - indentation_idx = try self.parseParagraph(.text, parent_idx, parent_col, block_col); - parsed_first_paragraph_for_inline_context = true; - } - }, - .block_context => { - tracy_frame.setName(@tagName(.paragraph)); - const paragraph_idx = try self.appendContainerNodeAtCursor(parent_idx, .paragraph); - indentation_idx = try self.parseParagraph(.text, paragraph_idx, parent_col, block_col); - }, - } - } - - // We just finished parsing a block, so cursor points at the start of a line: - // - // === - // | Parent column - // | Hello there - // | Same column - // |v----------------- (cursor) - // | Parent column - // === - // - // We need to find the next block. This involves checking for empty lines and indentation. - // - // We find where the indentation differs, if any. There are 6 cases: - // - **The line is empty.** - // + We loop again, looking for another block. - // - **Matches us, next char is non-whitespace.** - // + Happy path, we continue parsing as usual. - // - **Matches us, next char is whitespace.** - // + Log and error and recover by treating all leading whitespace - // as if it matched our indentation level exactly. - // - **Does not even match parent.** - // + Return with no errors. - // - **Matches parent and not us, next char is non-whitespace.** - // + Return with no errors. - // - **Matches parent and not us, next char is whitespace.** - // + Log and error and recover by treating all leading whitespace - // as if it matched our indentation level exactly. - - block_col = cursor_col; - // finding_block: - while (true) { - // Special case: when we hit EOF, there's nothing left to parse. - if (self.cursor.len == 0) return 0; - - assert(self.cursor[indentation_idx] != '\n'); - if (indentation_idx > cursor_col) { - // Matches us but there's too much whitespace. - // Fix the indentation. - // Here, we fix the indentation by pretending that - // the block starts from wherever the whitespace ended. - block_col = indentation_idx; - @memcpy( - self.indentation[cursor_col..block_col], - self.cursor[cursor_col..block_col], - ); - // Log the error. - self.advanceCursor(cursor_col); - try self.appendPointErrorAtCursor(.inconsistent_indentation, self.nextNodeIdx()); - // Continue parsing. - self.advanceCursor(indentation_idx - cursor_col); - continue :parse_another_block; - } else if (indentation_idx == cursor_col) { - // Matches us exactly. - self.advanceCursor(indentation_idx); - continue :parse_another_block; - } else if (indentation_idx > parent_col) { - // Matches parent but there's extra whitespace that doesn't match us. - // Log the error. - self.advanceCursor(indentation_idx); - try self.appendPointErrorAtCursor(.inconsistent_indentation, parent_idx); - // Fix the indentation. - // Here, we fix the indentation by pretending that - // the block starts from the correct amount of whitespace. - // <no op> - - // Continue parsing. - continue :parse_another_block; - } else { - // Matches parent exactly or doesn't match parent, return. - return indentation_idx; - } - } - } -} - -/// Finds where the indented block starts -fn findIndentation( - self: *AstGen, - parent_col: Column, - skip: Column, -) !union(enum) { - found_column: Column, - mismatched_indentation: Column, -} { - // We're given the input at the marker. - // - // === - // parent_col - // v - // - // | Parent column - // | v------------ (cursor) - // | - Hello there - // | Same column - // | Parent column - // === - // - // We first skip some number of characters, and then scan forward until we find non-whitespace. - // - // === - // first skip... - // - // parent_col - // v - // - // | Parent column - // | v------------ (cursor) - // | - Hello there - // | Same column - // | Parent column - // - // then scan for non-whitespace: - // - // parent_col - // v - // - // | Parent column - // | v------------ (cursor) - // | - Hello there - // | Same column - // | Parent column - // === - // - // Then we store the indentation found in `self.indentation` and return the indentation column (in this case 5). - // Because in this case we found the non-whitespace character on the first line, we will memset the marker to spaces. - // - // - return = 5 - // - self.indentation = ` ` - // - // *** - // - // There are a couple other cases. - // - // This next case is when the non-whitespace char is not on the same line. - // In this case we can simply copy the indentation from the input. - // - // === - // parent_col - // v - // - // | Parent column - // | - // | - v------------ (cursor) - // | Hello there - // | Same column - // | Parent column - // === - // - // - return = 5 - // - self.indentation = ` ` - // - // *** - // - // This next case is we find that the indentation does not match the parent's indentation. - // Note that this can only happen when the non-whitespace char is not on the same line as the initial cursor, - // since the indentation behind the initial cursor has already been checked by the caller. - // - // === - // parent_col - // v - // - // | Parent column - // | - // | - v------------ (cursor) - // | \t Hello there - // | Same column - // | Parent column - // === - // - // - return = mismatched indentation at column 1 - // - // Here, parent's indentation is ` ` but we saw ` \t `. - // In this case we return how many characters did match to the caller. - // The caller should interpret this case as that no indented column was found. - // - // Note that we do not break out of the loop until we find a non-whitespace char, - // even if we see non-matching indentation. In this next example, we ignore the `\t` - // and continue to the H, where the indentation actually does match. - // - // === - // parent_col - // v - // - // | Parent column - // | - // | - - // | \t v------------ (cursor) - // | Hello there - // | Same column - // | Parent column - // === - // - // - return = 5 - // - self.indentation = ` ` - - // Handle first line separately - { - self.advanceCursor(skip); - const inner_block_idx = str.indexOfNone(self.cursor, " \t") orelse unreachable; - if (self.cursor[inner_block_idx] != '\n') { - // We found the indentation! - // Because this is the first line, we need to memset the marker into spaces. - if (parent_col + skip + inner_block_idx > std.math.maxInt(Column)) - return error.IndentationTooLong; - @memset(self.indentation[parent_col .. parent_col + skip], ' '); - @memcpy(self.indentation[parent_col + skip .. parent_col + skip + inner_block_idx], self.cursor[0..inner_block_idx]); - self.advanceCursor(inner_block_idx); - return .{ .found_column = @intCast(parent_col + skip + inner_block_idx) }; - } else { - // I lied, inner_block_idx doesn't point to the inner block. - self.advanceCursor(inner_block_idx + 1); - } - } - - // Remaining lines don't need to memset the marker into spaces. - while (true) { - // Find column - const inner_block_idx = str.indexOfNone(self.cursor, " \t") orelse unreachable; - if (self.cursor[inner_block_idx] != '\n') { - // Verify parent indentation - const indentation_idx = std.mem.indexOfDiff(u8, self.cursor, self.indentation[0..parent_col]) orelse unreachable; - if (indentation_idx != parent_col) { - return .{ .mismatched_indentation = @intCast(indentation_idx) }; - } - // We found the indentation! - if (parent_col + inner_block_idx > std.math.maxInt(Column)) - return error.IndentationTooLong; - @memcpy(self.indentation[parent_col .. parent_col + inner_block_idx], self.cursor[parent_col..inner_block_idx]); - self.advanceCursor(inner_block_idx); - return .{ .found_column = @intCast(parent_col + inner_block_idx) }; - } else { - // I lied, inner_block_idx doesn't point to the inner block. - self.advanceCursor(inner_block_idx + 1); - } - } -} - -fn insertTextLine( - self: *AstGen, - comptime first_text_tag: Node.Tag, - comptime rest_text_tag: Node.Tag, - parent_idx: Node.Idx, - len_: usize, -) !void { - var len = len_; - if (len <= std.math.maxInt(StrLen)) { - _ = try self.appendLeafNodeAtCursor(parent_idx, first_text_tag, @intCast(len)); - self.advanceCursor(len); - } else { - @branchHint(.cold); - { - const consumed_len = @min(len, std.math.maxInt(StrLen)); - _ = try self.appendLeafNodeAtCursor(parent_idx, first_text_tag, @intCast(consumed_len)); - self.advanceCursor(consumed_len); - len -= consumed_len; - } - while (len > 0) { - const consumed_len = @min(len, std.math.maxInt(StrLen)); - _ = try self.appendLeafNodeAtCursor(parent_idx, rest_text_tag, @intCast(consumed_len)); - self.advanceCursor(consumed_len); - len -= consumed_len; - } - } -} - -fn parseParagraph( - self: *AstGen, - comptime first_text_tag: Node.Tag, - parent_idx: Node.Idx, - parent_col: Column, - block_col: Column, -) !Column { - { - const newline = str.indexOfChar(self.cursor, '\n') orelse unreachable; - try self.insertTextLine(first_text_tag, .text, parent_idx, newline); - self.advanceCursor(1); - } - - while (true) { - if (self.cursor.len == 0) return 0; - - const indentation_idx = str.indexOfNone(self.cursor, " \t") orelse unreachable; - if (str.isAnyOf(self.cursor[indentation_idx], "-.:+>#;")) { - // block line found, exit - const verified_indentation_idx = std.mem.indexOfDiff( - u8, - self.cursor, - self.indentation[0..block_col], - ) orelse unreachable; - if (verified_indentation_idx == block_col) { - return @intCast(indentation_idx); - } else { - return @intCast(verified_indentation_idx); - } - } - if (self.cursor[indentation_idx] == '*') { - const after_stars = self.cursor[3..]; - const skip_whitespace_idx = str.indexOfNone(after_stars, " \t") orelse unreachable; - if (after_stars[skip_whitespace_idx] == '\n') { - // block line found, exit - const verified_indentation_idx = std.mem.indexOfDiff( - u8, - self.cursor, - self.indentation[0..block_col], - ) orelse unreachable; - if (verified_indentation_idx == block_col) { - return @intCast(indentation_idx); - } else { - return @intCast(verified_indentation_idx); - } - } - } - // empty line found, consume to next nonwhitespace and exit - if (self.cursor[indentation_idx] == '\n') { - self.advanceCursor(indentation_idx + 1); - while (true) { - if (self.cursor.len == 0) return 0; - - const next_idx = str.indexOfNone(self.cursor, " \t") orelse unreachable; - if (self.cursor[next_idx] == '\n') { - self.advanceCursor(next_idx + 1); - continue; - } - - const verified_indentation_idx = std.mem.indexOfDiff( - u8, - self.cursor, - self.indentation[0..block_col], - ) orelse unreachable; - if (verified_indentation_idx == block_col) { - return @intCast(next_idx); - } else { - return @intCast(verified_indentation_idx); - } - } - } - - // verify indentation - const verified_indentation_idx = std.mem.indexOfDiff( - u8, - self.cursor, - self.indentation[0..block_col], - ) orelse unreachable; - if (verified_indentation_idx == block_col) { - self.advanceCursor(verified_indentation_idx); - const newline = str.indexOfChar(self.cursor, '\n') orelse unreachable; - try self.insertTextLine(.space_text, .text, parent_idx, newline); - self.advanceCursor(1); - } else if (verified_indentation_idx > parent_col) { - self.advanceCursor(indentation_idx); - try self.appendPointErrorAtCursor(.inconsistent_indentation, parent_idx); - const newline = str.indexOfChar(self.cursor, '\n') orelse unreachable; - try self.insertTextLine(.space_text, .text, parent_idx, newline); - self.advanceCursor(1); - } else { - return @intCast(verified_indentation_idx); - } - } -} diff --git a/src/main.zig b/src/main.zig @@ -38,48 +38,28 @@ pub fn main() !void { const args = try std.process.argsAlloc(arena); - const bench1, const bench2, const bench3, // - const run1, const run2, const run3, // - const check1, const check2, const check3, // - const render1, const render2, const render3, // - const print1, const print2, const print3, // + const bench3, // + const run3, // + const check3, // + const render3, // + const print3, // const iters = blk: { - var bench1, var bench2, var bench3 = .{ false, false, false }; - var run1, var run2, var run3 = .{ false, false, false }; - var check1, var check2, var check3 = .{ false, false, false }; - var render1, var render2, var render3 = .{ false, false, false }; - var print1, var print2, var print3 = .{ false, false, false }; + var bench3 = false; + var run3 = false; + var check3 = false; + var render3 = false; + var print3 = false; var iters: usize = 8; for (args) |arg| { - if (std.mem.eql(u8, arg, "--bench1")) - bench1 = true; - if (std.mem.eql(u8, arg, "--bench2")) - bench2 = true; if (std.mem.eql(u8, arg, "--bench3")) bench3 = true; - if (std.mem.eql(u8, arg, "--run1")) - run1 = true; - if (std.mem.eql(u8, arg, "--run2")) - run2 = true; if (std.mem.eql(u8, arg, "--run3")) run3 = true; - if (std.mem.eql(u8, arg, "--check1")) - check1, run1 = .{ true, true }; - if (std.mem.eql(u8, arg, "--check2")) - check2, run2 = .{ true, true }; if (std.mem.eql(u8, arg, "--check3")) check3, run3 = .{ true, true }; - if (std.mem.eql(u8, arg, "--render1")) - render1, run1 = .{ true, true }; - if (std.mem.eql(u8, arg, "--render2")) - render2, run2 = .{ true, true }; if (std.mem.eql(u8, arg, "--render3")) render3, run3 = .{ true, true }; - if (std.mem.eql(u8, arg, "--print1")) - print1, run1 = .{ true, true }; - if (std.mem.eql(u8, arg, "--print2")) - print2, run2 = .{ true, true }; if (std.mem.eql(u8, arg, "--print3")) print3, run3 = .{ true, true }; if (std.mem.startsWith(u8, arg, "--iters=")) @@ -98,11 +78,11 @@ pub fn main() !void { std.Thread.sleep(2e9); } break :blk .{ - bench1, bench2, bench3, - run1, run2, run3, - check1, check2, check3, - render1, render2, render3, - print1, print2, print3, + bench3, + run3, + check3, + render3, + print3, iters, }; }; @@ -112,30 +92,6 @@ pub fn main() !void { const input = input_arraylist.items; for (0..iters) |_| { - if (bench1) { - const ast1 = blk: { - const tracy_frame = tracy.namedFrame("parse 1"); - defer tracy_frame.end(); - break :blk try mymarkdown.parse( - gpa, - gpa, - input, - ); - }; - ast1.deinit(gpa); - } - if (bench2) { - const ast2 = blk: { - const tracy_frame = tracy.namedFrame("parse 2"); - defer tracy_frame.end(); - break :blk try mymarkdown.parse2( - gpa, - gpa, - input, - ); - }; - ast2.deinit(gpa); - } if (bench3) { const ast3 = blk: { const tracy_frame = tracy.namedFrame("parse 3"); @@ -150,25 +106,7 @@ pub fn main() !void { } } - if (!bench1 and !bench2 and !bench3) { - const ast: mymarkdown.Ast = if (run1) blk: { - const tracy_frame = tracy.namedFrame("parse 1"); - defer tracy_frame.end(); - break :blk try mymarkdown.parse( - gpa, - arena, - input, - ); - } else .empty; - const ast2: mymarkdown.Ast = if (run2) blk: { - const tracy_frame = tracy.namedFrame("parse 2"); - defer tracy_frame.end(); - break :blk try mymarkdown.parse2( - gpa, - arena, - input, - ); - } else .empty; + if (!bench3) { const ast3: mymarkdown.Ast = if (run3) blk: { const tracy_frame = tracy.namedFrame("parse 3"); defer tracy_frame.end(); @@ -179,22 +117,6 @@ pub fn main() !void { ); } else .empty; - var render_arraylist1: std.ArrayList(u8) = .init(gpa); - defer render_arraylist1.deinit(); - if (check1 or render1 or print1) { - std.debug.print("Rendering 1\n", .{}); - const tracy_frame = tracy.namedFrame("Render 1"); - defer tracy_frame.end(); - _ = try ast.renderAst(render_arraylist1.writer(), input); - } - var render_arraylist2: std.ArrayList(u8) = .init(gpa); - defer render_arraylist2.deinit(); - if (check2 or render2 or print2) { - std.debug.print("Rendering 2\n", .{}); - const tracy_frame = tracy.namedFrame("Render 2"); - defer tracy_frame.end(); - _ = try ast2.renderAst(render_arraylist2.writer(), input); - } var render_arraylist3: std.ArrayList(u8) = .init(gpa); defer render_arraylist3.deinit(); if (check3 or render3 or print3) { @@ -203,34 +125,13 @@ pub fn main() !void { defer tracy_frame.end(); _ = try ast3.renderAst(render_arraylist3.writer(), input); } - if (check1 and check3) { - std.debug.print("check 1 vs 3\n", .{}); - try std.testing.expectEqualStrings(render_arraylist1.items, render_arraylist3.items); - } - if (check2 and check3) { - std.debug.print("check 2 vs 3\n", .{}); - try std.testing.expectEqualStrings(render_arraylist2.items, render_arraylist3.items); - } - if (check1 and check2 and !check3) { - std.debug.print("check 1 vs 2\n", .{}); - try std.testing.expectEqualStrings(render_arraylist1.items, render_arraylist2.items); - } + + // if (check1 and check3) { + // std.debug.print("check 1 vs 3\n", .{}); + // try std.testing.expectEqualStrings(render_arraylist1.items, render_arraylist3.items); + // } for (0..10) |_| { - if (render1) { - std.debug.print("Re-rendering 1\n", .{}); - render_arraylist1.clearRetainingCapacity(); - const tracy_frame = tracy.namedFrame("re-render 1"); - defer tracy_frame.end(); - _ = try ast.renderAst(render_arraylist1.writer(), input); - } - if (render2) { - std.debug.print("Re-rendering 2\n", .{}); - render_arraylist2.clearRetainingCapacity(); - const tracy_frame = tracy.namedFrame("re-render 2"); - defer tracy_frame.end(); - _ = try ast2.renderAst(render_arraylist2.writer(), input); - } if (render3) { std.debug.print("Re-rendering 3\n", .{}); render_arraylist3.clearRetainingCapacity(); @@ -240,8 +141,6 @@ pub fn main() !void { } } - if (print1) try std.io.getStdOut().writeAll(render_arraylist1.items); - if (print2) try std.io.getStdOut().writeAll(render_arraylist2.items); if (print3) try std.io.getStdOut().writeAll(render_arraylist3.items); } diff --git a/src/root.zig b/src/root.zig @@ -1,10 +1,6 @@ const std = @import("std"); pub const Ast = @import("Ast.zig"); -pub const AstGen = @import("AstGen.zig"); -pub const parse = AstGen.parse; -pub const AstGen2 = @import("AstGen2.zig"); -pub const parse2 = AstGen2.parse; pub const AstGen3 = @import("AstGen3.zig"); pub const parse3 = AstGen3.parse;