commit 7145b5826c7eca7b203a663b1cada149f2a44891
parent 200c1f7c82ca4056761cd0d0ed2e368eec504501
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date: Thu, 22 May 2025 14:05:31 +0800
Remove old AstGen
Diffstat:
| D | src/AstGen.zig | | | 616 | ------------------------------------------------------------------------------- |
| D | src/AstGen2.zig | | | 848 | ------------------------------------------------------------------------------- |
| M | src/main.zig | | | 143 | ++++++++++++------------------------------------------------------------------- |
| M | src/root.zig | | | 4 | ---- |
4 files changed, 21 insertions(+), 1590 deletions(-)
diff --git a/src/AstGen.zig b/src/AstGen.zig
@@ -1,616 +0,0 @@
-const std = @import("std");
-const assert = std.debug.assert;
-
-const tracy = @import("tracy");
-
-const Ast = @import("Ast.zig");
-const Node = Ast.Node;
-const Error = Ast.Error;
-const str = @import("str.zig");
-const utils = @import("utils.zig");
-
-const AstGen = @This();
-
-input_base: [*]u8,
-nodes: std.ArrayListUnmanaged(Node),
-errors: std.ArrayListUnmanaged(Error),
-extra: std.ArrayListUnmanaged(u32),
-
-fn getNode(self: AstGen, idx: Node.Idx) *Node {
- @setRuntimeSafety(true);
- return &self.nodes.items[@intFromEnum(idx)];
-}
-fn lastNodeIdx(self: AstGen) Node.Idx {
- @setRuntimeSafety(true);
- return @enumFromInt(self.nodes.items.len - 1);
-}
-fn nextNodeIdx(self: AstGen) Node.Idx {
- @setRuntimeSafety(true);
- return @enumFromInt(self.nodes.items.len);
-}
-fn appendNode(self: *AstGen, gpa: std.mem.Allocator, node: Node.Tagged) !Node.Idx {
- if (self.nodes.items.len > std.math.maxInt(
- @typeInfo(Node.Idx).@"enum".tag_type,
- )) return error.OutOfNodeIdx;
- const idx = self.nodes.items.len;
- try self.nodes.append(gpa, .fromTagged(node));
- return @enumFromInt(idx);
-}
-
-pub fn deinit(self: *AstGen, gpa: std.mem.Allocator) void {
- self.nodes.deinit(gpa);
- self.errors.deinit(gpa);
- self.extra.deinit(gpa);
-}
-
-/// Parses mymarkdown
-///
-/// gpa: A suitable allocator for scratch allocations.
-/// output_gpa: If passed, no scratch allocations will outlive this function,
-/// and any allocations returned will be allocated on this.
-/// input: The input slice to be parsed.
-pub fn parse(
- gpa: std.mem.Allocator,
- output_gpa: ?std.mem.Allocator,
- input: []const u8,
-) error{
- InputTooLarge, // When the input length exceeds 2^32 bytes
- MarkerTooLong, // When the input contains a marker that exceeds 2^24 bytes
- OutOfNodeIdx, // When there are more than 2^24 nodes created during parsing
- OutOfMemory, // When allocation fails
- Todo, // When I'm too lazy
-}!Ast {
- const tracy_frame = tracy.trace(@src());
- defer tracy_frame.end();
-
- if (input.len > std.math.maxInt(u32)) {
- return error.InputTooLarge;
- }
-
- const tracy_frame2 = tracy.traceNamed(@src(), "Allocate input copy");
- // const input_copy = try gpa.dupe(u8, input);
- // defer gpa.free(input_copy);
- var input_copy: std.ArrayListUnmanaged(u8) = .empty;
- defer input_copy.deinit(gpa);
- try input_copy.ensureTotalCapacityPrecise(gpa, input.len + 1);
- tracy_frame2.end();
- var ast: AstGen = .{
- .input_base = input_copy.items.ptr,
- .nodes = .empty,
- .errors = .empty,
- .extra = .empty,
- };
- defer ast.deinit(gpa);
- const root = try ast.appendNode(gpa, .{ .document = .{} });
-
- var lines: std.ArrayListUnmanaged([]u8) = .empty;
- defer lines.deinit(gpa);
-
- const tracy_frame3 = tracy.traceNamed(@src(), "Split into lines and copy");
- var lines_it = std.mem.splitScalar(u8, input, '\n');
- var maybe_line: ?[]u8 = @constCast(lines_it.first());
- var off: usize = 0;
- while (maybe_line) |line| : (maybe_line = @constCast(lines_it.next())) {
- input_copy.appendSliceAssumeCapacity(line);
- input_copy.appendAssumeCapacity('\n');
- if (str.lastIndexOfNone(line, " \t\r\n")) |idx| {
- try lines.append(gpa, input_copy.items[off .. off + idx + 1]);
- } else {
- try lines.append(gpa, input_copy.items[off..off]);
- }
- off += line.len + 1;
- }
- tracy_frame3.end();
-
- // stripTrailingWhitespace(&lines.items);
-
- try ast.parseColumn(gpa, lines.items, root);
-
- // std.time.sleep(std.time.ns_per_hour);
-
- std.sort.pdq(Error, ast.errors.items, {}, struct {
- fn func(_: void, lhs: Error, rhs: Error) bool {
- return @intFromEnum(lhs.get(.idx)) < @intFromEnum(rhs.get(.idx));
- }
- }.func);
-
- if (output_gpa) |gpa2| {
- return .{
- .nodes = try gpa2.dupe(Node, ast.nodes.items),
- .errors = try gpa2.dupe(Error, ast.errors.items),
- .extra = try gpa2.dupe(u32, ast.extra.items),
- };
- } else {
- return .{
- .nodes = try ast.nodes.toOwnedSlice(gpa),
- .errors = try ast.errors.toOwnedSlice(gpa),
- .extra = try ast.extra.toOwnedSlice(gpa),
- };
- }
-}
-
-fn stripTrailingWhitespace(lines: *[][]u8) void {
- const tracy_frame = tracy.trace(@src());
- defer tracy_frame.end();
- for (lines.*) |*line| {
- if (str.lastIndexOfNone(line.*, " \t\r\n")) |idx| {
- line.* = line.*[0 .. idx + 1];
- } else {
- line.* = line.*[0..0];
- }
- }
-}
-
-fn calcOffset(self: *AstGen, c: *u8) u32 {
- return @intCast(c - self.input_base);
-}
-
-fn findIndentedColumn(self: *AstGen, gpa: std.mem.Allocator, lines_: [][]u8, node_idx: Node.Idx) ![][]u8 {
- const tracy_frame = tracy.trace(@src());
- defer tracy_frame.end();
- var lines = lines_;
-
- // empty lines at the start of the inline block are fine, just skip these
- // special case: the first line consist of only whitespace
- // because they may have been introduced via marker replacement
- if (lines.len > 0 and str.indexOfNone(lines[0], " \t\r\n") == null) lines = lines[1..];
- while (true) : (lines = lines[1..]) {
- if (lines.len == 0) return &.{};
- if (lines[0].len != 0) break;
- }
-
- // determine indentation
- const indentation_idx = str.indexOfNone(lines[0], " \t\r\n") orelse unreachable;
- if (indentation_idx == 0) return &.{};
-
- const indentation = lines[0][0..indentation_idx];
-
- // strip all lines of their indentation
- lines[0] = lines[0][indentation.len..];
- for (lines[1..]) |*line| {
- if (line.len == 0) continue;
-
- const diff_idx = std.mem.indexOfDiff(u8, line.*, indentation) orelse unreachable;
- assert(diff_idx != line.len);
- if (diff_idx != indentation.len) {
- // Recover by stripping all whitespace on this line
- const recover_indentation_idx = std.mem.indexOfNone(u8, line.*, " \t\r\n") orelse unreachable;
- try self.errors.append(gpa, .fromTagged(.{
- .inconsistent_indentation = .{ .idx = node_idx, .off = self.calcOffset(&line.*[diff_idx]) },
- }));
- line.* = line.*[recover_indentation_idx..];
- } else {
- line.* = line.*[indentation.len..];
- }
- }
-
- return lines;
-}
-
-fn parseInlineBlock(self: *AstGen, gpa: std.mem.Allocator, lines_: [][]u8, parent_idx: Node.Idx) !void {
- const tracy_frame = tracy.trace(@src());
- defer tracy_frame.end();
- var lines = lines_;
- var saw_empty_line: bool = false;
-
- outer: {
- // empty lines at the start of the inline block are fine, just skip these
- // special case: the first line consist of only whitespace
- // because they may have been introduced via marker replacement
- if (lines.len > 0 and str.indexOfNone(lines[0], " \t\r\n") == null) lines = lines[1..];
- while (true) : (lines = lines[1..]) {
- if (lines.len == 0) break :outer;
- if (lines[0].len != 0) break;
- }
-
- self.getNode(parent_idx).incrementNumChildren();
-
- if (lines[0].len <= std.math.maxInt(Ast.StrLen)) {
- _ = try self.appendNode(gpa, .{
- .text = .{
- .off = self.calcOffset(&lines[0][0]),
- .len = @intCast(lines[0].len),
- },
- });
- } else {
- @branchHint(.cold);
- var line = lines[0];
- while (line.len > 0) {
- const len = @min(line.len, std.math.maxInt(Ast.StrLen));
- _ = try self.appendNode(gpa, .{
- .text = .{
- .off = self.calcOffset(&line[0]),
- .len = @intCast(len),
- },
- });
- line = line[len..];
- }
- }
- lines = lines[1..];
-
- while (true) {
- // Skip and error on empty lines
- while (true) : (lines = lines[1..]) {
- if (lines.len == 0) break :outer;
- if (lines[0].len != 0) break;
- // empty line detected
- saw_empty_line = true;
- }
-
- if (saw_empty_line) {
- saw_empty_line = false;
- try self.errors.append(gpa, .fromTagged(.{
- .unexpected_block_in_inline_context = .{ .idx = self.nextNodeIdx() },
- }));
- }
-
- self.getNode(parent_idx).incrementNumChildren();
-
- if (lines[0].len <= std.math.maxInt(Ast.StrLen)) {
- _ = try self.appendNode(gpa, .{
- .space_text = .{
- .off = self.calcOffset(&lines[0][0]),
- .len = @intCast(lines[0].len),
- },
- });
- } else {
- @branchHint(.cold);
- var line = lines[0];
- _ = try self.appendNode(gpa, .{
- .space_text = .{
- .off = self.calcOffset(&line[0]),
- .len = @intCast(std.math.maxInt(Ast.StrLen)),
- },
- });
- line = line[std.math.maxInt(Ast.StrLen)..];
- while (line.len > 0) {
- const len = @min(line.len, std.math.maxInt(Ast.StrLen));
- _ = try self.appendNode(gpa, .{
- .text = .{
- .off = self.calcOffset(&line[0]),
- .len = @intCast(len),
- },
- });
- line = line[len..];
- }
- }
- lines = lines[1..];
- }
- }
-}
-
-fn parseColumn(self: *AstGen, gpa: std.mem.Allocator, lines_: [][]u8, parent_idx: Node.Idx) !void {
- const tracy_frame = tracy.trace(@src());
- defer tracy_frame.end();
- var lines = lines_;
- outer: while (true) {
- // Skip empty lines
- // special case: the first line consist of only whitespace
- // because they may have been introduced via marker replacement
- {
- const tracy_frame_skip = tracy.traceNamed(@src(), "skip empty lines");
- defer tracy_frame_skip.end();
- if (lines.len > 0 and str.indexOfNone(lines[0], " \t\r\n") == null) lines = lines[1..];
- while (true) : (lines = lines[1..]) {
- if (lines.len == 0) break :outer;
- if (lines[0].len != 0) break;
- }
- }
-
- // Use first character to determine marker
- const mode, const child = try self.parseBlockStart(gpa, lines[0]);
-
- self.getNode(parent_idx).incrementNumChildren();
-
- switch (mode) {
- .paragraph => {
- // take indented or non-block-marker lines
- var num_lines: usize = 1;
- for (lines[1..]) |line| {
- if (line.len == 0) break;
- if (line[0] == '*') {
- if (std.mem.eql(u8, line, "***")) break;
- } else if (block_specs[line[0]] != null) break;
- num_lines += 1;
- }
-
- var paragraph_lines = lines[0..num_lines];
- if (lines[0][0] == ' ' or lines[0][0] == '\t') {
- try self.errors.append(gpa, .fromTagged(.{
- .inconsistent_indentation = .{
- .idx = self.lastNodeIdx(),
- .off = self.calcOffset(&lines[0][0]),
- },
- }));
- paragraph_lines = try self.findIndentedColumn(gpa, paragraph_lines, child);
- }
- lines = lines[num_lines..];
- try self.parseInlineBlock(gpa, paragraph_lines, child);
- },
- .indented_inline_block => {
- // take indented or empty lines
- var num_lines: usize = 1;
- for (lines[1..]) |line| {
- if (line.len != 0 and str.isNoneOf(line[0], " \t\r\n")) break;
- num_lines += 1;
- }
-
- const inline_block_lines = try self.findIndentedColumn(gpa, lines[0..num_lines], child);
- lines = lines[num_lines..];
- try self.parseInlineBlock(gpa, inline_block_lines, child);
- },
- .indented_column => {
- // take indented or empty lines
- var num_lines: usize = 1;
- for (lines[1..]) |line| {
- if (line.len != 0 and std.mem.indexOfScalar(u8, " \t\r\n", line[0]) == null) break;
- num_lines += 1;
- }
-
- const column_lines = try self.findIndentedColumn(gpa, lines[0..num_lines], child);
- lines = lines[num_lines..];
- try self.parseColumn(gpa, column_lines, child);
- },
- .no_children => {
- lines = lines[1..];
- },
- else => return error.Todo,
- }
- }
-}
-
-const ParseMode = union(enum) {
- indented_column,
- indented_inline_block,
- paragraph,
- raw: struct { fence: []u8 },
- no_children,
-};
-
-const MarkerSpec = union(enum) {
- paragraph,
- exact: []const u8,
- starts_with: []const u8,
- starts_with_multi: struct {
- marker_char: u8,
- extra: []const []const u8 = &.{""}, // any extra characters to check after the marker
- max_chars: ?u32 = null,
- },
-};
-const BlockSpecCase = struct {
- tag: Node.Tag,
- marker: MarkerSpec,
- mode: ParseMode,
- store_marker_child: enum { store, no_store },
-};
-
-const BlockSpec = ?[]const BlockSpecCase;
-
-fn blockSpecs(comptime spec: type) [256]BlockSpec {
- var arr: [256]BlockSpec = undefined;
- for (0..256) |c| arr[c] = null;
- for (@typeInfo(spec).@"struct".decls) |decl| {
- const c = decl.name[0];
- arr[c] = @field(spec, decl.name);
- }
- return arr;
-}
-
-const block_specs = blockSpecs(struct {
- pub const @"*": BlockSpec = &.{
- .{
- .tag = .thematic_break,
- .marker = .{ .exact = "***" },
- .mode = .no_children,
- .store_marker_child = .no_store,
- },
- .{
- .tag = .paragraph,
- .marker = .paragraph,
- .mode = .paragraph,
- .store_marker_child = .no_store,
- },
- };
- pub const @"#": BlockSpec = &.{
- .{
- .tag = .heading,
- .marker = .{ .starts_with_multi = .{ .marker_char = '#', .max_chars = 6 } },
- .mode = .indented_inline_block,
- .store_marker_child = .store,
- },
- };
- pub const @"-": BlockSpec = &.{
- .{
- .tag = .task_item,
- .marker = .{ .starts_with_multi = .{ .marker_char = '-', .extra = &.{ " [ ]", " [x]", " [X]" } } },
- .mode = .indented_inline_block,
- .store_marker_child = .store,
- },
- .{
- .tag = .unordered_item,
- .marker = .{ .starts_with_multi = .{ .marker_char = '-' } },
- .mode = .indented_inline_block,
- .store_marker_child = .store,
- },
- };
- pub const @".": BlockSpec = &.{
- .{
- .tag = .ordered_item,
- .marker = .{ .starts_with_multi = .{ .marker_char = '.' } },
- .mode = .indented_inline_block,
- .store_marker_child = .store,
- },
- };
- pub const @":": BlockSpec = &.{
- .{
- .tag = .term_item,
- .marker = .{ .starts_with_multi = .{ .marker_char = ':' } },
- .mode = .indented_inline_block,
- .store_marker_child = .store,
- },
- };
- pub const @">": BlockSpec = &.{
- .{
- .tag = .quote,
- .marker = .{ .starts_with = ">" },
- .mode = .indented_column,
- .store_marker_child = .no_store,
- },
- };
- pub const @"+": BlockSpec = &.{
- .{
- .tag = .elaboration,
- .marker = .{ .starts_with_multi = .{ .marker_char = '+' } },
- .mode = .indented_column,
- .store_marker_child = .store,
- },
- };
- pub const @";": BlockSpec = &.{
- .{
- .tag = .paragraph,
- .marker = .{ .starts_with = ";" },
- .mode = .indented_inline_block,
- .store_marker_child = .no_store,
- },
- };
-});
-
-/// Appends the suitable block node to the ast,
-/// then returns how parsing should proceed for the children of this block.
-/// Also returns the idx of the container node created.
-fn parseBlockStart(self: *AstGen, gpa: std.mem.Allocator, line: []u8) !struct { ParseMode, Node.Idx } {
- const tracy_frame = tracy.trace(@src());
- defer tracy_frame.end();
- if (block_specs[line[0]] == null) {
- return .{
- .paragraph,
- try self.appendNode(gpa, .{
- .paragraph = .{
- .off = self.calcOffset(&line[0]),
- },
- }),
- };
- }
-
- // Inline switch by starting character so codegen proceeds as if each blockspec was converted to code then concatenated.
- // Note that we separately handle the null case above, then make the inline case below `unreachable`.
- // That makes it so that we don't have 240+ branches that all just do exactly the same thing.
- //
- // Regardless, the blockspec must be comptime known (the inline for is mandatory) because we do @unionInit with case.tag.
- switch (line[0]) {
- inline else => |c| {
- assert(block_specs[c] != null);
- inline for (block_specs[c] orelse unreachable) |case| {
- switch (case.marker) {
- .exact, .starts_with => |marker| {
- if (std.mem.startsWith(u8, line, marker)) {
- const node = if (case.mode == .no_children)
- try self.appendNode(gpa, @unionInit(
- Node.Tagged,
- @tagName(case.tag),
- @as(Node.Tagged.Leaf, .{
- .off = self.calcOffset(&line[0]),
- .len = marker.len,
- }),
- ))
- else
- try self.appendNode(gpa, @unionInit(
- Node.Tagged,
- @tagName(case.tag),
- @as(Node.Tagged.Container, .{
- .off = self.calcOffset(&line[0]),
- .num_children = if (case.store_marker_child == .store) 1 else 0,
- }),
- ));
- @memset(line[0..marker.len], ' ');
- if (case.store_marker_child == .store) {
- _ = try self.appendNode(gpa, .{ .marker = .{
- .off = self.calcOffset(&line[0]),
- .len = case.marker.len,
- } });
- }
- return .{ case.mode, node };
- }
- },
- .starts_with_multi => |marker_spec| {
- var marker_len = str.indexOfNotChar(line, marker_spec.marker_char) orelse line.len;
-
- inline for (marker_spec.extra) |extra| {
- if (std.mem.startsWith(u8, line[marker_len..], extra)) {
- marker_len += extra.len;
-
- const node = if (case.mode == .no_children)
- try self.appendNode(gpa, @unionInit(
- Node.Tagged,
- @tagName(case.tag),
- @as(Node.Tagged.Leaf, .{
- .off = self.calcOffset(&line[0]),
- .len = marker_len,
- }),
- ))
- else
- try self.appendNode(gpa, @unionInit(
- Node.Tagged,
- @tagName(case.tag),
- @as(Node.Tagged.Container, .{
- .off = self.calcOffset(&line[0]),
- .num_children = if (case.store_marker_child == .store) 1 else 0,
- }),
- ));
-
- if (marker_spec.max_chars) |max|
- if (marker_len > max)
- try self.errors.append(gpa, .fromTagged(.{
- .marker_too_long = .{
- .idx = if (case.store_marker_child == .no_store)
- self.lastNodeIdx()
- else
- self.nextNodeIdx(),
- },
- }));
-
- @memset(line[0..marker_len], ' ');
- if (marker_len > std.math.maxInt(Ast.StrLen))
- return error.MarkerTooLong;
-
- if (case.store_marker_child == .store) {
- _ = try self.appendNode(gpa, .{ .marker = .{
- .off = self.calcOffset(&line[0]),
- .len = @intCast(marker_len),
- } });
- }
- return .{ case.mode, node };
- }
- }
- },
- .paragraph => return .{
- .paragraph,
- try self.appendNode(gpa, .{
- .paragraph = .{
- .off = self.calcOffset(&line[0]),
- },
- }),
- },
- }
- }
- },
- }
-
- // Line started with a special character, but it didn't match any markers
- // Fallback to paragraph, but place a warning.
- try self.errors.append(gpa, .fromTagged(.{
- .invalid_marker = .{
- .idx = self.nextNodeIdx(),
- .off = self.calcOffset(&line[0]),
- },
- }));
-
- return .{
- .paragraph,
- try self.appendNode(gpa, .{
- .paragraph = .{
- .off = self.calcOffset(&line[0]),
- },
- }),
- };
-}
diff --git a/src/AstGen2.zig b/src/AstGen2.zig
@@ -1,848 +0,0 @@
-const std = @import("std");
-const assert = std.debug.assert;
-
-const tracy = @import("tracy");
-
-const Ast = @import("Ast.zig");
-const StrOffset = Ast.StrOffset;
-const StrLen = Ast.StrLen;
-const Node = Ast.Node;
-const Error = Ast.Error;
-const str = @import("str.zig");
-const utils = @import("utils.zig");
-
-const AstGen = @This();
-
-gpa: std.mem.Allocator,
-output_gpa: std.mem.Allocator,
-output_gpa_same_as_gpa: bool,
-input: []const u8,
-cursor: []const u8, // suffix of input
-indentation: [std.math.maxInt(Column)]u8,
-nodes: std.ArrayListUnmanaged(Node),
-errors: std.ArrayListUnmanaged(Error),
-extra: std.ArrayListUnmanaged(u32),
-
-const Column = u10;
-
-fn cursorOffset(self: *AstGen) StrOffset {
- return @intCast(self.cursor.ptr - self.input.ptr);
-}
-fn advanceCursor(self: *AstGen, advance: usize) void {
- // NOTE: `advance` should really be u32, but this makes it easier to work with other str functions.
- self.cursor = self.cursor[advance..];
-}
-fn findMarkerEnd(self: AstGen, m: u8) error{IndentationTooLong}!Column {
- // NOTE: null is impossible because input is guaranteed to end in newlines.
- const idx = str.indexOfNotChar(self.cursor, m) orelse unreachable;
- // Explicitly check for marker length because malicious input is possible
- if (idx > std.math.maxInt(Column))
- return error.IndentationTooLong;
- return @intCast(idx);
-}
-
-fn getNode(self: AstGen, idx: Node.Idx) *Node {
- @setRuntimeSafety(true);
- return &self.nodes.items[@intFromEnum(idx)];
-}
-fn lastNodeIdx(self: AstGen) Node.Idx {
- @setRuntimeSafety(true);
- return @enumFromInt(self.nodes.items.len - 1);
-}
-fn nextNodeIdx(self: AstGen) Node.Idx {
- @setRuntimeSafety(true);
- return @enumFromInt(self.nodes.items.len);
-}
-
-// These need manual inlining for some reason.
-//
-// LLVM doesn't seem to think that inlining these are worth it, but LLVM is wrong.
-// Because this constructs the Node using .fromTagged, and Node.Tagged and Node
-// have different memory representations, the only way to construct in place and
-// elide copies is for every function from this one to the callsite to be inlined.
-//
-// The same applies for Error / Error.Tagged below, but the impact is less severe
-// as appending Errors is a much less common operation. Nevertheless, we inline it
-// despite not having any data to back it up, because I have vibes that it should be faster.
-inline fn appendNode(self: *AstGen, node: Node.Tagged) !Node.Idx {
- if (self.nodes.items.len > std.math.maxInt(
- @typeInfo(Node.Idx).@"enum".tag_type,
- )) return error.OutOfNodeIdx;
- const idx = self.nodes.items.len;
- try self.nodes.append(self.gpa, .fromTagged(node));
- return @enumFromInt(idx);
-}
-inline fn appendContainerNodeAtCursor(self: *AstGen, parent_idx: Node.Idx, comptime container_node_tag: Node.Tag) !Node.Idx {
- self.getNode(parent_idx).incrementNumChildren();
- return try self.appendNode(
- @unionInit(
- Node.Tagged,
- @tagName(container_node_tag),
- .{ .off = self.cursorOffset() },
- ),
- );
-}
-inline fn appendLeafNodeAtCursor(self: *AstGen, parent_idx: Node.Idx, comptime leaf_node_tag: Node.Tag, len: StrLen) !Node.Idx {
- self.getNode(parent_idx).incrementNumChildren();
- return try self.appendNode(
- @unionInit(
- Node.Tagged,
- @tagName(leaf_node_tag),
- .{ .off = self.cursorOffset(), .len = len },
- ),
- );
-}
-inline fn appendError(self: *AstGen, err: Error.Tagged) !void {
- if (self.errors.items.len > std.math.maxInt(
- @typeInfo(Error.Idx).@"enum".tag_type,
- )) return error.OutOfErrorIdx;
- try self.errors.append(self.gpa, .fromTagged(err));
-}
-inline fn appendPointErrorAtCursor(self: *AstGen, comptime tag: Error.Tag, idx: Node.Idx) !void {
- try self.appendError(
- @unionInit(
- Error.Tagged,
- @tagName(tag),
- .{ .idx = idx, .off = self.cursorOffset() },
- ),
- );
-}
-inline fn appendNodeErrorAtCursor(self: *AstGen, comptime tag: Error.Tag, idx: Node.Idx) !void {
- try self.appendError(
- @unionInit(
- Error.Tagged,
- @tagName(tag),
- .{ .idx = idx },
- ),
- );
-}
-
-pub fn deinit(self: *AstGen) void {
- self.nodes.deinit(self.gpa);
- self.errors.deinit(self.gpa);
- self.extra.deinit(self.gpa);
-}
-
-/// Parses mymarkdown
-///
-/// : `gpa`
-/// + A suitable allocator for scratch allocations that supports free and ideally remap.
-/// : `output_gpa`
-/// + If passed, no scratch allocations will outlive this function,
-/// and any allocations returned will be allocated on this.
-/// : `input`
-/// + The input slice to be parsed. Must end in at least 1024 \n characters.
-///
-/// Errors:
-/// : `IndentationTooLong`
-/// + This implementation of mymarkdown supports up to 1023 characters of indentation.
-pub fn parse(
- gpa: std.mem.Allocator,
- output_gpa: ?std.mem.Allocator,
- input: []const u8,
-) !Ast {
- const tracy_frame = tracy.trace(@src());
- defer tracy_frame.end();
-
- if (@typeInfo(Column).int.bits > @typeInfo(StrLen).int.bits)
- @compileError("Column should have less bits than StrLen");
- if (input.len < 128 or !std.mem.eql(u8, input[input.len - 128 ..], "\n" ** 128))
- return error.InputUnsafe;
- if (input.len > std.math.maxInt(u32))
- return error.InputTooLarge;
-
- var ast: AstGen = .{
- .gpa = gpa,
- .output_gpa = output_gpa orelse gpa,
- .output_gpa_same_as_gpa = output_gpa == null,
- .input = input,
- .cursor = input,
- .indentation = undefined,
- .nodes = .empty,
- .errors = .empty,
- .extra = .empty,
- };
- defer ast.deinit();
-
- try ast.parseRoot();
-
- std.sort.pdq(Error, ast.errors.items, {}, struct {
- fn func(_: void, lhs: Error, rhs: Error) bool {
- return @intFromEnum(lhs.get(.idx)) < @intFromEnum(rhs.get(.idx));
- }
- }.func);
-
- if (output_gpa) |gpa2| {
- return .{
- .nodes = try gpa2.dupe(Node, ast.nodes.items),
- .errors = try gpa2.dupe(Error, ast.errors.items),
- .extra = try gpa2.dupe(u32, ast.extra.items),
- };
- } else {
- return .{
- .nodes = try ast.nodes.toOwnedSlice(gpa),
- .errors = try ast.errors.toOwnedSlice(gpa),
- .extra = try ast.extra.toOwnedSlice(gpa),
- };
- }
-}
-
-const ParsingContext = enum { block_context, inline_context };
-
-fn parseRoot(self: *AstGen) !void {
- const tracy_frame = tracy.trace(@src());
- defer tracy_frame.end();
- const root = try self.appendNode(.{ .document = .{} });
- assert(root == .root);
- assert(self.input.ptr == self.cursor.ptr);
- assert(self.input.len == self.cursor.len);
-
- if (str.indexOfNone(self.input, " \t\r\n")) |idx| {
- if (idx == 0 or self.input[idx - 1] != '\n') {
- // Happy case: input starts at the start of the line :)
- self.advanceCursor(idx);
- } else {
- // Input doesn't start at the start of the line :(
- // Log an error but otherwise proceed as usual
- try self.appendPointErrorAtCursor(.inconsistent_indentation, root);
- self.advanceCursor(idx);
- }
-
- // The actual parse
- // "inline" hack to get different branch predictors for the root column
- _ = try self.parseColumnInline(root, 0, 0, .block_context);
- } else {
- // Input is completely empty, return without really parsing anything
- }
-}
-
-fn parseColumn(
- self: *AstGen,
- parent_idx: Node.Idx,
- parent_col: Column,
- cursor_col: Column,
- comptime parsing_context: ParsingContext,
-) error{
- IndentationTooLong,
- OutOfNodeIdx,
- OutOfErrorIdx,
- OutOfMemory,
-}!Column {
- return self.parseColumnInline(parent_idx, parent_col, cursor_col, parsing_context);
-}
-
-fn parseMarkerItem(
- self: *AstGen,
- comptime node_tag: Node.Tag,
- parent_idx: Node.Idx,
- block_col: Column,
- marker_len: Column,
- comptime output_marker: enum { output_marker, no_output_marker },
- comptime parent_parsing_context: ParsingContext,
- comptime child_parsing_context: ParsingContext,
-) !Column {
- const block_idx = try self.appendContainerNodeAtCursor(parent_idx, node_tag);
- if (parent_parsing_context == .inline_context)
- try self.appendNodeErrorAtCursor(.unexpected_block_in_inline_context, block_idx);
- if (output_marker == .output_marker) {
- _ = try self.appendLeafNodeAtCursor(block_idx, .marker, marker_len);
- }
- switch (try self.findIndentation(block_col, marker_len)) {
- .found_column => |child_col| {
- return try self.parseColumn(block_idx, block_col, child_col, child_parsing_context);
- },
- .mismatched_indentation => |indentation_idx_found| return indentation_idx_found,
- }
-}
-
-inline fn parseColumnInline(
- self: *AstGen,
- parent_idx: Node.Idx,
- parent_col: Column,
- cursor_col: Column,
- comptime parsing_context: ParsingContext,
-) !Column {
- assert(cursor_col == 0 or parent_col < cursor_col);
-
- // Used for "indentation correction".
- // For simplicity, just think of this as this column's indentation.
- var block_col = cursor_col;
-
- var parsed_first_paragraph_for_inline_context: bool = false;
-
- // # parseColumn's input parameter explanation
- // + AKA: How to deal with indentation
- //
- // Our cursor points at the first (usually non-whitespace) char of the column.
- // (The caller is in charge of finding the start of the column.)
- //
- // We also need both our own and our parent's indentations.
- // These are represented as column indices that indicate
- // how many characters of `self.indentation` should match.
- // indicating where the parent column's ends,
- // as well as where this column's indentation ends.
- // For the root column, both of these values are 0, which means the column
- // can never be exited from until it reaches the end of the file.
- //
- // ===
- // column 0, file starts here
- // | column 2, parent column starts here
- // | | column 5, our column starts here
- // v v v
- //
- // | Parent column (parent's indentation = " ", represented as parent_col = 2)
- // | v (cursor points at H, our indentation = " ", represented as cursor_col = 5)
- // | Hello there
- // | Same column
- // | Parent column
- // ===
- //
- // Lines that match with our indentation are considered part of our column,
- // and the column ends upon finding a line aligns with the parent indentation.
- // When a line matches the parent's indentation but NOT ours, we log an error,
- // then do error recovery by pretending it does match with our indentation.
- //
- // ===
- // | Parent column
- // | v (cursor)
- // | Hello there
- // | \t Same column but syntax error! (" \t " does not match with " ", but matches " ")
- // | Parent column
- // ===
- //
- // This indentation recovery system is not the best and it can fix indentation "incorrectly",
- // but I don't have anything better. (Btw, the original AstGen.zig uses the same recovery logic.)
-
- parse_another_block: while (true) {
- assert(self.cursor.len > 0);
- assert(str.isNoneOf(self.cursor[0], " \t\r\n"));
-
- // Will be set by the recursive call (if any),
- // to indicate how much indentation was previously checked.
- var indentation_idx: Column = undefined;
-
- finish_parsing_block: {
- const tracy_frame = tracy.trace(@src());
- defer tracy_frame.end();
- switch (self.cursor[0]) {
- // Par-like repeatable markers
- inline '-', '.', ':', '#' => |m| {
- const marker_len = try self.findMarkerEnd(m);
- if (m == '-') {
- var potential_task_item = str.indexOfNone(self.cursor[marker_len..], "[ ]xX") orelse unreachable;
- while (potential_task_item >= 3 and self.cursor[marker_len + potential_task_item - 1] == ' ')
- potential_task_item -= 1;
- if (potential_task_item >= 3 and
- self.cursor[marker_len + potential_task_item - 1] == ']' and
- self.cursor[marker_len + potential_task_item - 3] == '[' and
- (self.cursor[marker_len + potential_task_item - 2] == ' ' or
- self.cursor[marker_len + potential_task_item - 2] == 'x' or
- self.cursor[marker_len + potential_task_item - 2] == 'X') and
- std.mem.allEqual(u8, self.cursor[marker_len .. marker_len + potential_task_item - 3], ' '))
- {
- if (marker_len + potential_task_item > std.math.maxInt(Column))
- return error.IndentationTooLong;
- tracy_frame.setName(@tagName(.task_item));
- indentation_idx = try self.parseMarkerItem(
- .task_item,
- parent_idx,
- block_col,
- @intCast(marker_len + potential_task_item),
- .output_marker,
- parsing_context,
- .inline_context,
- );
- break :finish_parsing_block;
- }
- }
- const tag = switch (m) {
- '-' => .unordered_item,
- '.' => .ordered_item,
- ':' => .term_item,
- '#' => .heading,
- else => unreachable,
- };
- tracy_frame.setName(@tagName(tag));
- indentation_idx = try self.parseMarkerItem(
- tag,
- parent_idx,
- block_col,
- marker_len,
- .output_marker,
- parsing_context,
- .inline_context,
- );
- break :finish_parsing_block;
- },
-
- // Div-like repeatable markers
- inline '+' => |m| {
- tracy_frame.setName(@tagName(.elaboration));
- const marker_len = try self.findMarkerEnd(m);
- indentation_idx = try self.parseMarkerItem(
- switch (m) {
- '+' => .elaboration,
- else => unreachable,
- },
- parent_idx,
- block_col,
- marker_len,
- .output_marker,
- parsing_context,
- .block_context,
- );
- break :finish_parsing_block;
- },
-
- // Par-like single markers
- inline ';' => |m| {
- tracy_frame.setName(@tagName(.paragraph));
- indentation_idx = try self.parseMarkerItem(
- switch (m) {
- ';' => .paragraph,
- else => unreachable,
- },
- parent_idx,
- block_col,
- 1,
- .no_output_marker,
- parsing_context,
- .inline_context,
- );
- break :finish_parsing_block;
- },
-
- // Div-like single markers
- inline '>' => |m| {
- tracy_frame.setName(@tagName(.quote));
- indentation_idx = try self.parseMarkerItem(
- switch (m) {
- '>' => .quote,
- else => unreachable,
- },
- parent_idx,
- block_col,
- 1,
- .no_output_marker,
- parsing_context,
- .block_context,
- );
- break :finish_parsing_block;
- },
-
- '*' => {
- if (std.mem.eql(u8, self.cursor[0..3], "***")) {
- const after_stars = self.cursor[3..];
- const skip_whitespace_idx = str.indexOfNone(after_stars, " \t") orelse unreachable;
- if (after_stars[skip_whitespace_idx] == '\n') {
- tracy_frame.setName(@tagName(.thematic_break));
- _ = try self.appendLeafNodeAtCursor(parent_idx, .thematic_break, 3);
- self.advanceCursor(3 + skip_whitespace_idx + 1);
- while (true) {
- if (self.cursor.len == 0) return 0;
-
- const next_idx = str.indexOfNone(self.cursor, " \t") orelse unreachable;
- if (self.cursor[next_idx] == '\n') {
- self.advanceCursor(next_idx + 1);
- continue;
- }
-
- const verified_indentation_idx = std.mem.indexOfDiff(
- u8,
- self.cursor,
- self.indentation[0..block_col],
- ) orelse unreachable;
- if (verified_indentation_idx == block_col) {
- indentation_idx = @intCast(next_idx);
- break :finish_parsing_block;
- } else {
- indentation_idx = @intCast(verified_indentation_idx);
- break :finish_parsing_block;
- }
- }
- }
- }
- },
-
- else => {},
- }
-
- // Handle paragraph
- switch (parsing_context) {
- .inline_context => {
- tracy_frame.setName(@tagName(.text));
- if (parsed_first_paragraph_for_inline_context) {
- try self.appendNodeErrorAtCursor(.unexpected_block_in_inline_context, self.nextNodeIdx());
- indentation_idx = try self.parseParagraph(.space_text, parent_idx, parent_col, block_col);
- } else {
- indentation_idx = try self.parseParagraph(.text, parent_idx, parent_col, block_col);
- parsed_first_paragraph_for_inline_context = true;
- }
- },
- .block_context => {
- tracy_frame.setName(@tagName(.paragraph));
- const paragraph_idx = try self.appendContainerNodeAtCursor(parent_idx, .paragraph);
- indentation_idx = try self.parseParagraph(.text, paragraph_idx, parent_col, block_col);
- },
- }
- }
-
- // We just finished parsing a block, so cursor points at the start of a line:
- //
- // ===
- // | Parent column
- // | Hello there
- // | Same column
- // |v----------------- (cursor)
- // | Parent column
- // ===
- //
- // We need to find the next block. This involves checking for empty lines and indentation.
- //
- // We find where the indentation differs, if any. There are 6 cases:
- // - **The line is empty.**
- // + We loop again, looking for another block.
- // - **Matches us, next char is non-whitespace.**
- // + Happy path, we continue parsing as usual.
- // - **Matches us, next char is whitespace.**
- // + Log and error and recover by treating all leading whitespace
- // as if it matched our indentation level exactly.
- // - **Does not even match parent.**
- // + Return with no errors.
- // - **Matches parent and not us, next char is non-whitespace.**
- // + Return with no errors.
- // - **Matches parent and not us, next char is whitespace.**
- // + Log and error and recover by treating all leading whitespace
- // as if it matched our indentation level exactly.
-
- block_col = cursor_col;
- // finding_block:
- while (true) {
- // Special case: when we hit EOF, there's nothing left to parse.
- if (self.cursor.len == 0) return 0;
-
- assert(self.cursor[indentation_idx] != '\n');
- if (indentation_idx > cursor_col) {
- // Matches us but there's too much whitespace.
- // Fix the indentation.
- // Here, we fix the indentation by pretending that
- // the block starts from wherever the whitespace ended.
- block_col = indentation_idx;
- @memcpy(
- self.indentation[cursor_col..block_col],
- self.cursor[cursor_col..block_col],
- );
- // Log the error.
- self.advanceCursor(cursor_col);
- try self.appendPointErrorAtCursor(.inconsistent_indentation, self.nextNodeIdx());
- // Continue parsing.
- self.advanceCursor(indentation_idx - cursor_col);
- continue :parse_another_block;
- } else if (indentation_idx == cursor_col) {
- // Matches us exactly.
- self.advanceCursor(indentation_idx);
- continue :parse_another_block;
- } else if (indentation_idx > parent_col) {
- // Matches parent but there's extra whitespace that doesn't match us.
- // Log the error.
- self.advanceCursor(indentation_idx);
- try self.appendPointErrorAtCursor(.inconsistent_indentation, parent_idx);
- // Fix the indentation.
- // Here, we fix the indentation by pretending that
- // the block starts from the correct amount of whitespace.
- // <no op>
-
- // Continue parsing.
- continue :parse_another_block;
- } else {
- // Matches parent exactly or doesn't match parent, return.
- return indentation_idx;
- }
- }
- }
-}
-
-/// Finds where the indented block starts
-fn findIndentation(
- self: *AstGen,
- parent_col: Column,
- skip: Column,
-) !union(enum) {
- found_column: Column,
- mismatched_indentation: Column,
-} {
- // We're given the input at the marker.
- //
- // ===
- // parent_col
- // v
- //
- // | Parent column
- // | v------------ (cursor)
- // | - Hello there
- // | Same column
- // | Parent column
- // ===
- //
- // We first skip some number of characters, and then scan forward until we find non-whitespace.
- //
- // ===
- // first skip...
- //
- // parent_col
- // v
- //
- // | Parent column
- // | v------------ (cursor)
- // | - Hello there
- // | Same column
- // | Parent column
- //
- // then scan for non-whitespace:
- //
- // parent_col
- // v
- //
- // | Parent column
- // | v------------ (cursor)
- // | - Hello there
- // | Same column
- // | Parent column
- // ===
- //
- // Then we store the indentation found in `self.indentation` and return the indentation column (in this case 5).
- // Because in this case we found the non-whitespace character on the first line, we will memset the marker to spaces.
- //
- // - return = 5
- // - self.indentation = ` `
- //
- // ***
- //
- // There are a couple other cases.
- //
- // This next case is when the non-whitespace char is not on the same line.
- // In this case we can simply copy the indentation from the input.
- //
- // ===
- // parent_col
- // v
- //
- // | Parent column
- // |
- // | - v------------ (cursor)
- // | Hello there
- // | Same column
- // | Parent column
- // ===
- //
- // - return = 5
- // - self.indentation = ` `
- //
- // ***
- //
- // This next case is we find that the indentation does not match the parent's indentation.
- // Note that this can only happen when the non-whitespace char is not on the same line as the initial cursor,
- // since the indentation behind the initial cursor has already been checked by the caller.
- //
- // ===
- // parent_col
- // v
- //
- // | Parent column
- // |
- // | - v------------ (cursor)
- // | \t Hello there
- // | Same column
- // | Parent column
- // ===
- //
- // - return = mismatched indentation at column 1
- //
- // Here, parent's indentation is ` ` but we saw ` \t `.
- // In this case we return how many characters did match to the caller.
- // The caller should interpret this case as that no indented column was found.
- //
- // Note that we do not break out of the loop until we find a non-whitespace char,
- // even if we see non-matching indentation. In this next example, we ignore the `\t`
- // and continue to the H, where the indentation actually does match.
- //
- // ===
- // parent_col
- // v
- //
- // | Parent column
- // |
- // | -
- // | \t v------------ (cursor)
- // | Hello there
- // | Same column
- // | Parent column
- // ===
- //
- // - return = 5
- // - self.indentation = ` `
-
- // Handle first line separately
- {
- self.advanceCursor(skip);
- const inner_block_idx = str.indexOfNone(self.cursor, " \t") orelse unreachable;
- if (self.cursor[inner_block_idx] != '\n') {
- // We found the indentation!
- // Because this is the first line, we need to memset the marker into spaces.
- if (parent_col + skip + inner_block_idx > std.math.maxInt(Column))
- return error.IndentationTooLong;
- @memset(self.indentation[parent_col .. parent_col + skip], ' ');
- @memcpy(self.indentation[parent_col + skip .. parent_col + skip + inner_block_idx], self.cursor[0..inner_block_idx]);
- self.advanceCursor(inner_block_idx);
- return .{ .found_column = @intCast(parent_col + skip + inner_block_idx) };
- } else {
- // I lied, inner_block_idx doesn't point to the inner block.
- self.advanceCursor(inner_block_idx + 1);
- }
- }
-
- // Remaining lines don't need to memset the marker into spaces.
- while (true) {
- // Find column
- const inner_block_idx = str.indexOfNone(self.cursor, " \t") orelse unreachable;
- if (self.cursor[inner_block_idx] != '\n') {
- // Verify parent indentation
- const indentation_idx = std.mem.indexOfDiff(u8, self.cursor, self.indentation[0..parent_col]) orelse unreachable;
- if (indentation_idx != parent_col) {
- return .{ .mismatched_indentation = @intCast(indentation_idx) };
- }
- // We found the indentation!
- if (parent_col + inner_block_idx > std.math.maxInt(Column))
- return error.IndentationTooLong;
- @memcpy(self.indentation[parent_col .. parent_col + inner_block_idx], self.cursor[parent_col..inner_block_idx]);
- self.advanceCursor(inner_block_idx);
- return .{ .found_column = @intCast(parent_col + inner_block_idx) };
- } else {
- // I lied, inner_block_idx doesn't point to the inner block.
- self.advanceCursor(inner_block_idx + 1);
- }
- }
-}
-
-fn insertTextLine(
- self: *AstGen,
- comptime first_text_tag: Node.Tag,
- comptime rest_text_tag: Node.Tag,
- parent_idx: Node.Idx,
- len_: usize,
-) !void {
- var len = len_;
- if (len <= std.math.maxInt(StrLen)) {
- _ = try self.appendLeafNodeAtCursor(parent_idx, first_text_tag, @intCast(len));
- self.advanceCursor(len);
- } else {
- @branchHint(.cold);
- {
- const consumed_len = @min(len, std.math.maxInt(StrLen));
- _ = try self.appendLeafNodeAtCursor(parent_idx, first_text_tag, @intCast(consumed_len));
- self.advanceCursor(consumed_len);
- len -= consumed_len;
- }
- while (len > 0) {
- const consumed_len = @min(len, std.math.maxInt(StrLen));
- _ = try self.appendLeafNodeAtCursor(parent_idx, rest_text_tag, @intCast(consumed_len));
- self.advanceCursor(consumed_len);
- len -= consumed_len;
- }
- }
-}
-
-fn parseParagraph(
- self: *AstGen,
- comptime first_text_tag: Node.Tag,
- parent_idx: Node.Idx,
- parent_col: Column,
- block_col: Column,
-) !Column {
- {
- const newline = str.indexOfChar(self.cursor, '\n') orelse unreachable;
- try self.insertTextLine(first_text_tag, .text, parent_idx, newline);
- self.advanceCursor(1);
- }
-
- while (true) {
- if (self.cursor.len == 0) return 0;
-
- const indentation_idx = str.indexOfNone(self.cursor, " \t") orelse unreachable;
- if (str.isAnyOf(self.cursor[indentation_idx], "-.:+>#;")) {
- // block line found, exit
- const verified_indentation_idx = std.mem.indexOfDiff(
- u8,
- self.cursor,
- self.indentation[0..block_col],
- ) orelse unreachable;
- if (verified_indentation_idx == block_col) {
- return @intCast(indentation_idx);
- } else {
- return @intCast(verified_indentation_idx);
- }
- }
- if (self.cursor[indentation_idx] == '*') {
- const after_stars = self.cursor[3..];
- const skip_whitespace_idx = str.indexOfNone(after_stars, " \t") orelse unreachable;
- if (after_stars[skip_whitespace_idx] == '\n') {
- // block line found, exit
- const verified_indentation_idx = std.mem.indexOfDiff(
- u8,
- self.cursor,
- self.indentation[0..block_col],
- ) orelse unreachable;
- if (verified_indentation_idx == block_col) {
- return @intCast(indentation_idx);
- } else {
- return @intCast(verified_indentation_idx);
- }
- }
- }
- // empty line found, consume to next nonwhitespace and exit
- if (self.cursor[indentation_idx] == '\n') {
- self.advanceCursor(indentation_idx + 1);
- while (true) {
- if (self.cursor.len == 0) return 0;
-
- const next_idx = str.indexOfNone(self.cursor, " \t") orelse unreachable;
- if (self.cursor[next_idx] == '\n') {
- self.advanceCursor(next_idx + 1);
- continue;
- }
-
- const verified_indentation_idx = std.mem.indexOfDiff(
- u8,
- self.cursor,
- self.indentation[0..block_col],
- ) orelse unreachable;
- if (verified_indentation_idx == block_col) {
- return @intCast(next_idx);
- } else {
- return @intCast(verified_indentation_idx);
- }
- }
- }
-
- // verify indentation
- const verified_indentation_idx = std.mem.indexOfDiff(
- u8,
- self.cursor,
- self.indentation[0..block_col],
- ) orelse unreachable;
- if (verified_indentation_idx == block_col) {
- self.advanceCursor(verified_indentation_idx);
- const newline = str.indexOfChar(self.cursor, '\n') orelse unreachable;
- try self.insertTextLine(.space_text, .text, parent_idx, newline);
- self.advanceCursor(1);
- } else if (verified_indentation_idx > parent_col) {
- self.advanceCursor(indentation_idx);
- try self.appendPointErrorAtCursor(.inconsistent_indentation, parent_idx);
- const newline = str.indexOfChar(self.cursor, '\n') orelse unreachable;
- try self.insertTextLine(.space_text, .text, parent_idx, newline);
- self.advanceCursor(1);
- } else {
- return @intCast(verified_indentation_idx);
- }
- }
-}
diff --git a/src/main.zig b/src/main.zig
@@ -38,48 +38,28 @@ pub fn main() !void {
const args = try std.process.argsAlloc(arena);
- const bench1, const bench2, const bench3, //
- const run1, const run2, const run3, //
- const check1, const check2, const check3, //
- const render1, const render2, const render3, //
- const print1, const print2, const print3, //
+ const bench3, //
+ const run3, //
+ const check3, //
+ const render3, //
+ const print3, //
const iters =
blk: {
- var bench1, var bench2, var bench3 = .{ false, false, false };
- var run1, var run2, var run3 = .{ false, false, false };
- var check1, var check2, var check3 = .{ false, false, false };
- var render1, var render2, var render3 = .{ false, false, false };
- var print1, var print2, var print3 = .{ false, false, false };
+ var bench3 = false;
+ var run3 = false;
+ var check3 = false;
+ var render3 = false;
+ var print3 = false;
var iters: usize = 8;
for (args) |arg| {
- if (std.mem.eql(u8, arg, "--bench1"))
- bench1 = true;
- if (std.mem.eql(u8, arg, "--bench2"))
- bench2 = true;
if (std.mem.eql(u8, arg, "--bench3"))
bench3 = true;
- if (std.mem.eql(u8, arg, "--run1"))
- run1 = true;
- if (std.mem.eql(u8, arg, "--run2"))
- run2 = true;
if (std.mem.eql(u8, arg, "--run3"))
run3 = true;
- if (std.mem.eql(u8, arg, "--check1"))
- check1, run1 = .{ true, true };
- if (std.mem.eql(u8, arg, "--check2"))
- check2, run2 = .{ true, true };
if (std.mem.eql(u8, arg, "--check3"))
check3, run3 = .{ true, true };
- if (std.mem.eql(u8, arg, "--render1"))
- render1, run1 = .{ true, true };
- if (std.mem.eql(u8, arg, "--render2"))
- render2, run2 = .{ true, true };
if (std.mem.eql(u8, arg, "--render3"))
render3, run3 = .{ true, true };
- if (std.mem.eql(u8, arg, "--print1"))
- print1, run1 = .{ true, true };
- if (std.mem.eql(u8, arg, "--print2"))
- print2, run2 = .{ true, true };
if (std.mem.eql(u8, arg, "--print3"))
print3, run3 = .{ true, true };
if (std.mem.startsWith(u8, arg, "--iters="))
@@ -98,11 +78,11 @@ pub fn main() !void {
std.Thread.sleep(2e9);
}
break :blk .{
- bench1, bench2, bench3,
- run1, run2, run3,
- check1, check2, check3,
- render1, render2, render3,
- print1, print2, print3,
+ bench3,
+ run3,
+ check3,
+ render3,
+ print3,
iters,
};
};
@@ -112,30 +92,6 @@ pub fn main() !void {
const input = input_arraylist.items;
for (0..iters) |_| {
- if (bench1) {
- const ast1 = blk: {
- const tracy_frame = tracy.namedFrame("parse 1");
- defer tracy_frame.end();
- break :blk try mymarkdown.parse(
- gpa,
- gpa,
- input,
- );
- };
- ast1.deinit(gpa);
- }
- if (bench2) {
- const ast2 = blk: {
- const tracy_frame = tracy.namedFrame("parse 2");
- defer tracy_frame.end();
- break :blk try mymarkdown.parse2(
- gpa,
- gpa,
- input,
- );
- };
- ast2.deinit(gpa);
- }
if (bench3) {
const ast3 = blk: {
const tracy_frame = tracy.namedFrame("parse 3");
@@ -150,25 +106,7 @@ pub fn main() !void {
}
}
- if (!bench1 and !bench2 and !bench3) {
- const ast: mymarkdown.Ast = if (run1) blk: {
- const tracy_frame = tracy.namedFrame("parse 1");
- defer tracy_frame.end();
- break :blk try mymarkdown.parse(
- gpa,
- arena,
- input,
- );
- } else .empty;
- const ast2: mymarkdown.Ast = if (run2) blk: {
- const tracy_frame = tracy.namedFrame("parse 2");
- defer tracy_frame.end();
- break :blk try mymarkdown.parse2(
- gpa,
- arena,
- input,
- );
- } else .empty;
+ if (!bench3) {
const ast3: mymarkdown.Ast = if (run3) blk: {
const tracy_frame = tracy.namedFrame("parse 3");
defer tracy_frame.end();
@@ -179,22 +117,6 @@ pub fn main() !void {
);
} else .empty;
- var render_arraylist1: std.ArrayList(u8) = .init(gpa);
- defer render_arraylist1.deinit();
- if (check1 or render1 or print1) {
- std.debug.print("Rendering 1\n", .{});
- const tracy_frame = tracy.namedFrame("Render 1");
- defer tracy_frame.end();
- _ = try ast.renderAst(render_arraylist1.writer(), input);
- }
- var render_arraylist2: std.ArrayList(u8) = .init(gpa);
- defer render_arraylist2.deinit();
- if (check2 or render2 or print2) {
- std.debug.print("Rendering 2\n", .{});
- const tracy_frame = tracy.namedFrame("Render 2");
- defer tracy_frame.end();
- _ = try ast2.renderAst(render_arraylist2.writer(), input);
- }
var render_arraylist3: std.ArrayList(u8) = .init(gpa);
defer render_arraylist3.deinit();
if (check3 or render3 or print3) {
@@ -203,34 +125,13 @@ pub fn main() !void {
defer tracy_frame.end();
_ = try ast3.renderAst(render_arraylist3.writer(), input);
}
- if (check1 and check3) {
- std.debug.print("check 1 vs 3\n", .{});
- try std.testing.expectEqualStrings(render_arraylist1.items, render_arraylist3.items);
- }
- if (check2 and check3) {
- std.debug.print("check 2 vs 3\n", .{});
- try std.testing.expectEqualStrings(render_arraylist2.items, render_arraylist3.items);
- }
- if (check1 and check2 and !check3) {
- std.debug.print("check 1 vs 2\n", .{});
- try std.testing.expectEqualStrings(render_arraylist1.items, render_arraylist2.items);
- }
+
+ // if (check1 and check3) {
+ // std.debug.print("check 1 vs 3\n", .{});
+ // try std.testing.expectEqualStrings(render_arraylist1.items, render_arraylist3.items);
+ // }
for (0..10) |_| {
- if (render1) {
- std.debug.print("Re-rendering 1\n", .{});
- render_arraylist1.clearRetainingCapacity();
- const tracy_frame = tracy.namedFrame("re-render 1");
- defer tracy_frame.end();
- _ = try ast.renderAst(render_arraylist1.writer(), input);
- }
- if (render2) {
- std.debug.print("Re-rendering 2\n", .{});
- render_arraylist2.clearRetainingCapacity();
- const tracy_frame = tracy.namedFrame("re-render 2");
- defer tracy_frame.end();
- _ = try ast2.renderAst(render_arraylist2.writer(), input);
- }
if (render3) {
std.debug.print("Re-rendering 3\n", .{});
render_arraylist3.clearRetainingCapacity();
@@ -240,8 +141,6 @@ pub fn main() !void {
}
}
- if (print1) try std.io.getStdOut().writeAll(render_arraylist1.items);
- if (print2) try std.io.getStdOut().writeAll(render_arraylist2.items);
if (print3) try std.io.getStdOut().writeAll(render_arraylist3.items);
}
diff --git a/src/root.zig b/src/root.zig
@@ -1,10 +1,6 @@
const std = @import("std");
pub const Ast = @import("Ast.zig");
-pub const AstGen = @import("AstGen.zig");
-pub const parse = AstGen.parse;
-pub const AstGen2 = @import("AstGen2.zig");
-pub const parse2 = AstGen2.parse;
pub const AstGen3 = @import("AstGen3.zig");
pub const parse3 = AstGen3.parse;