mymarkdown

My markdown
git clone https://git.grace.moe/mymarkdown
Log | Files | Refs

commit a274d6f4377b2e6de289fcb2c7b2aa0f42d99903
parent 51081b5ac4913f56f762cec8ac5da1b900f66521
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date:   Fri, 16 May 2025 05:36:40 +0800

v0

Diffstat:
A.gitignore | 1+
Abuild.zig | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abuild.zig.zon | 16++++++++++++++++
Asrc/Ast.zig | 236+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/AstGen.zig | 559+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/AstGen/test.zig | 346+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/main.zig | 22++++++++++++++++++++++
Asrc/root.zig | 40++++++++++++++++++++++++++++++++++++++++
Asrc/str.zig | 95+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/utils.zig | 126+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 files changed, 1504 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1 @@ +zig-out diff --git a/build.zig b/build.zig @@ -0,0 +1,63 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + const mymarkdown = b.addModule("mymarkdown", .{ + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + }); + mymarkdown.addImport("ziggy", b.dependency("ziggy", .{}).module("ziggy")); + const mymarkdown_cli = b.addModule("mymarkdown", .{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + mymarkdown_cli.addImport("mymarkdown", mymarkdown); + + const mymarkdown_cli_compile = b.addExecutable(.{ + .name = "mymarkdown", + .root_module = mymarkdown_cli, + }); + b.installArtifact(mymarkdown_cli_compile); + + const check = b.step("check", "Check if the mymarkdown CLI compiles"); + check.dependOn(&mymarkdown_cli_compile.step); + + setupTestStep(b, target, optimize, mymarkdown, mymarkdown_cli, check); + setupRunStep(b, mymarkdown_cli_compile); +} + +fn setupTestStep( + b: *std.Build, + target: std.Build.ResolvedTarget, + optimize: std.builtin.OptimizeMode, + mymarkdown: *std.Build.Module, + mymarkdown_cli: *std.Build.Module, + check: *std.Build.Step, +) void { + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(check); + test_step.dependOn(&b.addRunArtifact(b.addTest(.{ + .root_module = mymarkdown, + .target = target, + .optimize = optimize, + })).step); + test_step.dependOn(&b.addRunArtifact(b.addTest(.{ + .root_module = mymarkdown_cli, + .target = target, + .optimize = optimize, + })).step); +} + +fn setupRunStep( + b: *std.Build, + mymarkdown_cli_compile: *std.Build.Step.Compile, +) void { + const run_exe = b.addRunArtifact(mymarkdown_cli_compile); + if (b.args) |args| run_exe.addArgs(args); + const run_exe_step = b.step("run", "Run the mymarkdown CLI"); + run_exe_step.dependOn(&run_exe.step); +} diff --git a/build.zig.zon b/build.zig.zon @@ -0,0 +1,16 @@ +.{ + .name = .mymarkdown, + .version = "0.0.0", + .fingerprint = 0x680fc5b268bbdd89, // Changing this has security and trust implications. + .minimum_zig_version = "0.14.0", + .dependencies = .{ + .ziggy = .{ + .path = "../../../manual-software/ziggy", + }, + }, + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + }, +} diff --git a/src/Ast.zig b/src/Ast.zig @@ -0,0 +1,236 @@ +const std = @import("std"); +const ziggy = @import("ziggy"); +const utils = @import("utils.zig"); +const Allocator = std.mem.Allocator; +const Ast = @This(); + +nodes: []const Node, +errors: []const Error, +extra: []const u32, + +pub const empty: Ast = .{ .nodes = &.{}, .errors = &.{}, .extra = &.{} }; + +fn ZiggyFormat(comptime T: type, opts: ziggy.serializer.StringifyOptions) type { + return struct { + pub fn format(self: T, comptime _: []const u8, _: anytype, writer: anytype) !void { + _ = try writer.writeAll("."); + try ziggy.stringify(self, opts, writer); + } + }; +} + +fn ziggyFormat(comptime T: type, opts: ziggy.serializer.StringifyOptions) @TypeOf(ZiggyFormat(T, opts).format) { + return ZiggyFormat(T, opts).format; +} + +fn UnionFormat(comptime T: type) type { + return struct { + pub fn format(self: T, comptime _: []const u8, _: anytype, writer: anytype) !void { + const info = @typeInfo(T).@"union"; + if (info.tag_type) |UnionTagType| { + try writer.writeAll(".{ ."); + try writer.writeAll(@tagName(@as(UnionTagType, self))); + try writer.writeAll(" = "); + inline for (info.fields) |u_field| { + if (self == @field(UnionTagType, u_field.name)) { + try writer.print("{}", .{@field(self, u_field.name)}); + } + } + try writer.writeAll(" }"); + } else { + try writer.print("@{x}", .{@intFromPtr(&self)}); + } + } + }; +} + +fn unionFormat(comptime T: type) @TypeOf(UnionFormat(T).format) { + return UnionFormat(T).format; +} + +fn StructFormat(comptime T: type) type { + return struct { + pub fn format(value: T, comptime actual_fmt: []const u8, _: anytype, writer: anytype) !void { + const info = @typeInfo(T).@"struct"; + if (actual_fmt.len != 0) std.fmt.invalidFmtError(actual_fmt, value); + if (info.is_tuple) { + // Skip the type and field names when formatting tuples. + try writer.writeAll(".{"); + inline for (info.fields, 0..) |f, i| { + if (i == 0) { + try writer.writeAll(" "); + } else { + try writer.writeAll(", "); + } + try writer.print("{}", .{@field(value, f.name)}); + } + return writer.writeAll(" }"); + } + try writer.writeAll(".{"); + inline for (info.fields, 0..) |f, i| { + if (i == 0) { + try writer.writeAll(" ."); + } else { + try writer.writeAll(", ."); + } + try writer.writeAll(f.name); + try writer.writeAll(" = "); + try writer.print("{}", .{@field(value, f.name)}); + } + try writer.writeAll(" }"); + } + }; +} + +fn structFormat(comptime T: type) @TypeOf(StructFormat(T).format) { + return StructFormat(T).format; +} + +pub const StrOffset = u32; +pub const StrLen = u24; + +pub const Node = utils.Packed(union(enum(u8)) { + document: Root, + marker: Leaf, // First child of nodes like heading, list items, ... + + thematic_break: Leaf, + heading: Container, + quote: Container, + paragraph: Container, + unordered_item: Container, + ordered_item: Container, + term_item: Container, + task_item: Container, + elaboration: Container, + + text: Leaf, + space_text: Leaf, // text with 1 space added before it + + pub const Idx = utils.NewType(u24, opaque {}); + pub const Root = packed struct { + num_children: u24 = 0, + pub const format = structFormat(@This()); + }; + pub const Container = packed struct { + off: StrOffset, + num_children: u24 = 0, + pub const format = structFormat(@This()); + }; + pub const Leaf = packed struct { + off: StrOffset, + len: StrLen, + const num_children = 0; + pub const format = structFormat(@This()); + }; + + pub fn incrementNumChildren(self: *Node) void { + switch (self.tag) { + inline else => |t| { + if (@TypeOf(@field(self.data, @tagName(t))) == Container or @TypeOf(@field(self.data, @tagName(t))) == Root) { + @field(self.data, @tagName(t)).num_children += 1; + } else unreachable; + }, + } + } + + pub const format = unionFormat(@This()); +}); + +pub const Error = utils.Packed(union(enum(u8)) { + marker_too_long: NodeError, + invalid_marker: PointError, + empty_line_in_inline_block: PointError, + inconsistent_indentation: PointError, + + /// Used when the error diagnostic spans the entire node + pub const NodeError = packed struct { + idx: Node.Idx, + + pub const format = structFormat(@This()); + }; + + /// Used when the error diagnostic should point at a single location + pub const PointError = packed struct { + idx: Node.Idx, + off: StrOffset, + + pub const format = structFormat(@This()); + }; + pub const Idx = utils.NewType(u24, opaque {}); + + pub const format = unionFormat(@This()); +}); + +test "Tracking size of Node struct" { + try std.testing.expectEqual(24, @bitSizeOf(Node.Idx)); + try std.testing.expectEqual(4, @sizeOf(Node.Idx)); + try std.testing.expectEqual(64, @bitSizeOf(Node)); + try std.testing.expectEqual(8, @sizeOf(Node)); +} + +test "Tracking size of Error struct" { + try std.testing.expectEqual(24, @bitSizeOf(Error.Idx)); + try std.testing.expectEqual(4, @sizeOf(Error.Idx)); + try std.testing.expectEqual(64, @bitSizeOf(Error)); + try std.testing.expectEqual(8, @sizeOf(Error)); +} + +pub const format = ziggyFormat(@This(), .{ + .whitespace = .space_2, + .omit_top_level_curly = false, +}); + +pub const Tagged = struct { + nodes: []const Node.Tagged, + errors: []const Error.Tagged, + extra: []const u32, + + pub const empty: Tagged = .{ .nodes = &.{}, .errors = &.{}, .extra = &.{} }; +}; +pub fn toTagged(self: Ast, gpa: Allocator) !Tagged { + const nodes = try gpa.alloc(Node.Tagged, self.nodes.len); + const errors = try gpa.alloc(Error.Tagged, self.errors.len); + const extra = try gpa.dupe(u32, self.extra); + for (self.nodes, nodes) |node, *out| out.* = node.toTagged(); + for (self.errors, errors) |err, *out| out.* = err.toTagged(); + return .{ .nodes = nodes, .errors = errors, .extra = extra }; +} + +pub fn render(self: Ast, writer: anytype, input: []const u8, start_: ?Node.Idx) !?Node.Idx { + const start: Node.Idx = start_ orelse @enumFromInt(0); + switch (self.nodes[@intFromEnum(start)].tag) { + .document => try writer.writeAll("<body>\n"), + .paragraph => try writer.writeAll("<p>"), + .text => { + const data: Node.Leaf = self.nodes[@intFromEnum(start)].data.text; + try writer.writeAll(input[data.off .. data.off + data.len]); + }, + .space_text => { + const data: Node.Leaf = self.nodes[@intFromEnum(start)].data.text; + try writer.writeByte(' '); + try writer.writeAll(input[data.off .. data.off + data.len]); + }, + else => unreachable, + } + var cur_idx: ?Node.Idx = start.next(); + switch (self.nodes[@intFromEnum(start)].tag) { + inline .document, .paragraph => |t| { + const data = @field(self.nodes[@intFromEnum(start)].data, @tagName(t)); + for (0..data.num_children) |_| { + if (cur_idx) |idx| { + cur_idx = try self.render(writer, input, idx); + } else { + unreachable; + } + } + }, + else => {}, + } + switch (self.nodes[@intFromEnum(start)].tag) { + .document => try writer.writeAll("</body>\n"), + .paragraph => try writer.writeAll("</p>\n"), + .text, .space_text => {}, + else => unreachable, + } + return cur_idx; +} diff --git a/src/AstGen.zig b/src/AstGen.zig @@ -0,0 +1,559 @@ +const std = @import("std"); +const ziggy = @import("ziggy"); +const utils = @import("utils.zig"); +const str = @import("str.zig"); +const ArenaAllocator = std.heap.ArenaAllocator; +const Allocator = std.mem.Allocator; +const AstGen = @This(); +const Ast = @import("Ast.zig"); +const Node = Ast.Node; +const Error = Ast.Error; + +input_base: [*]u8, +input: []u8, +nodes: std.ArrayListUnmanaged(Node), +errors: std.ArrayListUnmanaged(Error), +extra: std.ArrayListUnmanaged(u32), + +fn getNode(self: AstGen, idx: Node.Idx) *Node { + @setRuntimeSafety(true); + return &self.nodes.items[@intFromEnum(idx)]; +} +fn lastNodeIdx(self: AstGen) Node.Idx { + @setRuntimeSafety(true); + return @enumFromInt(self.nodes.items.len - 1); +} +fn nextNodeIdx(self: AstGen) Node.Idx { + @setRuntimeSafety(true); + return @enumFromInt(self.nodes.items.len); +} +fn appendNode(self: *AstGen, gpa: Allocator, node: Node.Tagged) !Node.Idx { + { + @setRuntimeSafety(true); + if (self.nodes.items.len > std.math.maxInt( + @typeInfo(Node.Idx).@"enum".tag_type, + )) unreachable; + } + const idx = self.nodes.items.len; + try self.nodes.append(gpa, .fromTagged(node)); + return @enumFromInt(idx); +} + +pub fn deinit(self: *AstGen, gpa: Allocator) void { + self.nodes.deinit(gpa); + self.errors.deinit(gpa); + self.extra.deinit(gpa); +} + +pub fn parse(gpa: Allocator, output_gpa: ?Allocator, input: []const u8) error{ InputTooLarge, OutOfMemory }!Ast { + if (input.len > std.math.maxInt(u32) - 1) { + return error.InputTooLarge; + } + + // const input_copy = input; + // const input_copy = try gpa.dupe(u8, input); + // defer gpa.free(input_copy); + var input_copy_arraylist: std.ArrayListUnmanaged(u8) = .empty; + defer input_copy_arraylist.deinit(gpa); + try input_copy_arraylist.ensureTotalCapacityPrecise(gpa, input.len + 2); + + var ast: AstGen = .{ + .input_base = input_copy_arraylist.items.ptr, + .input = undefined, + .nodes = .empty, + .errors = .empty, + .extra = .empty, + }; + defer ast.deinit(gpa); + const root = try ast.appendNode(gpa, .{ .document = .{} }); + + var lines: std.ArrayListUnmanaged(Ast.StrOffset) = .empty; + defer lines.deinit(gpa); + // var lines: std.ArrayListUnmanaged([]u8) = .empty; + // defer lines.deinit(gpa); + + var lines_it = std.mem.splitScalar(u8, input, '\n'); + var maybe_line: ?[]u8 = @constCast(lines_it.first()); + while (maybe_line) |line| : (maybe_line = @constCast(lines_it.next())) { + if (str.lastIndexOfNone(line, " \t\r\n")) |idx| { + const old_len = input_copy_arraylist.items.len; + try lines.append(gpa, @intCast(old_len)); + input_copy_arraylist.appendSliceAssumeCapacity(line); + input_copy_arraylist.appendAssumeCapacity('\n'); + input_copy_arraylist.items[old_len + idx + 1] = '\n'; + // try lines.append(gpa, input_copy_arraylist.items[old_len .. old_len + idx + 1]); + } else { + try lines.append(gpa, @intCast(input_copy_arraylist.items.len)); + input_copy_arraylist.appendAssumeCapacity('\n'); + // try lines.append(gpa, &.{}); + } + } + input_copy_arraylist.appendAssumeCapacity('\n'); + ast.input = input_copy_arraylist.items; + // stripTrailingWhitespace(&lines.items); + + try ast.parseColumn(gpa, lines.items, root); + + // std.time.sleep(std.time.ns_per_hour); + + if (output_gpa) |gpa2| { + return .{ + .nodes = try gpa2.dupe(Node, ast.nodes.items), + .errors = try gpa2.dupe(Error, ast.errors.items), + .extra = try gpa2.dupe(u32, ast.extra.items), + }; + } else { + return .{ + .nodes = try ast.nodes.toOwnedSlice(gpa), + .errors = try ast.errors.toOwnedSlice(gpa), + .extra = try ast.extra.toOwnedSlice(gpa), + }; + } +} + +fn stripTrailingWhitespace(lines: *[][]u8) void { + for (lines.*) |*line| { + if (str.lastIndexOfNone(line.*, " \t\r\n")) |idx| { + line.* = line.*[0 .. idx + 1]; + } else { + line.* = line.*[0..0]; + } + } +} + +fn calcOffset(self: *AstGen, c: *u8) u32 { + return @intCast(c - self.input_base); +} + +fn findIndentedColumn(self: *AstGen, gpa: Allocator, lines_: []u32, node_idx: Node.Idx) ![]u32 { + var lines = lines_; + + // empty lines at the start of the inline block are fine, just skip these + // special case: the first line consist of only whitespace + // because they may have been introduced via marker replacement + if (lines.len > 0) + if (str.indexOfNone(self.input[lines[0]..], " \t\r")) |idx| + if (self.input[lines[0] + idx] == '\n') { + lines = lines[1..]; + while (true) : (lines = lines[1..]) { + if (lines.len == 0) return &.{}; + if (self.input[lines[0]] != '\n') break; + } + }; + if (lines.len == 0) return &.{}; + + // determine indentation + const indentation_idx = str.indexOfNone(self.input[lines[0]..], " \t\r") orelse unreachable; + if (indentation_idx == 0) return &.{}; + const indentation = self.input[lines[0] .. lines[0] + indentation_idx]; + + // strip all lines of their indentation + lines[0] += @truncate(indentation.len); + for (lines[1..]) |*line| { + if (self.input[line.*] == '\n') continue; + + const diff_idx = std.mem.indexOfDiff(u8, self.input[line.*..], indentation) orelse unreachable; + // std.debug.assert(diff_idx != line.len); + if (diff_idx != indentation.len) { + try self.errors.append(gpa, .fromTagged(.{ + .inconsistent_indentation = .{ .idx = node_idx, .off = line.* }, + })); + // Recover by stripping all whitespace on this line + const recover_indentation_idx = std.mem.indexOfNone(u8, self.input[line.*..], " \t\r") orelse unreachable; + line.* += @truncate(recover_indentation_idx); + } else { + line.* += @truncate(indentation.len); + } + } + + return lines; +} + +fn parseInlineBlock(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, parent_idx: Node.Idx) !void { + var lines = lines_; + var empty_line_off: ?u32 = null; + + outer: { + // empty lines at the start of the inline block are fine, just skip these + // special case: the first line consist of only whitespace + // because they may have been introduced via marker replacement + if (lines.len > 0) + if (str.indexOfNone(self.input[lines[0]..], " \t\r")) |idx| + if (self.input[lines[0] + idx] == '\n') { + lines = lines[1..]; + while (true) : (lines = lines[1..]) { + if (lines.len == 0) break :outer; + if (self.input[lines[0]] != '\n') break; + } + }; + if (lines.len == 0) break :outer; + + self.getNode(parent_idx).incrementNumChildren(); + + // determine indentation + const indentation_idx = str.indexOfNone(self.input[lines[0]..], " \t\r") orelse unreachable; + const indentation = self.input[lines[0] .. lines[0] + indentation_idx]; + + lines[0] += @truncate(indentation.len); + // lines[0] = lines[0][indentation.len..]; + + var len = str.indexOfChar(self.input[lines[0]..], '\n') orelse unreachable; + if (len <= std.math.maxInt(Ast.StrLen)) { + _ = try self.appendNode(gpa, .{ + .text = .{ + .off = lines[0], + .len = @truncate(len), + }, + }); + } else { + @branchHint(.cold); + while (len > 0) { + const chunk_len = @min(len, std.math.maxInt(Ast.StrLen)); + _ = try self.appendNode(gpa, .{ + .text = .{ + .off = lines[0], + .len = chunk_len, + }, + }); + lines[0] += chunk_len; + len -= chunk_len; + } + } + lines = lines[1..]; + + while (true) { + // Skip and error on empty lines + while (true) : (lines = lines[1..]) { + if (lines.len == 0) break :outer; + if (self.input[lines[0]] != '\n') break; + // empty line detected + empty_line_off = lines[0]; + } + + if (empty_line_off) |off| { + try self.errors.append(gpa, .fromTagged(.{ + .empty_line_in_inline_block = .{ .idx = self.nextNodeIdx(), .off = off }, + })); + } + + const diff_idx = std.mem.indexOfDiff(u8, self.input[lines[0]..], indentation) orelse unreachable; + // std.debug.assert(diff_idx != lines[0].len); + if (diff_idx != indentation.len) { + try self.errors.append(gpa, .fromTagged(.{ + .inconsistent_indentation = .{ .idx = self.nextNodeIdx(), .off = lines[0] }, + })); + // Recover by stripping all whitespace on this line + const recover_indentation_idx = std.mem.indexOfNone(u8, self.input[lines[0]..], " \t\r\n") orelse unreachable; + lines[0] += @truncate(recover_indentation_idx); + } else { + lines[0] += @truncate(indentation.len); + } + + self.getNode(parent_idx).incrementNumChildren(); + + var len2 = str.indexOfChar(self.input[lines[0]..], '\n') orelse unreachable; + if (len2 <= std.math.maxInt(Ast.StrLen)) { + _ = try self.appendNode(gpa, .{ + .space_text = .{ + .off = lines[0], + .len = @truncate(len2), + }, + }); + } else { + @branchHint(.cold); + _ = try self.appendNode(gpa, .{ + .space_text = .{ + .off = lines[0], + .len = std.math.maxInt(Ast.StrLen), + }, + }); + len2 -= std.math.maxInt(Ast.StrLen); + lines[0] += std.math.maxInt(Ast.StrLen); + while (len2 > 0) { + const chunk_len = @min(len2, std.math.maxInt(Ast.StrLen)); + _ = try self.appendNode(gpa, .{ + .text = .{ + .off = lines[0], + .len = chunk_len, + }, + }); + lines[0] += chunk_len; + len2 -= chunk_len; + } + } + lines = lines[1..]; + } + } +} + +fn parseColumn(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, parent_idx: Node.Idx) !void { + var lines = lines_; + outer: while (true) { + // Skip empty lines + // special case: the first line consist of only whitespace + // because they may have been introduced via marker replacement + if (lines.len > 0) { + if (str.indexOfNone(self.input[lines[0]..], " \t\r")) |idx| { + if (self.input[lines[0] + idx] == '\n') { + lines = lines[1..]; + while (true) : (lines = lines[1..]) { + if (lines.len == 0) break :outer; + if (self.input[lines[0]] != '\n') break; + } + } + } + } + if (lines.len == 0) break :outer; + + // Use first character to determine marker + const mode, const child = try self.parseBlockStart(gpa, lines[0]); + self.getNode(parent_idx).incrementNumChildren(); + + switch (mode) { + .paragraph => { + // take indented or non-block-marker lines + var num_lines: usize = 1; + for (lines[1..]) |line| { + if (self.input[line] == '\n') break; + if (block_specs[self.input[line]] != null) break; + num_lines += 1; + } + + const paragraph_lines = lines[0..num_lines]; + lines = lines[num_lines..]; + try self.parseInlineBlock(gpa, paragraph_lines, child); + }, + .indented_inline_block => { + // take indented or empty lines + var num_lines: usize = 1; + for (lines[1..]) |line| { + if (str.isNoneOf(self.input[line], " \t\r\n")) break; + num_lines += 1; + } + + const inline_block_lines = try self.findIndentedColumn(gpa, lines[0..num_lines], child); + lines = lines[num_lines..]; + try self.parseInlineBlock(gpa, inline_block_lines, child); + }, + .indented_column => { + // take indented or empty lines + var num_lines: usize = 1; + for (lines[1..]) |line| { + if (str.isNoneOf(self.input[line], " \t\r\n")) break; + num_lines += 1; + } + + const column_lines = try self.findIndentedColumn(gpa, lines[0..num_lines], child); + lines = lines[num_lines..]; + try self.parseColumn(gpa, column_lines, child); + }, + .no_children => { + lines = lines[1..]; + }, + else => unreachable, + } + } +} + +const ParseMode = union(enum) { + indented_column, + indented_inline_block, + paragraph, + raw: struct { fence: []u8 }, + no_children, +}; + +const MarkerSpec = union(enum) { + exact: []const u8, + starts_with: []const u8, + starts_with_multi: struct { + marker_char: u8, + extra: []const []const u8 = &.{""}, // any extra characters to check after the marker + max_chars: ?u32 = null, + }, +}; +const BlockSpecCase = struct { + tag: Node.Tag, + marker: MarkerSpec, + mode: ParseMode, + store_marker_child: enum { store, no_store }, +}; + +const BlockSpec = ?[]const BlockSpecCase; + +fn blockSpecs(comptime spec: type) [256]BlockSpec { + var arr: [256]BlockSpec = undefined; + for (0..256) |c| arr[c] = null; + for (@typeInfo(spec).@"struct".decls) |decl| { + const c = decl.name[0]; + arr[c] = @field(spec, decl.name); + } + return arr; +} + +const block_specs = blockSpecs(struct { + pub const @"*": BlockSpec = &.{ + .{ + .tag = .thematic_break, + .marker = .{ .exact = "***" }, + .mode = .no_children, + .store_marker_child = .no_store, + }, + }; + pub const @"#": BlockSpec = &.{ + .{ + .tag = .heading, + .marker = .{ .starts_with_multi = .{ .marker_char = '#', .max_chars = 6 } }, + .mode = .indented_inline_block, + .store_marker_child = .store, + }, + }; + pub const @"-": BlockSpec = &.{ + .{ + .tag = .task_item, + .marker = .{ .starts_with_multi = .{ .marker_char = '-', .extra = &.{ " [ ]", " [x]", " [X]" } } }, + .mode = .indented_inline_block, + .store_marker_child = .store, + }, + .{ + .tag = .unordered_item, + .marker = .{ .starts_with_multi = .{ .marker_char = '-' } }, + .mode = .indented_inline_block, + .store_marker_child = .store, + }, + }; + pub const @".": BlockSpec = &.{ + .{ + .tag = .ordered_item, + .marker = .{ .starts_with_multi = .{ .marker_char = '.' } }, + .mode = .indented_inline_block, + .store_marker_child = .store, + }, + }; + pub const @":": BlockSpec = &.{ + .{ + .tag = .term_item, + .marker = .{ .starts_with_multi = .{ .marker_char = ':' } }, + .mode = .indented_inline_block, + .store_marker_child = .store, + }, + }; + pub const @">": BlockSpec = &.{ + .{ + .tag = .quote, + .marker = .{ .starts_with = ">" }, + .mode = .indented_column, + .store_marker_child = .no_store, + }, + }; + pub const @"+": BlockSpec = &.{ + .{ + .tag = .elaboration, + .marker = .{ .starts_with = "+" }, + .mode = .indented_column, + .store_marker_child = .no_store, + }, + }; +}); + +/// Appends the suitable block node to the ast, +/// then returns how parsing should proceed for the children of this block. +/// Also returns the idx of the container node created. +fn parseBlockStart(self: *AstGen, gpa: Allocator, line: Ast.StrOffset) !struct { ParseMode, Node.Idx } { + switch (self.input[line]) { + inline else => |c| { + const spec_or_null = block_specs[c]; + if (spec_or_null) |spec| { + inline for (spec) |case| { + switch (case.marker) { + .exact, .starts_with => |marker| { + if (std.mem.startsWith(u8, self.input[line..], marker)) { + const node = if (case.mode == .no_children) try self.appendNode(gpa, @unionInit(Node.Tagged, @tagName(case.tag), @as(Node.Tagged.Leaf, .{ + .off = line, + .len = marker.len, + }))) else try self.appendNode(gpa, @unionInit(Node.Tagged, @tagName(case.tag), @as(Node.Tagged.Container, .{ + .off = line, + .num_children = if (case.store_marker_child == .store) 1 else 0, + }))); + @memset(self.input[line .. line + marker.len], ' '); + if (case.store_marker_child == .store) { + _ = try self.appendNode(gpa, .{ .marker = .{ + .off = line, + .len = case.marker.len, + } }); + } + return .{ case.mode, node }; + } + }, + .starts_with_multi => |marker_spec| { + var marker_len = str.indexOfNotChar(self.input[line..], marker_spec.marker_char) orelse str.indexOfChar(self.input[line..], '\n') orelse unreachable; + + inline for (marker_spec.extra) |extra| { + if (std.mem.startsWith(u8, self.input[line + marker_len ..], extra)) { + marker_len += extra.len; + + const node = try self.appendNode(gpa, @unionInit(Node.Tagged, @tagName(case.tag), @as(Node.Tagged.Container, .{ + .off = line, + .num_children = if (case.store_marker_child == .store) 1 else 0, + }))); + + if (marker_spec.max_chars) |max| + if (marker_len > max) + try self.errors.append(gpa, .fromTagged(.{ + .marker_too_long = .{ + .idx = if (case.store_marker_child == .no_store) + self.lastNodeIdx() + else + self.nextNodeIdx(), + }, + })); + + @memset(self.input[line .. line + marker_len], ' '); + if (case.store_marker_child == .store) { + _ = try self.appendNode(gpa, .{ .marker = .{ + .off = line, + .len = utils.safeIntCast(Ast.StrLen, marker_len), + } }); + } + return .{ case.mode, node }; + } + } + }, + } + } + } else { + // Default behaviour is to parse a paragraph until the next newline or block character + return .{ + .paragraph, + try self.appendNode(gpa, .{ + .paragraph = .{ + .off = line, + }, + }), + }; + } + }, + } + + // Line started with a special character, but it didn't match any markers + // Fallback to paragraph, but place a warning. + try self.errors.append(gpa, .fromTagged(.{ + .invalid_marker = .{ + .idx = self.nextNodeIdx(), + .off = line, + }, + })); + + return .{ + .paragraph, + try self.appendNode(gpa, .{ + .paragraph = .{ + .off = line, + }, + }), + }; +} + +test { + _ = @import("AstGen/test.zig"); +} diff --git a/src/AstGen/test.zig b/src/AstGen/test.zig @@ -0,0 +1,346 @@ +const std = @import("std"); +const parse = @import("../AstGen.zig").parse; +const Ast = @import("../Ast.zig"); + +const GeneralPurposeAllocator = std.heap.GeneralPurposeAllocator(.{}); +const ArenaAllocator = std.heap.ArenaAllocator; + +fn testParse(input: []const u8, expected: Ast.Tagged) !void { + var arena: ArenaAllocator = .init(std.testing.allocator); + defer arena.deinit(); + const ast = try parse(std.testing.allocator, arena.allocator(), input); + const tagged_ast = try ast.toTagged(arena.allocator()); + // try std.testing.expectEqualDeep(expected.nodes.len, tagged_ast.nodes.len); + try std.testing.expectEqualDeep(expected, tagged_ast); +} + +test "Empty" { + try testParse("", .{ + .nodes = &.{ + .{ .document = .{ .num_children = 0 } }, + }, + .errors = &.{}, + .extra = &.{}, + }); +} + +test "Happy path paragraph" { + try testParse( + \\text + \\ + \\text + \\text + \\ + \\text + \\ text + \\ + , .{ + .nodes = &.{ + .{ .document = .{ .num_children = 3 } }, + .{ .paragraph = .{ .off = 0, .num_children = 1 } }, + .{ .text = .{ .off = 0, .len = 4 } }, + .{ .paragraph = .{ .off = 6, .num_children = 2 } }, + .{ .text = .{ .off = 6, .len = 4 } }, + .{ .space_text = .{ .off = 11, .len = 4 } }, + .{ .paragraph = .{ .off = 17, .num_children = 2 } }, + .{ .text = .{ .off = 17, .len = 4 } }, + .{ .space_text = .{ .off = 22, .len = 7 } }, + }, + .errors = &.{}, + .extra = &.{}, + }); +} + +test "Happy path headings" { + try testParse( + \\# text + \\# text + \\# text + \\ text + \\ + \\# text + \\ + \\# text + \\ text + \\ + \\# text + \\ text + \\ + \\## text + \\## text + \\## text + \\ text + \\ + \\## text + \\ + \\## text + \\ text + \\ + \\## text + \\ text + \\ + , .{ + .nodes = &.{ + .{ .document = .{ .num_children = 12 } }, + .{ .heading = .{ .off = 0, .num_children = 2 } }, + .{ .marker = .{ .off = 0, .len = 1 } }, + .{ .text = .{ .off = 2, .len = 4 } }, + .{ .heading = .{ .off = 7, .num_children = 2 } }, + .{ .marker = .{ .off = 7, .len = 1 } }, + .{ .text = .{ .off = 9, .len = 4 } }, + .{ .heading = .{ .off = 14, .num_children = 3 } }, + .{ .marker = .{ .off = 14, .len = 1 } }, + .{ .text = .{ .off = 16, .len = 4 } }, + .{ .space_text = .{ .off = 23, .len = 4 } }, + .{ .heading = .{ .off = 29, .num_children = 2 } }, + .{ .marker = .{ .off = 29, .len = 1 } }, + .{ .text = .{ .off = 31, .len = 4 } }, + .{ .heading = .{ .off = 37, .num_children = 3 } }, + .{ .marker = .{ .off = 37, .len = 1 } }, + .{ .text = .{ .off = 39, .len = 4 } }, + .{ .space_text = .{ .off = 46, .len = 4 } }, + .{ .heading = .{ .off = 52, .num_children = 3 } }, + .{ .marker = .{ .off = 52, .len = 1 } }, + .{ .text = .{ .off = 54, .len = 4 } }, + .{ .space_text = .{ .off = 61, .len = 6 } }, + .{ .heading = .{ .off = 69, .num_children = 2 } }, + .{ .marker = .{ .off = 69, .len = 2 } }, + .{ .text = .{ .off = 72, .len = 4 } }, + .{ .heading = .{ .off = 77, .num_children = 2 } }, + .{ .marker = .{ .off = 77, .len = 2 } }, + .{ .text = .{ .off = 80, .len = 4 } }, + .{ .heading = .{ .off = 85, .num_children = 3 } }, + .{ .marker = .{ .off = 85, .len = 2 } }, + .{ .text = .{ .off = 88, .len = 4 } }, + .{ .space_text = .{ .off = 96, .len = 4 } }, + .{ .heading = .{ .off = 102, .num_children = 2 } }, + .{ .marker = .{ .off = 102, .len = 2 } }, + .{ .text = .{ .off = 105, .len = 4 } }, + .{ .heading = .{ .off = 111, .num_children = 3 } }, + .{ .marker = .{ .off = 111, .len = 2 } }, + .{ .text = .{ .off = 114, .len = 4 } }, + .{ .space_text = .{ .off = 122, .len = 4 } }, + .{ .heading = .{ .off = 128, .num_children = 3 } }, + .{ .marker = .{ .off = 128, .len = 2 } }, + .{ .text = .{ .off = 131, .len = 4 } }, + .{ .space_text = .{ .off = 139, .len = 6 } }, + }, + .errors = &.{}, + .extra = &.{}, + }); +} + +test "Happy path quote" { + try testParse( + \\> text + \\ text + \\ + \\> text + \\ text + \\> text + \\> text + \\text + \\ + , .{ + .nodes = &.{ + .{ .document = .{ .num_children = 5 } }, + .{ .quote = .{ .off = 0, .num_children = 1 } }, + .{ .paragraph = .{ .off = 2, .num_children = 2 } }, + .{ .text = .{ .off = 2, .len = 4 } }, + .{ .space_text = .{ .off = 9, .len = 4 } }, + .{ .quote = .{ .off = 15, .num_children = 1 } }, + .{ .paragraph = .{ .off = 17, .num_children = 2 } }, + .{ .text = .{ .off = 17, .len = 4 } }, + .{ .space_text = .{ .off = 24, .len = 6 } }, + .{ .quote = .{ .off = 31, .num_children = 1 } }, + .{ .paragraph = .{ .off = 33, .num_children = 1 } }, + .{ .text = .{ .off = 33, .len = 4 } }, + .{ .quote = .{ .off = 38, .num_children = 1 } }, + .{ .paragraph = .{ .off = 40, .num_children = 1 } }, + .{ .text = .{ .off = 40, .len = 4 } }, + .{ .paragraph = .{ .off = 45, .num_children = 1 } }, + .{ .text = .{ .off = 45, .len = 4 } }, + }, + .errors = &.{}, + .extra = &.{}, + }); +} + +test "Happy path list" { + try testParse( + \\- text + \\- [ ] text + \\. text + \\: text + \\-- text + \\-- [ ] text + \\.. text + \\:: text + \\ + , .{ + .nodes = &.{ + .{ .document = .{ .num_children = 8 } }, + .{ .unordered_item = .{ .off = 0, .num_children = 2 } }, + .{ .marker = .{ .off = 0, .len = 1 } }, + .{ .text = .{ .off = 2, .len = 4 } }, + .{ .task_item = .{ .off = 7, .num_children = 2 } }, + .{ .marker = .{ .off = 7, .len = 5 } }, + .{ .text = .{ .off = 13, .len = 4 } }, + .{ .ordered_item = .{ .off = 18, .num_children = 2 } }, + .{ .marker = .{ .off = 18, .len = 1 } }, + .{ .text = .{ .off = 20, .len = 4 } }, + .{ .term_item = .{ .off = 25, .num_children = 2 } }, + .{ .marker = .{ .off = 25, .len = 1 } }, + .{ .text = .{ .off = 27, .len = 4 } }, + .{ .unordered_item = .{ .off = 32, .num_children = 2 } }, + .{ .marker = .{ .off = 32, .len = 2 } }, + .{ .text = .{ .off = 35, .len = 4 } }, + .{ .task_item = .{ .off = 40, .num_children = 2 } }, + .{ .marker = .{ .off = 40, .len = 6 } }, + .{ .text = .{ .off = 47, .len = 4 } }, + .{ .ordered_item = .{ .off = 52, .num_children = 2 } }, + .{ .marker = .{ .off = 52, .len = 2 } }, + .{ .text = .{ .off = 55, .len = 4 } }, + .{ .term_item = .{ .off = 60, .num_children = 2 } }, + .{ .marker = .{ .off = 60, .len = 2 } }, + .{ .text = .{ .off = 63, .len = 4 } }, + }, + .errors = &.{}, + .extra = &.{}, + }); +} + +test "Happy path list elaboration" { + try testParse( + \\- a + \\+ bb + \\ + \\ ccc + \\ + , .{ + .nodes = &.{ + .{ .document = .{ .num_children = 2 } }, + .{ .unordered_item = .{ .off = 0, .num_children = 2 } }, + .{ .marker = .{ .off = 0, .len = 1 } }, + .{ .text = .{ .off = 2, .len = 1 } }, + .{ .elaboration = .{ .off = 4, .num_children = 2 } }, + .{ .paragraph = .{ .off = 6, .num_children = 1 } }, + .{ .text = .{ .off = 6, .len = 2 } }, + .{ .paragraph = .{ .off = 12, .num_children = 1 } }, + .{ .text = .{ .off = 12, .len = 3 } }, + }, + .errors = &.{}, + .extra = &.{}, + }); +} + +test "Thematic break" { + try testParse( + \\a + \\*** + \\b + \\* + \\c + \\ + , .{ + .nodes = &.{ + .{ .document = .{ .num_children = 4 } }, + .{ .paragraph = .{ .off = 0, .num_children = 1 } }, + .{ .text = .{ .off = 0, .len = 1 } }, + .{ .thematic_break = .{ .off = 2, .len = 3 } }, + .{ .paragraph = .{ .off = 6, .num_children = 1 } }, + .{ .text = .{ .off = 6, .len = 1 } }, + .{ .paragraph = .{ .off = 8, .num_children = 2 } }, + .{ .text = .{ .off = 8, .len = 1 } }, + .{ .space_text = .{ .off = 10, .len = 1 } }, + }, + .errors = &.{ + .{ .invalid_marker = .{ .idx = @enumFromInt(6), .off = 8 } }, + }, + .extra = &.{}, + }); +} + +test "Mixed indentation" { + try testParse( + \\+ aaa + \\ + \\ + ++ "\tbbbbb\n", .{ + .nodes = &.{ + .{ .document = .{ .num_children = 1 } }, + .{ .elaboration = .{ .off = 0, .num_children = 2 } }, + .{ .paragraph = .{ .off = 2, .num_children = 1 } }, + .{ .text = .{ .off = 2, .len = 3 } }, + .{ .paragraph = .{ .off = 8, .num_children = 1 } }, + .{ .text = .{ .off = 8, .len = 5 } }, + }, + .errors = &.{ + .{ .inconsistent_indentation = .{ .idx = @enumFromInt(1), .off = 7 } }, + }, + .extra = &.{}, + }); +} + +test "Empty line in heading" { + try testParse( + \\# heading + \\ + \\ text + \\ + \\text + \\ + , .{ + .nodes = &.{ + .{ .document = .{ .num_children = 2 } }, + .{ .heading = .{ .off = 0, .num_children = 3 } }, + .{ .marker = .{ .off = 0, .len = 1 } }, + .{ .text = .{ .off = 2, .len = 7 } }, + .{ .space_text = .{ .off = 13, .len = 4 } }, + .{ .paragraph = .{ .off = 19, .num_children = 1 } }, + .{ .text = .{ .off = 19, .len = 4 } }, + }, + .errors = &.{ + .{ .empty_line_in_inline_block = .{ .idx = @enumFromInt(4), .off = 10 } }, + }, + .extra = &.{}, + }); +} + +// test "Super long line" { +// const input = try std.testing.allocator.create([(1 << 24) * 4]u8); +// defer std.testing.allocator.destroy(input); +// @memset(input, 'a'); +// input[1] = '\n'; +// try testParse(input, .{ +// .nodes = &.{ +// .{ .document = .{ .num_children = 1 } }, +// .{ .paragraph = .{ .off = 0, .num_children = 2 } }, +// .{ .text = .{ .off = 0, .len = 1 } }, +// .{ .space_text = .{ .off = 2, .len = 16777215 } }, +// .{ .text = .{ .off = 2, .len = 16777215 } }, +// .{ .text = .{ .off = 2, .len = 16777215 } }, +// .{ .text = .{ .off = 2, .len = 16777215 } }, +// .{ .text = .{ .off = 2, .len = 2 } }, +// }, +// .errors = &.{}, +// .extra = &.{}, +// }); +// } + +// test "Many short lines" { +// const input = try std.testing.allocator.create([(1 << 23) - 2][2]u8); +// defer std.testing.allocator.destroy(input); +// @memset(input, [2]u8{ 'a', '\n' }); + +// var arena: ArenaAllocator = .init(std.testing.allocator); +// defer arena.deinit(); +// const ast = try parse(std.testing.allocator, arena.allocator(), @as([*]u8, @ptrCast(input))[0 .. (1 << 23) * 2 - 4]); +// try std.testing.expectEqual(1 << 23, ast.nodes.len); +// try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .document = .{ .num_children = 1 } }), ast.nodes[0].toTagged()); +// try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .paragraph = .{ .off = 0, .num_children = (1 << 23) - 2 } }), ast.nodes[1].toTagged()); +// try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .text = .{ .off = 0, .len = 1 } }), ast.nodes[2].toTagged()); +// for (1..(1 << 23) - 2) |i| { +// try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .space_text = .{ .off = @intCast(i * 2), .len = 1 } }), ast.nodes[i + 2].toTagged()); +// } +// } diff --git a/src/main.zig b/src/main.zig @@ -0,0 +1,22 @@ +const std = @import("std"); +const mymarkdown = @import("mymarkdown"); + +const GeneralPurposeAllocator = std.heap.GeneralPurposeAllocator(.{}); +const ArenaAllocator = std.heap.ArenaAllocator; + +pub fn main() !void { + var gpa: GeneralPurposeAllocator = .{}; + var arena: ArenaAllocator = .init(gpa.allocator()); + defer arena.deinit(); + + const input = try std.io.getStdIn().readToEndAlloc(arena.allocator(), std.math.maxInt(u32)); + + const ast = try mymarkdown.parse(gpa.allocator(), arena.allocator(), input); + // std.mem.doNotOptimizeAway(ast); + + var bw = std.io.bufferedWriter(std.io.getStdOut().writer()); + const stdout = bw.writer(); + // try stdout.print("{}\n", .{ast}); + _ = try ast.render(stdout, input, null); + try bw.flush(); +} diff --git a/src/root.zig b/src/root.zig @@ -0,0 +1,40 @@ +const std = @import("std"); +pub const Ast = @import("Ast.zig"); +pub const AstGen = @import("AstGen.zig"); +pub const parse = AstGen.parse; + +test { + _ = Ast; + _ = AstGen; +} + +// test { +// var arena: std.heap.ArenaAllocator = .init(std.testing.allocator); +// defer arena.deinit(); + +// // const input = try std.io.getStdIn().readToEndAlloc(arena.allocator(), std.math.maxInt(u32)); +// const input = +// \\# heading +// \\ +// \\blah +// \\ +// \\blah +// \\ +// ; +// const ast = try parse(std.testing.allocator, arena.allocator(), input); +// try std.testing.expectEqualDeep(6, ast.nodes.len); +// // try std.testing.expectEqualDeep(Ast{ +// // .nodes = &.{ +// // .{ .document = .{ .num_children = 2 } }, +// // .{ .heading = .{ .off = 0, .level = .h1, .num_children = 2 } }, +// // .{ .text = .{ .off = 0, .len = 8 } }, +// // .{ .space_text = .{ .off = 11, .len = 5 } }, +// // .{ .paragraph = .{ .off = 19, .num_children = 1 } }, +// // .{ .text = .{ .off = 19, .len = 3 } }, +// // }, +// // .errors = &.{ +// // .{ .empty_line_in_inline_block = .{ .idx = @enumFromInt(3) } }, +// // }, +// // .extra = &.{}, +// // }, ast); +// } diff --git a/src/str.zig b/src/str.zig @@ -0,0 +1,95 @@ +//! Utils for "strings", []u8 or []const u8 slices +//! +//! The only purpose of this file is to reduce typing. +//! `std.mem.indexOfScalar(u8, s, c)` is very long, and +//! this file lets you just type `str.indexOfChar(s, c)`. +//! +//! If I need any functionality I will also just put it here, +//! so this file may have functions not in `std.mem`. +const std = @import("std"); +const mem = std.mem; +pub const Char = u8; +pub const Str = []const u8; +pub const Charset = []const u8; + +pub fn isAnyOf(c: Char, cs: Charset) bool { + return indexOfChar(cs, c) != null; +} + +pub fn isNoneOf(c: Char, cs: Charset) bool { + return !isAnyOf(c, cs); +} + +pub fn indexOfChar(s: Str, c: Char) ?usize { + return mem.indexOfScalar(u8, s, c); +} + +pub fn indexOfNotChar(slice: Str, value: Char) ?usize { + var i: usize = 0; + if (switch (@import("builtin").zig_backend) { + .stage2_llvm, .stage2_c => true, + else => false, + } and + !std.debug.inValgrind() and // https://github.com/ziglang/zig/issues/17717 + !@inComptime()) + { + if (std.simd.suggestVectorLength(Char)) |block_len| { + // For Intel Nehalem (2009) and AMD Bulldozer (2012) or later, unaligned loads on aligned data result + // in the same execution as aligned loads. We ignore older arch's here and don't bother pre-aligning. + // + // Use `std.simd.suggestVectorLength(T)` to get the same alignment as used in this function + // however this usually isn't necessary unless your arch has a performance penalty due to this. + // + // This may differ for other arch's. Arm for example costs a cycle when loading across a cache + // line so explicit alignment prologues may be worth exploration. + + // Unrolling here is ~10% improvement. We can then do one bounds check every 2 blocks + // instead of one which adds up. + const Block = @Vector(block_len, Char); + if (i + 2 * block_len < slice.len) { + const mask: Block = @splat(value); + while (true) { + inline for (0..2) |_| { + const block: Block = slice[i..][0..block_len].*; + const matches = block != mask; + if (@reduce(.Or, matches)) { + return i + std.simd.firstTrue(matches).?; + } + i += block_len; + } + if (i + 2 * block_len >= slice.len) break; + } + } + + // {block_len, block_len / 2} check + inline for (0..2) |j| { + const block_x_len = block_len / (1 << j); + comptime if (block_x_len < 4) break; + + const BlockX = @Vector(block_x_len, Char); + if (i + block_x_len < slice.len) { + const mask: BlockX = @splat(value); + const block: BlockX = slice[i..][0..block_x_len].*; + const matches = block != mask; + if (@reduce(.Or, matches)) { + return i + std.simd.firstTrue(matches).?; + } + i += block_x_len; + } + } + } + } + + for (slice[i..], i..) |c, j| { + if (c != value) return j; + } + return null; +} + +pub fn indexOfNone(s: Str, cs: Charset) ?usize { + return mem.indexOfNone(u8, s, cs); +} + +pub fn lastIndexOfNone(s: Str, cs: Charset) ?usize { + return mem.lastIndexOfNone(u8, s, cs); +} diff --git a/src/utils.zig b/src/utils.zig @@ -0,0 +1,126 @@ +const std = @import("std"); +const ziggy = @import("ziggy"); + +pub fn NewType(comptime int_type: type, comptime dummy_type_: type) type { + return enum(int_type) { + _, + + const Self = @This(); + + pub fn next(self: @This()) ?@This() { + if (@intFromEnum(self) == std.math.maxInt(int_type)) + return null; + return @enumFromInt(@intFromEnum(self) + 1); + } + + pub fn format(self: @This(), comptime _: []const u8, _: anytype, writer: anytype) !void { + try writer.print("@enumFromInt({})", .{@intFromEnum(self)}); + } + + pub const ziggy_options = struct { + const dummy_type = dummy_type_; + pub fn parse( + self: *ziggy.Parser, + first_tok: ziggy.Tokenizer.Token, + ) !Self { + return @enumFromInt(try self.parseValue(u32, first_tok)); + } + pub fn stringify( + self: Self, + opts: ziggy.serializer.StringifyOptions, + indent_level: usize, + depth: usize, + writer: anytype, + ) !void { + const serialized: u32 = @intFromEnum(self); + return ziggy.serializer.stringifyInner(serialized, opts, indent_level, depth, writer); + } + }; + }; +} + +pub fn Packed(comptime Tagged_: type) type { + return packed struct { + data: Data, + tag: Tag, + + const Self = @This(); + pub const Tagged = Tagged_; + pub const Tag = @typeInfo(Tagged_).@"union".tag_type.?; + pub const Data = @Type(.{ .@"union" = .{ + .layout = .@"packed", + .tag_type = null, + .fields = @typeInfo(Tagged_).@"union".fields, + .decls = &.{}, + } }); + + pub fn fromTagged(tagged: Tagged_) Self { + switch (@as(Tag, tagged)) { + inline else => |t| return .{ + .tag = tagged, + .data = @unionInit( + Data, + @tagName(t), + @field(tagged, @tagName(t)), + ), + }, + } + } + + pub fn toTagged(self: Self) Tagged_ { + switch (self.tag) { + inline else => |t| return @unionInit( + Tagged_, + @tagName(t), + @field(self.data, @tagName(t)), + ), + } + } + + pub const ziggy_options = struct { + pub fn parse( + self: *ziggy.Parser, + first_tok: ziggy.Tokenizer.Token, + ) !Self { + return .fromNode( + try self.parseValue(Tagged_, first_tok), + ); + } + pub fn stringify( + self: Self, + opts: ziggy.serializer.StringifyOptions, + indent_level: usize, + depth: usize, + writer: anytype, + ) !void { + return ziggy.serializer.stringifyInner( + self.toTagged(), + opts, + indent_level, + depth, + writer, + ); + } + }; + + /// May not exist, but we can define it anyway thanks to lazy decl analysis. + pub const Idx = Tagged_.Idx; + /// May not exist, but we can define it anyway thanks to lazy decl analysis. + pub const HeadingLevel = Tagged_.HeadingLevel; + /// May not exist, but we can define it anyway thanks to lazy decl analysis. + pub const Leaf = Tagged_.Leaf; + // /// May not exist, but we can define it anyway thanks to lazy decl analysis. + // pub const format = Tagged_.format; + /// May not exist, but we can define it anyway thanks to lazy decl analysis. + pub const incrementNumChildren = Tagged_.incrementNumChildren; + + pub fn format(self: @This(), comptime _: []const u8, _: anytype, writer: anytype) !void { + try writer.print("{}", .{self.toTagged()}); + } + }; +} + +pub fn safeIntCast(comptime T: type, value: anytype) T { + @setRuntimeSafety(true); + return @intCast(value); +}