commit c08e3785171f96ac5779ee9c5577dc2ce4fa6c9a
parent 872212778fa2fe51462122f2bfc0a0463d0b564e
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date: Sat, 17 May 2025 18:54:29 +0800
v0
Diffstat:
| M | build.zig | | | 6 | +++--- |
| M | src/Ast.zig | | | 219 | +++++++++++++++++++++++++++++++++++++++++++++++-------------------------------- |
| M | src/AstGen.zig | | | 59 | +++++++++++++++++++++++++++++++++++++---------------------- |
| D | src/AstGen/test.zig | | | 347 | ------------------------------------------------------------------------------- |
| M | src/main.zig | | | 54 | +++++++++++++++++++++++++++++++++++++++++++++--------- |
| M | src/root.zig | | | 33 | ++------------------------------- |
| M | src/str.zig | | | 133 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/test/test.zig | | | 420 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | src/utils.zig | | | 100 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- |
9 files changed, 866 insertions(+), 505 deletions(-)
diff --git a/build.zig b/build.zig
@@ -7,7 +7,7 @@ pub fn build(b: *std.Build) !void {
const llvm = b.option(
bool,
"llvm",
- "Force llvm to be used or not (default: compiler default for markdown_cli, false for tests)",
+ "Force llvm to be used or not (default: whatever the compiler default is)",
);
const enable_tracy = b.option(
@@ -66,13 +66,13 @@ fn setupTestStep(
.root_module = mymarkdown,
.target = target,
.optimize = optimize,
- .use_llvm = llvm orelse true,
+ .use_llvm = llvm,
})).step);
test_step.dependOn(&b.addRunArtifact(b.addTest(.{
.root_module = mymarkdown_cli,
.target = target,
.optimize = optimize,
- .use_llvm = llvm orelse true,
+ .use_llvm = llvm,
})).step);
}
diff --git a/src/Ast.zig b/src/Ast.zig
@@ -1,8 +1,12 @@
const std = @import("std");
-const ziggy = @import("ziggy");
+const Allocator = std.mem.Allocator;
+const assert = std.debug.assert;
+
const tracy = @import("tracy");
+
const utils = @import("utils.zig");
-const Allocator = std.mem.Allocator;
+const str = @import("str.zig");
+
const Ast = @This();
nodes: []const Node,
@@ -11,82 +15,6 @@ extra: []const u32,
pub const empty: Ast = .{ .nodes = &.{}, .errors = &.{}, .extra = &.{} };
-fn ZiggyFormat(comptime T: type, opts: ziggy.serializer.StringifyOptions) type {
- return struct {
- pub fn format(self: T, comptime _: []const u8, _: anytype, writer: anytype) !void {
- _ = try writer.writeAll(".");
- try ziggy.stringify(self, opts, writer);
- }
- };
-}
-
-fn ziggyFormat(comptime T: type, opts: ziggy.serializer.StringifyOptions) @TypeOf(ZiggyFormat(T, opts).format) {
- return ZiggyFormat(T, opts).format;
-}
-
-fn UnionFormat(comptime T: type) type {
- return struct {
- pub fn format(self: T, comptime _: []const u8, _: anytype, writer: anytype) !void {
- const info = @typeInfo(T).@"union";
- if (info.tag_type) |UnionTagType| {
- try writer.writeAll(".{ .");
- try writer.writeAll(@tagName(@as(UnionTagType, self)));
- try writer.writeAll(" = ");
- inline for (info.fields) |u_field| {
- if (self == @field(UnionTagType, u_field.name)) {
- try writer.print("{}", .{@field(self, u_field.name)});
- }
- }
- try writer.writeAll(" }");
- } else {
- try writer.print("@{x}", .{@intFromPtr(&self)});
- }
- }
- };
-}
-
-fn unionFormat(comptime T: type) @TypeOf(UnionFormat(T).format) {
- return UnionFormat(T).format;
-}
-
-fn StructFormat(comptime T: type) type {
- return struct {
- pub fn format(value: T, comptime actual_fmt: []const u8, _: anytype, writer: anytype) !void {
- const info = @typeInfo(T).@"struct";
- if (actual_fmt.len != 0) std.fmt.invalidFmtError(actual_fmt, value);
- if (info.is_tuple) {
- // Skip the type and field names when formatting tuples.
- try writer.writeAll(".{");
- inline for (info.fields, 0..) |f, i| {
- if (i == 0) {
- try writer.writeAll(" ");
- } else {
- try writer.writeAll(", ");
- }
- try writer.print("{}", .{@field(value, f.name)});
- }
- return writer.writeAll(" }");
- }
- try writer.writeAll(".{");
- inline for (info.fields, 0..) |f, i| {
- if (i == 0) {
- try writer.writeAll(" .");
- } else {
- try writer.writeAll(", .");
- }
- try writer.writeAll(f.name);
- try writer.writeAll(" = ");
- try writer.print("{}", .{@field(value, f.name)});
- }
- try writer.writeAll(" }");
- }
- };
-}
-
-fn structFormat(comptime T: type) @TypeOf(StructFormat(T).format) {
- return StructFormat(T).format;
-}
-
pub const StrOffset = u32;
pub const StrLen = u24;
@@ -110,18 +38,18 @@ pub const Node = utils.Packed(union(enum(u8)) {
pub const Idx = utils.NewType(u24, opaque {});
pub const Root = packed struct {
num_children: u24 = 0,
- pub const format = structFormat(@This());
+ pub const format = utils.structFormat(@This());
};
pub const Container = packed struct {
off: StrOffset,
num_children: u24 = 0,
- pub const format = structFormat(@This());
+ pub const format = utils.structFormat(@This());
};
pub const Leaf = packed struct {
off: StrOffset,
len: StrLen,
const num_children = 0;
- pub const format = structFormat(@This());
+ pub const format = utils.structFormat(@This());
};
pub fn incrementNumChildren(self: *Node) void {
@@ -134,7 +62,7 @@ pub const Node = utils.Packed(union(enum(u8)) {
}
}
- pub const format = unionFormat(@This());
+ pub const format = utils.unionFormat(@This());
});
pub const Error = utils.Packed(union(enum(u8)) {
@@ -147,7 +75,7 @@ pub const Error = utils.Packed(union(enum(u8)) {
pub const NodeError = packed struct {
idx: Node.Idx,
- pub const format = structFormat(@This());
+ pub const format = utils.structFormat(@This());
};
/// Used when the error diagnostic should point at a single location
@@ -155,11 +83,11 @@ pub const Error = utils.Packed(union(enum(u8)) {
idx: Node.Idx,
off: StrOffset,
- pub const format = structFormat(@This());
+ pub const format = utils.structFormat(@This());
};
pub const Idx = utils.NewType(u24, opaque {});
- pub const format = unionFormat(@This());
+ pub const format = utils.unionFormat(@This());
});
test "Tracking size of Node struct" {
@@ -176,7 +104,7 @@ test "Tracking size of Error struct" {
try std.testing.expectEqual(8, @sizeOf(Error));
}
-pub const format = ziggyFormat(@This(), .{
+pub const format = utils.ziggyFormat(@This(), .{
.whitespace = .space_2,
.omit_top_level_curly = false,
});
@@ -197,7 +125,122 @@ pub fn toTagged(self: Ast, gpa: Allocator) !Tagged {
return .{ .nodes = nodes, .errors = errors, .extra = extra };
}
-pub fn render(self: Ast, writer: anytype, input: []const u8, start_: ?Node.Idx) !?Node.Idx {
+pub fn renderAst(self: Ast, writer: anytype, input: []const u8) !void {
+ var renderer: AstRenderer(@TypeOf(writer)) = .{ .self = self, .writer = writer, .input = input, .cached_offset = 0, .cached_pos = .{ .row = 1, .col = 1 } };
+ const node_idx, const error_idx = try renderer.renderAstInner(0, 0, 0);
+ assert(node_idx == self.nodes.len);
+ assert(error_idx == self.errors.len);
+}
+
+fn AstRenderer(Writer: type) type {
+ return struct {
+ self: Ast,
+ writer: Writer,
+ input: []const u8,
+ cached_offset: u32,
+ cached_pos: Pos,
+
+ const Pos = struct { row: u32, col: u32 };
+ fn offsetToPos(renderer: *@This(), offset: u32) Pos {
+ if (renderer.cached_offset == offset) {
+ return renderer.cached_pos;
+ } else if (renderer.cached_offset <= offset) {
+ var row = renderer.cached_pos.row;
+ var col = renderer.cached_pos.col;
+ for (renderer.input[renderer.cached_offset..offset]) |c| {
+ if (c == '\n') {
+ row += 1;
+ col = 1;
+ } else {
+ col += 1;
+ }
+ }
+ renderer.cached_offset = offset;
+ renderer.cached_pos = .{ .row = row, .col = col };
+ return renderer.cached_pos;
+ } else {
+ var row: u32 = 1;
+ var col: u32 = 1;
+ for (renderer.input[0..offset]) |c| {
+ if (c == '\n') {
+ row += 1;
+ col = 1;
+ } else {
+ col += 1;
+ }
+ }
+ renderer.cached_offset = offset;
+ renderer.cached_pos = .{ .row = row, .col = col };
+ return renderer.cached_pos;
+ }
+ }
+
+ pub fn renderAstInner(renderer: *@This(), start_node: usize, start_error: usize, indent: usize) !struct { usize, usize } {
+ const tracy_frame = tracy.trace(@src());
+ defer tracy_frame.end();
+ switch (renderer.self.nodes[start_node].toTagged()) {
+ inline else => |data, tag| {
+ try renderer.writer.writeByteNTimes(' ', indent);
+ try renderer.writer.writeByte('.');
+ try renderer.writer.writeAll(@tagName(tag));
+ try renderer.writer.writeByte('\n');
+ var cur_node: usize = start_node + 1;
+ var cur_error: usize = start_error;
+ while (cur_error < renderer.self.errors.len and @intFromEnum(renderer.self.errors[cur_error].get(.idx)) == start_node) {
+ switch (renderer.self.errors[cur_error].toTagged()) {
+ // PointError
+ .empty_line_in_inline_block,
+ .inconsistent_indentation,
+ .invalid_marker,
+ => |error_data| {
+ try renderer.writer.writeByteNTimes(' ', indent + 2);
+ try renderer.writer.writeAll(".error .");
+ try renderer.writer.writeAll(@tagName(renderer.self.errors[cur_error].tag));
+ try renderer.writer.print(" at {}:{}", renderer.offsetToPos(error_data.off));
+ try renderer.writer.writeByte('\n');
+ },
+ // NodeError
+ .marker_too_long => {
+ try renderer.writer.writeByteNTimes(' ', indent + 2);
+ try renderer.writer.writeAll("error .");
+ try renderer.writer.writeAll(@tagName(renderer.self.errors[cur_error].tag));
+ try renderer.writer.writeByte('\n');
+ },
+ }
+ cur_error += 1;
+ }
+ if (@TypeOf(data) == Node.Tagged.Container or @TypeOf(data) == Node.Tagged.Root) {
+ for (0..data.num_children) |_| {
+ assert(cur_node < renderer.self.nodes.len);
+ cur_node, cur_error = try renderer.renderAstInner(cur_node, cur_error, indent + 2);
+ assert(cur_node <= renderer.self.nodes.len);
+ }
+ return .{ cur_node, cur_error };
+ } else if (@TypeOf(data) == Node.Tagged.Leaf) {
+ try renderer.writer.writeByteNTimes(' ', indent + 2);
+ // This was too slow!
+ // try renderer.writer.print("\"{}\"", .{
+ // std.zig.fmtEscapes(renderer.input[data.off .. data.off + data.len]),
+ // });
+ try renderer.writer.writeByte('"');
+ try str.escapeStringForDoubleQuotedString(
+ renderer.writer,
+ renderer.input[data.off .. data.off + data.len],
+ );
+ try renderer.writer.writeByte('"');
+ try renderer.writer.writeByte('\n');
+ return .{ cur_node, cur_error };
+ } else {
+ @compileError(@typeName(@TypeOf(data)));
+ }
+ },
+ }
+ unreachable;
+ }
+ };
+}
+
+pub fn renderHtml(self: Ast, writer: anytype, input: []const u8, start_: ?Node.Idx) !?Node.Idx {
const tracy_frame = tracy.trace(@src());
defer tracy_frame.end();
const start: Node.Idx = start_ orelse @enumFromInt(0);
@@ -221,7 +264,7 @@ pub fn render(self: Ast, writer: anytype, input: []const u8, start_: ?Node.Idx)
const data = @field(self.nodes[@intFromEnum(start)].data, @tagName(t));
for (0..data.num_children) |_| {
if (cur_idx) |idx| {
- cur_idx = try self.render(writer, input, idx);
+ cur_idx = try self.renderHtml(writer, input, idx);
} else {
unreachable;
}
diff --git a/src/AstGen.zig b/src/AstGen.zig
@@ -1,15 +1,18 @@
const std = @import("std");
+const Allocator = std.mem.Allocator;
+const assert = std.debug.assert;
+
const ziggy = @import("ziggy");
const tracy = @import("tracy");
+
const utils = @import("utils.zig");
const str = @import("str.zig");
-const ArenaAllocator = std.heap.ArenaAllocator;
-const Allocator = std.mem.Allocator;
-const AstGen = @This();
const Ast = @import("Ast.zig");
const Node = Ast.Node;
const Error = Ast.Error;
+const AstGen = @This();
+
input_base: [*]u8,
nodes: std.ArrayListUnmanaged(Node),
errors: std.ArrayListUnmanaged(Error),
@@ -28,12 +31,9 @@ fn nextNodeIdx(self: AstGen) Node.Idx {
return @enumFromInt(self.nodes.items.len);
}
fn appendNode(self: *AstGen, gpa: Allocator, node: Node.Tagged) !Node.Idx {
- {
- @setRuntimeSafety(true);
- if (self.nodes.items.len > std.math.maxInt(
- @typeInfo(Node.Idx).@"enum".tag_type,
- )) unreachable;
- }
+ if (self.nodes.items.len > std.math.maxInt(
+ @typeInfo(Node.Idx).@"enum".tag_type,
+ )) return error.OutOfNodeIdx;
const idx = self.nodes.items.len;
try self.nodes.append(gpa, .fromTagged(node));
return @enumFromInt(idx);
@@ -45,7 +45,23 @@ pub fn deinit(self: *AstGen, gpa: Allocator) void {
self.extra.deinit(gpa);
}
-pub fn parse(gpa: Allocator, output_gpa: ?Allocator, input: []const u8) error{ InputTooLarge, OutOfMemory }!Ast {
+/// Parses mymarkdown
+///
+/// gpa: A suitable allocator for scratch allocations.
+/// output_gpa: If passed, no scratch allocations will outlive this function,
+/// and any allocations returned will be allocated on this.
+/// input: The input slice to be parsed.
+pub fn parse(
+ gpa: Allocator,
+ output_gpa: ?Allocator,
+ input: []const u8,
+) error{
+ InputTooLarge, // When the input length exceeds 2^32 bytes
+ MarkerTooLong, // When the input contains a marker that exceeds 2^24 bytes
+ OutOfNodeIdx, // When there are more than 2^24 nodes created during parsing
+ OutOfMemory, // When allocation fails
+ Todo, // When I'm too lazy
+}!Ast {
const tracy_frame = tracy.trace(@src());
defer tracy_frame.end();
@@ -151,7 +167,7 @@ fn findIndentedColumn(self: *AstGen, gpa: Allocator, lines_: [][]u8, node_idx: N
if (line.len == 0) continue;
const diff_idx = std.mem.indexOfDiff(u8, line.*, indentation) orelse unreachable;
- std.debug.assert(diff_idx != line.len);
+ assert(diff_idx != line.len);
if (diff_idx != indentation.len) {
try self.errors.append(gpa, .fromTagged(.{
.inconsistent_indentation = .{ .idx = node_idx, .off = self.calcOffset(&line.*[0]) },
@@ -199,7 +215,7 @@ fn parseInlineBlock(self: *AstGen, gpa: Allocator, lines_: [][]u8, parent_idx: N
const len = @min(line.len, std.math.maxInt(Ast.StrLen));
_ = try self.appendNode(gpa, .{
.text = .{
- .off = self.calcOffset(&lines[0][0]),
+ .off = self.calcOffset(&line[0]),
.len = @intCast(len),
},
});
@@ -237,7 +253,7 @@ fn parseInlineBlock(self: *AstGen, gpa: Allocator, lines_: [][]u8, parent_idx: N
var line = lines[0];
_ = try self.appendNode(gpa, .{
.space_text = .{
- .off = self.calcOffset(&lines[0][0]),
+ .off = self.calcOffset(&line[0]),
.len = @intCast(std.math.maxInt(Ast.StrLen)),
},
});
@@ -246,7 +262,7 @@ fn parseInlineBlock(self: *AstGen, gpa: Allocator, lines_: [][]u8, parent_idx: N
const len = @min(line.len, std.math.maxInt(Ast.StrLen));
_ = try self.appendNode(gpa, .{
.text = .{
- .off = self.calcOffset(&lines[0][0]),
+ .off = self.calcOffset(&line[0]),
.len = @intCast(len),
},
});
@@ -324,7 +340,7 @@ fn parseColumn(self: *AstGen, gpa: Allocator, lines_: [][]u8, parent_idx: Node.I
.no_children => {
lines = lines[1..];
},
- else => unreachable,
+ else => return error.Todo,
}
}
}
@@ -469,8 +485,8 @@ fn parseBlockStart(self: *AstGen, gpa: Allocator, line: []u8) !struct { ParseMod
// Regardless, the blockspec must be comptime known (the inline for is mandatory) because we do @unionInit with case.tag.
switch (line[0]) {
inline else => |c| {
- if (block_specs[c] == null) unreachable;
- inline for (block_specs[c].?) |case| {
+ assert(block_specs[c] != null);
+ inline for (block_specs[c] orelse unreachable) |case| {
switch (case.marker) {
.exact, .starts_with => |marker| {
if (std.mem.startsWith(u8, line, marker)) {
@@ -540,10 +556,13 @@ fn parseBlockStart(self: *AstGen, gpa: Allocator, line: []u8) !struct { ParseMod
}));
@memset(line[0..marker_len], ' ');
+ if (marker_len > std.math.maxInt(Ast.StrLen))
+ return error.MarkerTooLong;
+
if (case.store_marker_child == .store) {
_ = try self.appendNode(gpa, .{ .marker = .{
.off = self.calcOffset(&line[0]),
- .len = utils.safeIntCast(Ast.StrLen, marker_len),
+ .len = @intCast(marker_len),
} });
}
return .{ case.mode, node };
@@ -581,7 +600,3 @@ fn parseBlockStart(self: *AstGen, gpa: Allocator, line: []u8) !struct { ParseMod
}),
};
}
-
-test {
- _ = @import("AstGen/test.zig");
-}
diff --git a/src/AstGen/test.zig b/src/AstGen/test.zig
@@ -1,347 +0,0 @@
-const std = @import("std");
-const parse = @import("../AstGen.zig").parse;
-const Ast = @import("../Ast.zig");
-
-const GeneralPurposeAllocator = std.heap.GeneralPurposeAllocator(.{});
-const ArenaAllocator = std.heap.ArenaAllocator;
-
-fn testParse(input: []const u8, expected: Ast.Tagged) !void {
- var arena: ArenaAllocator = .init(std.testing.allocator);
- defer arena.deinit();
- const ast = try parse(std.testing.allocator, arena.allocator(), input);
- const tagged_ast = try ast.toTagged(arena.allocator());
- // try std.testing.expectEqualDeep(expected.nodes.len, tagged_ast.nodes.len);
- try std.testing.expectEqualDeep(expected, tagged_ast);
-}
-
-test "Empty" {
- try testParse("", .{
- .nodes = &.{
- .{ .document = .{ .num_children = 0 } },
- },
- .errors = &.{},
- .extra = &.{},
- });
-}
-
-test "Happy path paragraph" {
- try testParse(
- \\text
- \\
- \\text
- \\text
- \\
- \\text
- \\ text
- \\
- , .{
- .nodes = &.{
- .{ .document = .{ .num_children = 3 } },
- .{ .paragraph = .{ .off = 0, .num_children = 1 } },
- .{ .text = .{ .off = 0, .len = 4 } },
- .{ .paragraph = .{ .off = 6, .num_children = 2 } },
- .{ .text = .{ .off = 6, .len = 4 } },
- .{ .space_text = .{ .off = 11, .len = 4 } },
- .{ .paragraph = .{ .off = 17, .num_children = 2 } },
- .{ .text = .{ .off = 17, .len = 4 } },
- .{ .space_text = .{ .off = 22, .len = 7 } },
- },
- .errors = &.{},
- .extra = &.{},
- });
-}
-
-test "Happy path headings" {
- try testParse(
- \\# text
- \\# text
- \\# text
- \\ text
- \\
- \\# text
- \\
- \\# text
- \\ text
- \\
- \\# text
- \\ text
- \\
- \\## text
- \\## text
- \\## text
- \\ text
- \\
- \\## text
- \\
- \\## text
- \\ text
- \\
- \\## text
- \\ text
- \\
- , .{
- .nodes = &.{
- .{ .document = .{ .num_children = 12 } },
- .{ .heading = .{ .off = 0, .num_children = 2 } },
- .{ .marker = .{ .off = 0, .len = 1 } },
- .{ .text = .{ .off = 2, .len = 4 } },
- .{ .heading = .{ .off = 7, .num_children = 2 } },
- .{ .marker = .{ .off = 7, .len = 1 } },
- .{ .text = .{ .off = 9, .len = 4 } },
- .{ .heading = .{ .off = 14, .num_children = 3 } },
- .{ .marker = .{ .off = 14, .len = 1 } },
- .{ .text = .{ .off = 16, .len = 4 } },
- .{ .space_text = .{ .off = 23, .len = 4 } },
- .{ .heading = .{ .off = 29, .num_children = 2 } },
- .{ .marker = .{ .off = 29, .len = 1 } },
- .{ .text = .{ .off = 31, .len = 4 } },
- .{ .heading = .{ .off = 37, .num_children = 3 } },
- .{ .marker = .{ .off = 37, .len = 1 } },
- .{ .text = .{ .off = 39, .len = 4 } },
- .{ .space_text = .{ .off = 46, .len = 4 } },
- .{ .heading = .{ .off = 52, .num_children = 3 } },
- .{ .marker = .{ .off = 52, .len = 1 } },
- .{ .text = .{ .off = 54, .len = 4 } },
- .{ .space_text = .{ .off = 61, .len = 6 } },
- .{ .heading = .{ .off = 69, .num_children = 2 } },
- .{ .marker = .{ .off = 69, .len = 2 } },
- .{ .text = .{ .off = 72, .len = 4 } },
- .{ .heading = .{ .off = 77, .num_children = 2 } },
- .{ .marker = .{ .off = 77, .len = 2 } },
- .{ .text = .{ .off = 80, .len = 4 } },
- .{ .heading = .{ .off = 85, .num_children = 3 } },
- .{ .marker = .{ .off = 85, .len = 2 } },
- .{ .text = .{ .off = 88, .len = 4 } },
- .{ .space_text = .{ .off = 96, .len = 4 } },
- .{ .heading = .{ .off = 102, .num_children = 2 } },
- .{ .marker = .{ .off = 102, .len = 2 } },
- .{ .text = .{ .off = 105, .len = 4 } },
- .{ .heading = .{ .off = 111, .num_children = 3 } },
- .{ .marker = .{ .off = 111, .len = 2 } },
- .{ .text = .{ .off = 114, .len = 4 } },
- .{ .space_text = .{ .off = 122, .len = 4 } },
- .{ .heading = .{ .off = 128, .num_children = 3 } },
- .{ .marker = .{ .off = 128, .len = 2 } },
- .{ .text = .{ .off = 131, .len = 4 } },
- .{ .space_text = .{ .off = 139, .len = 6 } },
- },
- .errors = &.{},
- .extra = &.{},
- });
-}
-
-test "Happy path quote" {
- try testParse(
- \\> text
- \\ text
- \\
- \\> text
- \\ text
- \\> text
- \\> text
- \\text
- \\
- , .{
- .nodes = &.{
- .{ .document = .{ .num_children = 5 } },
- .{ .quote = .{ .off = 0, .num_children = 1 } },
- .{ .paragraph = .{ .off = 2, .num_children = 2 } },
- .{ .text = .{ .off = 2, .len = 4 } },
- .{ .space_text = .{ .off = 9, .len = 4 } },
- .{ .quote = .{ .off = 15, .num_children = 1 } },
- .{ .paragraph = .{ .off = 17, .num_children = 2 } },
- .{ .text = .{ .off = 17, .len = 4 } },
- .{ .space_text = .{ .off = 24, .len = 6 } },
- .{ .quote = .{ .off = 31, .num_children = 1 } },
- .{ .paragraph = .{ .off = 33, .num_children = 1 } },
- .{ .text = .{ .off = 33, .len = 4 } },
- .{ .quote = .{ .off = 38, .num_children = 1 } },
- .{ .paragraph = .{ .off = 40, .num_children = 1 } },
- .{ .text = .{ .off = 40, .len = 4 } },
- .{ .paragraph = .{ .off = 45, .num_children = 1 } },
- .{ .text = .{ .off = 45, .len = 4 } },
- },
- .errors = &.{},
- .extra = &.{},
- });
-}
-
-test "Happy path list" {
- try testParse(
- \\- text
- \\- [ ] text
- \\. text
- \\: text
- \\-- text
- \\-- [ ] text
- \\.. text
- \\:: text
- \\
- , .{
- .nodes = &.{
- .{ .document = .{ .num_children = 8 } },
- .{ .unordered_item = .{ .off = 0, .num_children = 2 } },
- .{ .marker = .{ .off = 0, .len = 1 } },
- .{ .text = .{ .off = 2, .len = 4 } },
- .{ .task_item = .{ .off = 7, .num_children = 2 } },
- .{ .marker = .{ .off = 7, .len = 5 } },
- .{ .text = .{ .off = 13, .len = 4 } },
- .{ .ordered_item = .{ .off = 18, .num_children = 2 } },
- .{ .marker = .{ .off = 18, .len = 1 } },
- .{ .text = .{ .off = 20, .len = 4 } },
- .{ .term_item = .{ .off = 25, .num_children = 2 } },
- .{ .marker = .{ .off = 25, .len = 1 } },
- .{ .text = .{ .off = 27, .len = 4 } },
- .{ .unordered_item = .{ .off = 32, .num_children = 2 } },
- .{ .marker = .{ .off = 32, .len = 2 } },
- .{ .text = .{ .off = 35, .len = 4 } },
- .{ .task_item = .{ .off = 40, .num_children = 2 } },
- .{ .marker = .{ .off = 40, .len = 6 } },
- .{ .text = .{ .off = 47, .len = 4 } },
- .{ .ordered_item = .{ .off = 52, .num_children = 2 } },
- .{ .marker = .{ .off = 52, .len = 2 } },
- .{ .text = .{ .off = 55, .len = 4 } },
- .{ .term_item = .{ .off = 60, .num_children = 2 } },
- .{ .marker = .{ .off = 60, .len = 2 } },
- .{ .text = .{ .off = 63, .len = 4 } },
- },
- .errors = &.{},
- .extra = &.{},
- });
-}
-
-test "Happy path list elaboration" {
- try testParse(
- \\- a
- \\+ bb
- \\
- \\ ccc
- \\
- , .{
- .nodes = &.{
- .{ .document = .{ .num_children = 2 } },
- .{ .unordered_item = .{ .off = 0, .num_children = 2 } },
- .{ .marker = .{ .off = 0, .len = 1 } },
- .{ .text = .{ .off = 2, .len = 1 } },
- .{ .elaboration = .{ .off = 4, .num_children = 2 } },
- .{ .paragraph = .{ .off = 6, .num_children = 1 } },
- .{ .text = .{ .off = 6, .len = 2 } },
- .{ .paragraph = .{ .off = 12, .num_children = 1 } },
- .{ .text = .{ .off = 12, .len = 3 } },
- },
- .errors = &.{},
- .extra = &.{},
- });
-}
-
-test "Thematic break" {
- try testParse(
- \\a
- \\***
- \\bb
- \\*
- \\ccc
- \\
- \\**bold text**
- \\
- , .{
- .nodes = &.{
- .{ .document = .{ .num_children = 4 } },
- .{ .paragraph = .{ .off = 0, .num_children = 1 } },
- .{ .text = .{ .off = 0, .len = 1 } },
- .{ .thematic_break = .{ .off = 2, .len = 3 } },
- .{ .paragraph = .{ .off = 6, .num_children = 3 } },
- .{ .text = .{ .off = 6, .len = 2 } },
- .{ .space_text = .{ .off = 9, .len = 1 } },
- .{ .space_text = .{ .off = 11, .len = 3 } },
- .{ .paragraph = .{ .off = 16, .num_children = 1 } },
- .{ .text = .{ .off = 16, .len = 13 } },
- },
- .errors = &.{},
- .extra = &.{},
- });
-}
-
-test "Mixed indentation" {
- try testParse(
- \\+ aaa
- \\
- \\
- ++ "\tbbbbb\n", .{
- .nodes = &.{
- .{ .document = .{ .num_children = 1 } },
- .{ .elaboration = .{ .off = 0, .num_children = 2 } },
- .{ .paragraph = .{ .off = 2, .num_children = 1 } },
- .{ .text = .{ .off = 2, .len = 3 } },
- .{ .paragraph = .{ .off = 8, .num_children = 1 } },
- .{ .text = .{ .off = 8, .len = 5 } },
- },
- .errors = &.{
- .{ .inconsistent_indentation = .{ .idx = @enumFromInt(1), .off = 7 } },
- },
- .extra = &.{},
- });
-}
-
-test "Empty line in heading" {
- try testParse(
- \\# heading
- \\
- \\ text
- \\
- \\text
- \\
- , .{
- .nodes = &.{
- .{ .document = .{ .num_children = 2 } },
- .{ .heading = .{ .off = 0, .num_children = 3 } },
- .{ .marker = .{ .off = 0, .len = 1 } },
- .{ .text = .{ .off = 2, .len = 7 } },
- .{ .space_text = .{ .off = 13, .len = 4 } },
- .{ .paragraph = .{ .off = 19, .num_children = 1 } },
- .{ .text = .{ .off = 19, .len = 4 } },
- },
- .errors = &.{
- .{ .empty_line_in_inline_block = .{ .idx = @enumFromInt(4), .off = 10 } },
- },
- .extra = &.{},
- });
-}
-
-test "Super long line" {
- const input = try std.testing.allocator.create([(1 << 24) * 4]u8);
- defer std.testing.allocator.destroy(input);
- @memset(input, 'a');
- input[1] = '\n';
- try testParse(input, .{
- .nodes = &.{
- .{ .document = .{ .num_children = 1 } },
- .{ .paragraph = .{ .off = 0, .num_children = 2 } },
- .{ .text = .{ .off = 0, .len = 1 } },
- .{ .space_text = .{ .off = 2, .len = 16777215 } },
- .{ .text = .{ .off = 2, .len = 16777215 } },
- .{ .text = .{ .off = 2, .len = 16777215 } },
- .{ .text = .{ .off = 2, .len = 16777215 } },
- .{ .text = .{ .off = 2, .len = 2 } },
- },
- .errors = &.{},
- .extra = &.{},
- });
-}
-
-test "Many short lines" {
- const input = try std.testing.allocator.create([(1 << 23) - 2][2]u8);
- defer std.testing.allocator.destroy(input);
- @memset(input, [2]u8{ 'a', '\n' });
-
- var arena: ArenaAllocator = .init(std.testing.allocator);
- defer arena.deinit();
- const ast = try parse(std.testing.allocator, arena.allocator(), @as([*]u8, @ptrCast(input))[0 .. (1 << 23) * 2 - 4]);
- try std.testing.expectEqual(1 << 23, ast.nodes.len);
- try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .document = .{ .num_children = 1 } }), ast.nodes[0].toTagged());
- try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .paragraph = .{ .off = 0, .num_children = (1 << 23) - 2 } }), ast.nodes[1].toTagged());
- try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .text = .{ .off = 0, .len = 1 } }), ast.nodes[2].toTagged());
- for (1..(1 << 23) - 2) |i| {
- try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .space_text = .{ .off = @intCast(i * 2), .len = 1 } }), ast.nodes[i + 2].toTagged());
- }
-}
diff --git a/src/main.zig b/src/main.zig
@@ -1,4 +1,5 @@
const std = @import("std");
+
const tracy = @import("tracy");
const mymarkdown = @import("mymarkdown");
@@ -10,19 +11,54 @@ pub fn main() !void {
var arena: ArenaAllocator = .init(gpa.allocator());
defer arena.deinit();
- const input = try std.io.getStdIn().readToEndAlloc(arena.allocator(), std.math.maxInt(u32));
+ const stdin = std.io.getStdIn();
+ var input_arraylist = blk: {
+ const tracy_frame = tracy.namedFrame("reading input");
+ defer tracy_frame.end();
+ if (stdin.stat()) |stat| {
+ if (stat.size > 0) {
+ var al: std.ArrayList(u8) = try .initCapacity(arena.allocator(), stat.size + 128);
+ try stdin.reader().readAllArrayList(&al, std.math.maxInt(u32) - 128);
+ try al.appendNTimes('\n', 128);
+ break :blk al;
+ }
+ } else |_| {}
+ var al: std.ArrayList(u8) = try .initCapacity(gpa.allocator(), 1024);
+ errdefer al.deinit();
+ try stdin.reader().readAllArrayList(&al, std.math.maxInt(u32) - 128);
+ try al.appendNTimes('\n', 128);
+ break :blk al;
+ };
+ defer input_arraylist.deinit();
+ const input = input_arraylist.items;
- const parse_tracy_frame = tracy.namedFrame("parse");
- const ast = try mymarkdown.parse(gpa.allocator(), arena.allocator(), input);
- parse_tracy_frame.end();
- // std.mem.doNotOptimizeAway(ast);
+ const ast = blk: {
+ const tracy_frame = tracy.namedFrame("parse");
+ defer tracy_frame.end();
+ break :blk try mymarkdown.parse(
+ gpa.allocator(),
+ arena.allocator(),
+ input,
+ );
+ };
+ // const ast2 = blk: {
+ // const tracy_frame = tracy.namedFrame("parse 2");
+ // tracy_frame.end();
+ // break :blk try mymarkdown.parse2(
+ // gpa.allocator(),
+ // arena.allocator(),
+ // input,
+ // );
+ // };
+ // try std.testing.expectEqualDeep(ast, ast2);
var bw = std.io.bufferedWriter(std.io.getStdOut().writer());
const stdout = bw.writer();
- // try stdout.print("{}\n", .{ast});
- const render_tracy_frame = tracy.namedFrame("render");
- _ = try ast.render(stdout, input, null);
- render_tracy_frame.end();
+ {
+ const tracy_frame = tracy.namedFrame("render");
+ defer tracy_frame.end();
+ _ = try ast.renderAst(stdout, input);
+ }
try bw.flush();
if (tracy.enable) {
diff --git a/src/root.zig b/src/root.zig
@@ -1,40 +1,11 @@
const std = @import("std");
+
pub const Ast = @import("Ast.zig");
pub const AstGen = @import("AstGen.zig");
pub const parse = AstGen.parse;
test {
+ _ = @import("test/test.zig");
_ = Ast;
_ = AstGen;
}
-
-// test {
-// var arena: std.heap.ArenaAllocator = .init(std.testing.allocator);
-// defer arena.deinit();
-
-// // const input = try std.io.getStdIn().readToEndAlloc(arena.allocator(), std.math.maxInt(u32));
-// const input =
-// \\# heading
-// \\
-// \\blah
-// \\
-// \\blah
-// \\
-// ;
-// const ast = try parse(std.testing.allocator, arena.allocator(), input);
-// try std.testing.expectEqualDeep(6, ast.nodes.len);
-// // try std.testing.expectEqualDeep(Ast{
-// // .nodes = &.{
-// // .{ .document = .{ .num_children = 2 } },
-// // .{ .heading = .{ .off = 0, .level = .h1, .num_children = 2 } },
-// // .{ .text = .{ .off = 0, .len = 8 } },
-// // .{ .space_text = .{ .off = 11, .len = 5 } },
-// // .{ .paragraph = .{ .off = 19, .num_children = 1 } },
-// // .{ .text = .{ .off = 19, .len = 3 } },
-// // },
-// // .errors = &.{
-// // .{ .empty_line_in_inline_block = .{ .idx = @enumFromInt(3) } },
-// // },
-// // .extra = &.{},
-// // }, ast);
-// }
diff --git a/src/str.zig b/src/str.zig
@@ -8,6 +8,9 @@
//! so this file may have functions not in `std.mem`.
const std = @import("std");
const mem = std.mem;
+
+const tracy = @import("tracy");
+
pub const Char = u8;
pub const Str = []const u8;
pub const Charset = []const u8;
@@ -93,3 +96,133 @@ pub fn indexOfNone(s: Str, cs: Charset) ?usize {
pub fn lastIndexOfNone(s: Str, cs: Charset) ?usize {
return mem.lastIndexOfNone(u8, s, cs);
}
+
+pub fn escapeStringForDoubleQuotedString(
+ writer: anytype,
+ slice: []const u8,
+) !void {
+ return escapeString(writer, slice, .double_quoted_string);
+}
+
+pub fn escapeStringForSingleQuotedString(
+ writer: anytype,
+ slice: []const u8,
+) !void {
+ return escapeString(writer, slice, .double_quoted_string);
+}
+
+pub fn escapeString(
+ writer: anytype,
+ slice: []const u8,
+ comptime escape_for: enum { double_quoted_string, single_quoted_string },
+) !void {
+ const tracy_frame = tracy.trace(@src());
+ defer tracy_frame.end();
+
+ const quote = switch (escape_for) {
+ .double_quoted_string => '"',
+ .single_quoted_string => '\'',
+ };
+ const escaped_quote = switch (escape_for) {
+ .double_quoted_string => "\\\"",
+ .single_quoted_string => "\\'",
+ };
+
+ var i: usize = 0;
+ if (switch (@import("builtin").zig_backend) {
+ .stage2_llvm, .stage2_c => true,
+ else => false,
+ } and
+ !std.debug.inValgrind() and // https://github.com/ziglang/zig/issues/17717
+ !@inComptime())
+ {
+ if (std.simd.suggestVectorLength(u8)) |block_len| {
+ const Block = @Vector(block_len, u8);
+ while (i + 2 * block_len < slice.len) {
+ inline for (0..2) |_| {
+ const load: Block = slice[i .. i + block_len][0..block_len].*;
+
+ // NOTE: LLVM can auto optimize this, but we should check again
+ // when x86 backend supports @Vector.
+ // const has_ctrl = load - @as(Block, @splat(0x20)) >= @as(Block, @splat(0x7f - 0x20));
+ const has_low_ctrl = load < @as(Block, @splat(0x20));
+ const has_high_ctrl = load >= @as(Block, @splat(0x7f));
+
+ // already in ctrl range!
+ // const has_tab = load == @as(Block, @splat('\t'));
+ // const has_cr = load == @as(Block, @splat('\r'));
+ // const has_lf = load == @as(Block, @splat('\n'));
+
+ // not worth it to merge these into the ctrl range,
+ // because doing so will include the space character,
+ // which is super common. space can be avoided by xor'ing with 0x07,
+ // but unfortunately that moves DEL from 0x7f to 0x78,
+ // which means that now has to be separately checked,
+ // resulting in the same number of instructions as before...
+ const has_quote = load == @as(Block, @splat(quote));
+ const has_backslash = load == @as(Block, @splat('\\'));
+
+ // If any character is escaped, do slow path
+ if (@reduce(.Or, has_low_ctrl) or
+ @reduce(.Or, has_high_ctrl) or
+ // @reduce(.Or, has_tab) or
+ // @reduce(.Or, has_cr) or
+ // @reduce(.Or, has_lf) or
+ @reduce(.Or, has_quote) or
+ @reduce(.Or, has_backslash))
+ {
+ // uncomment if you want to inspect the assembly, not that it helps much
+ // @branchHint(.cold);
+ // adapted from std.zig.stringEscape
+ for (slice[i .. i + block_len]) |byte| {
+ switch (byte) {
+ '\t' => try writer.writeAll("\\t"),
+ '\r' => try writer.writeAll("\\r"),
+ '\n' => try writer.writeAll("\\n"),
+ quote => try writer.writeAll(escaped_quote),
+ '\\' => try writer.writeAll("\\\\"),
+ else => if (byte < 0x20 or byte >= 0x7f) {
+ try writer.writeAll("\\x");
+ try std.fmt.formatInt(
+ byte,
+ 16,
+ .lower,
+ .{ .width = 2, .fill = '0' },
+ writer,
+ );
+ } else try writer.writeByte(byte),
+ }
+ }
+ } else {
+ try writer.writeAll(slice[i .. i + block_len]);
+ }
+
+ i += block_len;
+ }
+ }
+ }
+ }
+
+ for (slice[i..]) |byte| {
+ // uncomment if you want to inspect the assembly, not that it helps much
+ // @branchHint(.cold);
+ // adapted from std.zig.stringEscape
+ switch (byte) {
+ '\t' => try writer.writeAll("\\t"),
+ '\r' => try writer.writeAll("\\r"),
+ '\n' => try writer.writeAll("\\n"),
+ quote => try writer.writeAll(escaped_quote),
+ '\\' => try writer.writeAll("\\\\"),
+ else => if (byte < 0x20 or byte >= 0x7f) {
+ try writer.writeAll("\\x");
+ try std.fmt.formatInt(
+ byte,
+ 16,
+ .lower,
+ .{ .width = 2, .fill = '0' },
+ writer,
+ );
+ } else try writer.writeByte(byte),
+ }
+ }
+}
diff --git a/src/test/test.zig b/src/test/test.zig
@@ -0,0 +1,420 @@
+const std = @import("std");
+const parse = @import("../AstGen.zig").parse;
+const Ast = @import("../Ast.zig");
+
+const GeneralPurposeAllocator = std.heap.GeneralPurposeAllocator(.{});
+const ArenaAllocator = std.heap.ArenaAllocator;
+
+fn testParse(input: []const u8, expected: []const u8) !void {
+ var arena: ArenaAllocator = .init(std.testing.allocator);
+ defer arena.deinit();
+ const ast = try parse(std.testing.allocator, arena.allocator(), input);
+ var ast_render: std.ArrayListUnmanaged(u8) = .empty;
+ defer ast_render.deinit(std.testing.allocator);
+ try ast.renderAst(ast_render.writer(std.testing.allocator), input);
+ try std.testing.expectEqualStrings(expected, ast_render.items);
+}
+
+test "Empty" {
+ try testParse("",
+ \\.document
+ \\
+ );
+}
+
+test "Happy path paragraph" {
+ try testParse(
+ \\text
+ \\
+ \\text
+ \\text
+ \\
+ \\text
+ \\ text
+ \\
+ ,
+ \\.document
+ \\ .paragraph
+ \\ .text
+ \\ "text"
+ \\ .paragraph
+ \\ .text
+ \\ "text"
+ \\ .space_text
+ \\ "text"
+ \\ .paragraph
+ \\ .text
+ \\ "text"
+ \\ .space_text
+ \\ " text"
+ \\
+ );
+}
+
+test "Happy path headings" {
+ try testParse(
+ \\# text
+ \\# text
+ \\# text
+ \\ text
+ \\
+ \\# text
+ \\
+ \\# text
+ \\ text
+ \\
+ \\# text
+ \\ text
+ \\
+ \\## text
+ \\## text
+ \\## text
+ \\ text
+ \\
+ \\## text
+ \\
+ \\## text
+ \\ text
+ \\
+ \\## text
+ \\ text
+ \\
+ ,
+ \\.document
+ \\ .heading
+ \\ .marker
+ \\ "#"
+ \\ .text
+ \\ "text"
+ \\ .heading
+ \\ .marker
+ \\ "#"
+ \\ .text
+ \\ "text"
+ \\ .heading
+ \\ .marker
+ \\ "#"
+ \\ .text
+ \\ "text"
+ \\ .space_text
+ \\ "text"
+ \\ .heading
+ \\ .marker
+ \\ "#"
+ \\ .text
+ \\ "text"
+ \\ .heading
+ \\ .marker
+ \\ "#"
+ \\ .text
+ \\ "text"
+ \\ .space_text
+ \\ "text"
+ \\ .heading
+ \\ .marker
+ \\ "#"
+ \\ .text
+ \\ "text"
+ \\ .space_text
+ \\ " text"
+ \\ .heading
+ \\ .marker
+ \\ "##"
+ \\ .text
+ \\ "text"
+ \\ .heading
+ \\ .marker
+ \\ "##"
+ \\ .text
+ \\ "text"
+ \\ .heading
+ \\ .marker
+ \\ "##"
+ \\ .text
+ \\ "text"
+ \\ .space_text
+ \\ "text"
+ \\ .heading
+ \\ .marker
+ \\ "##"
+ \\ .text
+ \\ "text"
+ \\ .heading
+ \\ .marker
+ \\ "##"
+ \\ .text
+ \\ "text"
+ \\ .space_text
+ \\ "text"
+ \\ .heading
+ \\ .marker
+ \\ "##"
+ \\ .text
+ \\ "text"
+ \\ .space_text
+ \\ " text"
+ \\
+ );
+}
+
+test "Happy path quote" {
+ try testParse(
+ \\> text
+ \\ text
+ \\
+ \\> text
+ \\ text
+ \\> text
+ \\> text
+ \\text
+ \\
+ ,
+ \\.document
+ \\ .quote
+ \\ .paragraph
+ \\ .text
+ \\ "text"
+ \\ .space_text
+ \\ "text"
+ \\ .quote
+ \\ .paragraph
+ \\ .text
+ \\ "text"
+ \\ .space_text
+ \\ " text"
+ \\ .quote
+ \\ .paragraph
+ \\ .text
+ \\ "text"
+ \\ .quote
+ \\ .paragraph
+ \\ .text
+ \\ "text"
+ \\ .paragraph
+ \\ .text
+ \\ "text"
+ \\
+ );
+}
+
+test "Happy path list" {
+ try testParse(
+ \\- text
+ \\- [ ] text
+ \\. text
+ \\: text
+ \\-- text
+ \\-- [ ] text
+ \\.. text
+ \\:: text
+ \\
+ ,
+ \\.document
+ \\ .unordered_item
+ \\ .marker
+ \\ "-"
+ \\ .text
+ \\ "text"
+ \\ .task_item
+ \\ .marker
+ \\ "- [ ]"
+ \\ .text
+ \\ "text"
+ \\ .ordered_item
+ \\ .marker
+ \\ "."
+ \\ .text
+ \\ "text"
+ \\ .term_item
+ \\ .marker
+ \\ ":"
+ \\ .text
+ \\ "text"
+ \\ .unordered_item
+ \\ .marker
+ \\ "--"
+ \\ .text
+ \\ "text"
+ \\ .task_item
+ \\ .marker
+ \\ "-- [ ]"
+ \\ .text
+ \\ "text"
+ \\ .ordered_item
+ \\ .marker
+ \\ ".."
+ \\ .text
+ \\ "text"
+ \\ .term_item
+ \\ .marker
+ \\ "::"
+ \\ .text
+ \\ "text"
+ \\
+ );
+}
+
+test "Happy path list elaboration" {
+ try testParse(
+ \\- a
+ \\+ bb
+ \\
+ \\ ccc
+ \\
+ ,
+ \\.document
+ \\ .unordered_item
+ \\ .marker
+ \\ "-"
+ \\ .text
+ \\ "a"
+ \\ .elaboration
+ \\ .paragraph
+ \\ .text
+ \\ "bb"
+ \\ .paragraph
+ \\ .text
+ \\ "ccc"
+ \\
+ );
+}
+
+test "Thematic break" {
+ try testParse(
+ \\a
+ \\***
+ \\bb
+ \\*
+ \\ccc
+ \\
+ \\**bold text**
+ \\
+ ,
+ \\.document
+ \\ .paragraph
+ \\ .text
+ \\ "a"
+ \\ .thematic_break
+ \\ "***"
+ \\ .paragraph
+ \\ .text
+ \\ "bb"
+ \\ .space_text
+ \\ "*"
+ \\ .space_text
+ \\ "ccc"
+ \\ .paragraph
+ \\ .text
+ \\ "**bold text**"
+ \\
+ );
+}
+
+test "Mixed indentation" {
+ try testParse("" ++
+ "+ aaa\n" ++
+ "\n" ++
+ "\tbbbbb\n",
+ \\.document
+ \\ .elaboration
+ \\ .error .inconsistent_indentation at 3:1
+ \\ .paragraph
+ \\ .text
+ \\ "aaa"
+ \\ .paragraph
+ \\ .text
+ \\ "bbbbb"
+ \\
+ );
+}
+
+test "Tabs in text" {
+ try testParse("" ++
+ "aaa\n" ++
+ "\tbbbbb\n",
+ \\.document
+ \\ .paragraph
+ \\ .text
+ \\ "aaa"
+ \\ .space_text
+ \\ "\tbbbbb"
+ \\
+ );
+}
+
+test "Empty line in heading" {
+ try testParse(
+ \\# heading
+ \\
+ \\ text
+ \\
+ \\text
+ \\
+ ,
+ \\.document
+ \\ .heading
+ \\ .marker
+ \\ "#"
+ \\ .text
+ \\ "heading"
+ \\ .space_text
+ \\ .error .empty_line_in_inline_block at 2:1
+ \\ "text"
+ \\ .paragraph
+ \\ .text
+ \\ "text"
+ \\
+ );
+}
+
+test "Super long line" {
+ const input = try std.testing.allocator.create([(1 << 24) * 4]u8);
+ defer std.testing.allocator.destroy(input);
+ @memset(input, 'a');
+ var arena: ArenaAllocator = .init(std.testing.allocator);
+ defer arena.deinit();
+ const ast = try parse(std.testing.allocator, arena.allocator(), input);
+ const taggedAst = try ast.toTagged(arena.allocator());
+ try std.testing.expectEqualDeep(@as(Ast.Tagged, .{
+ .nodes = &.{
+ .{ .document = .{ .num_children = 1 } },
+ .{ .paragraph = .{ .off = 0, .num_children = 1 } },
+ .{ .text = .{ .off = 0, .len = 16777215 } },
+ .{ .text = .{ .off = 16777215, .len = 16777215 } },
+ .{ .text = .{ .off = 33554430, .len = 16777215 } },
+ .{ .text = .{ .off = 50331645, .len = 16777215 } },
+ .{ .text = .{ .off = 67108860, .len = 4 } },
+ },
+ .errors = &.{},
+ .extra = &.{},
+ }), taggedAst);
+}
+
+test "Many short lines" {
+ const input = try std.testing.allocator.create([(1 << 25) - 4]u8);
+ defer std.testing.allocator.destroy(input);
+ @memset(@as(*[(1 << 24) - 2][2]u8, @ptrCast(input)), "a\n"[0..2].*);
+
+ var arena: ArenaAllocator = .init(std.testing.allocator);
+ defer arena.deinit();
+ const ast = try parse(std.testing.allocator, arena.allocator(), input);
+ try std.testing.expectEqual(1 << 24, ast.nodes.len);
+ try std.testing.expectEqual(
+ @as(Ast.Node.Tagged, .{ .document = .{ .num_children = 1 } }),
+ ast.nodes[0].toTagged(),
+ );
+ try std.testing.expectEqual(
+ @as(Ast.Node.Tagged, .{ .paragraph = .{ .off = 0, .num_children = (1 << 24) - 2 } }),
+ ast.nodes[1].toTagged(),
+ );
+ try std.testing.expectEqual(
+ @as(Ast.Node.Tagged, .{ .text = .{ .off = 0, .len = 1 } }),
+ ast.nodes[2].toTagged(),
+ );
+ for (1..(1 << 24) - 2) |i| {
+ try std.testing.expectEqual(
+ @as(Ast.Node.Tagged, .{ .space_text = .{ .off = @intCast(i * 2), .len = 1 } }),
+ ast.nodes[i + 2].toTagged(),
+ );
+ }
+}
diff --git a/src/utils.zig b/src/utils.zig
@@ -1,4 +1,5 @@
const std = @import("std");
+
const ziggy = @import("ziggy");
pub fn NewType(comptime int_type: type, comptime dummy_type_: type) type {
@@ -77,6 +78,24 @@ pub fn Packed(comptime Tagged_: type) type {
}
}
+ fn GetType(field: anytype) type {
+ return @FieldType(@typeInfo(Tagged_).@"union".fields[0].type, @tagName(field));
+ }
+
+ pub fn get(self: Self, field: anytype) GetType(field) {
+ switch (self.tag) {
+ inline else => |t| {
+ return @field(
+ @field(
+ self.data,
+ @tagName(t),
+ ),
+ @tagName(field),
+ );
+ },
+ }
+ }
+
pub const ziggy_options = struct {
pub fn parse(
self: *ziggy.Parser,
@@ -114,13 +133,84 @@ pub fn Packed(comptime Tagged_: type) type {
/// May not exist, but we can define it anyway thanks to lazy decl analysis.
pub const incrementNumChildren = Tagged_.incrementNumChildren;
- pub fn format(self: @This(), comptime _: []const u8, _: anytype, writer: anytype) !void {
- try writer.print("{}", .{self.toTagged()});
+ pub fn format(self: @This(), comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+ try std.fmt.formatType(self.toTagged(), fmt, options, writer);
+ }
+ };
+}
+
+fn ZiggyFormat(comptime T: type, opts: ziggy.serializer.StringifyOptions) type {
+ return struct {
+ pub fn format(self: T, comptime _: []const u8, _: anytype, writer: anytype) !void {
+ _ = try writer.writeAll(".");
+ try ziggy.stringify(self, opts, writer);
+ }
+ };
+}
+
+pub fn ziggyFormat(comptime T: type, opts: ziggy.serializer.StringifyOptions) @TypeOf(ZiggyFormat(T, opts).format) {
+ return ZiggyFormat(T, opts).format;
+}
+
+fn UnionFormat(comptime T: type) type {
+ return struct {
+ pub fn format(self: T, comptime _: []const u8, _: anytype, writer: anytype) !void {
+ const info = @typeInfo(T).@"union";
+ if (info.tag_type) |UnionTagType| {
+ try writer.writeAll(".{ .");
+ try writer.writeAll(@tagName(@as(UnionTagType, self)));
+ try writer.writeAll(" = ");
+ inline for (info.fields) |u_field| {
+ if (self == @field(UnionTagType, u_field.name)) {
+ try writer.print("{}", .{@field(self, u_field.name)});
+ }
+ }
+ try writer.writeAll(" }");
+ } else {
+ try writer.print("@{x}", .{@intFromPtr(&self)});
+ }
+ }
+ };
+}
+
+pub fn unionFormat(comptime T: type) @TypeOf(UnionFormat(T).format) {
+ return UnionFormat(T).format;
+}
+
+fn StructFormat(comptime T: type) type {
+ return struct {
+ pub fn format(value: T, comptime actual_fmt: []const u8, _: anytype, writer: anytype) !void {
+ const info = @typeInfo(T).@"struct";
+ if (actual_fmt.len != 0) std.fmt.invalidFmtError(actual_fmt, value);
+ if (info.is_tuple) {
+ // Skip the type and field names when formatting tuples.
+ try writer.writeAll(".{");
+ inline for (info.fields, 0..) |f, i| {
+ if (i == 0) {
+ try writer.writeAll(" ");
+ } else {
+ try writer.writeAll(", ");
+ }
+ try writer.print("{}", .{@field(value, f.name)});
+ }
+ return writer.writeAll(" }");
+ }
+ try writer.writeAll(".{");
+ inline for (info.fields, 0..) |f, i| {
+ if (i == 0) {
+ try writer.writeAll(" .");
+ } else {
+ try writer.writeAll(", .");
+ }
+ try writer.writeAll(f.name);
+ try writer.writeAll(" = ");
+ try writer.print("{}", .{@field(value, f.name)});
+ }
+ try writer.writeAll(" }");
}
};
}
-pub fn safeIntCast(comptime T: type, value: anytype) T {
- @setRuntimeSafety(true);
- return @intCast(value);
+pub fn structFormat(comptime T: type) @TypeOf(StructFormat(T).format) {
+ return StructFormat(T).format;
}