mymarkdown

My markdown
git clone https://git.grace.moe/mymarkdown
Log | Files | Refs

commit 710682847dfa6ef35230d275a79fe35bbca21fc6
parent 568cd19766617323594a89b4710f8ffba32bc98c
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date:   Wed, 21 May 2025 18:19:49 +0800

More microopts and better cli

Diffstat:
Msrc/Ast.zig | 6++++++
Msrc/AstGen3.zig | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
Msrc/main.zig | 107++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
Msrc/padded_str.zig | 2+-
Msrc/root.zig | 2++
5 files changed, 154 insertions(+), 50 deletions(-)

diff --git a/src/Ast.zig b/src/Ast.zig @@ -14,6 +14,12 @@ extra: []const u32, pub const empty: Ast = .{ .nodes = &.{}, .errors = &.{}, .extra = &.{} }; +pub fn deinit(self: Ast, gpa: std.mem.Allocator) void { + gpa.free(self.nodes); + gpa.free(self.errors); + gpa.free(self.extra); +} + pub const StrOffset = u32; pub const StrLen = u24; diff --git a/src/AstGen3.zig b/src/AstGen3.zig @@ -26,6 +26,9 @@ nodes: std.ArrayListUnmanaged(Node), errors: std.ArrayListUnmanaged(Error), extra: std.ArrayListUnmanaged(u32), +num_node_allocs: if (PRINT_ALLOC_STATS) usize else void = if (PRINT_ALLOC_STATS) 0, +num_error_allocs: if (PRINT_ALLOC_STATS) usize else void = if (PRINT_ALLOC_STATS) 0, + fn getNode(self: *const AstGen, idx: Node.Idx) *Node { @setRuntimeSafety(true); return &self.nodes.items[@intFromEnum(idx)]; @@ -39,6 +42,8 @@ fn nextNodeIdx(self: *const AstGen) Node.Idx { return @enumFromInt(self.nodes.items.len); } +const PRINT_ALLOC_STATS = false; + // These need manual inlining for some reason. // // LLVM doesn't seem to think that inlining these are worth it, but LLVM is wrong. @@ -54,7 +59,11 @@ inline fn appendNode(self: *AstGen, node: Node.Tagged) !Node.Idx { @typeInfo(Node.Idx).@"enum".tag_type, )) return error.OutOfNodeIdx; const idx = self.nodes.items.len; + const cap = if (PRINT_ALLOC_STATS) self.nodes.capacity; try self.nodes.append(self.gpa, .fromTagged(node)); + if (PRINT_ALLOC_STATS and cap != self.nodes.capacity) { + self.num_node_allocs += 1; + } return @enumFromInt(idx); } inline fn appendContainerNode(self: *AstGen, parent_idx: Node.Idx, comptime container_node_tag: Node.Tag, ptr: PaddedMany) !Node.Idx { @@ -81,7 +90,11 @@ inline fn appendError(self: *AstGen, err: Error.Tagged) !void { if (self.errors.items.len > std.math.maxInt( @typeInfo(Error.Idx).@"enum".tag_type, )) return error.OutOfErrorIdx; + const cap = if (PRINT_ALLOC_STATS) self.errors.capacity; try self.errors.append(self.gpa, .fromTagged(err)); + if (PRINT_ALLOC_STATS and cap != self.errors.capacity) { + self.num_error_allocs += 1; + } } inline fn appendPointError(self: *AstGen, comptime tag: Error.Tag, idx: Node.Idx, ptr: PaddedMany) !void { try self.appendError( @@ -148,6 +161,20 @@ pub fn parse( }; defer ast.deinit(); + { + const tracy_frame2 = tracy.traceNamed(@src(), "allocating"); + defer tracy_frame2.end(); + + // Based on sample.my, there is around 1 node for every 26 bytes in the input. + // Divide the # of bytes by 32 as a heuristic. + // TODO: Review this when inline parsing is implemented + if (PRINT_ALLOC_STATS) ast.num_node_allocs += 1; + try ast.nodes.ensureTotalCapacity(gpa, input.len / 32); + + // Expect there to be no errors + // try ast.errors.ensureTotalCapacity(gpa, ...); + } + try ast.parseRoot(); std.sort.pdq(Error, ast.errors.items, {}, struct { @@ -156,6 +183,29 @@ pub fn parse( } }.func); + // If you want to figure out better parameters for allocation + if (PRINT_ALLOC_STATS) { + const num_newlines = blk: { + const tracy_frame2 = tracy.traceNamed(@src(), "counting newlines"); + defer tracy_frame2.end(); + + var num_newlines: usize = 0; + for (input.toUnpaddedSlice()) |c| { + if (c == '\n') num_newlines += 1; + } + break :blk num_newlines; + }; + + std.debug.print("num_newlines = {}\n", .{num_newlines}); + std.debug.print("input.len = {}\n", .{input.len}); + std.debug.print("ast.nodes.capacity = {}\n", .{ast.nodes.capacity}); + std.debug.print("ast.errors.capacity = {}\n", .{ast.errors.capacity}); + std.debug.print("ast.nodes.items.len = {}\n", .{ast.nodes.items.len}); + std.debug.print("ast.errors.items.len = {}\n", .{ast.errors.items.len}); + std.debug.print("ast.num_node_allocs = {}\n", .{ast.num_node_allocs}); + std.debug.print("ast.num_error_allocs = {}\n", .{ast.num_error_allocs}); + } + if (output_gpa) |gpa2| { return .{ .nodes = try gpa2.dupe(Node, ast.nodes.items), @@ -426,19 +476,30 @@ inline fn parseColumnImpl( while (self.scanner.peek()) |peek2| { _, const line2 = peek2; - // Empty line (only spaces / tabs) - if (line2.len == 0) break; - if (line2.len == line2.indexOfNotSpaceOrTab()) break; - // Special block chars - if (str.isAnyOf(line2.index(0), "-.:+>#@")) break; - // Thematic break - if (line2.len >= 3 and - std.mem.allEqual(u8, line2.toUnpaddedSlice()[0..3], '*') and - line2.len == 3 + line2.sliceOpen(3).indexOfNotSpaceOrTab()) break; - // Verbatim block - if (line2.len >= 2 and std.mem.eql(u8, line2.toUnpaddedSlice()[0..2], "==")) break; - // Math block - if (line2.len >= 3 and std.mem.eql(u8, line2.toUnpaddedSlice()[0..3], "$==")) break; + const line_load4: u32 = @bitCast(line2.ptr._ptr[0..4].*); + const line_load3: u32 = line_load4 & @as(u32, @bitCast("\xff\xff\xff\x00\x00"[0..4].*)); + const line_load2: u32 = line_load4 & @as(u32, @bitCast("\xff\xff\x00\x00\x00"[0..4].*)); + + switch (line2.ptr._ptr[0]) { + // Special block chars / actually empty line + '\n', '-', '.', ':', '+', '>', '#', '@' => break, + // Empty line (only whitespace) + ' ', '\t' => if (line2.len == line2.indexOfNotSpaceOrTab()) break, + // string literals have extra chars so [0..4] doesn't give a sentinel ptr + // Thematic break + '*' => if (line_load3 == + @as(u32, @bitCast("***\x00\x00"[0..4].*)) and + (line2.len == 3 or + line2.len == 3 + line2.sliceOpen(3).indexOfNotSpaceOrTab())) break, + // Verbatim block + '=' => if (line_load2 == + @as(u32, @bitCast("==\x00\x00\x00"[0..4].*))) break, + // Math block + '$' => if (line_load3 == + @as(u32, @bitCast("$==\x00\x00"[0..4].*))) break, + else => {}, + } + try self.insertTextLine(.space_text, .text, paragraph_idx, line2); self.scanner.advance(); } diff --git a/src/main.zig b/src/main.zig @@ -9,16 +9,16 @@ fn readInput(gpa: std.mem.Allocator, arena: std.mem.Allocator) !std.ArrayList(u8 defer tracy_frame.end(); if (stdin.stat()) |stat| { if (stat.size > 0) { - var al: std.ArrayList(u8) = try .initCapacity(arena, stat.size + 1024); - try stdin.reader().readAllArrayList(&al, std.math.maxInt(u32) - 1024); - try al.appendNTimes('\n', 1024); + var al: std.ArrayList(u8) = try .initCapacity(arena, stat.size + mymarkdown.PADDING); + try stdin.reader().readAllArrayList(&al, std.math.maxInt(u32) - mymarkdown.PADDING); + try al.appendNTimes('\n', mymarkdown.PADDING); return al; } } else |_| {} var al: std.ArrayList(u8) = try .initCapacity(gpa, 4096); errdefer al.deinit(); - try stdin.reader().readAllArrayList(&al, std.math.maxInt(u32) - 1024); - try al.appendNTimes('\n', 1024); + try stdin.reader().readAllArrayList(&al, std.math.maxInt(u32) - mymarkdown.PADDING); + try al.appendNTimes('\n', mymarkdown.PADDING); return al; } @@ -28,7 +28,7 @@ pub fn main() !void { const gpa, const is_debug = gpa: { break :gpa switch (@import("builtin").mode) { .Debug, .ReleaseSafe => .{ debug_allocator.allocator(), true }, - .ReleaseFast, .ReleaseSmall => .{ std.heap.page_allocator, false }, + .ReleaseFast, .ReleaseSmall => .{ std.heap.smp_allocator, false }, }; }; defer _ = if (is_debug) debug_allocator.deinit(); @@ -37,15 +37,20 @@ pub fn main() !void { const arena = arena_instance.allocator(); const args = try std.process.argsAlloc(arena); + const bench1, const bench2, const bench3, // const run1, const run2, const run3, // const check1, const check2, const check3, // - const print1, const print2, const print3 = + const render1, const render2, const render3, // + const print1, const print2, const print3, // + const iters = blk: { var bench1, var bench2, var bench3 = .{ false, false, false }; var run1, var run2, var run3 = .{ false, false, false }; var check1, var check2, var check3 = .{ false, false, false }; + var render1, var render2, var render3 = .{ false, false, false }; var print1, var print2, var print3 = .{ false, false, false }; + var iters: usize = 8; for (args) |arg| { if (std.mem.eql(u8, arg, "--bench1")) bench1 = true; @@ -65,20 +70,40 @@ pub fn main() !void { check2, run2 = .{ true, true }; if (std.mem.eql(u8, arg, "--check3")) check3, run3 = .{ true, true }; + if (std.mem.eql(u8, arg, "--render1")) + render1, run1 = .{ true, true }; + if (std.mem.eql(u8, arg, "--render2")) + render2, run2 = .{ true, true }; + if (std.mem.eql(u8, arg, "--render3")) + render3, run3 = .{ true, true }; if (std.mem.eql(u8, arg, "--print1")) print1, run1 = .{ true, true }; if (std.mem.eql(u8, arg, "--print2")) print2, run2 = .{ true, true }; if (std.mem.eql(u8, arg, "--print3")) print3, run3 = .{ true, true }; + if (std.mem.startsWith(u8, arg, "--iters=")) + iters = std.fmt.parseInt( + usize, + arg[8..], + 0, + ) catch |err| { + std.debug.print( + "Invalid --iters= argument, expected number: {}\n", + .{err}, + ); + continue; + }; if (std.mem.eql(u8, arg, "--wait")) std.Thread.sleep(2e9); } break :blk .{ - bench1, bench2, bench3, - run1, run2, run3, - check1, check2, check3, - print1, print2, print3, + bench1, bench2, bench3, + run1, run2, run3, + check1, check2, check3, + render1, render2, render3, + print1, print2, print3, + iters, }; }; @@ -86,37 +111,43 @@ pub fn main() !void { defer input_arraylist.deinit(); const input = input_arraylist.items; - for (0..10) |_| { - if (bench1) - std.mem.doNotOptimizeAway(blk: { + for (0..iters) |_| { + if (bench1) { + const ast1 = blk: { const tracy_frame = tracy.namedFrame("parse 1"); defer tracy_frame.end(); break :blk try mymarkdown.parse( gpa, - arena, + gpa, input, ); - }); - if (bench2) - std.mem.doNotOptimizeAway(blk: { + }; + ast1.deinit(gpa); + } + if (bench2) { + const ast2 = blk: { const tracy_frame = tracy.namedFrame("parse 2"); defer tracy_frame.end(); break :blk try mymarkdown.parse2( gpa, - arena, + gpa, input, ); - }); - if (bench3) - std.mem.doNotOptimizeAway(blk: { + }; + ast2.deinit(gpa); + } + if (bench3) { + const ast3 = blk: { const tracy_frame = tracy.namedFrame("parse 3"); defer tracy_frame.end(); break :blk try mymarkdown.parse3( gpa, - arena, + gpa, input, ); - }); + }; + ast3.deinit(gpa); + } } if (!bench1 and !bench2 and !bench3) { @@ -150,57 +181,57 @@ pub fn main() !void { var render_arraylist1: std.ArrayList(u8) = .init(gpa); defer render_arraylist1.deinit(); - if (check1) { + if (check1 or render1 or print1) { std.debug.print("Rendering 1\n", .{}); - const tracy_frame = tracy.namedFrame("check render 1"); + const tracy_frame = tracy.namedFrame("Render 1"); defer tracy_frame.end(); _ = try ast.renderAst(render_arraylist1.writer(), input); } var render_arraylist2: std.ArrayList(u8) = .init(gpa); defer render_arraylist2.deinit(); - if (check2) { + if (check2 or render2 or print2) { std.debug.print("Rendering 2\n", .{}); - const tracy_frame = tracy.namedFrame("check render 2"); + const tracy_frame = tracy.namedFrame("Render 2"); defer tracy_frame.end(); _ = try ast2.renderAst(render_arraylist2.writer(), input); } var render_arraylist3: std.ArrayList(u8) = .init(gpa); defer render_arraylist3.deinit(); - if (check3) { + if (check3 or render3 or print3) { std.debug.print("Rendering 3\n", .{}); - const tracy_frame = tracy.namedFrame("check render 3"); + const tracy_frame = tracy.namedFrame("Render 3"); defer tracy_frame.end(); _ = try ast3.renderAst(render_arraylist3.writer(), input); } if (check1 and check3) { - std.debug.print("Testing 1 vs 3\n", .{}); + std.debug.print("check 1 vs 3\n", .{}); try std.testing.expectEqualStrings(render_arraylist1.items, render_arraylist3.items); } if (check2 and check3) { - std.debug.print("Testing 2 vs 3\n", .{}); + std.debug.print("check 2 vs 3\n", .{}); try std.testing.expectEqualStrings(render_arraylist2.items, render_arraylist3.items); } if (check1 and check2 and !check3) { - std.debug.print("Testing 1 vs 2\n", .{}); + std.debug.print("check 1 vs 2\n", .{}); try std.testing.expectEqualStrings(render_arraylist1.items, render_arraylist2.items); } for (0..10) |_| { - if (print1) { + if (render1) { std.debug.print("Re-rendering 1\n", .{}); render_arraylist1.clearRetainingCapacity(); const tracy_frame = tracy.namedFrame("re-render 1"); defer tracy_frame.end(); _ = try ast.renderAst(render_arraylist1.writer(), input); } - if (print2) { + if (render2) { std.debug.print("Re-rendering 2\n", .{}); render_arraylist2.clearRetainingCapacity(); const tracy_frame = tracy.namedFrame("re-render 2"); defer tracy_frame.end(); _ = try ast2.renderAst(render_arraylist2.writer(), input); } - if (print3) { + if (render3) { std.debug.print("Re-rendering 3\n", .{}); render_arraylist3.clearRetainingCapacity(); const tracy_frame = tracy.namedFrame("re-render 3"); @@ -208,6 +239,10 @@ pub fn main() !void { _ = try ast3.renderAst(render_arraylist3.writer(), input); } } + + if (print1) try std.io.getStdOut().writeAll(render_arraylist1.items); + if (print2) try std.io.getStdOut().writeAll(render_arraylist2.items); + if (print3) try std.io.getStdOut().writeAll(render_arraylist3.items); } if (tracy.enable) { diff --git a/src/padded_str.zig b/src/padded_str.zig @@ -1,3 +1,3 @@ -pub const PADDING = 128; +pub const PADDING = @import("root.zig").PADDING; pub const PaddedSlice = @import("padded_str_impl.zig").PaddedSlice(PADDING); pub const PaddedMany = @import("padded_str_impl.zig").PaddedMany(PADDING); diff --git a/src/root.zig b/src/root.zig @@ -8,6 +8,8 @@ pub const parse2 = AstGen2.parse; pub const AstGen3 = @import("AstGen3.zig"); pub const parse3 = AstGen3.parse; +pub const PADDING = 128; + test { _ = @import("test/test.zig"); _ = Ast;