v0 - mymarkdown - My markdown

commit a274d6f4377b2e6de289fcb2c7b2aa0f42d99903
parent 51081b5ac4913f56f762cec8ac5da1b900f66521
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date:   Fri, 16 May 2025 05:36:40 +0800

v0

Diffstat:
A .gitignore  | 1 +
A build.zig  | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A build.zig.zon  | 16 ++++++++++++++++
A src/Ast.zig  | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/AstGen.zig  | 559 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/AstGen/test.zig  | 346 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/main.zig  | 22 ++++++++++++++++++++++
A src/root.zig  | 40 ++++++++++++++++++++++++++++++++++++++++
A src/str.zig  | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/utils.zig  | 126 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

10 files changed, 1504 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+zig-out
diff --git a/build.zig b/build.zig
@@ -0,0 +1,63 @@
+const std = @import("std");
+
+pub fn build(b: *std.Build) void {
+    const target = b.standardTargetOptions(.{});
+    const optimize = b.standardOptimizeOption(.{});
+
+    const mymarkdown = b.addModule("mymarkdown", .{
+        .root_source_file = b.path("src/root.zig"),
+        .target = target,
+        .optimize = optimize,
+    });
+    mymarkdown.addImport("ziggy", b.dependency("ziggy", .{}).module("ziggy"));
+    const mymarkdown_cli = b.addModule("mymarkdown", .{
+        .root_source_file = b.path("src/main.zig"),
+        .target = target,
+        .optimize = optimize,
+    });
+    mymarkdown_cli.addImport("mymarkdown", mymarkdown);
+
+    const mymarkdown_cli_compile = b.addExecutable(.{
+        .name = "mymarkdown",
+        .root_module = mymarkdown_cli,
+    });
+    b.installArtifact(mymarkdown_cli_compile);
+
+    const check = b.step("check", "Check if the mymarkdown CLI compiles");
+    check.dependOn(&mymarkdown_cli_compile.step);
+
+    setupTestStep(b, target, optimize, mymarkdown, mymarkdown_cli, check);
+    setupRunStep(b, mymarkdown_cli_compile);
+}
+
+fn setupTestStep(
+    b: *std.Build,
+    target: std.Build.ResolvedTarget,
+    optimize: std.builtin.OptimizeMode,
+    mymarkdown: *std.Build.Module,
+    mymarkdown_cli: *std.Build.Module,
+    check: *std.Build.Step,
+) void {
+    const test_step = b.step("test", "Run unit tests");
+    test_step.dependOn(check);
+    test_step.dependOn(&b.addRunArtifact(b.addTest(.{
+        .root_module = mymarkdown,
+        .target = target,
+        .optimize = optimize,
+    })).step);
+    test_step.dependOn(&b.addRunArtifact(b.addTest(.{
+        .root_module = mymarkdown_cli,
+        .target = target,
+        .optimize = optimize,
+    })).step);
+}
+
+fn setupRunStep(
+    b: *std.Build,
+    mymarkdown_cli_compile: *std.Build.Step.Compile,
+) void {
+    const run_exe = b.addRunArtifact(mymarkdown_cli_compile);
+    if (b.args) |args| run_exe.addArgs(args);
+    const run_exe_step = b.step("run", "Run the mymarkdown CLI");
+    run_exe_step.dependOn(&run_exe.step);
+}
diff --git a/build.zig.zon b/build.zig.zon
@@ -0,0 +1,16 @@
+.{
+    .name = .mymarkdown,
+    .version = "0.0.0",
+    .fingerprint = 0x680fc5b268bbdd89, // Changing this has security and trust implications.
+    .minimum_zig_version = "0.14.0",
+    .dependencies = .{
+        .ziggy = .{
+            .path = "../../../manual-software/ziggy",
+        },
+    },
+    .paths = .{
+        "build.zig",
+        "build.zig.zon",
+        "src",
+    },
+}
diff --git a/src/Ast.zig b/src/Ast.zig
@@ -0,0 +1,236 @@
+const std = @import("std");
+const ziggy = @import("ziggy");
+const utils = @import("utils.zig");
+const Allocator = std.mem.Allocator;
+const Ast = @This();
+
+nodes: []const Node,
+errors: []const Error,
+extra: []const u32,
+
+pub const empty: Ast = .{ .nodes = &.{}, .errors = &.{}, .extra = &.{} };
+
+fn ZiggyFormat(comptime T: type, opts: ziggy.serializer.StringifyOptions) type {
+    return struct {
+        pub fn format(self: T, comptime _: []const u8, _: anytype, writer: anytype) !void {
+            _ = try writer.writeAll(".");
+            try ziggy.stringify(self, opts, writer);
+        }
+    };
+}
+
+fn ziggyFormat(comptime T: type, opts: ziggy.serializer.StringifyOptions) @TypeOf(ZiggyFormat(T, opts).format) {
+    return ZiggyFormat(T, opts).format;
+}
+
+fn UnionFormat(comptime T: type) type {
+    return struct {
+        pub fn format(self: T, comptime _: []const u8, _: anytype, writer: anytype) !void {
+            const info = @typeInfo(T).@"union";
+            if (info.tag_type) |UnionTagType| {
+                try writer.writeAll(".{ .");
+                try writer.writeAll(@tagName(@as(UnionTagType, self)));
+                try writer.writeAll(" = ");
+                inline for (info.fields) |u_field| {
+                    if (self == @field(UnionTagType, u_field.name)) {
+                        try writer.print("{}", .{@field(self, u_field.name)});
+                    }
+                }
+                try writer.writeAll(" }");
+            } else {
+                try writer.print("@{x}", .{@intFromPtr(&self)});
+            }
+        }
+    };
+}
+
+fn unionFormat(comptime T: type) @TypeOf(UnionFormat(T).format) {
+    return UnionFormat(T).format;
+}
+
+fn StructFormat(comptime T: type) type {
+    return struct {
+        pub fn format(value: T, comptime actual_fmt: []const u8, _: anytype, writer: anytype) !void {
+            const info = @typeInfo(T).@"struct";
+            if (actual_fmt.len != 0) std.fmt.invalidFmtError(actual_fmt, value);
+            if (info.is_tuple) {
+                // Skip the type and field names when formatting tuples.
+                try writer.writeAll(".{");
+                inline for (info.fields, 0..) |f, i| {
+                    if (i == 0) {
+                        try writer.writeAll(" ");
+                    } else {
+                        try writer.writeAll(", ");
+                    }
+                    try writer.print("{}", .{@field(value, f.name)});
+                }
+                return writer.writeAll(" }");
+            }
+            try writer.writeAll(".{");
+            inline for (info.fields, 0..) |f, i| {
+                if (i == 0) {
+                    try writer.writeAll(" .");
+                } else {
+                    try writer.writeAll(", .");
+                }
+                try writer.writeAll(f.name);
+                try writer.writeAll(" = ");
+                try writer.print("{}", .{@field(value, f.name)});
+            }
+            try writer.writeAll(" }");
+        }
+    };
+}
+
+fn structFormat(comptime T: type) @TypeOf(StructFormat(T).format) {
+    return StructFormat(T).format;
+}
+
+pub const StrOffset = u32;
+pub const StrLen = u24;
+
+pub const Node = utils.Packed(union(enum(u8)) {
+    document: Root,
+    marker: Leaf, // First child of nodes like heading, list items, ...
+
+    thematic_break: Leaf,
+    heading: Container,
+    quote: Container,
+    paragraph: Container,
+    unordered_item: Container,
+    ordered_item: Container,
+    term_item: Container,
+    task_item: Container,
+    elaboration: Container,
+
+    text: Leaf,
+    space_text: Leaf, // text with 1 space added before it
+
+    pub const Idx = utils.NewType(u24, opaque {});
+    pub const Root = packed struct {
+        num_children: u24 = 0,
+        pub const format = structFormat(@This());
+    };
+    pub const Container = packed struct {
+        off: StrOffset,
+        num_children: u24 = 0,
+        pub const format = structFormat(@This());
+    };
+    pub const Leaf = packed struct {
+        off: StrOffset,
+        len: StrLen,
+        const num_children = 0;
+        pub const format = structFormat(@This());
+    };
+
+    pub fn incrementNumChildren(self: *Node) void {
+        switch (self.tag) {
+            inline else => |t| {
+                if (@TypeOf(@field(self.data, @tagName(t))) == Container or @TypeOf(@field(self.data, @tagName(t))) == Root) {
+                    @field(self.data, @tagName(t)).num_children += 1;
+                } else unreachable;
+            },
+        }
+    }
+
+    pub const format = unionFormat(@This());
+});
+
+pub const Error = utils.Packed(union(enum(u8)) {
+    marker_too_long: NodeError,
+    invalid_marker: PointError,
+    empty_line_in_inline_block: PointError,
+    inconsistent_indentation: PointError,
+
+    /// Used when the error diagnostic spans the entire node
+    pub const NodeError = packed struct {
+        idx: Node.Idx,
+
+        pub const format = structFormat(@This());
+    };
+
+    /// Used when the error diagnostic should point at a single location
+    pub const PointError = packed struct {
+        idx: Node.Idx,
+        off: StrOffset,
+
+        pub const format = structFormat(@This());
+    };
+    pub const Idx = utils.NewType(u24, opaque {});
+
+    pub const format = unionFormat(@This());
+});
+
+test "Tracking size of Node struct" {
+    try std.testing.expectEqual(24, @bitSizeOf(Node.Idx));
+    try std.testing.expectEqual(4, @sizeOf(Node.Idx));
+    try std.testing.expectEqual(64, @bitSizeOf(Node));
+    try std.testing.expectEqual(8, @sizeOf(Node));
+}
+
+test "Tracking size of Error struct" {
+    try std.testing.expectEqual(24, @bitSizeOf(Error.Idx));
+    try std.testing.expectEqual(4, @sizeOf(Error.Idx));
+    try std.testing.expectEqual(64, @bitSizeOf(Error));
+    try std.testing.expectEqual(8, @sizeOf(Error));
+}
+
+pub const format = ziggyFormat(@This(), .{
+    .whitespace = .space_2,
+    .omit_top_level_curly = false,
+});
+
+pub const Tagged = struct {
+    nodes: []const Node.Tagged,
+    errors: []const Error.Tagged,
+    extra: []const u32,
+
+    pub const empty: Tagged = .{ .nodes = &.{}, .errors = &.{}, .extra = &.{} };
+};
+pub fn toTagged(self: Ast, gpa: Allocator) !Tagged {
+    const nodes = try gpa.alloc(Node.Tagged, self.nodes.len);
+    const errors = try gpa.alloc(Error.Tagged, self.errors.len);
+    const extra = try gpa.dupe(u32, self.extra);
+    for (self.nodes, nodes) |node, *out| out.* = node.toTagged();
+    for (self.errors, errors) |err, *out| out.* = err.toTagged();
+    return .{ .nodes = nodes, .errors = errors, .extra = extra };
+}
+
+pub fn render(self: Ast, writer: anytype, input: []const u8, start_: ?Node.Idx) !?Node.Idx {
+    const start: Node.Idx = start_ orelse @enumFromInt(0);
+    switch (self.nodes[@intFromEnum(start)].tag) {
+        .document => try writer.writeAll("<body>\n"),
+        .paragraph => try writer.writeAll("<p>"),
+        .text => {
+            const data: Node.Leaf = self.nodes[@intFromEnum(start)].data.text;
+            try writer.writeAll(input[data.off .. data.off + data.len]);
+        },
+        .space_text => {
+            const data: Node.Leaf = self.nodes[@intFromEnum(start)].data.text;
+            try writer.writeByte(' ');
+            try writer.writeAll(input[data.off .. data.off + data.len]);
+        },
+        else => unreachable,
+    }
+    var cur_idx: ?Node.Idx = start.next();
+    switch (self.nodes[@intFromEnum(start)].tag) {
+        inline .document, .paragraph => |t| {
+            const data = @field(self.nodes[@intFromEnum(start)].data, @tagName(t));
+            for (0..data.num_children) |_| {
+                if (cur_idx) |idx| {
+                    cur_idx = try self.render(writer, input, idx);
+                } else {
+                    unreachable;
+                }
+            }
+        },
+        else => {},
+    }
+    switch (self.nodes[@intFromEnum(start)].tag) {
+        .document => try writer.writeAll("</body>\n"),
+        .paragraph => try writer.writeAll("</p>\n"),
+        .text, .space_text => {},
+        else => unreachable,
+    }
+    return cur_idx;
+}
diff --git a/src/AstGen.zig b/src/AstGen.zig
@@ -0,0 +1,559 @@
+const std = @import("std");
+const ziggy = @import("ziggy");
+const utils = @import("utils.zig");
+const str = @import("str.zig");
+const ArenaAllocator = std.heap.ArenaAllocator;
+const Allocator = std.mem.Allocator;
+const AstGen = @This();
+const Ast = @import("Ast.zig");
+const Node = Ast.Node;
+const Error = Ast.Error;
+
+input_base: [*]u8,
+input: []u8,
+nodes: std.ArrayListUnmanaged(Node),
+errors: std.ArrayListUnmanaged(Error),
+extra: std.ArrayListUnmanaged(u32),
+
+fn getNode(self: AstGen, idx: Node.Idx) *Node {
+    @setRuntimeSafety(true);
+    return &self.nodes.items[@intFromEnum(idx)];
+}
+fn lastNodeIdx(self: AstGen) Node.Idx {
+    @setRuntimeSafety(true);
+    return @enumFromInt(self.nodes.items.len - 1);
+}
+fn nextNodeIdx(self: AstGen) Node.Idx {
+    @setRuntimeSafety(true);
+    return @enumFromInt(self.nodes.items.len);
+}
+fn appendNode(self: *AstGen, gpa: Allocator, node: Node.Tagged) !Node.Idx {
+    {
+        @setRuntimeSafety(true);
+        if (self.nodes.items.len > std.math.maxInt(
+            @typeInfo(Node.Idx).@"enum".tag_type,
+        )) unreachable;
+    }
+    const idx = self.nodes.items.len;
+    try self.nodes.append(gpa, .fromTagged(node));
+    return @enumFromInt(idx);
+}
+
+pub fn deinit(self: *AstGen, gpa: Allocator) void {
+    self.nodes.deinit(gpa);
+    self.errors.deinit(gpa);
+    self.extra.deinit(gpa);
+}
+
+pub fn parse(gpa: Allocator, output_gpa: ?Allocator, input: []const u8) error{ InputTooLarge, OutOfMemory }!Ast {
+    if (input.len > std.math.maxInt(u32) - 1) {
+        return error.InputTooLarge;
+    }
+
+    // const input_copy = input;
+    // const input_copy = try gpa.dupe(u8, input);
+    // defer gpa.free(input_copy);
+    var input_copy_arraylist: std.ArrayListUnmanaged(u8) = .empty;
+    defer input_copy_arraylist.deinit(gpa);
+    try input_copy_arraylist.ensureTotalCapacityPrecise(gpa, input.len + 2);
+
+    var ast: AstGen = .{
+        .input_base = input_copy_arraylist.items.ptr,
+        .input = undefined,
+        .nodes = .empty,
+        .errors = .empty,
+        .extra = .empty,
+    };
+    defer ast.deinit(gpa);
+    const root = try ast.appendNode(gpa, .{ .document = .{} });
+
+    var lines: std.ArrayListUnmanaged(Ast.StrOffset) = .empty;
+    defer lines.deinit(gpa);
+    // var lines: std.ArrayListUnmanaged([]u8) = .empty;
+    // defer lines.deinit(gpa);
+
+    var lines_it = std.mem.splitScalar(u8, input, '\n');
+    var maybe_line: ?[]u8 = @constCast(lines_it.first());
+    while (maybe_line) |line| : (maybe_line = @constCast(lines_it.next())) {
+        if (str.lastIndexOfNone(line, " \t\r\n")) |idx| {
+            const old_len = input_copy_arraylist.items.len;
+            try lines.append(gpa, @intCast(old_len));
+            input_copy_arraylist.appendSliceAssumeCapacity(line);
+            input_copy_arraylist.appendAssumeCapacity('\n');
+            input_copy_arraylist.items[old_len + idx + 1] = '\n';
+            // try lines.append(gpa, input_copy_arraylist.items[old_len .. old_len + idx + 1]);
+        } else {
+            try lines.append(gpa, @intCast(input_copy_arraylist.items.len));
+            input_copy_arraylist.appendAssumeCapacity('\n');
+            // try lines.append(gpa, &.{});
+        }
+    }
+    input_copy_arraylist.appendAssumeCapacity('\n');
+    ast.input = input_copy_arraylist.items;
+    // stripTrailingWhitespace(&lines.items);
+
+    try ast.parseColumn(gpa, lines.items, root);
+
+    // std.time.sleep(std.time.ns_per_hour);
+
+    if (output_gpa) |gpa2| {
+        return .{
+            .nodes = try gpa2.dupe(Node, ast.nodes.items),
+            .errors = try gpa2.dupe(Error, ast.errors.items),
+            .extra = try gpa2.dupe(u32, ast.extra.items),
+        };
+    } else {
+        return .{
+            .nodes = try ast.nodes.toOwnedSlice(gpa),
+            .errors = try ast.errors.toOwnedSlice(gpa),
+            .extra = try ast.extra.toOwnedSlice(gpa),
+        };
+    }
+}
+
+fn stripTrailingWhitespace(lines: *[][]u8) void {
+    for (lines.*) |*line| {
+        if (str.lastIndexOfNone(line.*, " \t\r\n")) |idx| {
+            line.* = line.*[0 .. idx + 1];
+        } else {
+            line.* = line.*[0..0];
+        }
+    }
+}
+
+fn calcOffset(self: *AstGen, c: *u8) u32 {
+    return @intCast(c - self.input_base);
+}
+
+fn findIndentedColumn(self: *AstGen, gpa: Allocator, lines_: []u32, node_idx: Node.Idx) ![]u32 {
+    var lines = lines_;
+
+    // empty lines at the start of the inline block are fine, just skip these
+    // special case: the first line consist of only whitespace
+    // because they may have been introduced via marker replacement
+    if (lines.len > 0)
+        if (str.indexOfNone(self.input[lines[0]..], " \t\r")) |idx|
+            if (self.input[lines[0] + idx] == '\n') {
+                lines = lines[1..];
+                while (true) : (lines = lines[1..]) {
+                    if (lines.len == 0) return &.{};
+                    if (self.input[lines[0]] != '\n') break;
+                }
+            };
+    if (lines.len == 0) return &.{};
+
+    // determine indentation
+    const indentation_idx = str.indexOfNone(self.input[lines[0]..], " \t\r") orelse unreachable;
+    if (indentation_idx == 0) return &.{};
+    const indentation = self.input[lines[0] .. lines[0] + indentation_idx];
+
+    // strip all lines of their indentation
+    lines[0] += @truncate(indentation.len);
+    for (lines[1..]) |*line| {
+        if (self.input[line.*] == '\n') continue;
+
+        const diff_idx = std.mem.indexOfDiff(u8, self.input[line.*..], indentation) orelse unreachable;
+        // std.debug.assert(diff_idx != line.len);
+        if (diff_idx != indentation.len) {
+            try self.errors.append(gpa, .fromTagged(.{
+                .inconsistent_indentation = .{ .idx = node_idx, .off = line.* },
+            }));
+            // Recover by stripping all whitespace on this line
+            const recover_indentation_idx = std.mem.indexOfNone(u8, self.input[line.*..], " \t\r") orelse unreachable;
+            line.* += @truncate(recover_indentation_idx);
+        } else {
+            line.* += @truncate(indentation.len);
+        }
+    }
+
+    return lines;
+}
+
+fn parseInlineBlock(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, parent_idx: Node.Idx) !void {
+    var lines = lines_;
+    var empty_line_off: ?u32 = null;
+
+    outer: {
+        // empty lines at the start of the inline block are fine, just skip these
+        // special case: the first line consist of only whitespace
+        // because they may have been introduced via marker replacement
+        if (lines.len > 0)
+            if (str.indexOfNone(self.input[lines[0]..], " \t\r")) |idx|
+                if (self.input[lines[0] + idx] == '\n') {
+                    lines = lines[1..];
+                    while (true) : (lines = lines[1..]) {
+                        if (lines.len == 0) break :outer;
+                        if (self.input[lines[0]] != '\n') break;
+                    }
+                };
+        if (lines.len == 0) break :outer;
+
+        self.getNode(parent_idx).incrementNumChildren();
+
+        // determine indentation
+        const indentation_idx = str.indexOfNone(self.input[lines[0]..], " \t\r") orelse unreachable;
+        const indentation = self.input[lines[0] .. lines[0] + indentation_idx];
+
+        lines[0] += @truncate(indentation.len);
+        // lines[0] = lines[0][indentation.len..];
+
+        var len = str.indexOfChar(self.input[lines[0]..], '\n') orelse unreachable;
+        if (len <= std.math.maxInt(Ast.StrLen)) {
+            _ = try self.appendNode(gpa, .{
+                .text = .{
+                    .off = lines[0],
+                    .len = @truncate(len),
+                },
+            });
+        } else {
+            @branchHint(.cold);
+            while (len > 0) {
+                const chunk_len = @min(len, std.math.maxInt(Ast.StrLen));
+                _ = try self.appendNode(gpa, .{
+                    .text = .{
+                        .off = lines[0],
+                        .len = chunk_len,
+                    },
+                });
+                lines[0] += chunk_len;
+                len -= chunk_len;
+            }
+        }
+        lines = lines[1..];
+
+        while (true) {
+            // Skip and error on empty lines
+            while (true) : (lines = lines[1..]) {
+                if (lines.len == 0) break :outer;
+                if (self.input[lines[0]] != '\n') break;
+                // empty line detected
+                empty_line_off = lines[0];
+            }
+
+            if (empty_line_off) |off| {
+                try self.errors.append(gpa, .fromTagged(.{
+                    .empty_line_in_inline_block = .{ .idx = self.nextNodeIdx(), .off = off },
+                }));
+            }
+
+            const diff_idx = std.mem.indexOfDiff(u8, self.input[lines[0]..], indentation) orelse unreachable;
+            // std.debug.assert(diff_idx != lines[0].len);
+            if (diff_idx != indentation.len) {
+                try self.errors.append(gpa, .fromTagged(.{
+                    .inconsistent_indentation = .{ .idx = self.nextNodeIdx(), .off = lines[0] },
+                }));
+                // Recover by stripping all whitespace on this line
+                const recover_indentation_idx = std.mem.indexOfNone(u8, self.input[lines[0]..], " \t\r\n") orelse unreachable;
+                lines[0] += @truncate(recover_indentation_idx);
+            } else {
+                lines[0] += @truncate(indentation.len);
+            }
+
+            self.getNode(parent_idx).incrementNumChildren();
+
+            var len2 = str.indexOfChar(self.input[lines[0]..], '\n') orelse unreachable;
+            if (len2 <= std.math.maxInt(Ast.StrLen)) {
+                _ = try self.appendNode(gpa, .{
+                    .space_text = .{
+                        .off = lines[0],
+                        .len = @truncate(len2),
+                    },
+                });
+            } else {
+                @branchHint(.cold);
+                _ = try self.appendNode(gpa, .{
+                    .space_text = .{
+                        .off = lines[0],
+                        .len = std.math.maxInt(Ast.StrLen),
+                    },
+                });
+                len2 -= std.math.maxInt(Ast.StrLen);
+                lines[0] += std.math.maxInt(Ast.StrLen);
+                while (len2 > 0) {
+                    const chunk_len = @min(len2, std.math.maxInt(Ast.StrLen));
+                    _ = try self.appendNode(gpa, .{
+                        .text = .{
+                            .off = lines[0],
+                            .len = chunk_len,
+                        },
+                    });
+                    lines[0] += chunk_len;
+                    len2 -= chunk_len;
+                }
+            }
+            lines = lines[1..];
+        }
+    }
+}
+
+fn parseColumn(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, parent_idx: Node.Idx) !void {
+    var lines = lines_;
+    outer: while (true) {
+        // Skip empty lines
+        // special case: the first line consist of only whitespace
+        // because they may have been introduced via marker replacement
+        if (lines.len > 0) {
+            if (str.indexOfNone(self.input[lines[0]..], " \t\r")) |idx| {
+                if (self.input[lines[0] + idx] == '\n') {
+                    lines = lines[1..];
+                    while (true) : (lines = lines[1..]) {
+                        if (lines.len == 0) break :outer;
+                        if (self.input[lines[0]] != '\n') break;
+                    }
+                }
+            }
+        }
+        if (lines.len == 0) break :outer;
+
+        // Use first character to determine marker
+        const mode, const child = try self.parseBlockStart(gpa, lines[0]);
+        self.getNode(parent_idx).incrementNumChildren();
+
+        switch (mode) {
+            .paragraph => {
+                // take indented or non-block-marker lines
+                var num_lines: usize = 1;
+                for (lines[1..]) |line| {
+                    if (self.input[line] == '\n') break;
+                    if (block_specs[self.input[line]] != null) break;
+                    num_lines += 1;
+                }
+
+                const paragraph_lines = lines[0..num_lines];
+                lines = lines[num_lines..];
+                try self.parseInlineBlock(gpa, paragraph_lines, child);
+            },
+            .indented_inline_block => {
+                // take indented or empty lines
+                var num_lines: usize = 1;
+                for (lines[1..]) |line| {
+                    if (str.isNoneOf(self.input[line], " \t\r\n")) break;
+                    num_lines += 1;
+                }
+
+                const inline_block_lines = try self.findIndentedColumn(gpa, lines[0..num_lines], child);
+                lines = lines[num_lines..];
+                try self.parseInlineBlock(gpa, inline_block_lines, child);
+            },
+            .indented_column => {
+                // take indented or empty lines
+                var num_lines: usize = 1;
+                for (lines[1..]) |line| {
+                    if (str.isNoneOf(self.input[line], " \t\r\n")) break;
+                    num_lines += 1;
+                }
+
+                const column_lines = try self.findIndentedColumn(gpa, lines[0..num_lines], child);
+                lines = lines[num_lines..];
+                try self.parseColumn(gpa, column_lines, child);
+            },
+            .no_children => {
+                lines = lines[1..];
+            },
+            else => unreachable,
+        }
+    }
+}
+
+const ParseMode = union(enum) {
+    indented_column,
+    indented_inline_block,
+    paragraph,
+    raw: struct { fence: []u8 },
+    no_children,
+};
+
+const MarkerSpec = union(enum) {
+    exact: []const u8,
+    starts_with: []const u8,
+    starts_with_multi: struct {
+        marker_char: u8,
+        extra: []const []const u8 = &.{""}, // any extra characters to check after the marker
+        max_chars: ?u32 = null,
+    },
+};
+const BlockSpecCase = struct {
+    tag: Node.Tag,
+    marker: MarkerSpec,
+    mode: ParseMode,
+    store_marker_child: enum { store, no_store },
+};
+
+const BlockSpec = ?[]const BlockSpecCase;
+
+fn blockSpecs(comptime spec: type) [256]BlockSpec {
+    var arr: [256]BlockSpec = undefined;
+    for (0..256) |c| arr[c] = null;
+    for (@typeInfo(spec).@"struct".decls) |decl| {
+        const c = decl.name[0];
+        arr[c] = @field(spec, decl.name);
+    }
+    return arr;
+}
+
+const block_specs = blockSpecs(struct {
+    pub const @"*": BlockSpec = &.{
+        .{
+            .tag = .thematic_break,
+            .marker = .{ .exact = "***" },
+            .mode = .no_children,
+            .store_marker_child = .no_store,
+        },
+    };
+    pub const @"#": BlockSpec = &.{
+        .{
+            .tag = .heading,
+            .marker = .{ .starts_with_multi = .{ .marker_char = '#', .max_chars = 6 } },
+            .mode = .indented_inline_block,
+            .store_marker_child = .store,
+        },
+    };
+    pub const @"-": BlockSpec = &.{
+        .{
+            .tag = .task_item,
+            .marker = .{ .starts_with_multi = .{ .marker_char = '-', .extra = &.{ " [ ]", " [x]", " [X]" } } },
+            .mode = .indented_inline_block,
+            .store_marker_child = .store,
+        },
+        .{
+            .tag = .unordered_item,
+            .marker = .{ .starts_with_multi = .{ .marker_char = '-' } },
+            .mode = .indented_inline_block,
+            .store_marker_child = .store,
+        },
+    };
+    pub const @".": BlockSpec = &.{
+        .{
+            .tag = .ordered_item,
+            .marker = .{ .starts_with_multi = .{ .marker_char = '.' } },
+            .mode = .indented_inline_block,
+            .store_marker_child = .store,
+        },
+    };
+    pub const @":": BlockSpec = &.{
+        .{
+            .tag = .term_item,
+            .marker = .{ .starts_with_multi = .{ .marker_char = ':' } },
+            .mode = .indented_inline_block,
+            .store_marker_child = .store,
+        },
+    };
+    pub const @">": BlockSpec = &.{
+        .{
+            .tag = .quote,
+            .marker = .{ .starts_with = ">" },
+            .mode = .indented_column,
+            .store_marker_child = .no_store,
+        },
+    };
+    pub const @"+": BlockSpec = &.{
+        .{
+            .tag = .elaboration,
+            .marker = .{ .starts_with = "+" },
+            .mode = .indented_column,
+            .store_marker_child = .no_store,
+        },
+    };
+});
+
+/// Appends the suitable block node to the ast,
+/// then returns how parsing should proceed for the children of this block.
+/// Also returns the idx of the container node created.
+fn parseBlockStart(self: *AstGen, gpa: Allocator, line: Ast.StrOffset) !struct { ParseMode, Node.Idx } {
+    switch (self.input[line]) {
+        inline else => |c| {
+            const spec_or_null = block_specs[c];
+            if (spec_or_null) |spec| {
+                inline for (spec) |case| {
+                    switch (case.marker) {
+                        .exact, .starts_with => |marker| {
+                            if (std.mem.startsWith(u8, self.input[line..], marker)) {
+                                const node = if (case.mode == .no_children) try self.appendNode(gpa, @unionInit(Node.Tagged, @tagName(case.tag), @as(Node.Tagged.Leaf, .{
+                                    .off = line,
+                                    .len = marker.len,
+                                }))) else try self.appendNode(gpa, @unionInit(Node.Tagged, @tagName(case.tag), @as(Node.Tagged.Container, .{
+                                    .off = line,
+                                    .num_children = if (case.store_marker_child == .store) 1 else 0,
+                                })));
+                                @memset(self.input[line .. line + marker.len], ' ');
+                                if (case.store_marker_child == .store) {
+                                    _ = try self.appendNode(gpa, .{ .marker = .{
+                                        .off = line,
+                                        .len = case.marker.len,
+                                    } });
+                                }
+                                return .{ case.mode, node };
+                            }
+                        },
+                        .starts_with_multi => |marker_spec| {
+                            var marker_len = str.indexOfNotChar(self.input[line..], marker_spec.marker_char) orelse str.indexOfChar(self.input[line..], '\n') orelse unreachable;
+
+                            inline for (marker_spec.extra) |extra| {
+                                if (std.mem.startsWith(u8, self.input[line + marker_len ..], extra)) {
+                                    marker_len += extra.len;
+
+                                    const node = try self.appendNode(gpa, @unionInit(Node.Tagged, @tagName(case.tag), @as(Node.Tagged.Container, .{
+                                        .off = line,
+                                        .num_children = if (case.store_marker_child == .store) 1 else 0,
+                                    })));
+
+                                    if (marker_spec.max_chars) |max|
+                                        if (marker_len > max)
+                                            try self.errors.append(gpa, .fromTagged(.{
+                                                .marker_too_long = .{
+                                                    .idx = if (case.store_marker_child == .no_store)
+                                                        self.lastNodeIdx()
+                                                    else
+                                                        self.nextNodeIdx(),
+                                                },
+                                            }));
+
+                                    @memset(self.input[line .. line + marker_len], ' ');
+                                    if (case.store_marker_child == .store) {
+                                        _ = try self.appendNode(gpa, .{ .marker = .{
+                                            .off = line,
+                                            .len = utils.safeIntCast(Ast.StrLen, marker_len),
+                                        } });
+                                    }
+                                    return .{ case.mode, node };
+                                }
+                            }
+                        },
+                    }
+                }
+            } else {
+                // Default behaviour is to parse a paragraph until the next newline or block character
+                return .{
+                    .paragraph,
+                    try self.appendNode(gpa, .{
+                        .paragraph = .{
+                            .off = line,
+                        },
+                    }),
+                };
+            }
+        },
+    }
+
+    // Line started with a special character, but it didn't match any markers
+    // Fallback to paragraph, but place a warning.
+    try self.errors.append(gpa, .fromTagged(.{
+        .invalid_marker = .{
+            .idx = self.nextNodeIdx(),
+            .off = line,
+        },
+    }));
+
+    return .{
+        .paragraph,
+        try self.appendNode(gpa, .{
+            .paragraph = .{
+                .off = line,
+            },
+        }),
+    };
+}
+
+test {
+    _ = @import("AstGen/test.zig");
+}
diff --git a/src/AstGen/test.zig b/src/AstGen/test.zig
@@ -0,0 +1,346 @@
+const std = @import("std");
+const parse = @import("../AstGen.zig").parse;
+const Ast = @import("../Ast.zig");
+
+const GeneralPurposeAllocator = std.heap.GeneralPurposeAllocator(.{});
+const ArenaAllocator = std.heap.ArenaAllocator;
+
+fn testParse(input: []const u8, expected: Ast.Tagged) !void {
+    var arena: ArenaAllocator = .init(std.testing.allocator);
+    defer arena.deinit();
+    const ast = try parse(std.testing.allocator, arena.allocator(), input);
+    const tagged_ast = try ast.toTagged(arena.allocator());
+    // try std.testing.expectEqualDeep(expected.nodes.len, tagged_ast.nodes.len);
+    try std.testing.expectEqualDeep(expected, tagged_ast);
+}
+
+test "Empty" {
+    try testParse("", .{
+        .nodes = &.{
+            .{ .document = .{ .num_children = 0 } },
+        },
+        .errors = &.{},
+        .extra = &.{},
+    });
+}
+
+test "Happy path paragraph" {
+    try testParse(
+        \\text
+        \\
+        \\text
+        \\text
+        \\
+        \\text
+        \\   text
+        \\
+    , .{
+        .nodes = &.{
+            .{ .document = .{ .num_children = 3 } },
+            .{ .paragraph = .{ .off = 0, .num_children = 1 } },
+            .{ .text = .{ .off = 0, .len = 4 } },
+            .{ .paragraph = .{ .off = 6, .num_children = 2 } },
+            .{ .text = .{ .off = 6, .len = 4 } },
+            .{ .space_text = .{ .off = 11, .len = 4 } },
+            .{ .paragraph = .{ .off = 17, .num_children = 2 } },
+            .{ .text = .{ .off = 17, .len = 4 } },
+            .{ .space_text = .{ .off = 22, .len = 7 } },
+        },
+        .errors = &.{},
+        .extra = &.{},
+    });
+}
+
+test "Happy path headings" {
+    try testParse(
+        \\# text
+        \\# text
+        \\# text
+        \\  text
+        \\
+        \\# text
+        \\
+        \\# text
+        \\  text
+        \\
+        \\# text
+        \\    text
+        \\
+        \\## text
+        \\## text
+        \\## text
+        \\   text
+        \\
+        \\## text
+        \\
+        \\## text
+        \\   text
+        \\
+        \\## text
+        \\     text
+        \\
+    , .{
+        .nodes = &.{
+            .{ .document = .{ .num_children = 12 } },
+            .{ .heading = .{ .off = 0, .num_children = 2 } },
+            .{ .marker = .{ .off = 0, .len = 1 } },
+            .{ .text = .{ .off = 2, .len = 4 } },
+            .{ .heading = .{ .off = 7, .num_children = 2 } },
+            .{ .marker = .{ .off = 7, .len = 1 } },
+            .{ .text = .{ .off = 9, .len = 4 } },
+            .{ .heading = .{ .off = 14, .num_children = 3 } },
+            .{ .marker = .{ .off = 14, .len = 1 } },
+            .{ .text = .{ .off = 16, .len = 4 } },
+            .{ .space_text = .{ .off = 23, .len = 4 } },
+            .{ .heading = .{ .off = 29, .num_children = 2 } },
+            .{ .marker = .{ .off = 29, .len = 1 } },
+            .{ .text = .{ .off = 31, .len = 4 } },
+            .{ .heading = .{ .off = 37, .num_children = 3 } },
+            .{ .marker = .{ .off = 37, .len = 1 } },
+            .{ .text = .{ .off = 39, .len = 4 } },
+            .{ .space_text = .{ .off = 46, .len = 4 } },
+            .{ .heading = .{ .off = 52, .num_children = 3 } },
+            .{ .marker = .{ .off = 52, .len = 1 } },
+            .{ .text = .{ .off = 54, .len = 4 } },
+            .{ .space_text = .{ .off = 61, .len = 6 } },
+            .{ .heading = .{ .off = 69, .num_children = 2 } },
+            .{ .marker = .{ .off = 69, .len = 2 } },
+            .{ .text = .{ .off = 72, .len = 4 } },
+            .{ .heading = .{ .off = 77, .num_children = 2 } },
+            .{ .marker = .{ .off = 77, .len = 2 } },
+            .{ .text = .{ .off = 80, .len = 4 } },
+            .{ .heading = .{ .off = 85, .num_children = 3 } },
+            .{ .marker = .{ .off = 85, .len = 2 } },
+            .{ .text = .{ .off = 88, .len = 4 } },
+            .{ .space_text = .{ .off = 96, .len = 4 } },
+            .{ .heading = .{ .off = 102, .num_children = 2 } },
+            .{ .marker = .{ .off = 102, .len = 2 } },
+            .{ .text = .{ .off = 105, .len = 4 } },
+            .{ .heading = .{ .off = 111, .num_children = 3 } },
+            .{ .marker = .{ .off = 111, .len = 2 } },
+            .{ .text = .{ .off = 114, .len = 4 } },
+            .{ .space_text = .{ .off = 122, .len = 4 } },
+            .{ .heading = .{ .off = 128, .num_children = 3 } },
+            .{ .marker = .{ .off = 128, .len = 2 } },
+            .{ .text = .{ .off = 131, .len = 4 } },
+            .{ .space_text = .{ .off = 139, .len = 6 } },
+        },
+        .errors = &.{},
+        .extra = &.{},
+    });
+}
+
+test "Happy path quote" {
+    try testParse(
+        \\> text
+        \\  text
+        \\
+        \\> text
+        \\    text
+        \\> text
+        \\> text
+        \\text
+        \\
+    , .{
+        .nodes = &.{
+            .{ .document = .{ .num_children = 5 } },
+            .{ .quote = .{ .off = 0, .num_children = 1 } },
+            .{ .paragraph = .{ .off = 2, .num_children = 2 } },
+            .{ .text = .{ .off = 2, .len = 4 } },
+            .{ .space_text = .{ .off = 9, .len = 4 } },
+            .{ .quote = .{ .off = 15, .num_children = 1 } },
+            .{ .paragraph = .{ .off = 17, .num_children = 2 } },
+            .{ .text = .{ .off = 17, .len = 4 } },
+            .{ .space_text = .{ .off = 24, .len = 6 } },
+            .{ .quote = .{ .off = 31, .num_children = 1 } },
+            .{ .paragraph = .{ .off = 33, .num_children = 1 } },
+            .{ .text = .{ .off = 33, .len = 4 } },
+            .{ .quote = .{ .off = 38, .num_children = 1 } },
+            .{ .paragraph = .{ .off = 40, .num_children = 1 } },
+            .{ .text = .{ .off = 40, .len = 4 } },
+            .{ .paragraph = .{ .off = 45, .num_children = 1 } },
+            .{ .text = .{ .off = 45, .len = 4 } },
+        },
+        .errors = &.{},
+        .extra = &.{},
+    });
+}
+
+test "Happy path list" {
+    try testParse(
+        \\- text
+        \\- [ ] text
+        \\. text
+        \\: text
+        \\-- text
+        \\-- [ ] text
+        \\.. text
+        \\:: text
+        \\
+    , .{
+        .nodes = &.{
+            .{ .document = .{ .num_children = 8 } },
+            .{ .unordered_item = .{ .off = 0, .num_children = 2 } },
+            .{ .marker = .{ .off = 0, .len = 1 } },
+            .{ .text = .{ .off = 2, .len = 4 } },
+            .{ .task_item = .{ .off = 7, .num_children = 2 } },
+            .{ .marker = .{ .off = 7, .len = 5 } },
+            .{ .text = .{ .off = 13, .len = 4 } },
+            .{ .ordered_item = .{ .off = 18, .num_children = 2 } },
+            .{ .marker = .{ .off = 18, .len = 1 } },
+            .{ .text = .{ .off = 20, .len = 4 } },
+            .{ .term_item = .{ .off = 25, .num_children = 2 } },
+            .{ .marker = .{ .off = 25, .len = 1 } },
+            .{ .text = .{ .off = 27, .len = 4 } },
+            .{ .unordered_item = .{ .off = 32, .num_children = 2 } },
+            .{ .marker = .{ .off = 32, .len = 2 } },
+            .{ .text = .{ .off = 35, .len = 4 } },
+            .{ .task_item = .{ .off = 40, .num_children = 2 } },
+            .{ .marker = .{ .off = 40, .len = 6 } },
+            .{ .text = .{ .off = 47, .len = 4 } },
+            .{ .ordered_item = .{ .off = 52, .num_children = 2 } },
+            .{ .marker = .{ .off = 52, .len = 2 } },
+            .{ .text = .{ .off = 55, .len = 4 } },
+            .{ .term_item = .{ .off = 60, .num_children = 2 } },
+            .{ .marker = .{ .off = 60, .len = 2 } },
+            .{ .text = .{ .off = 63, .len = 4 } },
+        },
+        .errors = &.{},
+        .extra = &.{},
+    });
+}
+
+test "Happy path list elaboration" {
+    try testParse(
+        \\- a
+        \\+ bb
+        \\
+        \\  ccc
+        \\
+    , .{
+        .nodes = &.{
+            .{ .document = .{ .num_children = 2 } },
+            .{ .unordered_item = .{ .off = 0, .num_children = 2 } },
+            .{ .marker = .{ .off = 0, .len = 1 } },
+            .{ .text = .{ .off = 2, .len = 1 } },
+            .{ .elaboration = .{ .off = 4, .num_children = 2 } },
+            .{ .paragraph = .{ .off = 6, .num_children = 1 } },
+            .{ .text = .{ .off = 6, .len = 2 } },
+            .{ .paragraph = .{ .off = 12, .num_children = 1 } },
+            .{ .text = .{ .off = 12, .len = 3 } },
+        },
+        .errors = &.{},
+        .extra = &.{},
+    });
+}
+
+test "Thematic break" {
+    try testParse(
+        \\a
+        \\***
+        \\b
+        \\*
+        \\c
+        \\
+    , .{
+        .nodes = &.{
+            .{ .document = .{ .num_children = 4 } },
+            .{ .paragraph = .{ .off = 0, .num_children = 1 } },
+            .{ .text = .{ .off = 0, .len = 1 } },
+            .{ .thematic_break = .{ .off = 2, .len = 3 } },
+            .{ .paragraph = .{ .off = 6, .num_children = 1 } },
+            .{ .text = .{ .off = 6, .len = 1 } },
+            .{ .paragraph = .{ .off = 8, .num_children = 2 } },
+            .{ .text = .{ .off = 8, .len = 1 } },
+            .{ .space_text = .{ .off = 10, .len = 1 } },
+        },
+        .errors = &.{
+            .{ .invalid_marker = .{ .idx = @enumFromInt(6), .off = 8 } },
+        },
+        .extra = &.{},
+    });
+}
+
+test "Mixed indentation" {
+    try testParse(
+        \\+ aaa
+        \\
+        \\
+    ++ "\tbbbbb\n", .{
+        .nodes = &.{
+            .{ .document = .{ .num_children = 1 } },
+            .{ .elaboration = .{ .off = 0, .num_children = 2 } },
+            .{ .paragraph = .{ .off = 2, .num_children = 1 } },
+            .{ .text = .{ .off = 2, .len = 3 } },
+            .{ .paragraph = .{ .off = 8, .num_children = 1 } },
+            .{ .text = .{ .off = 8, .len = 5 } },
+        },
+        .errors = &.{
+            .{ .inconsistent_indentation = .{ .idx = @enumFromInt(1), .off = 7 } },
+        },
+        .extra = &.{},
+    });
+}
+
+test "Empty line in heading" {
+    try testParse(
+        \\# heading
+        \\
+        \\  text
+        \\
+        \\text
+        \\
+    , .{
+        .nodes = &.{
+            .{ .document = .{ .num_children = 2 } },
+            .{ .heading = .{ .off = 0, .num_children = 3 } },
+            .{ .marker = .{ .off = 0, .len = 1 } },
+            .{ .text = .{ .off = 2, .len = 7 } },
+            .{ .space_text = .{ .off = 13, .len = 4 } },
+            .{ .paragraph = .{ .off = 19, .num_children = 1 } },
+            .{ .text = .{ .off = 19, .len = 4 } },
+        },
+        .errors = &.{
+            .{ .empty_line_in_inline_block = .{ .idx = @enumFromInt(4), .off = 10 } },
+        },
+        .extra = &.{},
+    });
+}
+
+// test "Super long line" {
+//     const input = try std.testing.allocator.create([(1 << 24) * 4]u8);
+//     defer std.testing.allocator.destroy(input);
+//     @memset(input, 'a');
+//     input[1] = '\n';
+//     try testParse(input, .{
+//         .nodes = &.{
+//             .{ .document = .{ .num_children = 1 } },
+//             .{ .paragraph = .{ .off = 0, .num_children = 2 } },
+//             .{ .text = .{ .off = 0, .len = 1 } },
+//             .{ .space_text = .{ .off = 2, .len = 16777215 } },
+//             .{ .text = .{ .off = 2, .len = 16777215 } },
+//             .{ .text = .{ .off = 2, .len = 16777215 } },
+//             .{ .text = .{ .off = 2, .len = 16777215 } },
+//             .{ .text = .{ .off = 2, .len = 2 } },
+//         },
+//         .errors = &.{},
+//         .extra = &.{},
+//     });
+// }
+
+// test "Many short lines" {
+//     const input = try std.testing.allocator.create([(1 << 23) - 2][2]u8);
+//     defer std.testing.allocator.destroy(input);
+//     @memset(input, [2]u8{ 'a', '\n' });
+
+//     var arena: ArenaAllocator = .init(std.testing.allocator);
+//     defer arena.deinit();
+//     const ast = try parse(std.testing.allocator, arena.allocator(), @as([*]u8, @ptrCast(input))[0 .. (1 << 23) * 2 - 4]);
+//     try std.testing.expectEqual(1 << 23, ast.nodes.len);
+//     try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .document = .{ .num_children = 1 } }), ast.nodes[0].toTagged());
+//     try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .paragraph = .{ .off = 0, .num_children = (1 << 23) - 2 } }), ast.nodes[1].toTagged());
+//     try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .text = .{ .off = 0, .len = 1 } }), ast.nodes[2].toTagged());
+//     for (1..(1 << 23) - 2) |i| {
+//         try std.testing.expectEqual(@as(Ast.Node.Tagged, .{ .space_text = .{ .off = @intCast(i * 2), .len = 1 } }), ast.nodes[i + 2].toTagged());
+//     }
+// }
diff --git a/src/main.zig b/src/main.zig
@@ -0,0 +1,22 @@
+const std = @import("std");
+const mymarkdown = @import("mymarkdown");
+
+const GeneralPurposeAllocator = std.heap.GeneralPurposeAllocator(.{});
+const ArenaAllocator = std.heap.ArenaAllocator;
+
+pub fn main() !void {
+    var gpa: GeneralPurposeAllocator = .{};
+    var arena: ArenaAllocator = .init(gpa.allocator());
+    defer arena.deinit();
+
+    const input = try std.io.getStdIn().readToEndAlloc(arena.allocator(), std.math.maxInt(u32));
+
+    const ast = try mymarkdown.parse(gpa.allocator(), arena.allocator(), input);
+    // std.mem.doNotOptimizeAway(ast);
+
+    var bw = std.io.bufferedWriter(std.io.getStdOut().writer());
+    const stdout = bw.writer();
+    // try stdout.print("{}\n", .{ast});
+    _ = try ast.render(stdout, input, null);
+    try bw.flush();
+}
diff --git a/src/root.zig b/src/root.zig
@@ -0,0 +1,40 @@
+const std = @import("std");
+pub const Ast = @import("Ast.zig");
+pub const AstGen = @import("AstGen.zig");
+pub const parse = AstGen.parse;
+
+test {
+    _ = Ast;
+    _ = AstGen;
+}
+
+// test {
+//     var arena: std.heap.ArenaAllocator = .init(std.testing.allocator);
+//     defer arena.deinit();
+
+//     // const input = try std.io.getStdIn().readToEndAlloc(arena.allocator(), std.math.maxInt(u32));
+//     const input =
+//         \\# heading
+//         \\
+//         \\blah
+//         \\
+//         \\blah
+//         \\
+//     ;
+//     const ast = try parse(std.testing.allocator, arena.allocator(), input);
+//     try std.testing.expectEqualDeep(6, ast.nodes.len);
+//     // try std.testing.expectEqualDeep(Ast{
+//     //     .nodes = &.{
+//     //         .{ .document = .{ .num_children = 2 } },
+//     //         .{ .heading = .{ .off = 0, .level = .h1, .num_children = 2 } },
+//     //         .{ .text = .{ .off = 0, .len = 8 } },
+//     //         .{ .space_text = .{ .off = 11, .len = 5 } },
+//     //         .{ .paragraph = .{ .off = 19, .num_children = 1 } },
+//     //         .{ .text = .{ .off = 19, .len = 3 } },
+//     //     },
+//     //     .errors = &.{
+//     //         .{ .empty_line_in_inline_block = .{ .idx = @enumFromInt(3) } },
+//     //     },
+//     //     .extra = &.{},
+//     // }, ast);
+// }
diff --git a/src/str.zig b/src/str.zig
@@ -0,0 +1,95 @@
+//! Utils for "strings", []u8 or []const u8 slices
+//!
+//! The only purpose of this file is to reduce typing.
+//! `std.mem.indexOfScalar(u8, s, c)` is very long, and
+//! this file lets you just type `str.indexOfChar(s, c)`.
+//!
+//! If I need any functionality I will also just put it here,
+//! so this file may have functions not in `std.mem`.
+const std = @import("std");
+const mem = std.mem;
+pub const Char = u8;
+pub const Str = []const u8;
+pub const Charset = []const u8;
+
+pub fn isAnyOf(c: Char, cs: Charset) bool {
+    return indexOfChar(cs, c) != null;
+}
+
+pub fn isNoneOf(c: Char, cs: Charset) bool {
+    return !isAnyOf(c, cs);
+}
+
+pub fn indexOfChar(s: Str, c: Char) ?usize {
+    return mem.indexOfScalar(u8, s, c);
+}
+
+pub fn indexOfNotChar(slice: Str, value: Char) ?usize {
+    var i: usize = 0;
+    if (switch (@import("builtin").zig_backend) {
+        .stage2_llvm, .stage2_c => true,
+        else => false,
+    } and
+        !std.debug.inValgrind() and // https://github.com/ziglang/zig/issues/17717
+        !@inComptime())
+    {
+        if (std.simd.suggestVectorLength(Char)) |block_len| {
+            // For Intel Nehalem (2009) and AMD Bulldozer (2012) or later, unaligned loads on aligned data result
+            // in the same execution as aligned loads. We ignore older arch's here and don't bother pre-aligning.
+            //
+            // Use `std.simd.suggestVectorLength(T)` to get the same alignment as used in this function
+            // however this usually isn't necessary unless your arch has a performance penalty due to this.
+            //
+            // This may differ for other arch's. Arm for example costs a cycle when loading across a cache
+            // line so explicit alignment prologues may be worth exploration.
+
+            // Unrolling here is ~10% improvement. We can then do one bounds check every 2 blocks
+            // instead of one which adds up.
+            const Block = @Vector(block_len, Char);
+            if (i + 2 * block_len < slice.len) {
+                const mask: Block = @splat(value);
+                while (true) {
+                    inline for (0..2) |_| {
+                        const block: Block = slice[i..][0..block_len].*;
+                        const matches = block != mask;
+                        if (@reduce(.Or, matches)) {
+                            return i + std.simd.firstTrue(matches).?;
+                        }
+                        i += block_len;
+                    }
+                    if (i + 2 * block_len >= slice.len) break;
+                }
+            }
+
+            // {block_len, block_len / 2} check
+            inline for (0..2) |j| {
+                const block_x_len = block_len / (1 << j);
+                comptime if (block_x_len < 4) break;
+
+                const BlockX = @Vector(block_x_len, Char);
+                if (i + block_x_len < slice.len) {
+                    const mask: BlockX = @splat(value);
+                    const block: BlockX = slice[i..][0..block_x_len].*;
+                    const matches = block != mask;
+                    if (@reduce(.Or, matches)) {
+                        return i + std.simd.firstTrue(matches).?;
+                    }
+                    i += block_x_len;
+                }
+            }
+        }
+    }
+
+    for (slice[i..], i..) |c, j| {
+        if (c != value) return j;
+    }
+    return null;
+}
+
+pub fn indexOfNone(s: Str, cs: Charset) ?usize {
+    return mem.indexOfNone(u8, s, cs);
+}
+
+pub fn lastIndexOfNone(s: Str, cs: Charset) ?usize {
+    return mem.lastIndexOfNone(u8, s, cs);
+}
diff --git a/src/utils.zig b/src/utils.zig
@@ -0,0 +1,126 @@
+const std = @import("std");
+const ziggy = @import("ziggy");
+
+pub fn NewType(comptime int_type: type, comptime dummy_type_: type) type {
+    return enum(int_type) {
+        _,
+
+        const Self = @This();
+
+        pub fn next(self: @This()) ?@This() {
+            if (@intFromEnum(self) == std.math.maxInt(int_type))
+                return null;
+            return @enumFromInt(@intFromEnum(self) + 1);
+        }
+
+        pub fn format(self: @This(), comptime _: []const u8, _: anytype, writer: anytype) !void {
+            try writer.print("@enumFromInt({})", .{@intFromEnum(self)});
+        }
+
+        pub const ziggy_options = struct {
+            const dummy_type = dummy_type_;
+            pub fn parse(
+                self: *ziggy.Parser,
+                first_tok: ziggy.Tokenizer.Token,
+            ) !Self {
+                return @enumFromInt(try self.parseValue(u32, first_tok));
+            }
+            pub fn stringify(
+                self: Self,
+                opts: ziggy.serializer.StringifyOptions,
+                indent_level: usize,
+                depth: usize,
+                writer: anytype,
+            ) !void {
+                const serialized: u32 = @intFromEnum(self);
+                return ziggy.serializer.stringifyInner(serialized, opts, indent_level, depth, writer);
+            }
+        };
+    };
+}
+
+pub fn Packed(comptime Tagged_: type) type {
+    return packed struct {
+        data: Data,
+        tag: Tag,
+
+        const Self = @This();
+        pub const Tagged = Tagged_;
+        pub const Tag = @typeInfo(Tagged_).@"union".tag_type.?;
+        pub const Data = @Type(.{ .@"union" = .{
+            .layout = .@"packed",
+            .tag_type = null,
+            .fields = @typeInfo(Tagged_).@"union".fields,
+            .decls = &.{},
+        } });
+
+        pub fn fromTagged(tagged: Tagged_) Self {
+            switch (@as(Tag, tagged)) {
+                inline else => |t| return .{
+                    .tag = tagged,
+                    .data = @unionInit(
+                        Data,
+                        @tagName(t),
+                        @field(tagged, @tagName(t)),
+                    ),
+                },
+            }
+        }
+
+        pub fn toTagged(self: Self) Tagged_ {
+            switch (self.tag) {
+                inline else => |t| return @unionInit(
+                    Tagged_,
+                    @tagName(t),
+                    @field(self.data, @tagName(t)),
+                ),
+            }
+        }
+
+        pub const ziggy_options = struct {
+            pub fn parse(
+                self: *ziggy.Parser,
+                first_tok: ziggy.Tokenizer.Token,
+            ) !Self {
+                return .fromNode(
+                    try self.parseValue(Tagged_, first_tok),
+                );
+            }
+            pub fn stringify(
+                self: Self,
+                opts: ziggy.serializer.StringifyOptions,
+                indent_level: usize,
+                depth: usize,
+                writer: anytype,
+            ) !void {
+                return ziggy.serializer.stringifyInner(
+                    self.toTagged(),
+                    opts,
+                    indent_level,
+                    depth,
+                    writer,
+                );
+            }
+        };
+
+        /// May not exist, but we can define it anyway thanks to lazy decl analysis.
+        pub const Idx = Tagged_.Idx;
+        /// May not exist, but we can define it anyway thanks to lazy decl analysis.
+        pub const HeadingLevel = Tagged_.HeadingLevel;
+        /// May not exist, but we can define it anyway thanks to lazy decl analysis.
+        pub const Leaf = Tagged_.Leaf;
+        // /// May not exist, but we can define it anyway thanks to lazy decl analysis.
+        // pub const format = Tagged_.format;
+        /// May not exist, but we can define it anyway thanks to lazy decl analysis.
+        pub const incrementNumChildren = Tagged_.incrementNumChildren;
+
+        pub fn format(self: @This(), comptime _: []const u8, _: anytype, writer: anytype) !void {
+            try writer.print("{}", .{self.toTagged()});
+        }
+    };
+}
+
+pub fn safeIntCast(comptime T: type, value: anytype) T {
+    @setRuntimeSafety(true);
+    return @intCast(value);
+}

	mymarkdown My markdown
	git clone https://git.grace.moe/mymarkdown
	Log \| Files \| Refs

A	.gitignore	\|	1	+
A	build.zig	\|	63	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	build.zig.zon	\|	16	++++++++++++++++
A	src/Ast.zig	\|	236	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/AstGen.zig	\|	559	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/AstGen/test.zig	\|	346	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/main.zig	\|	22	++++++++++++++++++++++
A	src/root.zig	\|	40	++++++++++++++++++++++++++++++++++++++++
A	src/str.zig	\|	95	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/utils.zig	\|	126	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++