mymarkdown

My markdown
git clone https://git.grace.moe/mymarkdown
Log | Files | Refs

Ast.zig (12110B)


      1 const std = @import("std");
      2 const assert = std.debug.assert;
      3 const panic = std.debug.panic;
      4 
      5 const tracy = @import("tracy");
      6 
      7 const str = @import("str.zig");
      8 const utils = @import("utils.zig");
      9 
     10 const Ast = @This();
     11 
     12 nodes: []const Node,
     13 errors: []const Error,
     14 
     15 pub const empty: Ast = .{ .nodes = &.{}, .errors = &.{} };
     16 
     17 pub fn deinit(self: Ast, gpa: std.mem.Allocator) void {
     18     gpa.free(self.nodes);
     19     gpa.free(self.errors);
     20 }
     21 
     22 pub const StrOffset = u32;
     23 pub const StrLen = u24;
     24 
     25 pub const Node = packed struct(u64) {
     26     /// Do not get or set directly
     27     _data: packed union {
     28         /// For container nodes, which have nodes nested within them
     29         container: packed struct { num_children: Idx.Int },
     30         /// For leaf nodes, which have no children but point to a range in the source (and hence has a length)
     31         leaf: packed struct { len: StrLen },
     32     },
     33 
     34     /// The type of node
     35     tag: Tag,
     36 
     37     /// Source location of the node
     38     off: StrOffset,
     39 
     40     // ================
     41     // pub fns
     42 
     43     // Getters
     44 
     45     pub fn numChildren(node: Node) ?Idx.Int {
     46         if (node.tag.layout() != .container) return null;
     47         return node._data.container.num_children;
     48     }
     49     pub fn numChildrenMut(node: *Node) ?*align(8:0:8) Idx.Int {
     50         if (node.tag.layout() != .container) return null;
     51         return &node._data.container.num_children;
     52     }
     53     pub fn len(node: Node) ?StrLen {
     54         if (node.tag.layout() != .leaf) return null;
     55         return node._data.leaf.len;
     56     }
     57 
     58     // Constructors
     59 
     60     /// The recommended way to construct a Node with .container layout.
     61     pub fn initContainerNode(comptime tag: Tag, off: StrOffset, num_children: Idx.Int) Node {
     62         comptime assert(tag.layout() == .container);
     63         return @call(.always_inline, initContainerNodeRuntime, .{ tag, off, num_children }).?;
     64     }
     65     /// The recommended way to construct a Node with .leaf layout.
     66     pub fn initLeafNode(comptime tag: Tag, off: StrOffset, len_: StrLen) Node {
     67         comptime assert(tag.layout() == .leaf);
     68         return @call(.always_inline, initLeafNodeRuntime, .{ tag, off, len_ }).?;
     69     }
     70 
     71     /// Construct a Node with .container layout when the tag is runtime known.
     72     pub fn initContainerNodeRuntime(tag: Tag, off: StrOffset, num_children: Idx.Int) ?Node {
     73         if (tag.layout() != .container) return null;
     74         return .{
     75             ._data = .{ .container = .{ .num_children = num_children } },
     76             .off = off,
     77             .tag = tag,
     78         };
     79     }
     80     /// Construct a Node with .leaf layout when the tag is runtime known.
     81     pub fn initLeafNodeRuntime(tag: Tag, off: StrOffset, len_: StrLen) ?Node {
     82         if (tag.layout() != .leaf) return null;
     83         return .{
     84             ._data = .{ .leaf = .{ .len = len_ } },
     85             .off = off,
     86             .tag = tag,
     87         };
     88     }
     89 
     90     // ================
     91     // pub types
     92     pub const Idx = utils.NewType(u24, opaque {});
     93 
     94     pub const Tag = enum(TagBacking.Int) {
     95         // Node with .container layout (has source location + num_children)
     96         document = @bitCast(TagBacking{ .tag = 0, .layout = .container }),
     97         list,
     98         heading,
     99         quote,
    100         paragraph,
    101         unordered_item,
    102         ordered_item,
    103         term_item,
    104         task_item,
    105         elaboration,
    106 
    107         // Node with .leaf layout (has source location + len)
    108         marker = @bitCast(TagBacking{ .tag = 0, .layout = .leaf }),
    109         thematic_break,
    110         text,
    111         space_text,
    112 
    113         pub fn fromMarker(comptime c: u8) Tag {
    114             return comptime .fromMarkerRuntime(c) orelse
    115                 panic("Invalid marker char '{c}' in fromMarker", .{c});
    116         }
    117         pub fn fromMarkerRuntime(c: u8) ?Tag {
    118             return switch (c) {
    119                 '#' => .heading,
    120                 '>' => .quote,
    121                 ';' => .paragraph,
    122                 '-' => .unordered_item,
    123                 '.' => .ordered_item,
    124                 ':' => .term_item,
    125                 '+' => .elaboration,
    126                 else => null,
    127             };
    128         }
    129 
    130         fn layout(tag: Tag) Layout {
    131             return @as(TagBacking, @bitCast(@intFromEnum(tag))).layout;
    132         }
    133     };
    134 
    135     // ================
    136     // Implementation detail
    137     const Layout = enum(u1) { container, leaf };
    138     const TagBacking = packed struct(u8) {
    139         tag: u7,
    140         layout: Layout,
    141 
    142         const Int = u8;
    143     };
    144 };
    145 
    146 pub const Error = packed struct(u64) {
    147     /// Do not get or set directly
    148     _data: packed union {
    149         /// For errors with a precise error position in the source
    150         point: packed struct { off: StrOffset },
    151         /// For errors that simply say something is wrong about the node
    152         node: void,
    153     },
    154 
    155     /// The idx of the node the error points to
    156     node_idx: Node.Idx,
    157 
    158     /// The type of error
    159     tag: Tag,
    160 
    161     // ================
    162     // pub fns
    163 
    164     // Getters
    165 
    166     pub fn off(err: Error) ?StrOffset {
    167         if (err.tag.layout() != .point) return null;
    168         return err._data.point.off;
    169     }
    170 
    171     // Constructors
    172 
    173     /// The recommended way to construct an Error with .point layout.
    174     pub fn initPointError(comptime tag: Tag, node_idx: Node.Idx, off_: StrOffset) Error {
    175         comptime assert(tag.layout() == .point);
    176         return @call(.always_inline, initPointErrorRuntime, .{ tag, node_idx, off_ }).?;
    177     }
    178     /// The recommended way to construct an Error with .node layout.
    179     pub fn initNodeError(comptime tag: Tag, node_idx: Node.Idx) Error {
    180         comptime assert(tag.layout() == .node);
    181         return @call(.always_inline, initNodeErrorRuntime, .{ tag, node_idx }).?;
    182     }
    183 
    184     /// Construct an Error with .point layout when the tag is runtime known.
    185     pub fn initPointErrorRuntime(tag: Tag, node_idx: Node.Idx, off_: StrOffset) ?Error {
    186         if (tag.layout() != .point) return null;
    187         return .{
    188             ._data = .{ .point = .{ .off = off_ } },
    189             .node_idx = node_idx,
    190             .tag = tag,
    191         };
    192     }
    193     /// Construct an Error with .node layout when the tag is runtime known.
    194     pub fn initNodeErrorRuntime(tag: Tag, node_idx: Node.Idx) ?Error {
    195         if (tag.layout() != .node) return null;
    196         return .{
    197             ._data = .{ .node = {} },
    198             .node_idx = node_idx,
    199             .tag = tag,
    200         };
    201     }
    202 
    203     // ================
    204     // pub types
    205     pub const Idx = utils.NewType(u24, opaque {});
    206 
    207     pub const Tag = enum(TagBacking.Int) {
    208         // Errors with .point layout (has node_idx and source location)
    209         invalid_marker = @bitCast(TagBacking{ .tag = 0, .layout = .point }),
    210         inconsistent_indentation,
    211 
    212         // Errors with .node layout (has node_idx)
    213         marker_too_long = @bitCast(TagBacking{ .tag = 0, .layout = .node }),
    214         unexpected_block_in_inline_context,
    215         elaboration_after_unelaboratable_node,
    216         incorrect_elaboration_marker,
    217 
    218         fn layout(tag: Tag) Layout {
    219             return @as(TagBacking, @bitCast(@intFromEnum(tag))).layout;
    220         }
    221     };
    222 
    223     // ================
    224     // Implementation detail
    225     const Layout = enum(u1) { point, node };
    226     const TagBacking = packed struct(u8) {
    227         tag: u7,
    228         layout: Layout,
    229 
    230         const Int = u8;
    231     };
    232 };
    233 
    234 comptime {
    235     assert(24 == @bitSizeOf(Node.Idx));
    236     assert(4 == @sizeOf(Node.Idx));
    237     assert(64 == @bitSizeOf(Node));
    238     assert(8 == @sizeOf(Node));
    239 
    240     assert(24 == @bitSizeOf(Error.Idx));
    241     assert(4 == @sizeOf(Error.Idx));
    242     assert(64 == @bitSizeOf(Error));
    243     assert(8 == @sizeOf(Error));
    244 }
    245 
    246 pub fn renderAst(self: Ast, writer: anytype, input: []const u8) !void {
    247     var renderer: AstRenderer(@TypeOf(writer)) = .{
    248         .self = self,
    249         .writer = writer,
    250         .input = input,
    251         .cached_offset = 0,
    252         .cached_pos = .{ .row = 1, .col = 1 },
    253     };
    254     const node_idx, const error_idx = try renderer.renderAstInner(0, 0, 0);
    255     assert(node_idx == self.nodes.len);
    256     assert(error_idx == self.errors.len);
    257 }
    258 
    259 fn AstRenderer(Writer: type) type {
    260     return struct {
    261         self: Ast,
    262         writer: Writer,
    263         input: []const u8,
    264         cached_offset: u32,
    265         cached_pos: Pos,
    266 
    267         const Pos = struct { row: u32, col: u32 };
    268         fn offsetToPos(renderer: *@This(), offset: u32) Pos {
    269             if (renderer.cached_offset == offset) {
    270                 return renderer.cached_pos;
    271             } else if (renderer.cached_offset <= offset) {
    272                 var row = renderer.cached_pos.row;
    273                 var col = renderer.cached_pos.col;
    274                 for (renderer.input[renderer.cached_offset..offset]) |c| {
    275                     if (c == '\n') {
    276                         row += 1;
    277                         col = 1;
    278                     } else {
    279                         col += 1;
    280                     }
    281                 }
    282                 renderer.cached_offset = offset;
    283                 renderer.cached_pos = .{ .row = row, .col = col };
    284                 return renderer.cached_pos;
    285             } else {
    286                 var row: u32 = 1;
    287                 var col: u32 = 1;
    288                 for (renderer.input[0..offset]) |c| {
    289                     if (c == '\n') {
    290                         row += 1;
    291                         col = 1;
    292                     } else {
    293                         col += 1;
    294                     }
    295                 }
    296                 renderer.cached_offset = offset;
    297                 renderer.cached_pos = .{ .row = row, .col = col };
    298                 return renderer.cached_pos;
    299             }
    300         }
    301 
    302         pub fn renderAstInner(
    303             renderer: *@This(),
    304             start_node: usize,
    305             start_error: usize,
    306             indent: usize,
    307         ) !struct { usize, usize } {
    308             const tracy_frame = tracy.trace(@src());
    309             defer tracy_frame.end();
    310             try renderer.writer.writeByteNTimes(' ', indent);
    311             try renderer.writer.writeByte('.');
    312             try renderer.writer.writeAll(@tagName(renderer.self.nodes[start_node].tag));
    313             try renderer.writer.writeByte('\n');
    314             var cur_node: usize = start_node + 1;
    315             var cur_error: usize = start_error;
    316             while (cur_error < renderer.self.errors.len and
    317                 @intFromEnum(renderer.self.errors[cur_error].node_idx) == start_node)
    318             {
    319                 try renderer.writer.writeByteNTimes(' ', indent + 2);
    320                 try renderer.writer.writeAll(".error .");
    321                 try renderer.writer.writeAll(@tagName(renderer.self.errors[cur_error].tag));
    322                 if (renderer.self.errors[cur_error].off()) |off|
    323                     try renderer.writer.print(" at {}:{}", renderer.offsetToPos(off));
    324                 try renderer.writer.writeByte('\n');
    325                 cur_error += 1;
    326             }
    327 
    328             if (renderer.self.nodes[start_node].numChildren()) |num_children| {
    329                 for (0..num_children) |_| {
    330                     assert(cur_node < renderer.self.nodes.len);
    331                     cur_node, cur_error = try renderer.renderAstInner(
    332                         cur_node,
    333                         cur_error,
    334                         indent + 2,
    335                     );
    336                     assert(cur_node <= renderer.self.nodes.len);
    337                 }
    338             }
    339 
    340             if (renderer.self.nodes[start_node].len()) |len| {
    341                 try renderer.writer.writeByteNTimes(' ', indent + 2);
    342                 // This was too slow!
    343                 // try renderer.writer.print("\"{}\"", .{
    344                 //     std.zig.fmtEscapes(renderer.input[data.off .. data.off + data.len]),
    345                 // });
    346                 try renderer.writer.writeByte('"');
    347                 try str.escapeStringForDoubleQuotedString(
    348                     renderer.writer,
    349                     renderer.input[renderer.self.nodes[start_node].off .. renderer.self.nodes[start_node].off + len],
    350                     .padded,
    351                 );
    352                 try renderer.writer.writeByte('"');
    353                 try renderer.writer.writeByte('\n');
    354             }
    355 
    356             return .{ cur_node, cur_error };
    357         }
    358     };
    359 }