commit a293f5b6aea00d2a3465b0aa4a513953b60f4bfa
parent a274d6f4377b2e6de289fcb2c7b2aa0f42d99903
Author: gracefu <81774659+gracefuu@users.noreply.github.com>
Date: Fri, 16 May 2025 05:37:57 +0800
v-1
Diffstat:
| M | src/AstGen.zig | | | 216 | ++++++++++++++++++++++++++++++++----------------------------------------------- |
1 file changed, 87 insertions(+), 129 deletions(-)
diff --git a/src/AstGen.zig b/src/AstGen.zig
@@ -10,7 +10,6 @@ const Node = Ast.Node;
const Error = Ast.Error;
input_base: [*]u8,
-input: []u8,
nodes: std.ArrayListUnmanaged(Node),
errors: std.ArrayListUnmanaged(Error),
extra: std.ArrayListUnmanaged(u32),
@@ -46,20 +45,14 @@ pub fn deinit(self: *AstGen, gpa: Allocator) void {
}
pub fn parse(gpa: Allocator, output_gpa: ?Allocator, input: []const u8) error{ InputTooLarge, OutOfMemory }!Ast {
- if (input.len > std.math.maxInt(u32) - 1) {
+ if (input.len > std.math.maxInt(u32)) {
return error.InputTooLarge;
}
- // const input_copy = input;
- // const input_copy = try gpa.dupe(u8, input);
- // defer gpa.free(input_copy);
- var input_copy_arraylist: std.ArrayListUnmanaged(u8) = .empty;
- defer input_copy_arraylist.deinit(gpa);
- try input_copy_arraylist.ensureTotalCapacityPrecise(gpa, input.len + 2);
-
+ const input_copy = try gpa.dupe(u8, input);
+ defer gpa.free(input_copy);
var ast: AstGen = .{
- .input_base = input_copy_arraylist.items.ptr,
- .input = undefined,
+ .input_base = input_copy.ptr,
.nodes = .empty,
.errors = .empty,
.extra = .empty,
@@ -67,30 +60,15 @@ pub fn parse(gpa: Allocator, output_gpa: ?Allocator, input: []const u8) error{ I
defer ast.deinit(gpa);
const root = try ast.appendNode(gpa, .{ .document = .{} });
- var lines: std.ArrayListUnmanaged(Ast.StrOffset) = .empty;
+ var lines: std.ArrayListUnmanaged([]u8) = .empty;
defer lines.deinit(gpa);
- // var lines: std.ArrayListUnmanaged([]u8) = .empty;
- // defer lines.deinit(gpa);
- var lines_it = std.mem.splitScalar(u8, input, '\n');
+ var lines_it = std.mem.splitScalar(u8, input_copy, '\n');
var maybe_line: ?[]u8 = @constCast(lines_it.first());
while (maybe_line) |line| : (maybe_line = @constCast(lines_it.next())) {
- if (str.lastIndexOfNone(line, " \t\r\n")) |idx| {
- const old_len = input_copy_arraylist.items.len;
- try lines.append(gpa, @intCast(old_len));
- input_copy_arraylist.appendSliceAssumeCapacity(line);
- input_copy_arraylist.appendAssumeCapacity('\n');
- input_copy_arraylist.items[old_len + idx + 1] = '\n';
- // try lines.append(gpa, input_copy_arraylist.items[old_len .. old_len + idx + 1]);
- } else {
- try lines.append(gpa, @intCast(input_copy_arraylist.items.len));
- input_copy_arraylist.appendAssumeCapacity('\n');
- // try lines.append(gpa, &.{});
- }
+ try lines.append(gpa, line);
}
- input_copy_arraylist.appendAssumeCapacity('\n');
- ast.input = input_copy_arraylist.items;
- // stripTrailingWhitespace(&lines.items);
+ stripTrailingWhitespace(&lines.items);
try ast.parseColumn(gpa, lines.items, root);
@@ -125,51 +103,47 @@ fn calcOffset(self: *AstGen, c: *u8) u32 {
return @intCast(c - self.input_base);
}
-fn findIndentedColumn(self: *AstGen, gpa: Allocator, lines_: []u32, node_idx: Node.Idx) ![]u32 {
+fn findIndentedColumn(self: *AstGen, gpa: Allocator, lines_: [][]u8, node_idx: Node.Idx) ![][]u8 {
var lines = lines_;
// empty lines at the start of the inline block are fine, just skip these
// special case: the first line consist of only whitespace
// because they may have been introduced via marker replacement
- if (lines.len > 0)
- if (str.indexOfNone(self.input[lines[0]..], " \t\r")) |idx|
- if (self.input[lines[0] + idx] == '\n') {
- lines = lines[1..];
- while (true) : (lines = lines[1..]) {
- if (lines.len == 0) return &.{};
- if (self.input[lines[0]] != '\n') break;
- }
- };
- if (lines.len == 0) return &.{};
+ if (lines.len > 0 and str.indexOfNone(lines[0], " \t\r\n") == null) lines = lines[1..];
+ while (true) : (lines = lines[1..]) {
+ if (lines.len == 0) return &.{};
+ if (lines[0].len != 0) break;
+ }
// determine indentation
- const indentation_idx = str.indexOfNone(self.input[lines[0]..], " \t\r") orelse unreachable;
+ const indentation_idx = str.indexOfNone(lines[0], " \t\r\n") orelse unreachable;
if (indentation_idx == 0) return &.{};
- const indentation = self.input[lines[0] .. lines[0] + indentation_idx];
+
+ const indentation = lines[0][0..indentation_idx];
// strip all lines of their indentation
- lines[0] += @truncate(indentation.len);
+ lines[0] = lines[0][indentation.len..];
for (lines[1..]) |*line| {
- if (self.input[line.*] == '\n') continue;
+ if (line.len == 0) continue;
- const diff_idx = std.mem.indexOfDiff(u8, self.input[line.*..], indentation) orelse unreachable;
- // std.debug.assert(diff_idx != line.len);
+ const diff_idx = std.mem.indexOfDiff(u8, line.*, indentation) orelse unreachable;
+ std.debug.assert(diff_idx != line.len);
if (diff_idx != indentation.len) {
try self.errors.append(gpa, .fromTagged(.{
- .inconsistent_indentation = .{ .idx = node_idx, .off = line.* },
+ .inconsistent_indentation = .{ .idx = node_idx, .off = self.calcOffset(&line.*[0]) },
}));
// Recover by stripping all whitespace on this line
- const recover_indentation_idx = std.mem.indexOfNone(u8, self.input[line.*..], " \t\r") orelse unreachable;
- line.* += @truncate(recover_indentation_idx);
+ const recover_indentation_idx = std.mem.indexOfNone(u8, line.*, " \t\r\n") orelse unreachable;
+ line.* = line.*[recover_indentation_idx..];
} else {
- line.* += @truncate(indentation.len);
+ line.* = line.*[indentation.len..];
}
}
return lines;
}
-fn parseInlineBlock(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, parent_idx: Node.Idx) !void {
+fn parseInlineBlock(self: *AstGen, gpa: Allocator, lines_: [][]u8, parent_idx: Node.Idx) !void {
var lines = lines_;
var empty_line_off: ?u32 = null;
@@ -177,46 +151,39 @@ fn parseInlineBlock(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, pare
// empty lines at the start of the inline block are fine, just skip these
// special case: the first line consist of only whitespace
// because they may have been introduced via marker replacement
- if (lines.len > 0)
- if (str.indexOfNone(self.input[lines[0]..], " \t\r")) |idx|
- if (self.input[lines[0] + idx] == '\n') {
- lines = lines[1..];
- while (true) : (lines = lines[1..]) {
- if (lines.len == 0) break :outer;
- if (self.input[lines[0]] != '\n') break;
- }
- };
- if (lines.len == 0) break :outer;
+ if (lines.len > 0 and str.indexOfNone(lines[0], " \t\r\n") == null) lines = lines[1..];
+ while (true) : (lines = lines[1..]) {
+ if (lines.len == 0) break :outer;
+ if (lines[0].len != 0) break;
+ }
self.getNode(parent_idx).incrementNumChildren();
// determine indentation
- const indentation_idx = str.indexOfNone(self.input[lines[0]..], " \t\r") orelse unreachable;
- const indentation = self.input[lines[0] .. lines[0] + indentation_idx];
+ const indentation_idx = str.indexOfNone(lines[0], " \t\r\n") orelse unreachable;
+ const indentation = lines[0][0..indentation_idx];
- lines[0] += @truncate(indentation.len);
- // lines[0] = lines[0][indentation.len..];
+ lines[0] = lines[0][indentation.len..];
- var len = str.indexOfChar(self.input[lines[0]..], '\n') orelse unreachable;
- if (len <= std.math.maxInt(Ast.StrLen)) {
+ if (lines[0].len <= std.math.maxInt(Ast.StrLen)) {
_ = try self.appendNode(gpa, .{
.text = .{
- .off = lines[0],
- .len = @truncate(len),
+ .off = self.calcOffset(&lines[0][0]),
+ .len = @intCast(lines[0].len),
},
});
} else {
@branchHint(.cold);
- while (len > 0) {
- const chunk_len = @min(len, std.math.maxInt(Ast.StrLen));
+ var line = lines[0];
+ while (line.len > 0) {
+ const len = @min(line.len, std.math.maxInt(Ast.StrLen));
_ = try self.appendNode(gpa, .{
.text = .{
- .off = lines[0],
- .len = chunk_len,
+ .off = self.calcOffset(&lines[0][0]),
+ .len = @intCast(len),
},
});
- lines[0] += chunk_len;
- len -= chunk_len;
+ line = line[len..];
}
}
lines = lines[1..];
@@ -225,9 +192,9 @@ fn parseInlineBlock(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, pare
// Skip and error on empty lines
while (true) : (lines = lines[1..]) {
if (lines.len == 0) break :outer;
- if (self.input[lines[0]] != '\n') break;
+ if (lines[0].len != 0) break;
// empty line detected
- empty_line_off = lines[0];
+ empty_line_off = self.calcOffset(@ptrCast(lines[0].ptr));
}
if (empty_line_off) |off| {
@@ -236,49 +203,47 @@ fn parseInlineBlock(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, pare
}));
}
- const diff_idx = std.mem.indexOfDiff(u8, self.input[lines[0]..], indentation) orelse unreachable;
- // std.debug.assert(diff_idx != lines[0].len);
+ const diff_idx = std.mem.indexOfDiff(u8, lines[0], indentation) orelse unreachable;
+ std.debug.assert(diff_idx != lines[0].len);
if (diff_idx != indentation.len) {
try self.errors.append(gpa, .fromTagged(.{
- .inconsistent_indentation = .{ .idx = self.nextNodeIdx(), .off = lines[0] },
+ .inconsistent_indentation = .{ .idx = self.nextNodeIdx(), .off = self.calcOffset(&lines[0][0]) },
}));
// Recover by stripping all whitespace on this line
- const recover_indentation_idx = std.mem.indexOfNone(u8, self.input[lines[0]..], " \t\r\n") orelse unreachable;
- lines[0] += @truncate(recover_indentation_idx);
+ const recover_indentation_idx = std.mem.indexOfNone(u8, lines[0], " \t\r\n") orelse unreachable;
+ lines[0] = lines[0][recover_indentation_idx..];
} else {
- lines[0] += @truncate(indentation.len);
+ lines[0] = lines[0][indentation.len..];
}
self.getNode(parent_idx).incrementNumChildren();
- var len2 = str.indexOfChar(self.input[lines[0]..], '\n') orelse unreachable;
- if (len2 <= std.math.maxInt(Ast.StrLen)) {
+ if (lines[0].len <= std.math.maxInt(Ast.StrLen)) {
_ = try self.appendNode(gpa, .{
.space_text = .{
- .off = lines[0],
- .len = @truncate(len2),
+ .off = self.calcOffset(&lines[0][0]),
+ .len = @intCast(lines[0].len),
},
});
} else {
@branchHint(.cold);
+ var line = lines[0];
_ = try self.appendNode(gpa, .{
.space_text = .{
- .off = lines[0],
- .len = std.math.maxInt(Ast.StrLen),
+ .off = self.calcOffset(&lines[0][0]),
+ .len = @intCast(std.math.maxInt(Ast.StrLen)),
},
});
- len2 -= std.math.maxInt(Ast.StrLen);
- lines[0] += std.math.maxInt(Ast.StrLen);
- while (len2 > 0) {
- const chunk_len = @min(len2, std.math.maxInt(Ast.StrLen));
+ line = line[std.math.maxInt(Ast.StrLen)..];
+ while (line.len > 0) {
+ const len = @min(line.len, std.math.maxInt(Ast.StrLen));
_ = try self.appendNode(gpa, .{
.text = .{
- .off = lines[0],
- .len = chunk_len,
+ .off = self.calcOffset(&lines[0][0]),
+ .len = @intCast(len),
},
});
- lines[0] += chunk_len;
- len2 -= chunk_len;
+ line = line[len..];
}
}
lines = lines[1..];
@@ -286,24 +251,17 @@ fn parseInlineBlock(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, pare
}
}
-fn parseColumn(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, parent_idx: Node.Idx) !void {
+fn parseColumn(self: *AstGen, gpa: Allocator, lines_: [][]u8, parent_idx: Node.Idx) !void {
var lines = lines_;
outer: while (true) {
// Skip empty lines
// special case: the first line consist of only whitespace
// because they may have been introduced via marker replacement
- if (lines.len > 0) {
- if (str.indexOfNone(self.input[lines[0]..], " \t\r")) |idx| {
- if (self.input[lines[0] + idx] == '\n') {
- lines = lines[1..];
- while (true) : (lines = lines[1..]) {
- if (lines.len == 0) break :outer;
- if (self.input[lines[0]] != '\n') break;
- }
- }
- }
+ if (lines.len > 0 and str.indexOfNone(lines[0], " \t\r\n") == null) lines = lines[1..];
+ while (true) : (lines = lines[1..]) {
+ if (lines.len == 0) break :outer;
+ if (lines[0].len != 0) break;
}
- if (lines.len == 0) break :outer;
// Use first character to determine marker
const mode, const child = try self.parseBlockStart(gpa, lines[0]);
@@ -314,8 +272,8 @@ fn parseColumn(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, parent_id
// take indented or non-block-marker lines
var num_lines: usize = 1;
for (lines[1..]) |line| {
- if (self.input[line] == '\n') break;
- if (block_specs[self.input[line]] != null) break;
+ if (line.len == 0) break;
+ if (block_specs[line[0]] != null) break;
num_lines += 1;
}
@@ -327,7 +285,7 @@ fn parseColumn(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, parent_id
// take indented or empty lines
var num_lines: usize = 1;
for (lines[1..]) |line| {
- if (str.isNoneOf(self.input[line], " \t\r\n")) break;
+ if (line.len != 0 and str.isNoneOf(line[0], " \t\r\n")) break;
num_lines += 1;
}
@@ -339,7 +297,7 @@ fn parseColumn(self: *AstGen, gpa: Allocator, lines_: []Ast.StrOffset, parent_id
// take indented or empty lines
var num_lines: usize = 1;
for (lines[1..]) |line| {
- if (str.isNoneOf(self.input[line], " \t\r\n")) break;
+ if (line.len != 0 and std.mem.indexOfScalar(u8, " \t\r\n", line[0]) == null) break;
num_lines += 1;
}
@@ -459,26 +417,26 @@ const block_specs = blockSpecs(struct {
/// Appends the suitable block node to the ast,
/// then returns how parsing should proceed for the children of this block.
/// Also returns the idx of the container node created.
-fn parseBlockStart(self: *AstGen, gpa: Allocator, line: Ast.StrOffset) !struct { ParseMode, Node.Idx } {
- switch (self.input[line]) {
+fn parseBlockStart(self: *AstGen, gpa: Allocator, line: []u8) !struct { ParseMode, Node.Idx } {
+ switch (line[0]) {
inline else => |c| {
const spec_or_null = block_specs[c];
if (spec_or_null) |spec| {
inline for (spec) |case| {
switch (case.marker) {
.exact, .starts_with => |marker| {
- if (std.mem.startsWith(u8, self.input[line..], marker)) {
+ if (std.mem.startsWith(u8, line, marker)) {
const node = if (case.mode == .no_children) try self.appendNode(gpa, @unionInit(Node.Tagged, @tagName(case.tag), @as(Node.Tagged.Leaf, .{
- .off = line,
+ .off = self.calcOffset(&line[0]),
.len = marker.len,
}))) else try self.appendNode(gpa, @unionInit(Node.Tagged, @tagName(case.tag), @as(Node.Tagged.Container, .{
- .off = line,
+ .off = self.calcOffset(&line[0]),
.num_children = if (case.store_marker_child == .store) 1 else 0,
})));
- @memset(self.input[line .. line + marker.len], ' ');
+ @memset(line[0..marker.len], ' ');
if (case.store_marker_child == .store) {
_ = try self.appendNode(gpa, .{ .marker = .{
- .off = line,
+ .off = self.calcOffset(&line[0]),
.len = case.marker.len,
} });
}
@@ -486,14 +444,14 @@ fn parseBlockStart(self: *AstGen, gpa: Allocator, line: Ast.StrOffset) !struct {
}
},
.starts_with_multi => |marker_spec| {
- var marker_len = str.indexOfNotChar(self.input[line..], marker_spec.marker_char) orelse str.indexOfChar(self.input[line..], '\n') orelse unreachable;
+ var marker_len = str.indexOfNotChar(line, marker_spec.marker_char) orelse line.len;
inline for (marker_spec.extra) |extra| {
- if (std.mem.startsWith(u8, self.input[line + marker_len ..], extra)) {
+ if (std.mem.startsWith(u8, line[marker_len..], extra)) {
marker_len += extra.len;
const node = try self.appendNode(gpa, @unionInit(Node.Tagged, @tagName(case.tag), @as(Node.Tagged.Container, .{
- .off = line,
+ .off = self.calcOffset(&line[0]),
.num_children = if (case.store_marker_child == .store) 1 else 0,
})));
@@ -508,10 +466,10 @@ fn parseBlockStart(self: *AstGen, gpa: Allocator, line: Ast.StrOffset) !struct {
},
}));
- @memset(self.input[line .. line + marker_len], ' ');
+ @memset(line[0..marker_len], ' ');
if (case.store_marker_child == .store) {
_ = try self.appendNode(gpa, .{ .marker = .{
- .off = line,
+ .off = self.calcOffset(&line[0]),
.len = utils.safeIntCast(Ast.StrLen, marker_len),
} });
}
@@ -527,7 +485,7 @@ fn parseBlockStart(self: *AstGen, gpa: Allocator, line: Ast.StrOffset) !struct {
.paragraph,
try self.appendNode(gpa, .{
.paragraph = .{
- .off = line,
+ .off = self.calcOffset(&line[0]),
},
}),
};
@@ -540,7 +498,7 @@ fn parseBlockStart(self: *AstGen, gpa: Allocator, line: Ast.StrOffset) !struct {
try self.errors.append(gpa, .fromTagged(.{
.invalid_marker = .{
.idx = self.nextNodeIdx(),
- .off = line,
+ .off = self.calcOffset(&line[0]),
},
}));
@@ -548,7 +506,7 @@ fn parseBlockStart(self: *AstGen, gpa: Allocator, line: Ast.StrOffset) !struct {
.paragraph,
try self.appendNode(gpa, .{
.paragraph = .{
- .off = line,
+ .off = self.calcOffset(&line[0]),
},
}),
};