-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ZON #20271
base: master
Are you sure you want to change the base?
ZON #20271
Changes from all commits
8524dcb
3cf23e2
1e40033
787e7f6
b10457c
20f1f00
d02fc84
2752c8c
4e4db38
006800c
0476192
62dad41
b6b9b2d
46d2a4d
cfc59e7
279145b
4dfc1f5
0ca4bf5
531579a
622d85c
05f1fe3
18284f5
cc7105a
083e233
56b17fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1581,7 +1581,8 @@ test parseInt { | |
try std.testing.expectEqual(@as(i5, -16), try std.fmt.parseInt(i5, "-10", 16)); | ||
} | ||
|
||
fn parseIntWithSign( | ||
/// Like `parseIntWithGenericCharacter`, but with a sign argument. | ||
pub fn parseIntWithSign( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a generally useful function IMO, and I needed access to it from the ZON parser, so I made it public |
||
comptime Result: type, | ||
comptime Character: type, | ||
buf: []const Character, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -130,6 +130,8 @@ fn appendRefsAssumeCapacity(astgen: *AstGen, refs: []const Zir.Inst.Ref) void { | |
} | ||
|
||
pub fn generate(gpa: Allocator, tree: Ast) Allocator.Error!Zir { | ||
assert(tree.mode == .zig); | ||
|
||
var arena = std.heap.ArenaAllocator.init(gpa); | ||
defer arena.deinit(); | ||
|
||
|
@@ -8812,36 +8814,22 @@ fn numberLiteral(gz: *GenZir, ri: ResultInfo, node: Ast.Node.Index, source_node: | |
} | ||
} | ||
|
||
fn failWithNumberError(astgen: *AstGen, err: std.zig.number_literal.Error, token: Ast.TokenIndex, bytes: []const u8) InnerError { | ||
const is_float = std.mem.indexOfScalar(u8, bytes, '.') != null; | ||
switch (err) { | ||
.leading_zero => if (is_float) { | ||
return astgen.failTok(token, "number '{s}' has leading zero", .{bytes}); | ||
} else { | ||
return astgen.failTokNotes(token, "number '{s}' has leading zero", .{bytes}, &.{ | ||
try astgen.errNoteTok(token, "use '0o' prefix for octal literals", .{}), | ||
}); | ||
}, | ||
.digit_after_base => return astgen.failTok(token, "expected a digit after base prefix", .{}), | ||
.upper_case_base => |i| return astgen.failOff(token, @intCast(i), "base prefix must be lowercase", .{}), | ||
.invalid_float_base => |i| return astgen.failOff(token, @intCast(i), "invalid base for float literal", .{}), | ||
.repeated_underscore => |i| return astgen.failOff(token, @intCast(i), "repeated digit separator", .{}), | ||
.invalid_underscore_after_special => |i| return astgen.failOff(token, @intCast(i), "expected digit before digit separator", .{}), | ||
.invalid_digit => |info| return astgen.failOff(token, @intCast(info.i), "invalid digit '{c}' for {s} base", .{ bytes[info.i], @tagName(info.base) }), | ||
.invalid_digit_exponent => |i| return astgen.failOff(token, @intCast(i), "invalid digit '{c}' in exponent", .{bytes[i]}), | ||
.duplicate_exponent => |i| return astgen.failOff(token, @intCast(i), "duplicate exponent", .{}), | ||
.exponent_after_underscore => |i| return astgen.failOff(token, @intCast(i), "expected digit before exponent", .{}), | ||
.special_after_underscore => |i| return astgen.failOff(token, @intCast(i), "expected digit before '{c}'", .{bytes[i]}), | ||
.trailing_special => |i| return astgen.failOff(token, @intCast(i), "expected digit after '{c}'", .{bytes[i - 1]}), | ||
.trailing_underscore => |i| return astgen.failOff(token, @intCast(i), "trailing digit separator", .{}), | ||
.duplicate_period => unreachable, // Validated by tokenizer | ||
.invalid_character => unreachable, // Validated by tokenizer | ||
.invalid_exponent_sign => |i| { | ||
assert(bytes.len >= 2 and bytes[0] == '0' and bytes[1] == 'x'); // Validated by tokenizer | ||
return astgen.failOff(token, @intCast(i), "sign '{c}' cannot follow digit '{c}' in hex base", .{ bytes[i], bytes[i - 1] }); | ||
}, | ||
.period_after_exponent => |i| return astgen.failOff(token, @intCast(i), "unexpected period after exponent", .{}), | ||
} | ||
fn failWithNumberError( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a TODO master to DRY these string/error functions with a duplicate in Manifest.zig. I also needed them, both in runtime and compile time ZON parsing, so rather than duplicate it further I resolved the TODO. |
||
astgen: *AstGen, | ||
err: std.zig.number_literal.Error, | ||
token: Ast.TokenIndex, | ||
bytes: []const u8, | ||
) InnerError { | ||
const note = err.noteWithSource(bytes); | ||
const notes: []const u32 = if (note) |n| &.{try astgen.errNoteTok(token, "{s}", .{n})} else &.{}; | ||
try astgen.appendErrorTokNotesOff( | ||
token, | ||
@as(u32, @intCast(err.offset())), | ||
"{}", | ||
.{err.fmtWithSource(bytes)}, | ||
notes, | ||
); | ||
return error.AnalysisFail; | ||
} | ||
|
||
fn asmExpr( | ||
|
@@ -9336,7 +9324,18 @@ fn builtinCall( | |
} else if (str.len == 0) { | ||
return astgen.failTok(str_lit_token, "import path cannot be empty", .{}); | ||
} | ||
const result = try gz.addStrTok(.import, str.index, str_lit_token); | ||
const res_ty = try ri.rl.resultType(gz, node) orelse .none; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Imports now store result types. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A possible ZIR optimization here would now be to replace the |
||
const payload_index = try addExtra(gz.astgen, Zir.Inst.Import{ | ||
.res_ty = res_ty, | ||
.path = str.index, | ||
}); | ||
const result = try gz.add(.{ | ||
.tag = .import, | ||
.data = .{ .pl_tok = .{ | ||
.src_tok = gz.tokenIndexToRelative(str_lit_token), | ||
.payload_index = payload_index, | ||
} }, | ||
}); | ||
const gop = try astgen.imports.getOrPut(astgen.gpa, str.index); | ||
if (!gop.found_existing) { | ||
gop.value_ptr.* = str_lit_token; | ||
|
@@ -11422,85 +11421,20 @@ fn parseStrLit( | |
} | ||
} | ||
|
||
fn failWithStrLitError(astgen: *AstGen, err: std.zig.string_literal.Error, token: Ast.TokenIndex, bytes: []const u8, offset: u32) InnerError { | ||
fn failWithStrLitError( | ||
astgen: *AstGen, | ||
err: std.zig.string_literal.Error, | ||
token: Ast.TokenIndex, | ||
bytes: []const u8, | ||
offset: u32, | ||
) InnerError { | ||
const raw_string = bytes[offset..]; | ||
switch (err) { | ||
.invalid_escape_character => |bad_index| { | ||
return astgen.failOff( | ||
token, | ||
offset + @as(u32, @intCast(bad_index)), | ||
"invalid escape character: '{c}'", | ||
.{raw_string[bad_index]}, | ||
); | ||
}, | ||
.expected_hex_digit => |bad_index| { | ||
return astgen.failOff( | ||
token, | ||
offset + @as(u32, @intCast(bad_index)), | ||
"expected hex digit, found '{c}'", | ||
.{raw_string[bad_index]}, | ||
); | ||
}, | ||
.empty_unicode_escape_sequence => |bad_index| { | ||
return astgen.failOff( | ||
token, | ||
offset + @as(u32, @intCast(bad_index)), | ||
"empty unicode escape sequence", | ||
.{}, | ||
); | ||
}, | ||
.expected_hex_digit_or_rbrace => |bad_index| { | ||
return astgen.failOff( | ||
token, | ||
offset + @as(u32, @intCast(bad_index)), | ||
"expected hex digit or '}}', found '{c}'", | ||
.{raw_string[bad_index]}, | ||
); | ||
}, | ||
.invalid_unicode_codepoint => |bad_index| { | ||
return astgen.failOff( | ||
token, | ||
offset + @as(u32, @intCast(bad_index)), | ||
"unicode escape does not correspond to a valid unicode scalar value", | ||
.{}, | ||
); | ||
}, | ||
.expected_lbrace => |bad_index| { | ||
return astgen.failOff( | ||
token, | ||
offset + @as(u32, @intCast(bad_index)), | ||
"expected '{{', found '{c}", | ||
.{raw_string[bad_index]}, | ||
); | ||
}, | ||
.expected_rbrace => |bad_index| { | ||
return astgen.failOff( | ||
token, | ||
offset + @as(u32, @intCast(bad_index)), | ||
"expected '}}', found '{c}", | ||
.{raw_string[bad_index]}, | ||
); | ||
}, | ||
.expected_single_quote => |bad_index| { | ||
return astgen.failOff( | ||
token, | ||
offset + @as(u32, @intCast(bad_index)), | ||
"expected single quote ('), found '{c}", | ||
.{raw_string[bad_index]}, | ||
); | ||
}, | ||
.invalid_character => |bad_index| { | ||
return astgen.failOff( | ||
token, | ||
offset + @as(u32, @intCast(bad_index)), | ||
"invalid byte in string or character literal: '{c}'", | ||
.{raw_string[bad_index]}, | ||
); | ||
}, | ||
.empty_char_literal => { | ||
return astgen.failOff(token, offset, "empty character literal", .{}); | ||
}, | ||
} | ||
return astgen.failOff( | ||
token, | ||
offset + @as(u32, @intCast(err.offset())), | ||
"{}", | ||
.{err.fmtWithSource(raw_string)}, | ||
); | ||
} | ||
|
||
fn failNode( | ||
|
@@ -11618,7 +11552,7 @@ fn appendErrorTokNotesOff( | |
comptime format: []const u8, | ||
args: anytype, | ||
notes: []const u32, | ||
) !void { | ||
) Allocator.Error!void { | ||
@branchHint(.cold); | ||
const gpa = astgen.gpa; | ||
const string_bytes = &astgen.string_bytes; | ||
|
@@ -11814,32 +11748,17 @@ fn strLitAsString(astgen: *AstGen, str_lit_token: Ast.TokenIndex) !IndexSlice { | |
} | ||
|
||
fn strLitNodeAsString(astgen: *AstGen, node: Ast.Node.Index) !IndexSlice { | ||
const tree = astgen.tree; | ||
const node_datas = tree.nodes.items(.data); | ||
|
||
const start = node_datas[node].lhs; | ||
const end = node_datas[node].rhs; | ||
|
||
const gpa = astgen.gpa; | ||
const data = astgen.tree.nodes.items(.data); | ||
const string_bytes = &astgen.string_bytes; | ||
const str_index = string_bytes.items.len; | ||
|
||
// First line: do not append a newline. | ||
var tok_i = start; | ||
{ | ||
const slice = tree.tokenSlice(tok_i); | ||
const line_bytes = slice[2..]; | ||
try string_bytes.appendSlice(gpa, line_bytes); | ||
tok_i += 1; | ||
} | ||
// Following lines: each line prepends a newline. | ||
while (tok_i <= end) : (tok_i += 1) { | ||
const slice = tree.tokenSlice(tok_i); | ||
const line_bytes = slice[2..]; | ||
try string_bytes.ensureUnusedCapacity(gpa, line_bytes.len + 1); | ||
string_bytes.appendAssumeCapacity('\n'); | ||
string_bytes.appendSliceAssumeCapacity(line_bytes); | ||
var parser = std.zig.string_literal.multilineParser(string_bytes.writer(gpa)); | ||
var tok_i = data[node].lhs; | ||
while (tok_i <= data[node].rhs) : (tok_i += 1) { | ||
try parser.line(astgen.tree.tokenSlice(tok_i)); | ||
} | ||
|
||
const len = string_bytes.items.len - str_index; | ||
try string_bytes.append(gpa, 0); | ||
return IndexSlice{ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why are these excluded from formatting?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Auto format is good on code for readability, but normalizing the zon test data to standard formatting reduces coverage. (Tests cover invalid syntax and non standard formatting)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm... I think incorrect formatting is something we could better test with some kind of automated testing (something to jiggle around whitespace in all of the behavior tests and check they work the same), and I don't see any particularly compelling reason to treat the ZON behavior tests differently to the other Zig behavior tests.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I’m open to more thorough tests being added for this in the future, but in the meantime, normalizing test data strictly decreases the quality of the tests and provides no upsides
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do see your point -- I'll defer to @andrewrk's opinion on this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think @mlugg's idea exactly describes fuzz testing combined with using these existing tests as an input corpus. If we had that sub-project already finished and fuzzing done regularly, then I think it would make sense to go with that suggestion. Until that is implemented, I think @MasonRemaley's stance makes more sense. I'm actively working on it.