Fix lexer bug with UTF-16 strings

This commit is contained in:
Jarred Sumner
2021-09-17 16:09:26 -07:00
parent 468c475359
commit e1ce0e8467
2 changed files with 18 additions and 14 deletions

View File

@@ -1 +1 @@
18
19

View File

@@ -455,13 +455,16 @@ pub const Lexer = struct {
else => {},
}
if (iter.c <= 0xFFFF) {
buf.append(@intCast(u16, c)) catch unreachable;
} else {
iter.c -= 0x10000;
buf.ensureUnusedCapacity(2) catch unreachable;
buf.appendAssumeCapacity(@intCast(u16, 0xD800 + ((iter.c >> 10) & 0x3FF)));
buf.appendAssumeCapacity(@intCast(u16, 0xDC00 + (iter.c & 0x3FF)));
switch (iter.c) {
0...0xFFFF => {
buf.append(@intCast(u16, iter.c)) catch unreachable;
},
else => {
iter.c -= 0x10000;
buf.ensureUnusedCapacity(2) catch unreachable;
buf.appendAssumeCapacity(@intCast(u16, 0xD800 + ((iter.c >> 10) & 0x3FF)));
buf.appendAssumeCapacity(@intCast(u16, 0xDC00 + (iter.c & 0x3FF)));
},
}
}
}
@@ -477,12 +480,6 @@ pub const Lexer = struct {
'\\' => {
try lexer.step();
// Skip slow path for a handful of common escaped characters that don't need UTf16 handling
needs_slow_path = switch (lexer.code_point) {
'n', '`', '\'', '0', '"' => false,
else => true,
};
// Handle Windows CRLF
if (lexer.code_point == '\r' and lexer.json_options != null) {
try lexer.step();
@@ -491,6 +488,13 @@ pub const Lexer = struct {
}
continue :stringLiteral;
}
// Skip slow path for a handful of common escaped characters that don't need UTf16 handling
needs_slow_path = switch (lexer.code_point) {
// if it was previously marked as needing slow path, then keep it
'n', '`', '\'', '0', '"' => needs_slow_path,
else => true,
};
},
// This indicates the end of the file