Make error position tracking explicit with Offset field on ParserError
Thread byte offset information through all error creation sites, eliminating the need for SubsliceOffset to recover position from pointer comparison. Changes: - Add Offset field to ParserError struct - Add offset parameter to NewParserError - Add Parser.offsetOf helper for suffix-length arithmetic - Thread base offset through scanner functions (scanComment, scanBasicString, scanMultilineBasicString, scanLiteralString, scanMultilineLiteralString, scanWindowsNewline) - Thread base offset through standalone functions (expect, hexToRune) - Thread base offset through all decode functions (parseInteger, parseFloat, parseLocalDate, parseLocalTime, parseLocalDateTime, parseDateTime, checkAndRemoveUnderscores*) - Update all unmarshaler call sites to pass value.Raw.Offset - Update localtime.go UnmarshalText methods with base=0 - Update strict.go to populate Offset from key ranges - Change wrapDecodeError to read de.Offset directly - Change Utf8TomlValidAlreadyEscaped to return int index (-1 if valid) instead of a byte subslice - Unexport SubsliceOffset (now only used internally by Range()) This makes error positions self-describing: each ParserError carries its own byte offset, so callers no longer need the original document slice and address arithmetic to determine where an error occurred. Co-authored-by: Thomas Pelletier <thomas@pelletier.dev>
This commit is contained in:
+1
-4
@@ -90,10 +90,7 @@ type Range struct {
|
||||
Length uint32
|
||||
}
|
||||
|
||||
// SubsliceOffset returns the byte offset of subslice within data.
|
||||
// Subslice must be a subslice of data, meaning it must point into the
|
||||
// same backing array. Panics if subslice is not within data.
|
||||
func SubsliceOffset(data []byte, subslice []byte) int {
|
||||
func subsliceOffset(data []byte, subslice []byte) int {
|
||||
if len(subslice) == 0 {
|
||||
return len(data)
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ func BenchmarkScanComments(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, _, _ = scanComment(input)
|
||||
_, _, _ = scanComment(input, 0)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
+63
-55
@@ -16,6 +16,7 @@ type ParserError struct {
|
||||
Highlight []byte
|
||||
Message string
|
||||
Key []string // optional
|
||||
Offset int
|
||||
}
|
||||
|
||||
// Error is the implementation of the error interface.
|
||||
@@ -27,9 +28,10 @@ func (e *ParserError) Error() string {
|
||||
//
|
||||
// Warning: Highlight needs to be a subslice of Parser.data, so only slices
|
||||
// returned by Parser.Raw are valid candidates.
|
||||
func NewParserError(highlight []byte, format string, args ...interface{}) error {
|
||||
func NewParserError(highlight []byte, offset int, format string, args ...interface{}) error {
|
||||
return &ParserError{
|
||||
Highlight: highlight,
|
||||
Offset: offset,
|
||||
Message: fmt.Errorf(format, args...).Error(),
|
||||
}
|
||||
}
|
||||
@@ -64,12 +66,18 @@ func (p *Parser) Data() []byte {
|
||||
return p.data
|
||||
}
|
||||
|
||||
func (p *Parser) offsetOf(b []byte) int {
|
||||
return len(p.data) - len(b)
|
||||
}
|
||||
|
||||
// Range returns a range description that corresponds to a given slice of the
|
||||
// input. If the argument is not a subslice of the parser input, this function
|
||||
// panics.
|
||||
//
|
||||
// Prefer using ParserError.Offset directly for error position information.
|
||||
func (p *Parser) Range(b []byte) Range {
|
||||
return Range{
|
||||
Offset: uint32(SubsliceOffset(p.data, b)), //nolint:gosec // TOML documents are small
|
||||
Offset: uint32(subsliceOffset(p.data, b)), //nolint:gosec // TOML documents are small
|
||||
Length: uint32(len(b)), //nolint:gosec // TOML documents are small
|
||||
}
|
||||
}
|
||||
@@ -191,16 +199,16 @@ func (p *Parser) parseNewline(b []byte) ([]byte, error) {
|
||||
}
|
||||
|
||||
if b[0] == '\r' {
|
||||
_, rest, err := scanWindowsNewline(b)
|
||||
_, rest, err := scanWindowsNewline(b, p.offsetOf(b))
|
||||
return rest, err
|
||||
}
|
||||
|
||||
return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
|
||||
return nil, NewParserError(b[0:1], p.offsetOf(b), "expected newline but got %#U", b[0])
|
||||
}
|
||||
|
||||
func (p *Parser) parseComment(b []byte) (reference, []byte, error) {
|
||||
ref := invalidReference
|
||||
data, rest, err := scanComment(b)
|
||||
data, rest, err := scanComment(b, p.offsetOf(b))
|
||||
if p.KeepComments && err == nil {
|
||||
ref = p.builder.Push(Node{
|
||||
Kind: Comment,
|
||||
@@ -284,12 +292,12 @@ func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) {
|
||||
p.builder.AttachChild(ref, k)
|
||||
b = p.parseWhitespace(b)
|
||||
|
||||
b, err = expect(']', b)
|
||||
b, err = expect(']', b, p.offsetOf(b))
|
||||
if err != nil {
|
||||
return ref, nil, err
|
||||
}
|
||||
|
||||
b, err = expect(']', b)
|
||||
b, err = expect(']', b, p.offsetOf(b))
|
||||
|
||||
return ref, b, err
|
||||
}
|
||||
@@ -314,7 +322,7 @@ func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
|
||||
|
||||
b = p.parseWhitespace(b)
|
||||
|
||||
b, err = expect(']', b)
|
||||
b, err = expect(']', b, p.offsetOf(b))
|
||||
|
||||
return ref, b, err
|
||||
}
|
||||
@@ -338,10 +346,10 @@ func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
|
||||
b = p.parseWhitespace(b)
|
||||
|
||||
if len(b) == 0 {
|
||||
return invalidReference, nil, NewParserError(startB[:len(startB)-len(b)], "expected = after a key, but the document ends there")
|
||||
return invalidReference, nil, NewParserError(startB[:len(startB)-len(b)], p.offsetOf(startB), "expected = after a key, but the document ends there")
|
||||
}
|
||||
|
||||
b, err = expect('=', b)
|
||||
b, err = expect('=', b, p.offsetOf(b))
|
||||
if err != nil {
|
||||
return invalidReference, nil, err
|
||||
}
|
||||
@@ -370,7 +378,7 @@ func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
|
||||
ref := invalidReference
|
||||
|
||||
if len(b) == 0 {
|
||||
return ref, nil, NewParserError(b, "expected value, not eof")
|
||||
return ref, nil, NewParserError(b, p.offsetOf(b), "expected value, not eof")
|
||||
}
|
||||
|
||||
var err error
|
||||
@@ -415,7 +423,7 @@ func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
|
||||
return ref, b, err
|
||||
case 't':
|
||||
if !scanFollowsTrue(b) {
|
||||
return ref, nil, NewParserError(atmost(b, 4), "expected 'true'")
|
||||
return ref, nil, NewParserError(atmost(b, 4), p.offsetOf(b), "expected 'true'")
|
||||
}
|
||||
|
||||
ref = p.builder.Push(Node{
|
||||
@@ -426,7 +434,7 @@ func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
|
||||
return ref, b[4:], nil
|
||||
case 'f':
|
||||
if !scanFollowsFalse(b) {
|
||||
return ref, nil, NewParserError(atmost(b, 5), "expected 'false'")
|
||||
return ref, nil, NewParserError(atmost(b, 5), p.offsetOf(b), "expected 'false'")
|
||||
}
|
||||
|
||||
ref = p.builder.Push(Node{
|
||||
@@ -453,7 +461,7 @@ func atmost(b []byte, n int) []byte {
|
||||
}
|
||||
|
||||
func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
|
||||
v, rest, err := scanLiteralString(b)
|
||||
v, rest, err := scanLiteralString(b, p.offsetOf(b))
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
@@ -485,7 +493,7 @@ func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
|
||||
b = p.parseWhitespace(b)
|
||||
|
||||
if len(b) == 0 {
|
||||
return parent, nil, NewParserError(previousB[:1], "inline table is incomplete")
|
||||
return parent, nil, NewParserError(previousB[:1], p.offsetOf(previousB), "inline table is incomplete")
|
||||
}
|
||||
|
||||
if b[0] == '}' {
|
||||
@@ -493,7 +501,7 @@ func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
|
||||
}
|
||||
|
||||
if !first {
|
||||
b, err = expect(',', b)
|
||||
b, err = expect(',', b, p.offsetOf(b))
|
||||
if err != nil {
|
||||
return parent, nil, err
|
||||
}
|
||||
@@ -517,7 +525,7 @@ func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
|
||||
first = false
|
||||
}
|
||||
|
||||
rest, err := expect('}', b)
|
||||
rest, err := expect('}', b, p.offsetOf(b))
|
||||
|
||||
return parent, rest, err
|
||||
}
|
||||
@@ -566,7 +574,7 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
|
||||
}
|
||||
|
||||
if len(b) == 0 {
|
||||
return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
|
||||
return parent, nil, NewParserError(arrayStart[:1], p.offsetOf(arrayStart), "array is incomplete")
|
||||
}
|
||||
|
||||
if b[0] == ']' {
|
||||
@@ -575,7 +583,7 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
|
||||
|
||||
if b[0] == ',' {
|
||||
if first {
|
||||
return parent, nil, NewParserError(b[0:1], "array cannot start with comma")
|
||||
return parent, nil, NewParserError(b[0:1], p.offsetOf(b), "array cannot start with comma")
|
||||
}
|
||||
b = b[1:]
|
||||
|
||||
@@ -587,7 +595,7 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
|
||||
addChild(cref)
|
||||
}
|
||||
} else if !first {
|
||||
return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
|
||||
return parent, nil, NewParserError(b[0:1], p.offsetOf(b), "array elements must be separated by commas")
|
||||
}
|
||||
|
||||
// TOML allows trailing commas in arrays.
|
||||
@@ -614,7 +622,7 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
|
||||
first = false
|
||||
}
|
||||
|
||||
rest, err := expect(']', b)
|
||||
rest, err := expect(']', b, p.offsetOf(b))
|
||||
|
||||
return parent, rest, err
|
||||
}
|
||||
@@ -669,7 +677,7 @@ func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []b
|
||||
}
|
||||
|
||||
func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
|
||||
token, rest, err := scanMultilineLiteralString(b)
|
||||
token, rest, err := scanMultilineLiteralString(b, p.offsetOf(b))
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
@@ -698,7 +706,7 @@ func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er
|
||||
// mlb-quotes = 1*2quotation-mark
|
||||
// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
|
||||
// mlb-escaped-nl = escape ws newline *( wschar / newline )
|
||||
token, escaped, rest, err := scanMultilineBasicString(b)
|
||||
token, escaped, rest, err := scanMultilineBasicString(b, p.offsetOf(b))
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
@@ -715,14 +723,15 @@ func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er
|
||||
// fast path
|
||||
startIdx := i
|
||||
endIdx := len(token) - len(`"""`)
|
||||
tokenBase := p.offsetOf(token)
|
||||
|
||||
if !escaped {
|
||||
str := token[startIdx:endIdx]
|
||||
highlight := characters.Utf8TomlValidAlreadyEscaped(str)
|
||||
if len(highlight) == 0 {
|
||||
invalidIdx := characters.Utf8TomlValidAlreadyEscaped(str)
|
||||
if invalidIdx < 0 {
|
||||
return token, str, rest, nil
|
||||
}
|
||||
return nil, nil, nil, NewParserError(highlight, "invalid UTF-8")
|
||||
return nil, nil, nil, NewParserError(str[invalidIdx:invalidIdx+1], tokenBase+startIdx+invalidIdx, "invalid UTF-8")
|
||||
}
|
||||
|
||||
var builder bytes.Buffer
|
||||
@@ -787,14 +796,14 @@ func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er
|
||||
case 'e':
|
||||
builder.WriteByte(0x1B)
|
||||
case 'u':
|
||||
x, err := hexToRune(atmost(token[i+1:], 4), 4)
|
||||
x, err := hexToRune(atmost(token[i+1:], 4), tokenBase+i+1, 4)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
builder.WriteRune(x)
|
||||
i += 4
|
||||
case 'U':
|
||||
x, err := hexToRune(atmost(token[i+1:], 8), 8)
|
||||
x, err := hexToRune(atmost(token[i+1:], 8), tokenBase+i+1, 8)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
@@ -802,13 +811,13 @@ func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er
|
||||
builder.WriteRune(x)
|
||||
i += 8
|
||||
default:
|
||||
return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
|
||||
return nil, nil, nil, NewParserError(token[i:i+1], tokenBase+i, "invalid escaped character %#U", c)
|
||||
}
|
||||
i++
|
||||
} else {
|
||||
size := characters.Utf8ValidNext(token[i:])
|
||||
if size == 0 {
|
||||
return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
|
||||
return nil, nil, nil, NewParserError(token[i:i+1], tokenBase+i, "invalid character %#U", c)
|
||||
}
|
||||
builder.Write(token[i : i+size])
|
||||
i += size
|
||||
@@ -863,12 +872,9 @@ func (p *Parser) parseKey(b []byte) (reference, []byte, error) {
|
||||
|
||||
func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
|
||||
if len(b) == 0 {
|
||||
return nil, nil, nil, NewParserError(b, "expected key but found none")
|
||||
return nil, nil, nil, NewParserError(b, p.offsetOf(b), "expected key but found none")
|
||||
}
|
||||
|
||||
// simple-key = quoted-key / unquoted-key
|
||||
// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
|
||||
// quoted-key = basic-string / literal-string
|
||||
switch {
|
||||
case b[0] == '\'':
|
||||
return p.parseLiteralString(b)
|
||||
@@ -878,7 +884,7 @@ func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
|
||||
key, rest = scanUnquotedKey(b)
|
||||
return key, key, rest, nil
|
||||
default:
|
||||
return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0])
|
||||
return nil, nil, nil, NewParserError(b[0:1], p.offsetOf(b), "invalid character at start of key: %c", b[0])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -898,7 +904,7 @@ func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
|
||||
// escape-seq-char =/ %x74 ; t tab U+0009
|
||||
// escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX
|
||||
// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX
|
||||
token, escaped, rest, err := scanBasicString(b)
|
||||
token, escaped, rest, err := scanBasicString(b, p.offsetOf(b))
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
@@ -909,13 +915,15 @@ func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
|
||||
// Fast path. If there is no escape sequence, the string should just be
|
||||
// an UTF-8 encoded string, which is the same as Go. In that case,
|
||||
// validate the string and return a direct reference to the buffer.
|
||||
tokenBase := p.offsetOf(token)
|
||||
|
||||
if !escaped {
|
||||
str := token[startIdx:endIdx]
|
||||
highlight := characters.Utf8TomlValidAlreadyEscaped(str)
|
||||
if len(highlight) == 0 {
|
||||
invalidIdx := characters.Utf8TomlValidAlreadyEscaped(str)
|
||||
if invalidIdx < 0 {
|
||||
return token, str, rest, nil
|
||||
}
|
||||
return nil, nil, nil, NewParserError(highlight, "invalid UTF-8")
|
||||
return nil, nil, nil, NewParserError(str[invalidIdx:invalidIdx+1], tokenBase+startIdx+invalidIdx, "invalid UTF-8")
|
||||
}
|
||||
|
||||
i := startIdx
|
||||
@@ -946,7 +954,7 @@ func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
|
||||
case 'e':
|
||||
builder.WriteByte(0x1B)
|
||||
case 'u':
|
||||
x, err := hexToRune(token[i+1:len(token)-1], 4)
|
||||
x, err := hexToRune(token[i+1:len(token)-1], tokenBase+i+1, 4)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
@@ -954,7 +962,7 @@ func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
|
||||
builder.WriteRune(x)
|
||||
i += 4
|
||||
case 'U':
|
||||
x, err := hexToRune(token[i+1:len(token)-1], 8)
|
||||
x, err := hexToRune(token[i+1:len(token)-1], tokenBase+i+1, 8)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
@@ -962,13 +970,13 @@ func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
|
||||
builder.WriteRune(x)
|
||||
i += 8
|
||||
default:
|
||||
return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
|
||||
return nil, nil, nil, NewParserError(token[i:i+1], tokenBase+i, "invalid escaped character %#U", c)
|
||||
}
|
||||
i++
|
||||
} else {
|
||||
size := characters.Utf8ValidNext(token[i:])
|
||||
if size == 0 {
|
||||
return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
|
||||
return nil, nil, nil, NewParserError(token[i:i+1], tokenBase+i, "invalid character %#U", c)
|
||||
}
|
||||
builder.Write(token[i : i+size])
|
||||
i += size
|
||||
@@ -978,9 +986,9 @@ func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
|
||||
return token, builder.Bytes(), rest, nil
|
||||
}
|
||||
|
||||
func hexToRune(b []byte, length int) (rune, error) {
|
||||
func hexToRune(b []byte, base int, length int) (rune, error) {
|
||||
if len(b) < length {
|
||||
return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b))
|
||||
return -1, NewParserError(b, base, "unicode point needs %d character, not %d", length, len(b))
|
||||
}
|
||||
b = b[:length]
|
||||
|
||||
@@ -995,13 +1003,13 @@ func hexToRune(b []byte, length int) (rune, error) {
|
||||
case 'A' <= c && c <= 'F':
|
||||
d = uint32(c - 'A' + 10)
|
||||
default:
|
||||
return -1, NewParserError(b[i:i+1], "non-hex character")
|
||||
return -1, NewParserError(b[i:i+1], base+i, "non-hex character")
|
||||
}
|
||||
r = r*16 + d
|
||||
}
|
||||
|
||||
if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 {
|
||||
return -1, NewParserError(b, "escape sequence is invalid Unicode code point")
|
||||
return -1, NewParserError(b, base, "escape sequence is invalid Unicode code point")
|
||||
}
|
||||
|
||||
return rune(r), nil
|
||||
@@ -1021,7 +1029,7 @@ func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error)
|
||||
switch b[0] {
|
||||
case 'i':
|
||||
if !scanFollowsInf(b) {
|
||||
return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'")
|
||||
return invalidReference, nil, NewParserError(atmost(b, 3), p.offsetOf(b), "expected 'inf'")
|
||||
}
|
||||
|
||||
return p.builder.Push(Node{
|
||||
@@ -1031,7 +1039,7 @@ func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error)
|
||||
}), b[3:], nil
|
||||
case 'n':
|
||||
if !scanFollowsNan(b) {
|
||||
return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'")
|
||||
return invalidReference, nil, NewParserError(atmost(b, 3), p.offsetOf(b), "expected 'nan'")
|
||||
}
|
||||
|
||||
return p.builder.Push(Node{
|
||||
@@ -1190,7 +1198,7 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
|
||||
}), b[i+3:], nil
|
||||
}
|
||||
|
||||
return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number")
|
||||
return invalidReference, nil, NewParserError(b[i:i+1], p.offsetOf(b)+i, "unexpected character 'i' while scanning for a number")
|
||||
}
|
||||
|
||||
if c == 'n' {
|
||||
@@ -1202,14 +1210,14 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
|
||||
}), b[i+3:], nil
|
||||
}
|
||||
|
||||
return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number")
|
||||
return invalidReference, nil, NewParserError(b[i:i+1], p.offsetOf(b)+i, "unexpected character 'n' while scanning for a number")
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
if i == 0 {
|
||||
return invalidReference, b, NewParserError(b, "incomplete number")
|
||||
return invalidReference, b, NewParserError(b, p.offsetOf(b), "incomplete number")
|
||||
}
|
||||
|
||||
kind := Integer
|
||||
@@ -1246,13 +1254,13 @@ func isValidBinaryRune(r byte) bool {
|
||||
return r == '0' || r == '1' || r == '_'
|
||||
}
|
||||
|
||||
func expect(x byte, b []byte) ([]byte, error) {
|
||||
func expect(x byte, b []byte, base int) ([]byte, error) {
|
||||
if len(b) == 0 {
|
||||
return nil, NewParserError(b, "expected character %c but the document ended here", x)
|
||||
return nil, NewParserError(b, base, "expected character %c but the document ended here", x)
|
||||
}
|
||||
|
||||
if b[0] != x {
|
||||
return nil, NewParserError(b[0:1], "expected character %c", x)
|
||||
return nil, NewParserError(b[0:1], base, "expected character %c", x)
|
||||
}
|
||||
|
||||
return b[1:], nil
|
||||
|
||||
+27
-72
@@ -47,48 +47,31 @@ func isUnquotedKeyChar(r byte) bool {
|
||||
return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_'
|
||||
}
|
||||
|
||||
func scanLiteralString(b []byte) ([]byte, []byte, error) {
|
||||
// literal-string = apostrophe *literal-char apostrophe
|
||||
// apostrophe = %x27 ; ' apostrophe
|
||||
// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
|
||||
func scanLiteralString(b []byte, base int) ([]byte, []byte, error) {
|
||||
for i := 1; i < len(b); {
|
||||
switch b[i] {
|
||||
case '\'':
|
||||
return b[:i+1], b[i+1:], nil
|
||||
case '\n', '\r':
|
||||
return nil, nil, NewParserError(b[i:i+1], "literal strings cannot have new lines")
|
||||
return nil, nil, NewParserError(b[i:i+1], base+i, "literal strings cannot have new lines")
|
||||
}
|
||||
size := characters.Utf8ValidNext(b[i:])
|
||||
if size == 0 {
|
||||
return nil, nil, NewParserError(b[i:i+1], "invalid character")
|
||||
return nil, nil, NewParserError(b[i:i+1], base+i, "invalid character")
|
||||
}
|
||||
i += size
|
||||
}
|
||||
|
||||
return nil, nil, NewParserError(b[len(b):], "unterminated literal string")
|
||||
return nil, nil, NewParserError(b[len(b):], base+len(b), "unterminated literal string")
|
||||
}
|
||||
|
||||
func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
|
||||
// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
|
||||
// ml-literal-string-delim
|
||||
// ml-literal-string-delim = 3apostrophe
|
||||
// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
|
||||
//
|
||||
// mll-content = mll-char / newline
|
||||
// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
|
||||
// mll-quotes = 1*2apostrophe
|
||||
func scanMultilineLiteralString(b []byte, base int) ([]byte, []byte, error) {
|
||||
for i := 3; i < len(b); {
|
||||
switch b[i] {
|
||||
case '\'':
|
||||
if scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
|
||||
i += 3
|
||||
|
||||
// At that point we found 3 apostrophe, and i is the
|
||||
// index of the byte after the third one. The scanner
|
||||
// needs to be eager, because there can be an extra 2
|
||||
// apostrophe that can be accepted at the end of the
|
||||
// string.
|
||||
|
||||
if i >= len(b) || b[i] != '\'' {
|
||||
return b[:i], b[i:], nil
|
||||
}
|
||||
@@ -100,39 +83,39 @@ func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
|
||||
i++
|
||||
|
||||
if i < len(b) && b[i] == '\'' {
|
||||
return nil, nil, NewParserError(b[i-3:i+1], "''' not allowed in multiline literal string")
|
||||
return nil, nil, NewParserError(b[i-3:i+1], base+i-3, "''' not allowed in multiline literal string")
|
||||
}
|
||||
|
||||
return b[:i], b[i:], nil
|
||||
}
|
||||
case '\r':
|
||||
if len(b) < i+2 {
|
||||
return nil, nil, NewParserError(b[len(b):], `need a \n after \r`)
|
||||
return nil, nil, NewParserError(b[len(b):], base+len(b), `need a \n after \r`)
|
||||
}
|
||||
if b[i+1] != '\n' {
|
||||
return nil, nil, NewParserError(b[i:i+2], `need a \n after \r`)
|
||||
return nil, nil, NewParserError(b[i:i+2], base+i, `need a \n after \r`)
|
||||
}
|
||||
i += 2 // skip the \n
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
size := characters.Utf8ValidNext(b[i:])
|
||||
if size == 0 {
|
||||
return nil, nil, NewParserError(b[i:i+1], "invalid character")
|
||||
return nil, nil, NewParserError(b[i:i+1], base+i, "invalid character")
|
||||
}
|
||||
i += size
|
||||
}
|
||||
|
||||
return nil, nil, NewParserError(b[len(b):], `multiline literal string not terminated by '''`)
|
||||
return nil, nil, NewParserError(b[len(b):], base+len(b), `multiline literal string not terminated by '''`)
|
||||
}
|
||||
|
||||
func scanWindowsNewline(b []byte) ([]byte, []byte, error) {
|
||||
func scanWindowsNewline(b []byte, base int) ([]byte, []byte, error) {
|
||||
const lenCRLF = 2
|
||||
if len(b) < lenCRLF {
|
||||
return nil, nil, NewParserError(b, "windows new line expected")
|
||||
return nil, nil, NewParserError(b, base, "windows new line expected")
|
||||
}
|
||||
|
||||
if b[1] != '\n' {
|
||||
return nil, nil, NewParserError(b, `windows new line should be \r\n`)
|
||||
return nil, nil, NewParserError(b, base, `windows new line should be \r\n`)
|
||||
}
|
||||
|
||||
return b[:lenCRLF], b[lenCRLF:], nil
|
||||
@@ -151,13 +134,7 @@ func scanWhitespace(b []byte) ([]byte, []byte) {
|
||||
return b, b[len(b):]
|
||||
}
|
||||
|
||||
func scanComment(b []byte) ([]byte, []byte, error) {
|
||||
// comment-start-symbol = %x23 ; #
|
||||
// non-ascii = %x80-D7FF / %xE000-10FFFF
|
||||
// non-eol = %x09 / %x20-7F / non-ascii
|
||||
//
|
||||
// comment = comment-start-symbol *non-eol
|
||||
|
||||
func scanComment(b []byte, base int) ([]byte, []byte, error) {
|
||||
for i := 1; i < len(b); {
|
||||
if b[i] == '\n' {
|
||||
return b[:i], b[i:], nil
|
||||
@@ -166,11 +143,11 @@ func scanComment(b []byte) ([]byte, []byte, error) {
|
||||
if i+1 < len(b) && b[i+1] == '\n' {
|
||||
return b[:i+1], b[i+1:], nil
|
||||
}
|
||||
return nil, nil, NewParserError(b[i:i+1], "invalid character in comment")
|
||||
return nil, nil, NewParserError(b[i:i+1], base+i, "invalid character in comment")
|
||||
}
|
||||
size := characters.Utf8ValidNext(b[i:])
|
||||
if size == 0 {
|
||||
return nil, nil, NewParserError(b[i:i+1], "invalid character in comment")
|
||||
return nil, nil, NewParserError(b[i:i+1], base+i, "invalid character in comment")
|
||||
}
|
||||
|
||||
i += size
|
||||
@@ -179,12 +156,7 @@ func scanComment(b []byte) ([]byte, []byte, error) {
|
||||
return b, b[len(b):], nil
|
||||
}
|
||||
|
||||
func scanBasicString(b []byte) ([]byte, bool, []byte, error) {
|
||||
// basic-string = quotation-mark *basic-char quotation-mark
|
||||
// quotation-mark = %x22 ; "
|
||||
// basic-char = basic-unescaped / escaped
|
||||
// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
|
||||
// escaped = escape escape-seq-char
|
||||
func scanBasicString(b []byte, base int) ([]byte, bool, []byte, error) {
|
||||
escaped := false
|
||||
i := 1
|
||||
|
||||
@@ -193,31 +165,20 @@ func scanBasicString(b []byte) ([]byte, bool, []byte, error) {
|
||||
case '"':
|
||||
return b[:i+1], escaped, b[i+1:], nil
|
||||
case '\n', '\r':
|
||||
return nil, escaped, nil, NewParserError(b[i:i+1], "basic strings cannot have new lines")
|
||||
return nil, escaped, nil, NewParserError(b[i:i+1], base+i, "basic strings cannot have new lines")
|
||||
case '\\':
|
||||
if len(b) < i+2 {
|
||||
return nil, escaped, nil, NewParserError(b[i:i+1], "need a character after \\")
|
||||
return nil, escaped, nil, NewParserError(b[i:i+1], base+i, "need a character after \\")
|
||||
}
|
||||
escaped = true
|
||||
i++ // skip the next character
|
||||
}
|
||||
}
|
||||
|
||||
return nil, escaped, nil, NewParserError(b[len(b):], `basic string not terminated by "`)
|
||||
return nil, escaped, nil, NewParserError(b[len(b):], base+len(b), `basic string not terminated by "`)
|
||||
}
|
||||
|
||||
func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) {
|
||||
// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
|
||||
// ml-basic-string-delim
|
||||
// ml-basic-string-delim = 3quotation-mark
|
||||
// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
|
||||
//
|
||||
// mlb-content = mlb-char / newline / mlb-escaped-nl
|
||||
// mlb-char = mlb-unescaped / escaped
|
||||
// mlb-quotes = 1*2quotation-mark
|
||||
// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
|
||||
// mlb-escaped-nl = escape ws newline *( wschar / newline )
|
||||
|
||||
func scanMultilineBasicString(b []byte, base int) ([]byte, bool, []byte, error) {
|
||||
escaped := false
|
||||
i := 3
|
||||
|
||||
@@ -227,12 +188,6 @@ func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) {
|
||||
if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
|
||||
i += 3
|
||||
|
||||
// At that point we found 3 apostrophe, and i is the
|
||||
// index of the byte after the third one. The scanner
|
||||
// needs to be eager, because there can be an extra 2
|
||||
// apostrophe that can be accepted at the end of the
|
||||
// string.
|
||||
|
||||
if i >= len(b) || b[i] != '"' {
|
||||
return b[:i], escaped, b[i:], nil
|
||||
}
|
||||
@@ -244,27 +199,27 @@ func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) {
|
||||
i++
|
||||
|
||||
if i < len(b) && b[i] == '"' {
|
||||
return nil, escaped, nil, NewParserError(b[i-3:i+1], `""" not allowed in multiline basic string`)
|
||||
return nil, escaped, nil, NewParserError(b[i-3:i+1], base+i-3, `""" not allowed in multiline basic string`)
|
||||
}
|
||||
|
||||
return b[:i], escaped, b[i:], nil
|
||||
}
|
||||
case '\\':
|
||||
if len(b) < i+2 {
|
||||
return nil, escaped, nil, NewParserError(b[len(b):], "need a character after \\")
|
||||
return nil, escaped, nil, NewParserError(b[len(b):], base+len(b), "need a character after \\")
|
||||
}
|
||||
escaped = true
|
||||
i++ // skip the next character
|
||||
case '\r':
|
||||
if len(b) < i+2 {
|
||||
return nil, escaped, nil, NewParserError(b[len(b):], `need a \n after \r`)
|
||||
return nil, escaped, nil, NewParserError(b[len(b):], base+len(b), `need a \n after \r`)
|
||||
}
|
||||
if b[i+1] != '\n' {
|
||||
return nil, escaped, nil, NewParserError(b[i:i+2], `need a \n after \r`)
|
||||
return nil, escaped, nil, NewParserError(b[i:i+2], base+i, `need a \n after \r`)
|
||||
}
|
||||
i++ // skip the \n
|
||||
}
|
||||
}
|
||||
|
||||
return nil, escaped, nil, NewParserError(b[len(b):], `multiline basic string not terminated by """`)
|
||||
return nil, escaped, nil, NewParserError(b[len(b):], base+len(b), `multiline basic string not terminated by """`)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user