Compare commits

...

2 Commits
v2 ... raw

Author SHA1 Message Date
Thomas Pelletier b371733c67 Make all nodes contain Raw 2022-08-22 21:05:41 -04:00
Thomas Pelletier 64dcce07ea WIP 2022-08-22 23:04:44 +00:00
2 changed files with 103 additions and 62 deletions
+84 -43
View File
@@ -28,6 +28,11 @@ func (p *parser) Raw(raw ast.Range) []byte {
return p.data[raw.Offset : raw.Offset+raw.Length]
}
func (p *parser) SetRaw(ref ast.Reference, from []byte, to []byte) {
b := danger.BytesRange(from, to)
p.builder.NodeAt(ref).Raw = p.Range(b)
}
func (p *parser) Reset(b []byte) {
p.builder.Reset()
p.ref = ast.InvalidReference
@@ -152,12 +157,14 @@ func (p *parser) parseArrayTable(b []byte) (ast.Reference, []byte, error) {
ref := p.builder.Push(ast.Node{
Kind: ast.ArrayTable,
})
start := b
b = b[2:]
b = p.parseWhitespace(b)
k, b, err := p.parseKey(b)
if err != nil {
p.SetRaw(ref, start, b)
return ref, nil, err
}
@@ -166,11 +173,12 @@ func (p *parser) parseArrayTable(b []byte) (ast.Reference, []byte, error) {
b, err = expect(']', b)
if err != nil {
p.SetRaw(ref, start, b)
return ref, nil, err
}
b, err = expect(']', b)
p.SetRaw(ref, start, b)
return ref, b, err
}
@@ -181,12 +189,14 @@ func (p *parser) parseStdTable(b []byte) (ast.Reference, []byte, error) {
ref := p.builder.Push(ast.Node{
Kind: ast.Table,
})
start := b
b = b[1:]
b = p.parseWhitespace(b)
key, b, err := p.parseKey(b)
if err != nil {
p.SetRaw(ref, start, b)
return ref, nil, err
}
@@ -196,6 +206,7 @@ func (p *parser) parseStdTable(b []byte) (ast.Reference, []byte, error) {
b, err = expect(']', b)
p.SetRaw(ref, start, b)
return ref, b, err
}
@@ -204,10 +215,12 @@ func (p *parser) parseKeyval(b []byte) (ast.Reference, []byte, error) {
ref := p.builder.Push(ast.Node{
Kind: ast.KeyValue,
})
start := b
key, b, err := p.parseKey(b)
if err != nil {
return ast.InvalidReference, nil, err
p.SetRaw(ref, start, b)
return ast.InvalidReference, b, err
}
// keyval-sep = ws %x3D ws ; =
@@ -215,24 +228,28 @@ func (p *parser) parseKeyval(b []byte) (ast.Reference, []byte, error) {
b = p.parseWhitespace(b)
if len(b) == 0 {
return ast.InvalidReference, nil, newDecodeError(b, "expected = after a key, but the document ends there")
p.SetRaw(ref, start, b)
return ast.InvalidReference, b, newDecodeError(b, "expected = after a key, but the document ends there")
}
b, err = expect('=', b)
if err != nil {
return ast.InvalidReference, nil, err
p.SetRaw(ref, start, b)
return ast.InvalidReference, b, err
}
b = p.parseWhitespace(b)
valRef, b, err := p.parseVal(b)
if err != nil {
p.SetRaw(ref, start, b)
return ref, b, err
}
p.builder.Chain(valRef, key)
p.builder.AttachChild(ref, valRef)
p.SetRaw(ref, start, b)
return ref, b, err
}
@@ -242,7 +259,7 @@ func (p *parser) parseVal(b []byte) (ast.Reference, []byte, error) {
ref := ast.InvalidReference
if len(b) == 0 {
return ref, nil, newDecodeError(b, "expected value, not eof")
return ref, b, newDecodeError(b, "expected value, not eof")
}
var err error
@@ -287,23 +304,25 @@ func (p *parser) parseVal(b []byte) (ast.Reference, []byte, error) {
return ref, b, err
case 't':
if !scanFollowsTrue(b) {
return ref, nil, newDecodeError(atmost(b, 4), "expected 'true'")
return ref, b, newDecodeError(atmost(b, 4), "expected 'true'")
}
ref = p.builder.Push(ast.Node{
Kind: ast.Bool,
Data: b[:4],
Raw: p.Range(b[:4]),
})
return ref, b[4:], nil
case 'f':
if !scanFollowsFalse(b) {
return ref, nil, newDecodeError(atmost(b, 5), "expected 'false'")
return ref, b, newDecodeError(atmost(b, 5), "expected 'false'")
}
ref = p.builder.Push(ast.Node{
Kind: ast.Bool,
Data: b[:5],
Raw: p.Range(b[:5]),
})
return ref, b[5:], nil
@@ -327,7 +346,7 @@ func atmost(b []byte, n int) []byte {
func (p *parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
v, rest, err := scanLiteralString(b)
if err != nil {
return nil, nil, nil, err
return nil, nil, rest, err
}
return v, v[1 : len(v)-1], rest, nil
@@ -342,6 +361,7 @@ func (p *parser) parseInlineTable(b []byte) (ast.Reference, []byte, error) {
parent := p.builder.Push(ast.Node{
Kind: ast.InlineTable,
})
start := b
first := true
@@ -356,7 +376,8 @@ func (p *parser) parseInlineTable(b []byte) (ast.Reference, []byte, error) {
b = p.parseWhitespace(b)
if len(b) == 0 {
return parent, nil, newDecodeError(previousB[:1], "inline table is incomplete")
p.SetRaw(parent, start, b)
return parent, b, newDecodeError(previousB[:1], "inline table is incomplete")
}
if b[0] == '}' {
@@ -366,7 +387,8 @@ func (p *parser) parseInlineTable(b []byte) (ast.Reference, []byte, error) {
if !first {
b, err = expect(',', b)
if err != nil {
return parent, nil, err
p.SetRaw(parent, start, b)
return parent, b, err
}
b = p.parseWhitespace(b)
}
@@ -375,7 +397,8 @@ func (p *parser) parseInlineTable(b []byte) (ast.Reference, []byte, error) {
kv, b, err = p.parseKeyval(b)
if err != nil {
return parent, nil, err
p.SetRaw(parent, start, b)
return parent, b, err
}
if first {
@@ -390,6 +413,7 @@ func (p *parser) parseInlineTable(b []byte) (ast.Reference, []byte, error) {
rest, err := expect('}', b)
p.SetRaw(parent, start, b)
return parent, rest, err
}
@@ -403,6 +427,7 @@ func (p *parser) parseValArray(b []byte) (ast.Reference, []byte, error) {
// array-sep = %x2C ; , Comma
// ws-comment-newline = *( wschar / [ comment ] newline )
arrayStart := b
start := b
b = b[1:]
parent := p.builder.Push(ast.Node{
@@ -417,11 +442,13 @@ func (p *parser) parseValArray(b []byte) (ast.Reference, []byte, error) {
for len(b) > 0 {
b, err = p.parseOptionalWhitespaceCommentNewline(b)
if err != nil {
return parent, nil, err
p.SetRaw(parent, start, b)
return parent, b, err
}
if len(b) == 0 {
return parent, nil, newDecodeError(arrayStart[:1], "array is incomplete")
p.SetRaw(parent, start, b)
return parent, b, newDecodeError(arrayStart[:1], "array is incomplete")
}
if b[0] == ']' {
@@ -430,16 +457,19 @@ func (p *parser) parseValArray(b []byte) (ast.Reference, []byte, error) {
if b[0] == ',' {
if first {
return parent, nil, newDecodeError(b[0:1], "array cannot start with comma")
p.SetRaw(parent, start, b)
return parent, b, newDecodeError(b[0:1], "array cannot start with comma")
}
b = b[1:]
b, err = p.parseOptionalWhitespaceCommentNewline(b)
if err != nil {
return parent, nil, err
p.SetRaw(parent, start, b)
return parent, b, err
}
} else if !first {
return parent, nil, newDecodeError(b[0:1], "array elements must be separated by commas")
p.SetRaw(parent, start, b)
return parent, b, newDecodeError(b[0:1], "array elements must be separated by commas")
}
// TOML allows trailing commas in arrays.
@@ -450,7 +480,8 @@ func (p *parser) parseValArray(b []byte) (ast.Reference, []byte, error) {
var valueRef ast.Reference
valueRef, b, err = p.parseVal(b)
if err != nil {
return parent, nil, err
p.SetRaw(parent, start, b)
return parent, b, err
}
if first {
@@ -462,13 +493,16 @@ func (p *parser) parseValArray(b []byte) (ast.Reference, []byte, error) {
b, err = p.parseOptionalWhitespaceCommentNewline(b)
if err != nil {
return parent, nil, err
p.SetRaw(parent, start, b)
return parent, b, err
}
first = false
}
rest, err := expect(']', b)
p.SetRaw(parent, start, rest)
return parent, rest, err
}
@@ -480,7 +514,7 @@ func (p *parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error)
if len(b) > 0 && b[0] == '#' {
_, b, err = scanComment(b)
if err != nil {
return nil, err
return b, err
}
}
@@ -491,7 +525,7 @@ func (p *parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error)
if b[0] == '\n' || b[0] == '\r' {
b, err = p.parseNewline(b)
if err != nil {
return nil, err
return b, err
}
} else {
break
@@ -504,7 +538,7 @@ func (p *parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error)
func (p *parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
token, rest, err := scanMultilineLiteralString(b)
if err != nil {
return nil, nil, nil, err
return nil, nil, rest, err
}
i := 3
@@ -533,7 +567,7 @@ func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er
// mlb-escaped-nl = escape ws newline *( wschar / newline )
token, escaped, rest, err := scanMultilineBasicString(b)
if err != nil {
return nil, nil, nil, err
return nil, nil, rest, err
}
i := 3
@@ -555,7 +589,7 @@ func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er
if verr.Zero() {
return token, str, rest, nil
}
return nil, nil, nil, newDecodeError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
return nil, nil, rest, newDecodeError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
}
var builder bytes.Buffer
@@ -622,26 +656,26 @@ func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er
case 'u':
x, err := hexToRune(atmost(token[i+1:], 4), 4)
if err != nil {
return nil, nil, nil, err
return nil, nil, rest, err
}
builder.WriteRune(x)
i += 4
case 'U':
x, err := hexToRune(atmost(token[i+1:], 8), 8)
if err != nil {
return nil, nil, nil, err
return nil, nil, rest, err
}
builder.WriteRune(x)
i += 8
default:
return nil, nil, nil, newDecodeError(token[i:i+1], "invalid escaped character %#U", c)
return nil, nil, rest, newDecodeError(token[i:i+1], "invalid escaped character %#U", c)
}
i++
} else {
size := utf8ValidNext(token[i:])
if size == 0 {
return nil, nil, nil, newDecodeError(token[i:i+1], "invalid character %#U", c)
return nil, nil, rest, newDecodeError(token[i:i+1], "invalid character %#U", c)
}
builder.Write(token[i : i+size])
i += size
@@ -662,7 +696,7 @@ func (p *parser) parseKey(b []byte) (ast.Reference, []byte, error) {
// dot-sep = ws %x2E ws ; . Period
raw, key, b, err := p.parseSimpleKey(b)
if err != nil {
return ast.InvalidReference, nil, err
return ast.InvalidReference, b, err
}
ref := p.builder.Push(ast.Node{
@@ -678,7 +712,7 @@ func (p *parser) parseKey(b []byte) (ast.Reference, []byte, error) {
raw, key, b, err = p.parseSimpleKey(b)
if err != nil {
return ref, nil, err
return ref, b, err
}
p.builder.PushAndChain(ast.Node{
@@ -696,7 +730,7 @@ func (p *parser) parseKey(b []byte) (ast.Reference, []byte, error) {
func (p *parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
if len(b) == 0 {
return nil, nil, nil, newDecodeError(b, "expected key but found none")
return nil, nil, b, newDecodeError(b, "expected key but found none")
}
// simple-key = quoted-key / unquoted-key
@@ -711,7 +745,7 @@ func (p *parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
key, rest = scanUnquotedKey(b)
return key, key, rest, nil
default:
return nil, nil, nil, newDecodeError(b[0:1], "invalid character at start of key: %c", b[0])
return nil, nil, b[1:], newDecodeError(b[0:1], "invalid character at start of key: %c", b[0])
}
}
@@ -733,7 +767,7 @@ func (p *parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX
token, escaped, rest, err := scanBasicString(b)
if err != nil {
return nil, nil, nil, err
return nil, nil, rest, err
}
startIdx := len(`"`)
@@ -748,7 +782,7 @@ func (p *parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
if verr.Zero() {
return token, str, rest, nil
}
return nil, nil, nil, newDecodeError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
return nil, nil, rest, newDecodeError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
}
i := startIdx
@@ -781,7 +815,7 @@ func (p *parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
case 'u':
x, err := hexToRune(token[i+1:len(token)-1], 4)
if err != nil {
return nil, nil, nil, err
return nil, nil, rest, err
}
builder.WriteRune(x)
@@ -789,19 +823,19 @@ func (p *parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
case 'U':
x, err := hexToRune(token[i+1:len(token)-1], 8)
if err != nil {
return nil, nil, nil, err
return nil, nil, rest, err
}
builder.WriteRune(x)
i += 8
default:
return nil, nil, nil, newDecodeError(token[i:i+1], "invalid escaped character %#U", c)
return nil, nil, rest, newDecodeError(token[i:i+1], "invalid escaped character %#U", c)
}
i++
} else {
size := utf8ValidNext(token[i:])
if size == 0 {
return nil, nil, nil, newDecodeError(token[i:i+1], "invalid character %#U", c)
return nil, nil, rest, newDecodeError(token[i:i+1], "invalid character %#U", c)
}
builder.Write(token[i : i+size])
i += size
@@ -854,21 +888,23 @@ func (p *parser) parseIntOrFloatOrDateTime(b []byte) (ast.Reference, []byte, err
switch b[0] {
case 'i':
if !scanFollowsInf(b) {
return ast.InvalidReference, nil, newDecodeError(atmost(b, 3), "expected 'inf'")
return ast.InvalidReference, b, newDecodeError(atmost(b, 3), "expected 'inf'")
}
return p.builder.Push(ast.Node{
Kind: ast.Float,
Data: b[:3],
Raw: p.Range(b[:3]),
}), b[3:], nil
case 'n':
if !scanFollowsNan(b) {
return ast.InvalidReference, nil, newDecodeError(atmost(b, 3), "expected 'nan'")
return ast.InvalidReference, b, newDecodeError(atmost(b, 3), "expected 'nan'")
}
return p.builder.Push(ast.Node{
Kind: ast.Float,
Data: b[:3],
Raw: p.Range(b[:3]),
}), b[3:], nil
case '+', '-':
return p.scanIntOrFloat(b)
@@ -960,6 +996,7 @@ byteLoop:
return p.builder.Push(ast.Node{
Kind: kind,
Data: b[:i],
Raw: p.Range(b[:i]),
}), b[i:], nil
}
@@ -993,6 +1030,7 @@ func (p *parser) scanIntOrFloat(b []byte) (ast.Reference, []byte, error) {
return p.builder.Push(ast.Node{
Kind: ast.Integer,
Data: b[:i],
Raw: p.Range(b[:i]),
}), b[i:], nil
}
@@ -1016,10 +1054,11 @@ func (p *parser) scanIntOrFloat(b []byte) (ast.Reference, []byte, error) {
return p.builder.Push(ast.Node{
Kind: ast.Float,
Data: b[:i+3],
Raw: p.Range(b[:i+3]),
}), b[i+3:], nil
}
return ast.InvalidReference, nil, newDecodeError(b[i:i+1], "unexpected character 'i' while scanning for a number")
return ast.InvalidReference, b[i:], newDecodeError(b[i:i+1], "unexpected character 'i' while scanning for a number")
}
if c == 'n' {
@@ -1027,10 +1066,11 @@ func (p *parser) scanIntOrFloat(b []byte) (ast.Reference, []byte, error) {
return p.builder.Push(ast.Node{
Kind: ast.Float,
Data: b[:i+3],
Raw: p.Range(b[:i+3]),
}), b[i+3:], nil
}
return ast.InvalidReference, nil, newDecodeError(b[i:i+1], "unexpected character 'n' while scanning for a number")
return ast.InvalidReference, b[i:], newDecodeError(b[i:i+1], "unexpected character 'n' while scanning for a number")
}
break
@@ -1049,6 +1089,7 @@ func (p *parser) scanIntOrFloat(b []byte) (ast.Reference, []byte, error) {
return p.builder.Push(ast.Node{
Kind: kind,
Data: b[:i],
Raw: p.Range(b[:i]),
}), b[i:], nil
}
@@ -1075,11 +1116,11 @@ func isValidBinaryRune(r byte) bool {
func expect(x byte, b []byte) ([]byte, error) {
if len(b) == 0 {
return nil, newDecodeError(b, "expected character %c but the document ended here", x)
return b, newDecodeError(b, "expected character %c but the document ended here", x)
}
if b[0] != x {
return nil, newDecodeError(b[0:1], "expected character %c", x)
return b, newDecodeError(b[0:1], "expected character %c", x)
}
return b[1:], nil
+19 -19
View File
@@ -54,16 +54,16 @@ func scanLiteralString(b []byte) ([]byte, []byte, error) {
case '\'':
return b[:i+1], b[i+1:], nil
case '\n', '\r':
return nil, nil, newDecodeError(b[i:i+1], "literal strings cannot have new lines")
return nil, b[i+1:], newDecodeError(b[i:i+1], "literal strings cannot have new lines")
}
size := utf8ValidNext(b[i:])
if size == 0 {
return nil, nil, newDecodeError(b[i:i+1], "invalid character")
return nil, b[i+1:], newDecodeError(b[i:i+1], "invalid character")
}
i += size
}
return nil, nil, newDecodeError(b[len(b):], "unterminated literal string")
return nil, b[len(b):], newDecodeError(b[len(b):], "unterminated literal string")
}
func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
@@ -98,39 +98,39 @@ func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
i++
if i < len(b) && b[i] == '\'' {
return nil, nil, newDecodeError(b[i-3:i+1], "''' not allowed in multiline literal string")
return nil, b[i:], newDecodeError(b[i-3:i+1], "''' not allowed in multiline literal string")
}
return b[:i], b[i:], nil
}
case '\r':
if len(b) < i+2 {
return nil, nil, newDecodeError(b[len(b):], `need a \n after \r`)
return nil, b[i:], newDecodeError(b[len(b):], `need a \n after \r`)
}
if b[i+1] != '\n' {
return nil, nil, newDecodeError(b[i:i+2], `need a \n after \r`)
return nil, b[i+2:], newDecodeError(b[i:i+2], `need a \n after \r`)
}
i += 2 // skip the \n
continue
}
size := utf8ValidNext(b[i:])
if size == 0 {
return nil, nil, newDecodeError(b[i:i+1], "invalid character")
return nil, b[i:], newDecodeError(b[i:i+1], "invalid character")
}
i += size
}
return nil, nil, newDecodeError(b[len(b):], `multiline literal string not terminated by '''`)
return nil, b[len(b):], newDecodeError(b[len(b):], `multiline literal string not terminated by '''`)
}
func scanWindowsNewline(b []byte) ([]byte, []byte, error) {
const lenCRLF = 2
if len(b) < lenCRLF {
return nil, nil, newDecodeError(b, "windows new line expected")
return nil, b, newDecodeError(b, "windows new line expected")
}
if b[1] != '\n' {
return nil, nil, newDecodeError(b, `windows new line should be \r\n`)
return nil, b[2:], newDecodeError(b, `windows new line should be \r\n`)
}
return b[:lenCRLF], b[lenCRLF:], nil
@@ -169,7 +169,7 @@ func scanComment(b []byte) ([]byte, []byte, error) {
}
size := utf8ValidNext(b[i:])
if size == 0 {
return nil, nil, newDecodeError(b[i:i+1], "invalid character in comment")
return nil, b[i+1:], newDecodeError(b[i:i+1], "invalid character in comment")
}
i += size
@@ -192,17 +192,17 @@ func scanBasicString(b []byte) ([]byte, bool, []byte, error) {
case '"':
return b[:i+1], escaped, b[i+1:], nil
case '\n', '\r':
return nil, escaped, nil, newDecodeError(b[i:i+1], "basic strings cannot have new lines")
return nil, escaped, b[i+1:], newDecodeError(b[i:i+1], "basic strings cannot have new lines")
case '\\':
if len(b) < i+2 {
return nil, escaped, nil, newDecodeError(b[i:i+1], "need a character after \\")
return nil, escaped, b[i+1:], newDecodeError(b[i:i+1], "need a character after \\")
}
escaped = true
i++ // skip the next character
}
}
return nil, escaped, nil, newDecodeError(b[len(b):], `basic string not terminated by "`)
return nil, escaped, b[len(b):], newDecodeError(b[len(b):], `basic string not terminated by "`)
}
func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) {
@@ -243,27 +243,27 @@ func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) {
i++
if i < len(b) && b[i] == '"' {
return nil, escaped, nil, newDecodeError(b[i-3:i+1], `""" not allowed in multiline basic string`)
return nil, escaped, b[i+1:], newDecodeError(b[i-3:i+1], `""" not allowed in multiline basic string`)
}
return b[:i], escaped, b[i:], nil
}
case '\\':
if len(b) < i+2 {
return nil, escaped, nil, newDecodeError(b[len(b):], "need a character after \\")
return nil, escaped, b[len(b):], newDecodeError(b[len(b):], "need a character after \\")
}
escaped = true
i++ // skip the next character
case '\r':
if len(b) < i+2 {
return nil, escaped, nil, newDecodeError(b[len(b):], `need a \n after \r`)
return nil, escaped, b[len(b):], newDecodeError(b[len(b):], `need a \n after \r`)
}
if b[i+1] != '\n' {
return nil, escaped, nil, newDecodeError(b[i:i+2], `need a \n after \r`)
return nil, escaped, b[i+2:], newDecodeError(b[i:i+2], `need a \n after \r`)
}
i++ // skip the \n
}
}
return nil, escaped, nil, newDecodeError(b[len(b):], `multiline basic string not terminated by """`)
return nil, escaped, b[len(b):], newDecodeError(b[len(b):], `multiline basic string not terminated by """`)
}