AST Tweaks (#551)
* Use pointers instead of copying around ast.Node Node is a 56B struct that is constantly in the hot path. Passing nodes around by copy had a cost that started to add up. This change replaces them by pointers. Using unsafe pointer arithmetic and converting sibling/child indexes to relative offsets, it removes the need to carry around a pointer to the root of the tree. This saves 8B per Node. This space will be used to store an extra []byte slice to provide contextual error handling on all nodes, including the ones whose data is different than the raw input (for example: strings with escaped characters), while staying under the size of a cache line. * Remove conditional * Add Raw to track range in data for parsed values * Simplify reference tracking
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"strconv"
|
||||
|
||||
"github.com/pelletier/go-toml/v2/internal/ast"
|
||||
"github.com/pelletier/go-toml/v2/internal/danger"
|
||||
)
|
||||
|
||||
type parser struct {
|
||||
@@ -16,9 +17,20 @@ type parser struct {
|
||||
first bool
|
||||
}
|
||||
|
||||
func (p *parser) Range(b []byte) ast.Range {
|
||||
return ast.Range{
|
||||
Offset: uint32(danger.SubsliceOffset(p.data, b)),
|
||||
Length: uint32(len(b)),
|
||||
}
|
||||
}
|
||||
|
||||
func (p *parser) Raw(raw ast.Range) []byte {
|
||||
return p.data[raw.Offset : raw.Offset+raw.Length]
|
||||
}
|
||||
|
||||
func (p *parser) Reset(b []byte) {
|
||||
p.builder.Reset()
|
||||
p.ref = ast.Reference{}
|
||||
p.ref = ast.InvalidReference
|
||||
p.data = b
|
||||
p.left = b
|
||||
p.err = nil
|
||||
@@ -32,7 +44,7 @@ func (p *parser) NextExpression() bool {
|
||||
}
|
||||
|
||||
p.builder.Reset()
|
||||
p.ref = ast.Reference{}
|
||||
p.ref = ast.InvalidReference
|
||||
|
||||
for {
|
||||
if len(p.left) == 0 || p.err != nil {
|
||||
@@ -61,7 +73,7 @@ func (p *parser) NextExpression() bool {
|
||||
}
|
||||
}
|
||||
|
||||
func (p *parser) Expression() ast.Node {
|
||||
func (p *parser) Expression() *ast.Node {
|
||||
return p.builder.NodeAt(p.ref)
|
||||
}
|
||||
|
||||
@@ -86,7 +98,7 @@ func (p *parser) parseExpression(b []byte) (ast.Reference, []byte, error) {
|
||||
// expression = ws [ comment ]
|
||||
// expression =/ ws keyval ws [ comment ]
|
||||
// expression =/ ws table ws [ comment ]
|
||||
var ref ast.Reference
|
||||
ref := ast.InvalidReference
|
||||
|
||||
b = p.parseWhitespace(b)
|
||||
|
||||
@@ -197,7 +209,7 @@ func (p *parser) parseKeyval(b []byte) (ast.Reference, []byte, error) {
|
||||
|
||||
key, b, err := p.parseKey(b)
|
||||
if err != nil {
|
||||
return ast.Reference{}, nil, err
|
||||
return ast.InvalidReference, nil, err
|
||||
}
|
||||
|
||||
// keyval-sep = ws %x3D ws ; =
|
||||
@@ -205,12 +217,12 @@ func (p *parser) parseKeyval(b []byte) (ast.Reference, []byte, error) {
|
||||
b = p.parseWhitespace(b)
|
||||
|
||||
if len(b) == 0 {
|
||||
return ast.Reference{}, nil, newDecodeError(b, "expected = after a key, but the document ends there")
|
||||
return ast.InvalidReference, nil, newDecodeError(b, "expected = after a key, but the document ends there")
|
||||
}
|
||||
|
||||
b, err = expect('=', b)
|
||||
if err != nil {
|
||||
return ast.Reference{}, nil, err
|
||||
return ast.InvalidReference, nil, err
|
||||
}
|
||||
|
||||
b = p.parseWhitespace(b)
|
||||
@@ -229,7 +241,7 @@ func (p *parser) parseKeyval(b []byte) (ast.Reference, []byte, error) {
|
||||
//nolint:cyclop,funlen
|
||||
func (p *parser) parseVal(b []byte) (ast.Reference, []byte, error) {
|
||||
// val = string / boolean / array / inline-table / date-time / float / integer
|
||||
var ref ast.Reference
|
||||
ref := ast.InvalidReference
|
||||
|
||||
if len(b) == 0 {
|
||||
return ref, nil, newDecodeError(b, "expected value, not eof")
|
||||
@@ -240,32 +252,36 @@ func (p *parser) parseVal(b []byte) (ast.Reference, []byte, error) {
|
||||
|
||||
switch c {
|
||||
case '"':
|
||||
var raw []byte
|
||||
var v []byte
|
||||
if scanFollowsMultilineBasicStringDelimiter(b) {
|
||||
v, b, err = p.parseMultilineBasicString(b)
|
||||
raw, v, b, err = p.parseMultilineBasicString(b)
|
||||
} else {
|
||||
v, b, err = p.parseBasicString(b)
|
||||
raw, v, b, err = p.parseBasicString(b)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
ref = p.builder.Push(ast.Node{
|
||||
Kind: ast.String,
|
||||
Raw: p.Range(raw),
|
||||
Data: v,
|
||||
})
|
||||
}
|
||||
|
||||
return ref, b, err
|
||||
case '\'':
|
||||
var raw []byte
|
||||
var v []byte
|
||||
if scanFollowsMultilineLiteralStringDelimiter(b) {
|
||||
v, b, err = p.parseMultilineLiteralString(b)
|
||||
raw, v, b, err = p.parseMultilineLiteralString(b)
|
||||
} else {
|
||||
v, b, err = p.parseLiteralString(b)
|
||||
raw, v, b, err = p.parseLiteralString(b)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
ref = p.builder.Push(ast.Node{
|
||||
Kind: ast.String,
|
||||
Raw: p.Range(raw),
|
||||
Data: v,
|
||||
})
|
||||
}
|
||||
@@ -310,13 +326,13 @@ func atmost(b []byte, n int) []byte {
|
||||
return b[:n]
|
||||
}
|
||||
|
||||
func (p *parser) parseLiteralString(b []byte) ([]byte, []byte, error) {
|
||||
func (p *parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
|
||||
v, rest, err := scanLiteralString(b)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
return v[1 : len(v)-1], rest, nil
|
||||
return v, v[1 : len(v)-1], rest, nil
|
||||
}
|
||||
|
||||
func (p *parser) parseInlineTable(b []byte) (ast.Reference, []byte, error) {
|
||||
@@ -476,10 +492,10 @@ func (p *parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error)
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (p *parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, error) {
|
||||
func (p *parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
|
||||
token, rest, err := scanMultilineLiteralString(b)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
i := 3
|
||||
@@ -491,11 +507,11 @@ func (p *parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, error) {
|
||||
i += 2
|
||||
}
|
||||
|
||||
return token[i : len(token)-3], rest, err
|
||||
return token, token[i : len(token)-3], rest, err
|
||||
}
|
||||
|
||||
//nolint:funlen,gocognit,cyclop
|
||||
func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, error) {
|
||||
func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) {
|
||||
// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
|
||||
// ml-basic-string-delim
|
||||
// ml-basic-string-delim = 3quotation-mark
|
||||
@@ -508,7 +524,7 @@ func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, error) {
|
||||
// mlb-escaped-nl = escape ws newline *( wschar / newline )
|
||||
token, rest, err := scanMultilineBasicString(b)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
i := 3
|
||||
@@ -529,7 +545,7 @@ func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, error) {
|
||||
}
|
||||
}
|
||||
if i == endIdx {
|
||||
return token[startIdx:endIdx], rest, nil
|
||||
return token, token[startIdx:endIdx], rest, nil
|
||||
}
|
||||
|
||||
var builder bytes.Buffer
|
||||
@@ -579,7 +595,7 @@ func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, error) {
|
||||
case 'u':
|
||||
x, err := hexToString(atmost(token[i+1:], 4), 4)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
builder.WriteString(x)
|
||||
@@ -587,20 +603,20 @@ func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, error) {
|
||||
case 'U':
|
||||
x, err := hexToString(atmost(token[i+1:], 8), 8)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
builder.WriteString(x)
|
||||
i += 8
|
||||
default:
|
||||
return nil, nil, newDecodeError(token[i:i+1], "invalid escaped character %#U", c)
|
||||
return nil, nil, nil, newDecodeError(token[i:i+1], "invalid escaped character %#U", c)
|
||||
}
|
||||
} else {
|
||||
builder.WriteByte(c)
|
||||
}
|
||||
}
|
||||
|
||||
return builder.Bytes(), rest, nil
|
||||
return token, builder.Bytes(), rest, nil
|
||||
}
|
||||
|
||||
func (p *parser) parseKey(b []byte) (ast.Reference, []byte, error) {
|
||||
@@ -612,13 +628,14 @@ func (p *parser) parseKey(b []byte) (ast.Reference, []byte, error) {
|
||||
// dotted-key = simple-key 1*( dot-sep simple-key )
|
||||
//
|
||||
// dot-sep = ws %x2E ws ; . Period
|
||||
key, b, err := p.parseSimpleKey(b)
|
||||
raw, key, b, err := p.parseSimpleKey(b)
|
||||
if err != nil {
|
||||
return ast.Reference{}, nil, err
|
||||
return ast.InvalidReference, nil, err
|
||||
}
|
||||
|
||||
ref := p.builder.Push(ast.Node{
|
||||
Kind: ast.Key,
|
||||
Raw: p.Range(raw),
|
||||
Data: key,
|
||||
})
|
||||
|
||||
@@ -627,13 +644,14 @@ func (p *parser) parseKey(b []byte) (ast.Reference, []byte, error) {
|
||||
if len(b) > 0 && b[0] == '.' {
|
||||
b = p.parseWhitespace(b[1:])
|
||||
|
||||
key, b, err = p.parseSimpleKey(b)
|
||||
raw, key, b, err = p.parseSimpleKey(b)
|
||||
if err != nil {
|
||||
return ref, nil, err
|
||||
}
|
||||
|
||||
p.builder.PushAndChain(ast.Node{
|
||||
Kind: ast.Key,
|
||||
Raw: p.Range(raw),
|
||||
Data: key,
|
||||
})
|
||||
} else {
|
||||
@@ -644,12 +662,12 @@ func (p *parser) parseKey(b []byte) (ast.Reference, []byte, error) {
|
||||
return ref, b, nil
|
||||
}
|
||||
|
||||
func (p *parser) parseSimpleKey(b []byte) (key, rest []byte, err error) {
|
||||
func (p *parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
|
||||
// simple-key = quoted-key / unquoted-key
|
||||
// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
|
||||
// quoted-key = basic-string / literal-string
|
||||
if len(b) == 0 {
|
||||
return nil, nil, newDecodeError(b, "key is incomplete")
|
||||
return nil, nil, nil, newDecodeError(b, "key is incomplete")
|
||||
}
|
||||
|
||||
switch {
|
||||
@@ -659,14 +677,14 @@ func (p *parser) parseSimpleKey(b []byte) (key, rest []byte, err error) {
|
||||
return p.parseBasicString(b)
|
||||
case isUnquotedKeyChar(b[0]):
|
||||
key, rest = scanUnquotedKey(b)
|
||||
return key, rest, nil
|
||||
return key, key, rest, nil
|
||||
default:
|
||||
return nil, nil, newDecodeError(b[0:1], "invalid character at start of key: %c", b[0])
|
||||
return nil, nil, nil, newDecodeError(b[0:1], "invalid character at start of key: %c", b[0])
|
||||
}
|
||||
}
|
||||
|
||||
//nolint:funlen,cyclop
|
||||
func (p *parser) parseBasicString(b []byte) ([]byte, []byte, error) {
|
||||
func (p *parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
|
||||
// basic-string = quotation-mark *basic-char quotation-mark
|
||||
// quotation-mark = %x22 ; "
|
||||
// basic-char = basic-unescaped / escaped
|
||||
@@ -683,7 +701,7 @@ func (p *parser) parseBasicString(b []byte) ([]byte, []byte, error) {
|
||||
// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX
|
||||
token, rest, err := scanBasicString(b)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
// fast path
|
||||
@@ -696,7 +714,7 @@ func (p *parser) parseBasicString(b []byte) ([]byte, []byte, error) {
|
||||
}
|
||||
}
|
||||
if i == endIdx {
|
||||
return token[startIdx:endIdx], rest, nil
|
||||
return token, token[startIdx:endIdx], rest, nil
|
||||
}
|
||||
|
||||
var builder bytes.Buffer
|
||||
@@ -726,7 +744,7 @@ func (p *parser) parseBasicString(b []byte) ([]byte, []byte, error) {
|
||||
case 'u':
|
||||
x, err := hexToString(token[i+1:len(token)-1], 4)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
builder.WriteString(x)
|
||||
@@ -734,20 +752,20 @@ func (p *parser) parseBasicString(b []byte) ([]byte, []byte, error) {
|
||||
case 'U':
|
||||
x, err := hexToString(token[i+1:len(token)-1], 8)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
builder.WriteString(x)
|
||||
i += 8
|
||||
default:
|
||||
return nil, nil, newDecodeError(token[i:i+1], "invalid escaped character %#U", c)
|
||||
return nil, nil, nil, newDecodeError(token[i:i+1], "invalid escaped character %#U", c)
|
||||
}
|
||||
} else {
|
||||
builder.WriteByte(c)
|
||||
}
|
||||
}
|
||||
|
||||
return builder.Bytes(), rest, nil
|
||||
return token, builder.Bytes(), rest, nil
|
||||
}
|
||||
|
||||
func hexToString(b []byte, length int) (string, error) {
|
||||
@@ -780,7 +798,7 @@ func (p *parser) parseIntOrFloatOrDateTime(b []byte) (ast.Reference, []byte, err
|
||||
switch b[0] {
|
||||
case 'i':
|
||||
if !scanFollowsInf(b) {
|
||||
return ast.Reference{}, nil, newDecodeError(atmost(b, 3), "expected 'inf'")
|
||||
return ast.InvalidReference, nil, newDecodeError(atmost(b, 3), "expected 'inf'")
|
||||
}
|
||||
|
||||
return p.builder.Push(ast.Node{
|
||||
@@ -789,7 +807,7 @@ func (p *parser) parseIntOrFloatOrDateTime(b []byte) (ast.Reference, []byte, err
|
||||
}), b[3:], nil
|
||||
case 'n':
|
||||
if !scanFollowsNan(b) {
|
||||
return ast.Reference{}, nil, newDecodeError(atmost(b, 3), "expected 'nan'")
|
||||
return ast.InvalidReference, nil, newDecodeError(atmost(b, 3), "expected 'nan'")
|
||||
}
|
||||
|
||||
return p.builder.Push(ast.Node{
|
||||
@@ -945,7 +963,7 @@ func (p *parser) scanIntOrFloat(b []byte) (ast.Reference, []byte, error) {
|
||||
}), b[i+3:], nil
|
||||
}
|
||||
|
||||
return ast.Reference{}, nil, newDecodeError(b[i:i+1], "unexpected character 'i' while scanning for a number")
|
||||
return ast.InvalidReference, nil, newDecodeError(b[i:i+1], "unexpected character 'i' while scanning for a number")
|
||||
}
|
||||
|
||||
if c == 'n' {
|
||||
@@ -956,14 +974,14 @@ func (p *parser) scanIntOrFloat(b []byte) (ast.Reference, []byte, error) {
|
||||
}), b[i+3:], nil
|
||||
}
|
||||
|
||||
return ast.Reference{}, nil, newDecodeError(b[i:i+1], "unexpected character 'n' while scanning for a number")
|
||||
return ast.InvalidReference, nil, newDecodeError(b[i:i+1], "unexpected character 'n' while scanning for a number")
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
if i == 0 {
|
||||
return ast.Reference{}, b, newDecodeError(b, "incomplete number")
|
||||
return ast.InvalidReference, b, newDecodeError(b, "incomplete number")
|
||||
}
|
||||
|
||||
kind := ast.Integer
|
||||
|
||||
Reference in New Issue
Block a user