517ceb4eb8
TOML v1.1.0 relaxes inline table syntax to allow newlines, comments, and trailing commas, matching the existing behavior of arrays.
1307 lines
28 KiB
Go
1307 lines
28 KiB
Go
package unstable
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"unicode"
|
|
|
|
"github.com/pelletier/go-toml/v2/internal/characters"
|
|
)
|
|
|
|
// ParserError describes an error relative to the content of the document.
|
|
//
|
|
// It cannot outlive the instance of Parser it refers to, and may cause panics
|
|
// if the parser is reset.
|
|
type ParserError struct {
|
|
Highlight []byte
|
|
Message string
|
|
Key []string // optional
|
|
}
|
|
|
|
// Error is the implementation of the error interface.
|
|
func (e *ParserError) Error() string {
|
|
return e.Message
|
|
}
|
|
|
|
// NewParserError is a convenience function to create a ParserError
|
|
//
|
|
// Warning: Highlight needs to be a subslice of Parser.data, so only slices
|
|
// returned by Parser.Raw are valid candidates.
|
|
func NewParserError(highlight []byte, format string, args ...interface{}) error {
|
|
return &ParserError{
|
|
Highlight: highlight,
|
|
Message: fmt.Errorf(format, args...).Error(),
|
|
}
|
|
}
|
|
|
|
// Parser scans over a TOML-encoded document and generates an iterative AST.
|
|
//
|
|
// To prime the Parser, first reset it with the contents of a TOML document.
|
|
// Then, process all top-level expressions sequentially. See Example.
|
|
//
|
|
// Don't forget to check Error() after you're done parsing.
|
|
//
|
|
// Each top-level expression needs to be fully processed before calling
|
|
// NextExpression() again. Otherwise, calls to various Node methods may panic if
|
|
// the parser has moved on the next expression.
|
|
//
|
|
// For performance reasons, go-toml doesn't make a copy of the input bytes to
|
|
// the parser. Make sure to copy all the bytes you need to outlive the slice
|
|
// given to the parser.
|
|
type Parser struct {
|
|
data []byte
|
|
builder builder
|
|
ref reference
|
|
left []byte
|
|
err error
|
|
first bool
|
|
|
|
KeepComments bool
|
|
}
|
|
|
|
// Data returns the slice provided to the last call to Reset.
|
|
func (p *Parser) Data() []byte {
|
|
return p.data
|
|
}
|
|
|
|
// Range returns a range description that corresponds to a given slice of the
|
|
// input. If the argument is not a subslice of the parser input, this function
|
|
// panics.
|
|
func (p *Parser) Range(b []byte) Range {
|
|
return Range{
|
|
Offset: uint32(p.subsliceOffset(b)), //nolint:gosec // TOML documents are small
|
|
Length: uint32(len(b)), //nolint:gosec // TOML documents are small
|
|
}
|
|
}
|
|
|
|
// rangeOfToken computes the Range of a token given the remaining bytes after the token.
|
|
// This is used when the token was extracted from the beginning of some position,
|
|
// and 'rest' is what remains after the token.
|
|
func (p *Parser) rangeOfToken(token, rest []byte) Range {
|
|
offset := len(p.data) - len(token) - len(rest)
|
|
return Range{Offset: uint32(offset), Length: uint32(len(token))} //nolint:gosec // TOML documents are small
|
|
}
|
|
|
|
// subsliceOffset returns the byte offset of subslice b within p.data.
|
|
// b must be a suffix (tail) of p.data.
|
|
func (p *Parser) subsliceOffset(b []byte) int {
|
|
// b is a suffix of p.data, so its offset is len(p.data) - len(b)
|
|
return len(p.data) - len(b)
|
|
}
|
|
|
|
// Raw returns the slice corresponding to the bytes in the given range.
|
|
func (p *Parser) Raw(raw Range) []byte {
|
|
return p.data[raw.Offset : raw.Offset+raw.Length]
|
|
}
|
|
|
|
// Reset brings the parser to its initial state for a given input. It wipes an
|
|
// reuses internal storage to reduce allocation.
|
|
func (p *Parser) Reset(b []byte) {
|
|
p.builder.Reset()
|
|
p.ref = invalidReference
|
|
p.data = b
|
|
p.left = b
|
|
p.err = nil
|
|
p.first = true
|
|
}
|
|
|
|
// NextExpression parses the next top-level expression. If an expression was
|
|
// successfully parsed, it returns true. If the parser is at the end of the
|
|
// document or an error occurred, it returns false.
|
|
//
|
|
// Retrieve the parsed expression with Expression().
|
|
func (p *Parser) NextExpression() bool {
|
|
if len(p.left) == 0 || p.err != nil {
|
|
return false
|
|
}
|
|
|
|
p.builder.Reset()
|
|
p.ref = invalidReference
|
|
|
|
for {
|
|
if len(p.left) == 0 || p.err != nil {
|
|
return false
|
|
}
|
|
|
|
if !p.first {
|
|
p.left, p.err = p.parseNewline(p.left)
|
|
}
|
|
|
|
if len(p.left) == 0 || p.err != nil {
|
|
return false
|
|
}
|
|
|
|
p.ref, p.left, p.err = p.parseExpression(p.left)
|
|
|
|
if p.err != nil {
|
|
return false
|
|
}
|
|
|
|
p.first = false
|
|
|
|
if p.ref.Valid() {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
|
|
// Expression returns a pointer to the node representing the last successfully
|
|
// parsed expression.
|
|
func (p *Parser) Expression() *Node {
|
|
return p.builder.NodeAt(p.ref)
|
|
}
|
|
|
|
// Error returns any error that has occurred during parsing.
|
|
func (p *Parser) Error() error {
|
|
return p.err
|
|
}
|
|
|
|
// Position describes a position in the input.
|
|
type Position struct {
|
|
// Number of bytes from the beginning of the input.
|
|
Offset int
|
|
// Line number, starting at 1.
|
|
Line int
|
|
// Column number, starting at 1.
|
|
Column int
|
|
}
|
|
|
|
// Shape describes the position of a range in the input.
|
|
type Shape struct {
|
|
Start Position
|
|
End Position
|
|
}
|
|
|
|
// Shape returns the shape of the given range in the input. Will
|
|
// panic if the range is not a subslice of the input.
|
|
func (p *Parser) Shape(r Range) Shape {
|
|
return Shape{
|
|
Start: p.positionAt(int(r.Offset)),
|
|
End: p.positionAt(int(r.Offset + r.Length)),
|
|
}
|
|
}
|
|
|
|
// positionAt returns the position at the given byte offset in the document.
|
|
func (p *Parser) positionAt(offset int) Position {
|
|
lead := p.data[:offset]
|
|
|
|
return Position{
|
|
Offset: offset,
|
|
Line: bytes.Count(lead, []byte{'\n'}) + 1,
|
|
Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}),
|
|
}
|
|
}
|
|
|
|
func (p *Parser) parseNewline(b []byte) ([]byte, error) {
|
|
if b[0] == '\n' {
|
|
return b[1:], nil
|
|
}
|
|
|
|
if b[0] == '\r' {
|
|
_, rest, err := scanWindowsNewline(b)
|
|
return rest, err
|
|
}
|
|
|
|
return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
|
|
}
|
|
|
|
func (p *Parser) parseComment(b []byte) (reference, []byte, error) {
|
|
ref := invalidReference
|
|
data, rest, err := scanComment(b)
|
|
if p.KeepComments && err == nil {
|
|
ref = p.builder.Push(Node{
|
|
Kind: Comment,
|
|
Raw: p.rangeOfToken(data, rest),
|
|
Data: data,
|
|
})
|
|
}
|
|
return ref, rest, err
|
|
}
|
|
|
|
func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
|
|
// expression = ws [ comment ]
|
|
// expression =/ ws keyval ws [ comment ]
|
|
// expression =/ ws table ws [ comment ]
|
|
ref := invalidReference
|
|
|
|
b = p.parseWhitespace(b)
|
|
|
|
if len(b) == 0 {
|
|
return ref, b, nil
|
|
}
|
|
|
|
if b[0] == '#' {
|
|
ref, rest, err := p.parseComment(b)
|
|
return ref, rest, err
|
|
}
|
|
|
|
if b[0] == '\n' || b[0] == '\r' {
|
|
return ref, b, nil
|
|
}
|
|
|
|
var err error
|
|
if b[0] == '[' {
|
|
ref, b, err = p.parseTable(b)
|
|
} else {
|
|
ref, b, err = p.parseKeyval(b)
|
|
}
|
|
|
|
if err != nil {
|
|
return ref, nil, err
|
|
}
|
|
|
|
b = p.parseWhitespace(b)
|
|
|
|
if len(b) > 0 && b[0] == '#' {
|
|
cref, rest, err := p.parseComment(b)
|
|
if cref != invalidReference {
|
|
p.builder.Chain(ref, cref)
|
|
}
|
|
return ref, rest, err
|
|
}
|
|
|
|
return ref, b, nil
|
|
}
|
|
|
|
func (p *Parser) parseTable(b []byte) (reference, []byte, error) {
|
|
// table = std-table / array-table
|
|
if len(b) > 1 && b[1] == '[' {
|
|
return p.parseArrayTable(b)
|
|
}
|
|
|
|
return p.parseStdTable(b)
|
|
}
|
|
|
|
func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) {
|
|
// array-table = array-table-open key array-table-close
|
|
// array-table-open = %x5B.5B ws ; [[ Double left square bracket
|
|
// array-table-close = ws %x5D.5D ; ]] Double right square bracket
|
|
ref := p.builder.Push(Node{
|
|
Kind: ArrayTable,
|
|
})
|
|
|
|
b = b[2:]
|
|
b = p.parseWhitespace(b)
|
|
|
|
k, b, err := p.parseKey(b)
|
|
if err != nil {
|
|
return ref, nil, err
|
|
}
|
|
|
|
p.builder.AttachChild(ref, k)
|
|
b = p.parseWhitespace(b)
|
|
|
|
b, err = expect(']', b)
|
|
if err != nil {
|
|
return ref, nil, err
|
|
}
|
|
|
|
b, err = expect(']', b)
|
|
|
|
return ref, b, err
|
|
}
|
|
|
|
func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
|
|
// std-table = std-table-open key std-table-close
|
|
// std-table-open = %x5B ws ; [ Left square bracket
|
|
// std-table-close = ws %x5D ; ] Right square bracket
|
|
ref := p.builder.Push(Node{
|
|
Kind: Table,
|
|
})
|
|
|
|
b = b[1:]
|
|
b = p.parseWhitespace(b)
|
|
|
|
key, b, err := p.parseKey(b)
|
|
if err != nil {
|
|
return ref, nil, err
|
|
}
|
|
|
|
p.builder.AttachChild(ref, key)
|
|
|
|
b = p.parseWhitespace(b)
|
|
|
|
b, err = expect(']', b)
|
|
|
|
return ref, b, err
|
|
}
|
|
|
|
func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
|
|
// keyval = key keyval-sep val
|
|
ref := p.builder.Push(Node{
|
|
Kind: KeyValue,
|
|
})
|
|
|
|
key, b, err := p.parseKey(b)
|
|
if err != nil {
|
|
return invalidReference, nil, err
|
|
}
|
|
|
|
// keyval-sep = ws %x3D ws ; =
|
|
|
|
b = p.parseWhitespace(b)
|
|
|
|
if len(b) == 0 {
|
|
return invalidReference, nil, NewParserError(b, "expected = after a key, but the document ends there")
|
|
}
|
|
|
|
b, err = expect('=', b)
|
|
if err != nil {
|
|
return invalidReference, nil, err
|
|
}
|
|
|
|
b = p.parseWhitespace(b)
|
|
|
|
valRef, b, err := p.parseVal(b)
|
|
if err != nil {
|
|
return ref, b, err
|
|
}
|
|
|
|
p.builder.Chain(valRef, key)
|
|
p.builder.AttachChild(ref, valRef)
|
|
|
|
return ref, b, err
|
|
}
|
|
|
|
//nolint:cyclop,funlen
|
|
func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
|
|
// val = string / boolean / array / inline-table / date-time / float / integer
|
|
ref := invalidReference
|
|
|
|
if len(b) == 0 {
|
|
return ref, nil, NewParserError(b, "expected value, not eof")
|
|
}
|
|
|
|
var err error
|
|
c := b[0]
|
|
|
|
switch c {
|
|
case '"':
|
|
var raw []byte
|
|
var v []byte
|
|
if scanFollowsMultilineBasicStringDelimiter(b) {
|
|
raw, v, b, err = p.parseMultilineBasicString(b)
|
|
} else {
|
|
raw, v, b, err = p.parseBasicString(b)
|
|
}
|
|
|
|
if err == nil {
|
|
ref = p.builder.Push(Node{
|
|
Kind: String,
|
|
Raw: p.rangeOfToken(raw, b),
|
|
Data: v,
|
|
})
|
|
}
|
|
|
|
return ref, b, err
|
|
case '\'':
|
|
var raw []byte
|
|
var v []byte
|
|
if scanFollowsMultilineLiteralStringDelimiter(b) {
|
|
raw, v, b, err = p.parseMultilineLiteralString(b)
|
|
} else {
|
|
raw, v, b, err = p.parseLiteralString(b)
|
|
}
|
|
|
|
if err == nil {
|
|
ref = p.builder.Push(Node{
|
|
Kind: String,
|
|
Raw: p.rangeOfToken(raw, b),
|
|
Data: v,
|
|
})
|
|
}
|
|
|
|
return ref, b, err
|
|
case 't':
|
|
if !scanFollowsTrue(b) {
|
|
return ref, nil, NewParserError(atmost(b, 4), "expected 'true'")
|
|
}
|
|
|
|
ref = p.builder.Push(Node{
|
|
Kind: Bool,
|
|
Data: b[:4],
|
|
})
|
|
|
|
return ref, b[4:], nil
|
|
case 'f':
|
|
if !scanFollowsFalse(b) {
|
|
return ref, nil, NewParserError(atmost(b, 5), "expected 'false'")
|
|
}
|
|
|
|
ref = p.builder.Push(Node{
|
|
Kind: Bool,
|
|
Data: b[:5],
|
|
})
|
|
|
|
return ref, b[5:], nil
|
|
case '[':
|
|
return p.parseValArray(b)
|
|
case '{':
|
|
return p.parseInlineTable(b)
|
|
default:
|
|
return p.parseIntOrFloatOrDateTime(b)
|
|
}
|
|
}
|
|
|
|
func atmost(b []byte, n int) []byte {
|
|
if n >= len(b) {
|
|
return b
|
|
}
|
|
|
|
return b[:n]
|
|
}
|
|
|
|
func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
|
|
v, rest, err := scanLiteralString(b)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
|
|
return v, v[1 : len(v)-1], rest, nil
|
|
}
|
|
|
|
//nolint:funlen,cyclop
|
|
func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
|
|
// inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
|
|
// inline-table-open = %x7B ws ; {
|
|
// inline-table-close = ws %x7D ; }
|
|
// inline-table-sep = ws %x2C ws ; , Comma
|
|
// inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
|
|
tableStart := b
|
|
parent := p.builder.Push(Node{
|
|
Kind: InlineTable,
|
|
Raw: p.rangeOfToken(b[:1], b[1:]),
|
|
})
|
|
|
|
first := true
|
|
|
|
lastChild := invalidReference
|
|
|
|
addChild := func(ref reference) {
|
|
if lastChild == invalidReference {
|
|
p.builder.AttachChild(parent, ref)
|
|
} else {
|
|
p.builder.Chain(lastChild, ref)
|
|
}
|
|
lastChild = ref
|
|
}
|
|
|
|
b = b[1:]
|
|
|
|
var err error
|
|
|
|
for len(b) > 0 {
|
|
var cref reference
|
|
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
|
|
if err != nil {
|
|
return parent, nil, err
|
|
}
|
|
|
|
if cref != invalidReference {
|
|
addChild(cref)
|
|
}
|
|
|
|
if len(b) == 0 {
|
|
return parent, nil, NewParserError(tableStart[:1], "inline table is incomplete")
|
|
}
|
|
|
|
if b[0] == '}' {
|
|
break
|
|
}
|
|
|
|
if b[0] == ',' {
|
|
if first {
|
|
return parent, nil, NewParserError(b[0:1], "inline table cannot start with comma")
|
|
}
|
|
b = b[1:]
|
|
|
|
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
|
|
if err != nil {
|
|
return parent, nil, err
|
|
}
|
|
if cref != invalidReference {
|
|
addChild(cref)
|
|
}
|
|
} else if !first {
|
|
return parent, nil, NewParserError(b[0:1], "inline table entries must be separated by commas")
|
|
}
|
|
|
|
// trailing comma: if '}' follows, stop
|
|
if len(b) > 0 && b[0] == '}' {
|
|
break
|
|
}
|
|
|
|
var kv reference
|
|
kv, b, err = p.parseKeyval(b)
|
|
if err != nil {
|
|
return parent, nil, err
|
|
}
|
|
|
|
addChild(kv)
|
|
|
|
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
|
|
if err != nil {
|
|
return parent, nil, err
|
|
}
|
|
if cref != invalidReference {
|
|
addChild(cref)
|
|
}
|
|
|
|
first = false
|
|
}
|
|
|
|
rest, err := expect('}', b)
|
|
|
|
return parent, rest, err
|
|
}
|
|
|
|
//nolint:funlen,cyclop
|
|
func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
|
|
// array = array-open [ array-values ] ws-comment-newline array-close
|
|
// array-open = %x5B ; [
|
|
// array-close = %x5D ; ]
|
|
// array-values = ws-comment-newline val ws-comment-newline array-sep array-values
|
|
// array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
|
|
// array-sep = %x2C ; , Comma
|
|
// ws-comment-newline = *( wschar / [ comment ] newline )
|
|
arrayStart := b
|
|
b = b[1:]
|
|
|
|
parent := p.builder.Push(Node{
|
|
Kind: Array,
|
|
})
|
|
|
|
// First indicates whether the parser is looking for the first element
|
|
// (non-comment) of the array.
|
|
first := true
|
|
|
|
lastChild := invalidReference
|
|
|
|
addChild := func(valueRef reference) {
|
|
if lastChild == invalidReference {
|
|
p.builder.AttachChild(parent, valueRef)
|
|
} else {
|
|
p.builder.Chain(lastChild, valueRef)
|
|
}
|
|
lastChild = valueRef
|
|
}
|
|
|
|
var err error
|
|
for len(b) > 0 {
|
|
var cref reference
|
|
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
|
|
if err != nil {
|
|
return parent, nil, err
|
|
}
|
|
|
|
if cref != invalidReference {
|
|
addChild(cref)
|
|
}
|
|
|
|
if len(b) == 0 {
|
|
return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
|
|
}
|
|
|
|
if b[0] == ']' {
|
|
break
|
|
}
|
|
|
|
if b[0] == ',' {
|
|
if first {
|
|
return parent, nil, NewParserError(b[0:1], "array cannot start with comma")
|
|
}
|
|
b = b[1:]
|
|
|
|
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
|
|
if err != nil {
|
|
return parent, nil, err
|
|
}
|
|
if cref != invalidReference {
|
|
addChild(cref)
|
|
}
|
|
} else if !first {
|
|
return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
|
|
}
|
|
|
|
// TOML allows trailing commas in arrays.
|
|
if len(b) > 0 && b[0] == ']' {
|
|
break
|
|
}
|
|
|
|
var valueRef reference
|
|
valueRef, b, err = p.parseVal(b)
|
|
if err != nil {
|
|
return parent, nil, err
|
|
}
|
|
|
|
addChild(valueRef)
|
|
|
|
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
|
|
if err != nil {
|
|
return parent, nil, err
|
|
}
|
|
if cref != invalidReference {
|
|
addChild(cref)
|
|
}
|
|
|
|
first = false
|
|
}
|
|
|
|
rest, err := expect(']', b)
|
|
|
|
return parent, rest, err
|
|
}
|
|
|
|
func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) {
|
|
rootCommentRef := invalidReference
|
|
latestCommentRef := invalidReference
|
|
|
|
addComment := func(ref reference) {
|
|
switch {
|
|
case rootCommentRef == invalidReference:
|
|
rootCommentRef = ref
|
|
case latestCommentRef == invalidReference:
|
|
p.builder.AttachChild(rootCommentRef, ref)
|
|
latestCommentRef = ref
|
|
default:
|
|
p.builder.Chain(latestCommentRef, ref)
|
|
latestCommentRef = ref
|
|
}
|
|
}
|
|
|
|
for len(b) > 0 {
|
|
var err error
|
|
b = p.parseWhitespace(b)
|
|
|
|
if len(b) > 0 && b[0] == '#' {
|
|
var ref reference
|
|
ref, b, err = p.parseComment(b)
|
|
if err != nil {
|
|
return invalidReference, nil, err
|
|
}
|
|
if ref != invalidReference {
|
|
addComment(ref)
|
|
}
|
|
}
|
|
|
|
if len(b) == 0 {
|
|
break
|
|
}
|
|
|
|
if b[0] == '\n' || b[0] == '\r' {
|
|
b, err = p.parseNewline(b)
|
|
if err != nil {
|
|
return invalidReference, nil, err
|
|
}
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
return rootCommentRef, b, nil
|
|
}
|
|
|
|
func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
|
|
token, rest, err := scanMultilineLiteralString(b)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
|
|
i := 3
|
|
|
|
// skip the immediate new line
|
|
if token[i] == '\n' {
|
|
i++
|
|
} else if token[i] == '\r' && token[i+1] == '\n' {
|
|
i += 2
|
|
}
|
|
|
|
return token, token[i : len(token)-3], rest, err
|
|
}
|
|
|
|
//nolint:funlen,gocognit,cyclop
|
|
func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) {
|
|
// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
|
|
// ml-basic-string-delim
|
|
// ml-basic-string-delim = 3quotation-mark
|
|
// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
|
|
//
|
|
// mlb-content = mlb-char / newline / mlb-escaped-nl
|
|
// mlb-char = mlb-unescaped / escaped
|
|
// mlb-quotes = 1*2quotation-mark
|
|
// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
|
|
// mlb-escaped-nl = escape ws newline *( wschar / newline )
|
|
token, escaped, rest, err := scanMultilineBasicString(b)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
|
|
i := 3
|
|
|
|
// skip the immediate new line
|
|
if token[i] == '\n' {
|
|
i++
|
|
} else if token[i] == '\r' && token[i+1] == '\n' {
|
|
i += 2
|
|
}
|
|
|
|
// fast path
|
|
startIdx := i
|
|
endIdx := len(token) - len(`"""`)
|
|
|
|
if !escaped {
|
|
str := token[startIdx:endIdx]
|
|
highlight := characters.Utf8TomlValidAlreadyEscaped(str)
|
|
if len(highlight) == 0 {
|
|
return token, str, rest, nil
|
|
}
|
|
return nil, nil, nil, NewParserError(highlight, "invalid UTF-8")
|
|
}
|
|
|
|
var builder bytes.Buffer
|
|
|
|
// The scanner ensures that the token starts and ends with quotes and that
|
|
// escapes are balanced.
|
|
for i < len(token)-3 {
|
|
c := token[i]
|
|
|
|
//nolint:nestif
|
|
if c == '\\' {
|
|
// When the last non-whitespace character on a line is an unescaped \,
|
|
// it will be trimmed along with all whitespace (including newlines) up
|
|
// to the next non-whitespace character or closing delimiter.
|
|
|
|
isLastNonWhitespaceOnLine := false
|
|
j := 1
|
|
findEOLLoop:
|
|
for ; j < len(token)-3-i; j++ {
|
|
switch token[i+j] {
|
|
case ' ', '\t':
|
|
continue
|
|
case '\r':
|
|
if token[i+j+1] == '\n' {
|
|
continue
|
|
}
|
|
case '\n':
|
|
isLastNonWhitespaceOnLine = true
|
|
}
|
|
break findEOLLoop
|
|
}
|
|
if isLastNonWhitespaceOnLine {
|
|
i += j
|
|
for ; i < len(token)-3; i++ {
|
|
c := token[i]
|
|
if c != '\n' && c != '\r' && c != ' ' && c != '\t' {
|
|
i--
|
|
break
|
|
}
|
|
}
|
|
i++
|
|
continue
|
|
}
|
|
|
|
// handle escaping
|
|
i++
|
|
c = token[i]
|
|
|
|
switch c {
|
|
case '"', '\\':
|
|
builder.WriteByte(c)
|
|
case 'b':
|
|
builder.WriteByte('\b')
|
|
case 'f':
|
|
builder.WriteByte('\f')
|
|
case 'n':
|
|
builder.WriteByte('\n')
|
|
case 'r':
|
|
builder.WriteByte('\r')
|
|
case 't':
|
|
builder.WriteByte('\t')
|
|
case 'e':
|
|
builder.WriteByte(0x1B)
|
|
case 'x':
|
|
x, err := hexToRune(atmost(token[i+1:], 2), 2)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
builder.WriteRune(x)
|
|
i += 2
|
|
case 'u':
|
|
x, err := hexToRune(atmost(token[i+1:], 4), 4)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
builder.WriteRune(x)
|
|
i += 4
|
|
case 'U':
|
|
x, err := hexToRune(atmost(token[i+1:], 8), 8)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
|
|
builder.WriteRune(x)
|
|
i += 8
|
|
default:
|
|
return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
|
|
}
|
|
i++
|
|
} else {
|
|
size := characters.Utf8ValidNext(token[i:])
|
|
if size == 0 {
|
|
return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
|
|
}
|
|
builder.Write(token[i : i+size])
|
|
i += size
|
|
}
|
|
}
|
|
|
|
return token, builder.Bytes(), rest, nil
|
|
}
|
|
|
|
func (p *Parser) parseKey(b []byte) (reference, []byte, error) {
|
|
// key = simple-key / dotted-key
|
|
// simple-key = quoted-key / unquoted-key
|
|
//
|
|
// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
|
|
// quoted-key = basic-string / literal-string
|
|
// dotted-key = simple-key 1*( dot-sep simple-key )
|
|
//
|
|
// dot-sep = ws %x2E ws ; . Period
|
|
raw, key, b, err := p.parseSimpleKey(b)
|
|
if err != nil {
|
|
return invalidReference, nil, err
|
|
}
|
|
|
|
ref := p.builder.Push(Node{
|
|
Kind: Key,
|
|
Raw: p.rangeOfToken(raw, b),
|
|
Data: key,
|
|
})
|
|
|
|
for {
|
|
b = p.parseWhitespace(b)
|
|
if len(b) > 0 && b[0] == '.' {
|
|
b = p.parseWhitespace(b[1:])
|
|
|
|
raw, key, b, err = p.parseSimpleKey(b)
|
|
if err != nil {
|
|
return ref, nil, err
|
|
}
|
|
|
|
p.builder.PushAndChain(Node{
|
|
Kind: Key,
|
|
Raw: p.rangeOfToken(raw, b),
|
|
Data: key,
|
|
})
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
return ref, b, nil
|
|
}
|
|
|
|
func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
|
|
if len(b) == 0 {
|
|
return nil, nil, nil, NewParserError(b, "expected key but found none")
|
|
}
|
|
|
|
// simple-key = quoted-key / unquoted-key
|
|
// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
|
|
// quoted-key = basic-string / literal-string
|
|
switch {
|
|
case b[0] == '\'':
|
|
return p.parseLiteralString(b)
|
|
case b[0] == '"':
|
|
return p.parseBasicString(b)
|
|
case isUnquotedKeyChar(b[0]):
|
|
key, rest = scanUnquotedKey(b)
|
|
return key, key, rest, nil
|
|
default:
|
|
return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0])
|
|
}
|
|
}
|
|
|
|
//nolint:funlen,cyclop
|
|
func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
|
|
// basic-string = quotation-mark *basic-char quotation-mark
|
|
// quotation-mark = %x22 ; "
|
|
// basic-char = basic-unescaped / escaped
|
|
// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
|
|
// escaped = escape escape-seq-char
|
|
// escape-seq-char = %x22 ; " quotation mark U+0022
|
|
// escape-seq-char =/ %x5C ; \ reverse solidus U+005C
|
|
// escape-seq-char =/ %x62 ; b backspace U+0008
|
|
// escape-seq-char =/ %x66 ; f form feed U+000C
|
|
// escape-seq-char =/ %x6E ; n line feed U+000A
|
|
// escape-seq-char =/ %x72 ; r carriage return U+000D
|
|
// escape-seq-char =/ %x74 ; t tab U+0009
|
|
// escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX
|
|
// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX
|
|
token, escaped, rest, err := scanBasicString(b)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
|
|
startIdx := len(`"`)
|
|
endIdx := len(token) - len(`"`)
|
|
|
|
// Fast path. If there is no escape sequence, the string should just be
|
|
// an UTF-8 encoded string, which is the same as Go. In that case,
|
|
// validate the string and return a direct reference to the buffer.
|
|
if !escaped {
|
|
str := token[startIdx:endIdx]
|
|
highlight := characters.Utf8TomlValidAlreadyEscaped(str)
|
|
if len(highlight) == 0 {
|
|
return token, str, rest, nil
|
|
}
|
|
return nil, nil, nil, NewParserError(highlight, "invalid UTF-8")
|
|
}
|
|
|
|
i := startIdx
|
|
|
|
var builder bytes.Buffer
|
|
|
|
// The scanner ensures that the token starts and ends with quotes and that
|
|
// escapes are balanced.
|
|
for i < len(token)-1 {
|
|
c := token[i]
|
|
if c == '\\' {
|
|
i++
|
|
c = token[i]
|
|
|
|
switch c {
|
|
case '"', '\\':
|
|
builder.WriteByte(c)
|
|
case 'b':
|
|
builder.WriteByte('\b')
|
|
case 'f':
|
|
builder.WriteByte('\f')
|
|
case 'n':
|
|
builder.WriteByte('\n')
|
|
case 'r':
|
|
builder.WriteByte('\r')
|
|
case 't':
|
|
builder.WriteByte('\t')
|
|
case 'e':
|
|
builder.WriteByte(0x1B)
|
|
case 'x':
|
|
x, err := hexToRune(token[i+1:len(token)-1], 2)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
builder.WriteRune(x)
|
|
i += 2
|
|
case 'u':
|
|
x, err := hexToRune(token[i+1:len(token)-1], 4)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
|
|
builder.WriteRune(x)
|
|
i += 4
|
|
case 'U':
|
|
x, err := hexToRune(token[i+1:len(token)-1], 8)
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
|
|
builder.WriteRune(x)
|
|
i += 8
|
|
default:
|
|
return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
|
|
}
|
|
i++
|
|
} else {
|
|
size := characters.Utf8ValidNext(token[i:])
|
|
if size == 0 {
|
|
return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
|
|
}
|
|
builder.Write(token[i : i+size])
|
|
i += size
|
|
}
|
|
}
|
|
|
|
return token, builder.Bytes(), rest, nil
|
|
}
|
|
|
|
func hexToRune(b []byte, length int) (rune, error) {
|
|
if len(b) < length {
|
|
return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b))
|
|
}
|
|
b = b[:length]
|
|
|
|
var r uint32
|
|
for i, c := range b {
|
|
var d uint32
|
|
switch {
|
|
case '0' <= c && c <= '9':
|
|
d = uint32(c - '0')
|
|
case 'a' <= c && c <= 'f':
|
|
d = uint32(c - 'a' + 10)
|
|
case 'A' <= c && c <= 'F':
|
|
d = uint32(c - 'A' + 10)
|
|
default:
|
|
return -1, NewParserError(b[i:i+1], "non-hex character")
|
|
}
|
|
r = r*16 + d
|
|
}
|
|
|
|
if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 {
|
|
return -1, NewParserError(b, "escape sequence is invalid Unicode code point")
|
|
}
|
|
|
|
return rune(r), nil
|
|
}
|
|
|
|
func (p *Parser) parseWhitespace(b []byte) []byte {
|
|
// ws = *wschar
|
|
// wschar = %x20 ; Space
|
|
// wschar =/ %x09 ; Horizontal tab
|
|
_, rest := scanWhitespace(b)
|
|
|
|
return rest
|
|
}
|
|
|
|
//nolint:cyclop
|
|
func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) {
|
|
switch b[0] {
|
|
case 'i':
|
|
if !scanFollowsInf(b) {
|
|
return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'")
|
|
}
|
|
|
|
return p.builder.Push(Node{
|
|
Kind: Float,
|
|
Data: b[:3],
|
|
Raw: p.rangeOfToken(b[:3], b[3:]),
|
|
}), b[3:], nil
|
|
case 'n':
|
|
if !scanFollowsNan(b) {
|
|
return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'")
|
|
}
|
|
|
|
return p.builder.Push(Node{
|
|
Kind: Float,
|
|
Data: b[:3],
|
|
Raw: p.rangeOfToken(b[:3], b[3:]),
|
|
}), b[3:], nil
|
|
case '+', '-':
|
|
return p.scanIntOrFloat(b)
|
|
}
|
|
|
|
if len(b) < 3 {
|
|
return p.scanIntOrFloat(b)
|
|
}
|
|
|
|
s := 5
|
|
if len(b) < s {
|
|
s = len(b)
|
|
}
|
|
|
|
for idx, c := range b[:s] {
|
|
if isDigit(c) {
|
|
continue
|
|
}
|
|
|
|
if idx == 2 && c == ':' || (idx == 4 && c == '-') {
|
|
return p.scanDateTime(b)
|
|
}
|
|
|
|
break
|
|
}
|
|
|
|
return p.scanIntOrFloat(b)
|
|
}
|
|
|
|
func (p *Parser) scanDateTime(b []byte) (reference, []byte, error) {
|
|
// scans for contiguous characters in [0-9T:Z.+-], and up to one space if
|
|
// followed by a digit.
|
|
hasDate := false
|
|
hasTime := false
|
|
hasTz := false
|
|
seenSpace := false
|
|
|
|
i := 0
|
|
byteLoop:
|
|
for ; i < len(b); i++ {
|
|
c := b[i]
|
|
|
|
switch {
|
|
case isDigit(c):
|
|
case c == '-':
|
|
hasDate = true
|
|
const minOffsetOfTz = 8
|
|
if i >= minOffsetOfTz {
|
|
hasTz = true
|
|
}
|
|
case c == 'T' || c == 't' || c == ':' || c == '.':
|
|
hasTime = true
|
|
case c == '+' || c == 'Z' || c == 'z':
|
|
hasTz = true
|
|
case c == ' ':
|
|
if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) {
|
|
i += 2
|
|
// Avoid reaching past the end of the document in case the time
|
|
// is malformed. See TestIssue585.
|
|
if i >= len(b) {
|
|
i--
|
|
}
|
|
seenSpace = true
|
|
hasTime = true
|
|
} else {
|
|
break byteLoop
|
|
}
|
|
default:
|
|
break byteLoop
|
|
}
|
|
}
|
|
|
|
var kind Kind
|
|
|
|
if hasTime {
|
|
if hasDate {
|
|
if hasTz {
|
|
kind = DateTime
|
|
} else {
|
|
kind = LocalDateTime
|
|
}
|
|
} else {
|
|
kind = LocalTime
|
|
}
|
|
} else {
|
|
kind = LocalDate
|
|
}
|
|
|
|
return p.builder.Push(Node{
|
|
Kind: kind,
|
|
Data: b[:i],
|
|
}), b[i:], nil
|
|
}
|
|
|
|
//nolint:funlen,gocognit,cyclop
|
|
func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
|
|
i := 0
|
|
|
|
if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' {
|
|
var isValidRune validRuneFn
|
|
|
|
switch b[1] {
|
|
case 'x':
|
|
isValidRune = isValidHexRune
|
|
case 'o':
|
|
isValidRune = isValidOctalRune
|
|
case 'b':
|
|
isValidRune = isValidBinaryRune
|
|
default:
|
|
i++
|
|
}
|
|
|
|
if isValidRune != nil {
|
|
i += 2
|
|
for ; i < len(b); i++ {
|
|
if !isValidRune(b[i]) {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
return p.builder.Push(Node{
|
|
Kind: Integer,
|
|
Data: b[:i],
|
|
Raw: p.rangeOfToken(b[:i], b[i:]),
|
|
}), b[i:], nil
|
|
}
|
|
|
|
isFloat := false
|
|
|
|
for ; i < len(b); i++ {
|
|
c := b[i]
|
|
|
|
if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' {
|
|
continue
|
|
}
|
|
|
|
if c == '.' || c == 'e' || c == 'E' {
|
|
isFloat = true
|
|
|
|
continue
|
|
}
|
|
|
|
if c == 'i' {
|
|
if scanFollowsInf(b[i:]) {
|
|
return p.builder.Push(Node{
|
|
Kind: Float,
|
|
Data: b[:i+3],
|
|
Raw: p.rangeOfToken(b[:i+3], b[i+3:]),
|
|
}), b[i+3:], nil
|
|
}
|
|
|
|
return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number")
|
|
}
|
|
|
|
if c == 'n' {
|
|
if scanFollowsNan(b[i:]) {
|
|
return p.builder.Push(Node{
|
|
Kind: Float,
|
|
Data: b[:i+3],
|
|
Raw: p.rangeOfToken(b[:i+3], b[i+3:]),
|
|
}), b[i+3:], nil
|
|
}
|
|
|
|
return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number")
|
|
}
|
|
|
|
break
|
|
}
|
|
|
|
if i == 0 {
|
|
return invalidReference, b, NewParserError(b, "incomplete number")
|
|
}
|
|
|
|
kind := Integer
|
|
|
|
if isFloat {
|
|
kind = Float
|
|
}
|
|
|
|
return p.builder.Push(Node{
|
|
Kind: kind,
|
|
Data: b[:i],
|
|
Raw: p.rangeOfToken(b[:i], b[i:]),
|
|
}), b[i:], nil
|
|
}
|
|
|
|
func isDigit(r byte) bool {
|
|
return r >= '0' && r <= '9'
|
|
}
|
|
|
|
type validRuneFn func(r byte) bool
|
|
|
|
func isValidHexRune(r byte) bool {
|
|
return r >= 'a' && r <= 'f' ||
|
|
r >= 'A' && r <= 'F' ||
|
|
r >= '0' && r <= '9' ||
|
|
r == '_'
|
|
}
|
|
|
|
func isValidOctalRune(r byte) bool {
|
|
return r >= '0' && r <= '7' || r == '_'
|
|
}
|
|
|
|
func isValidBinaryRune(r byte) bool {
|
|
return r == '0' || r == '1' || r == '_'
|
|
}
|
|
|
|
func expect(x byte, b []byte) ([]byte, error) {
|
|
if len(b) == 0 {
|
|
return nil, NewParserError(b, "expected character %c but the document ended here", x)
|
|
}
|
|
|
|
if b[0] != x {
|
|
return nil, NewParserError(b[0:1], "expected character %c", x)
|
|
}
|
|
|
|
return b[1:], nil
|
|
}
|