Replace branch with AST version

This commit is contained in:
Thomas Pelletier
2021-03-15 08:46:35 -04:00
parent b8df31de84
commit 00b2f776a9
11 changed files with 197 additions and 2257 deletions
File diff suppressed because it is too large Load Diff
-168
View File
@@ -1,168 +0,0 @@
package unmarshaler
import "fmt"
func scanFollows(pattern []byte) func(b []byte) bool {
return func(b []byte) bool {
if len(b) < len(pattern) {
return false
}
for i, c := range pattern {
if b[i] != c {
return false
}
}
return true
}
}
var scanFollowsMultilineBasicStringDelimiter = scanFollows([]byte{'"', '"', '"'})
var scanFollowsMultilineLiteralStringDelimiter = scanFollows([]byte{'\'', '\'', '\''})
var scanFollowsTrue = scanFollows([]byte{'t', 'r', 'u', 'e'})
var scanFollowsFalse = scanFollows([]byte{'f', 'a', 'l', 's', 'e'})
var scanFollowsInf = scanFollows([]byte{'i', 'n', 'f'})
var scanFollowsNan = scanFollows([]byte{'n', 'a', 'n'})
func scanUnquotedKey(b []byte) ([]byte, []byte, error) {
//unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
for i := 0; i < len(b); i++ {
if !isUnquotedKeyChar(b[i]) {
return b[:i], b[i:], nil
}
}
return b, nil, nil
}
func isUnquotedKeyChar(r byte) bool {
return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_'
}
func scanLiteralString(b []byte) ([]byte, []byte, error) {
//literal-string = apostrophe *literal-char apostrophe
//apostrophe = %x27 ; ' apostrophe
//literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
for i := 1; i < len(b); i++ {
switch b[i] {
case '\'':
return b[:i+1], b[i+1:], nil
case '\n':
return nil, nil, fmt.Errorf("literal strings cannot have new lines")
}
}
return nil, nil, fmt.Errorf("unterminated literal string")
}
func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
//ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
//ml-literal-string-delim
//ml-literal-string-delim = 3apostrophe
//ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
//
//mll-content = mll-char / newline
//mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
//mll-quotes = 1*2apostrophe
for i := 3; i < len(b); i++ {
switch b[i] {
case '\'':
if scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
return b[:i+3], b[:i+3], nil
}
}
}
return nil, nil, fmt.Errorf(`multiline literal string not terminated by '''`)
}
func scanWindowsNewline(b []byte) ([]byte, []byte, error) {
if len(b) < 2 {
return nil, nil, fmt.Errorf(`windows new line missing \n`)
}
if b[1] != '\n' {
return nil, nil, fmt.Errorf(`windows new line should be \r\n`)
}
return b[:2], b[2:], nil
}
func scanWhitespace(b []byte) ([]byte, []byte) {
for i := 0; i < len(b); i++ {
switch b[i] {
case ' ', '\t':
continue
default:
return b[:i], b[i:]
}
}
return b, nil
}
func scanComment(b []byte) ([]byte, []byte, error) {
//;; Comment
//
//comment-start-symbol = %x23 ; #
//non-ascii = %x80-D7FF / %xE000-10FFFF
//non-eol = %x09 / %x20-7F / non-ascii
//
//comment = comment-start-symbol *non-eol
for i := 1; i < len(b); i++ {
switch b[i] {
case '\n':
return b[:i], b[i:], nil
}
}
return b, nil, nil
}
// TODO perform validation on the string?
func scanBasicString(b []byte) ([]byte, []byte, error) {
//basic-string = quotation-mark *basic-char quotation-mark
//quotation-mark = %x22 ; "
//basic-char = basic-unescaped / escaped
//basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
//escaped = escape escape-seq-char
for i := 1; i < len(b); i++ {
switch b[i] {
case '"':
return b[:i+1], b[i+1:], nil
case '\n':
return nil, nil, fmt.Errorf("basic strings cannot have new lines")
case '\\':
if len(b) < i+2 {
return nil, nil, fmt.Errorf("need a character after \\")
}
i++ // skip the next character
}
}
return nil, nil, fmt.Errorf(`basic string not terminated by "`)
}
// TODO perform validation on the string?
func scanMultilineBasicString(b []byte) ([]byte, []byte, error) {
//ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
//ml-basic-string-delim
//ml-basic-string-delim = 3quotation-mark
//ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
//
//mlb-content = mlb-char / newline / mlb-escaped-nl
//mlb-char = mlb-unescaped / escaped
//mlb-quotes = 1*2quotation-mark
//mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
//mlb-escaped-nl = escape ws newline *( wschar / newline )
for i := 3; i < len(b); i++ {
switch b[i] {
case '"':
if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
return b[:i+3], b[i+3:], nil
}
case '\\':
if len(b) < i+2 {
return nil, nil, fmt.Errorf("need a character after \\")
}
i++ // skip the next character
}
}
return nil, nil, fmt.Errorf(`multiline basic string not terminated by """`)
}
+190 -335
View File
@@ -3,44 +3,17 @@ package toml
import (
"bytes"
"encoding/hex"
"errors"
"fmt"
"math"
"strconv"
"strings"
"time"
"github.com/pelletier/go-toml/v2/internal/ast"
)
type builder interface {
SimpleKey(v []byte)
StandardTableBegin()
StandardTableEnd()
ArrayTableBegin()
ArrayTableEnd()
KeyValBegin()
KeyValEnd()
ArrayBegin()
ArrayEnd()
Assignation()
InlineTableBegin()
InlineTableEnd()
StringValue(v []byte)
BoolValue(b bool)
FloatValue(n float64)
IntValue(n int64)
LocalDateValue(date LocalDate)
LocalDateTimeValue(dt LocalDateTime)
DateTimeValue(dt time.Time)
LocalTimeValue(localTime LocalTime)
}
type parser struct {
builder builder
tree ast.Root
}
func (p parser) parse(b []byte) error {
func (p *parser) parse(b []byte) error {
b, err := p.parseExpression(b)
if err != nil {
return err
@@ -59,7 +32,7 @@ func (p parser) parse(b []byte) error {
return nil
}
func (p parser) parseNewline(b []byte) ([]byte, error) {
func (p *parser) parseNewline(b []byte) ([]byte, error) {
if b[0] == '\n' {
return b[1:], nil
}
@@ -70,7 +43,7 @@ func (p parser) parseNewline(b []byte) ([]byte, error) {
return nil, fmt.Errorf("expected newline but got %#U", b[0])
}
func (p parser) parseExpression(b []byte) ([]byte, error) {
func (p *parser) parseExpression(b []byte) ([]byte, error) {
//expression = ws [ comment ]
//expression =/ ws keyval ws [ comment ]
//expression =/ ws table ws [ comment ]
@@ -90,10 +63,11 @@ func (p parser) parseExpression(b []byte) ([]byte, error) {
}
var err error
var node ast.Node
if b[0] == '[' {
b, err = p.parseTable(b)
node, b, err = p.parseTable(b)
} else {
b, err = p.parseKeyval(b)
node, b, err = p.parseKeyval(b)
}
if err != nil {
return nil, err
@@ -106,10 +80,12 @@ func (p parser) parseExpression(b []byte) ([]byte, error) {
return rest, err
}
p.tree = append(p.tree, node)
return b, nil
}
func (p parser) parseTable(b []byte) ([]byte, error) {
func (p *parser) parseTable(b []byte) (ast.Node, []byte, error) {
//table = std-table / array-table
if len(b) > 1 && b[1] == '[' {
return p.parseArrayTable(b)
@@ -117,82 +93,91 @@ func (p parser) parseTable(b []byte) ([]byte, error) {
return p.parseStdTable(b)
}
func (p parser) parseArrayTable(b []byte) ([]byte, error) {
func (p *parser) parseArrayTable(b []byte) (ast.Node, []byte, error) {
//array-table = array-table-open key array-table-close
//array-table-open = %x5B.5B ws ; [[ Double left square bracket
//array-table-close = ws %x5D.5D ; ]] Double right square bracket
p.builder.ArrayTableBegin()
defer p.builder.ArrayTableEnd()
// TODO
//b = b[2:]
//b = p.parseWhitespace(b)
//b, err := p.parseKey(b)
//if err != nil {
// return nil, err
//}
//b = p.parseWhitespace(b)
//b, err = expect(']', b)
//if err != nil {
// return nil, err
//}
//return expect(']', b)
b = b[2:]
b = p.parseWhitespace(b)
b, err := p.parseKey(b)
if err != nil {
return nil, err
}
b = p.parseWhitespace(b)
b, err = expect(']', b)
if err != nil {
return nil, err
}
return expect(']', b)
return ast.NoNode, nil, nil
}
func (p parser) parseStdTable(b []byte) ([]byte, error) {
func (p *parser) parseStdTable(b []byte) (ast.Node, []byte, error) {
//std-table = std-table-open key std-table-close
//std-table-open = %x5B ws ; [ Left square bracket
//std-table-close = ws %x5D ; ] Right square bracket
p.builder.StandardTableBegin()
defer p.builder.StandardTableEnd()
node := ast.Node{
Kind: ast.Table,
}
b = b[1:]
b = p.parseWhitespace(b)
b, err := p.parseKey(b)
key, b, err := p.parseKey(b)
if err != nil {
return nil, err
return ast.NoNode, nil, err
}
node.Children = key
b = p.parseWhitespace(b)
return expect(']', b)
b, err = expect(']', b)
return node, b, err
}
func (p parser) parseKeyval(b []byte) ([]byte, error) {
func (p *parser) parseKeyval(b []byte) (ast.Node, []byte, error) {
//keyval = key keyval-sep val
p.builder.KeyValBegin()
defer p.builder.KeyValEnd()
b, err := p.parseKey(b)
if err != nil {
return nil, err
node := ast.Node{
Kind: ast.KeyValue,
}
key, b, err := p.parseKey(b)
if err != nil {
return ast.NoNode, nil, err
}
node.Children = append(node.Children, key...)
//keyval-sep = ws %x3D ws ; =
b = p.parseWhitespace(b)
b, err = expect('=', b)
if err != nil {
return nil, err
return ast.NoNode, nil, err
}
p.builder.Assignation()
b = p.parseWhitespace(b)
return p.parseVal(b)
valNode, b, err := p.parseVal(b)
if err == nil {
node.Children = append(node.Children, valNode)
}
return node, b, err
}
func (p parser) parseVal(b []byte) ([]byte, error) {
func (p *parser) parseVal(b []byte) (ast.Node, []byte, error) {
// val = string / boolean / array / inline-table / date-time / float / integer
if len(b) == 0 {
return nil, fmt.Errorf("expected value, not eof")
return ast.NoNode, nil, fmt.Errorf("expected value, not eof")
}
node := ast.Node{}
var err error
c := b[0]
switch c {
// strings
case '"':
var v []byte
if scanFollowsMultilineBasicStringDelimiter(b) {
@@ -201,9 +186,10 @@ func (p parser) parseVal(b []byte) ([]byte, error) {
v, b, err = p.parseBasicString(b)
}
if err == nil {
p.builder.StringValue(v)
node.Kind = ast.String
node.Data = v
}
return b, err
return node, b, err
case '\'':
var v []byte
if scanFollowsMultilineLiteralStringDelimiter(b) {
@@ -212,31 +198,39 @@ func (p parser) parseVal(b []byte) ([]byte, error) {
v, b, err = p.parseLiteralString(b)
}
if err == nil {
p.builder.StringValue(v)
node.Kind = ast.String
node.Data = v
}
return b, err
return node, b, err
case 't':
if !scanFollowsTrue(b) {
return nil, fmt.Errorf("expected 'true'")
return node, nil, fmt.Errorf("expected 'true'")
}
p.builder.BoolValue(true)
return b[4:], nil
node.Kind = ast.Bool
node.Data = b[:4]
return node, b[4:], nil
case 'f':
if !scanFollowsFalse(b) {
return nil, fmt.Errorf("expected 'false'")
return node, nil, fmt.Errorf("expected 'false'")
}
p.builder.BoolValue(false)
return b[5:], nil
node.Kind = ast.Bool
node.Data = b[:5]
return node, b[5:], nil
case '[':
return p.parseValArray(b)
node.Kind = ast.Array
b, err := p.parseValArray(&node, b)
return node, b, err
case '{':
return p.parseInlineTable(b)
node.Kind = ast.InlineTable
b, err := p.parseInlineTable(&node, b)
return node, b, err
default:
return p.parseIntOrFloatOrDateTime(b)
b, err = p.parseIntOrFloatOrDateTime(&node, b)
return node, b, err
}
}
func (p parser) parseLiteralString(b []byte) ([]byte, []byte, error) {
func (p *parser) parseLiteralString(b []byte) ([]byte, []byte, error) {
v, rest, err := scanLiteralString(b)
if err != nil {
return nil, nil, err
@@ -244,16 +238,13 @@ func (p parser) parseLiteralString(b []byte) ([]byte, []byte, error) {
return v[1 : len(v)-1], rest, nil
}
func (p parser) parseInlineTable(b []byte) ([]byte, error) {
func (p *parser) parseInlineTable(node *ast.Node, b []byte) ([]byte, error) {
//inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
//inline-table-open = %x7B ws ; {
//inline-table-close = ws %x7D ; }
//inline-table-sep = ws %x2C ws ; , Comma
//inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
p.builder.InlineTableBegin()
defer p.builder.InlineTableEnd()
b = b[1:]
first := true
@@ -271,10 +262,12 @@ func (p parser) parseInlineTable(b []byte) ([]byte, error) {
}
b = p.parseWhitespace(b)
}
b, err = p.parseKeyval(b)
var kv ast.Node
kv, b, err = p.parseKeyval(b)
if err != nil {
return nil, err
}
node.Children = append(node.Children, kv)
first = false
}
@@ -282,7 +275,7 @@ func (p parser) parseInlineTable(b []byte) ([]byte, error) {
return expect('}', b)
}
func (p parser) parseValArray(b []byte) ([]byte, error) {
func (p *parser) parseValArray(node *ast.Node, b []byte) ([]byte, error) {
//array = array-open [ array-values ] ws-comment-newline array-close
//array-open = %x5B ; [
//array-close = %x5D ; ]
@@ -291,9 +284,6 @@ func (p parser) parseValArray(b []byte) ([]byte, error) {
//array-sep = %x2C ; , Comma
//ws-comment-newline = *( wschar / [ comment ] newline )
p.builder.ArrayBegin()
defer p.builder.ArrayEnd()
b = b[1:]
first := true
@@ -322,10 +312,12 @@ func (p parser) parseValArray(b []byte) ([]byte, error) {
}
}
b, err = p.parseVal(b)
var valueNode ast.Node
valueNode, b, err = p.parseVal(b)
if err != nil {
return nil, err
}
node.Children = append(node.Children, valueNode)
b, err = p.parseOptionalWhitespaceCommentNewline(b)
if err != nil {
return nil, err
@@ -336,7 +328,7 @@ func (p parser) parseValArray(b []byte) ([]byte, error) {
return expect(']', b)
}
func (p parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error) {
func (p *parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error) {
var err error
b = p.parseWhitespace(b)
if len(b) > 0 && b[0] == '#' {
@@ -354,7 +346,7 @@ func (p parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error)
return b, nil
}
func (p parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, error) {
func (p *parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, error) {
token, rest, err := scanMultilineLiteralString(b)
if err != nil {
return nil, nil, err
@@ -372,7 +364,7 @@ func (p parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, error) {
return token[i : len(b)-3], rest, err
}
func (p parser) parseMultilineBasicString(b []byte) ([]byte, []byte, error) {
func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, error) {
//ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
//ml-basic-string-delim
//ml-basic-string-delim = 3quotation-mark
@@ -459,7 +451,7 @@ func (p parser) parseMultilineBasicString(b []byte) ([]byte, []byte, error) {
return builder.Bytes(), rest, nil
}
func (p parser) parseKey(b []byte) ([]byte, error) {
func (p *parser) parseKey(b []byte) ([]ast.Node, []byte, error) {
//key = simple-key / dotted-key
//simple-key = quoted-key / unquoted-key
//
@@ -469,55 +461,64 @@ func (p parser) parseKey(b []byte) ([]byte, error) {
//
//dot-sep = ws %x2E ws ; . Period
b, err := p.parseSimpleKey(b)
var nodes []ast.Node
key, b, err := p.parseSimpleKey(b)
if err != nil {
return nil, err
return nodes, nil, err
}
nodes = append(nodes, ast.Node{
Kind: ast.Key,
Data: key,
})
for {
b = p.parseWhitespace(b)
if len(b) > 0 && b[0] == '.' {
b, err = expect('.', b)
if err != nil {
return nil, err
return nodes, nil, err
}
b = p.parseWhitespace(b)
b, err = p.parseSimpleKey(b)
key, b, err = p.parseSimpleKey(b)
if err != nil {
return nil, err
return nodes, nil, err
}
nodes = append(nodes, ast.Node{
Kind: ast.Key,
Data: key,
})
} else {
break
}
}
return b, nil
return nodes, b, nil
}
func (p parser) parseSimpleKey(b []byte) (rest []byte, err error) {
func (p *parser) parseSimpleKey(b []byte) (key, rest []byte, err error) {
//simple-key = quoted-key / unquoted-key
//unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
//quoted-key = basic-string / literal-string
if len(b) == 0 {
return nil, unexpectedCharacter{b: b}
return nil, nil, unexpectedCharacter{b: b}
}
var v []byte
if b[0] == '\'' {
v, rest, err = scanLiteralString(b)
key, rest, err = scanLiteralString(b)
} else if b[0] == '"' {
v, rest, err = p.parseBasicString(b)
key, rest, err = p.parseBasicString(b)
} else if isUnquotedKeyChar(b[0]) {
v, rest, err = scanUnquotedKey(b)
key, rest, err = scanUnquotedKey(b)
} else {
return nil, unexpectedCharacter{b: b}
err = unexpectedCharacter{b: b}
}
p.builder.SimpleKey(v)
return
}
func (p parser) parseBasicString(b []byte) ([]byte, []byte, error) {
func (p *parser) parseBasicString(b []byte) ([]byte, []byte, error) {
//basic-string = quotation-mark *basic-char quotation-mark
//quotation-mark = %x22 ; "
//basic-char = basic-unescaped / escaped
@@ -596,7 +597,7 @@ func hexToString(b []byte, length int) (string, error) {
return string(b), nil
}
func (p parser) parseWhitespace(b []byte) []byte {
func (p *parser) parseWhitespace(b []byte) []byte {
//ws = *wschar
//wschar = %x20 ; Space
//wschar =/ %x09 ; Horizontal tab
@@ -605,26 +606,28 @@ func (p parser) parseWhitespace(b []byte) []byte {
return rest
}
func (p parser) parseIntOrFloatOrDateTime(b []byte) ([]byte, error) {
func (p *parser) parseIntOrFloatOrDateTime(node *ast.Node, b []byte) ([]byte, error) {
switch b[0] {
case 'i':
if !scanFollowsInf(b) {
return nil, fmt.Errorf("expected 'inf'")
}
p.builder.FloatValue(math.Inf(1))
node.Kind = ast.Float
node.Data = b[:3]
return b[3:], nil
case 'n':
if !scanFollowsNan(b) {
return nil, fmt.Errorf("expected 'nan'")
}
p.builder.FloatValue(math.NaN())
node.Kind = ast.Float
node.Data = b[:3]
return b[3:], nil
case '+', '-':
return p.parseIntOrFloat(b)
return p.scanIntOrFloat(node, b)
}
if len(b) < 3 {
return p.parseIntOrFloat(b)
return p.scanIntOrFloat(node, b)
}
s := 5
if len(b) < s {
@@ -641,7 +644,7 @@ func (p parser) parseIntOrFloatOrDateTime(b []byte) ([]byte, error) {
return p.parseDateTime(b)
}
}
return p.parseIntOrFloat(b)
return p.scanIntOrFloat(node, b)
}
func digitsToInt(b []byte) int {
@@ -653,7 +656,7 @@ func digitsToInt(b []byte) int {
return x
}
func (p parser) parseDateTime(b []byte) ([]byte, error) {
func (p *parser) parseDateTime(b []byte) ([]byte, error) {
// we know the first 2 ar digits.
if b[2] == ':' {
return p.parseTime(b)
@@ -709,16 +712,19 @@ func (p parser) parseDateTime(b []byte) ([]byte, error) {
idx++
if idx >= len(b) {
p.builder.LocalDateValue(localDate)
//p.builder.LocalDateValue(localDate)
// TODO
return nil, nil
} else if b[idx] != ' ' && b[idx] != 'T' {
p.builder.LocalDateValue(localDate)
//p.builder.LocalDateValue(localDate)
// TODO
return b[idx:], nil
}
// check if there is a chance there is anything useful after
if b[idx] == ' ' && (((idx + 2) >= len(b)) || !isDigit(b[idx+1]) || !isDigit(b[idx+2])) {
p.builder.LocalDateValue(localDate)
//p.builder.LocalDateValue(localDate)
// TODO
return b[idx:], nil
}
@@ -792,7 +798,9 @@ func (p parser) parseDateTime(b []byte) ([]byte, error) {
Date: localDate,
Time: localTime,
}
p.builder.LocalDateTimeValue(dt)
//p.builder.LocalDateTimeValue(dt)
// TODO
dt = dt
return b[idx:], nil
}
@@ -838,11 +846,13 @@ func (p parser) parseDateTime(b []byte) ([]byte, error) {
loc = time.FixedZone(string(b[start:idx]), offset)
}
dt := time.Date(localDate.Year, localDate.Month, localDate.Day, localTime.Hour, localTime.Minute, localTime.Second, localTime.Nanosecond, loc)
p.builder.DateTimeValue(dt)
//p.builder.DateTimeValue(dt)
// TODO
dt = dt
return b[idx:], nil
}
func (p parser) parseTime(b []byte) ([]byte, error) {
func (p *parser) parseTime(b []byte) ([]byte, error) {
localTime := LocalTime{}
idx := 0
@@ -902,230 +912,84 @@ func (p parser) parseTime(b []byte) ([]byte, error) {
}
}
p.builder.LocalTimeValue(localTime)
//p.builder.LocalTimeValue(localTime)
// TODO
return b[idx:], nil
}
func (p parser) parseIntOrFloat(b []byte) ([]byte, error) {
func (p *parser) scanIntOrFloat(node *ast.Node, b []byte) ([]byte, error) {
i := 0
r := b[0]
if r == '0' {
if len(b) >= 2 {
var isValidRune validRuneFn
var parseFn func([]byte) (int64, error)
switch b[1] {
case 'x':
isValidRune = isValidHexRune
parseFn = parseIntHex
case 'o':
isValidRune = isValidOctalRune
parseFn = parseIntOct
case 'b':
isValidRune = isValidBinaryRune
parseFn = parseIntBin
default:
if b[1] >= 'a' && b[1] <= 'z' || b[1] >= 'A' && b[1] <= 'Z' {
return nil, fmt.Errorf("unknown number base: %s. possible options are x (hex) o (octal) b (binary)", string(b[1]))
}
parseFn = parseIntDec
}
if isValidRune != nil {
i = 2
digitSeen := false
for {
if !isValidRune(b[i]) {
break
}
digitSeen = true
i++
}
if len(b) > 2 && b[0] == '0' {
var isValidRune validRuneFn
switch b[1] {
case 'x':
isValidRune = isValidHexRune
case 'o':
isValidRune = isValidOctalRune
case 'b':
isValidRune = isValidBinaryRune
default:
return b, fmt.Errorf("unknown number base: %c. possible options are x (hex) o (octal) b (binary)", b[1])
}
if !digitSeen {
return nil, fmt.Errorf("number needs at least one digit")
}
v, err := parseFn(b[:i])
if err != nil {
return nil, err
}
p.builder.IntValue(v)
i += 2
for ; i < len(b); i++ {
if !isValidRune(b[i]) {
node.Kind = ast.Integer
node.Data = b[:i]
return b[i:], nil
}
}
}
if r == '+' || r == '-' {
b = b[1:]
if scanFollowsInf(b) {
if r == '+' {
p.builder.FloatValue(plusInf)
} else {
p.builder.FloatValue(minusInf)
isFloat := false
for ; i < len(b); i++ {
c := b[i]
if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' {
continue
}
if c == '.' || c == 'e' || c == 'E' {
isFloat = true
continue
}
if c == 'i' {
if scanFollowsInf(b[i:]) {
node.Kind = ast.Float
node.Data = b[:i+3]
return b[i+3:], nil
}
return b, nil
return nil, fmt.Errorf("unexpected character i while scanning for a number")
}
if scanFollowsNan(b) {
p.builder.FloatValue(nan)
return b, nil
if c == 'n' {
if scanFollowsNan(b[i:]) {
node.Kind = ast.Float
node.Data = b[:i+3]
return b[i+3:], nil
}
return nil, fmt.Errorf("unexpected character n while scanning for a number")
}
break
}
pointSeen := false
expSeen := false
digitSeen := false
for i < len(b) {
next := b[i]
if next == '.' {
if pointSeen {
return nil, fmt.Errorf("cannot have two dots in one float")
}
i++
if i < len(b) && !isDigit(b[i]) {
return nil, fmt.Errorf("float cannot end with a dot")
}
pointSeen = true
} else if next == 'e' || next == 'E' {
expSeen = true
i++
if i >= len(b) {
break
}
if b[i] == '+' || b[i] == '-' {
i++
}
} else if isDigit(next) {
digitSeen = true
i++
} else if next == '_' {
i++
} else {
break
}
if pointSeen && !digitSeen {
return nil, fmt.Errorf("cannot start float with a dot")
}
}
if !digitSeen {
return nil, fmt.Errorf("no digit in that number")
}
if pointSeen || expSeen {
f, err := parseFloat(b[:i])
if err != nil {
return nil, err
}
p.builder.FloatValue(f)
if isFloat {
node.Kind = ast.Float
} else {
v, err := parseIntDec(b[:i])
if err != nil {
return nil, err
}
p.builder.IntValue(v)
node.Kind = ast.Integer
}
node.Data = b[:i]
return b[i:], nil
}
func parseFloat(b []byte) (float64, error) {
// TODO: inefficient
tok := string(b)
err := numberContainsInvalidUnderscore(tok)
if err != nil {
return 0, err
}
cleanedVal := cleanupNumberToken(tok)
return strconv.ParseFloat(cleanedVal, 64)
}
func parseIntHex(b []byte) (int64, error) {
tok := string(b)
cleanedVal := cleanupNumberToken(tok)
err := hexNumberContainsInvalidUnderscore(cleanedVal)
if err != nil {
return 0, nil
}
return strconv.ParseInt(cleanedVal[2:], 16, 64)
}
func parseIntOct(b []byte) (int64, error) {
tok := string(b)
cleanedVal := cleanupNumberToken(tok)
err := numberContainsInvalidUnderscore(cleanedVal)
if err != nil {
return 0, err
}
return strconv.ParseInt(cleanedVal[2:], 8, 64)
}
func parseIntBin(b []byte) (int64, error) {
tok := string(b)
cleanedVal := cleanupNumberToken(tok)
err := numberContainsInvalidUnderscore(cleanedVal)
if err != nil {
return 0, err
}
return strconv.ParseInt(cleanedVal[2:], 2, 64)
}
func parseIntDec(b []byte) (int64, error) {
tok := string(b)
cleanedVal := cleanupNumberToken(tok)
err := numberContainsInvalidUnderscore(cleanedVal)
if err != nil {
return 0, err
}
return strconv.ParseInt(cleanedVal, 10, 64)
}
func numberContainsInvalidUnderscore(value string) error {
// For large numbers, you may use underscores between digits to enhance
// readability. Each underscore must be surrounded by at least one digit on
// each side.
hasBefore := false
for idx, r := range value {
if r == '_' {
if !hasBefore || idx+1 >= len(value) {
// can't end with an underscore
return errInvalidUnderscore
}
}
hasBefore = isDigitRune(r)
}
return nil
}
func hexNumberContainsInvalidUnderscore(value string) error {
hasBefore := false
for idx, r := range value {
if r == '_' {
if !hasBefore || idx+1 >= len(value) {
// can't end with an underscore
return errInvalidUnderscoreHex
}
}
hasBefore = isHexDigit(r)
}
return nil
}
func cleanupNumberToken(value string) string {
cleanedVal := strings.Replace(value, "_", "", -1)
return cleanedVal
}
func isDigit(r byte) bool {
return r >= '0' && r <= '9'
}
func isDigitRune(r rune) bool {
return r >= '0' && r <= '9'
}
var plusInf = math.Inf(1)
var minusInf = math.Inf(-1)
var nan = math.NaN()
type validRuneFn func(r byte) bool
func isValidHexRune(r byte) bool {
@@ -1135,12 +999,6 @@ func isValidHexRune(r byte) bool {
r == '_'
}
func isHexDigit(r rune) bool {
return isDigitRune(r) ||
(r >= 'a' && r <= 'f') ||
(r >= 'A' && r <= 'F')
}
func isValidOctalRune(r byte) bool {
return r >= '0' && r <= '7' || r == '_'
}
@@ -1168,6 +1026,3 @@ func (u unexpectedCharacter) Error() string {
}
return fmt.Sprintf("expected %#U, not %#U", u.r, u.b[0])
}
var errInvalidUnderscore = errors.New("invalid use of _ in number")
var errInvalidUnderscoreHex = errors.New("invalid use of _ in hex number")
@@ -1,4 +1,4 @@
package unmarshaler
package toml
import (
"testing"
+2
View File
@@ -113,6 +113,7 @@ func scanComment(b []byte) ([]byte, []byte, error) {
return b, nil, nil
}
// TODO perform validation on the string?
func scanBasicString(b []byte) ([]byte, []byte, error) {
//basic-string = quotation-mark *basic-char quotation-mark
//quotation-mark = %x22 ; "
@@ -136,6 +137,7 @@ func scanBasicString(b []byte) ([]byte, []byte, error) {
return nil, nil, fmt.Errorf(`basic string not terminated by "`)
}
// TODO perform validation on the string?
func scanMultilineBasicString(b []byte) ([]byte, []byte, error) {
//ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
//ml-basic-string-delim
@@ -1,4 +1,4 @@
package unmarshaler
package toml
import (
"fmt"
@@ -1,4 +1,4 @@
package unmarshaler
package toml
import (
"reflect"
-359
View File
@@ -1,359 +0,0 @@
package toml
import (
"fmt"
"reflect"
"time"
"github.com/pelletier/go-toml/v2/internal/reflectbuild"
)
func Unmarshal(data []byte, v interface{}) error {
u := &unmarshaler{}
u.builder, u.err = reflectbuild.NewBuilder("toml", v)
if u.err == nil {
parseErr := parser{builder: u}.parse(data)
if parseErr != nil {
return parseErr
}
}
return u.err
}
type unmarshaler struct {
builder reflectbuild.Builder
// First error that appeared during the construction of the object.
// When set all callbacks are no-ops.
err error
// State that indicates the parser is processing a [[table-array]] name.
// If false keys are interpreted as part of a key-value or [table].
parsingTableArray bool
// Table Arrays need a buffer of keys because we need to know which one is
// the last one, as it may result in creating a new element in the array.
arrayTableKey [][]byte
// Flag to indicate that the next value is an an assignment.
// Assignments are when the builder already points to the value, and should
// be directly assigned. This is used to distinguish between assigning or
// appending to arrays.
assign bool
// State that indicates the parser is processing a [table] name.
// Used to know whether the whole table should be skipped or just the
// keyval if a field is missing.
parsingTable bool
// Counters that indicate that we are skipping TOML expressions. It happens
// when the document contains values that are not in the target struct.
// TODO: signal the parser that it can just scan to avoid processing the
// unused data.
skipKeyValCount uint
skipTable bool
}
func (u *unmarshaler) skipping() bool {
return u.skipTable || u.skipKeyValCount > 0
}
func (u *unmarshaler) Assignation() {
if u.skipping() || u.err != nil {
return
}
u.assign = true
fmt.Println("ASSIGN: TRUE!")
}
func (u *unmarshaler) ArrayBegin() {
if u.skipping() || u.err != nil {
return
}
u.builder.Save()
u.err = u.builder.EnsureSlice()
if u.err != nil {
return
}
fmt.Println("ARRAY BEGIN ASSIGN =", u.assign)
if !u.assign {
//u.err = u.builder.SliceNewSlice()
// TODO
}
u.assign = false
}
func (u *unmarshaler) ArrayEnd() {
if u.skipping() || u.err != nil {
return
}
u.builder.Load()
}
func (u *unmarshaler) ArrayTableBegin() {
if u.skipping() || u.err != nil {
return
}
u.parsingTableArray = true
}
func (u *unmarshaler) ArrayTableEnd() {
if u.skipping() || u.err != nil {
return
}
u.builder.Reset()
for _, v := range u.arrayTableKey[:len(u.arrayTableKey)-1] {
u.err = u.builder.DigField(string(v))
if u.err != nil {
return
}
u.err = u.builder.SliceLastOrCreate()
}
v := u.arrayTableKey[len(u.arrayTableKey)-1]
u.err = u.builder.DigField(string(v))
if u.err != nil {
return
}
u.err = u.builder.SliceNewElem()
u.parsingTableArray = false
u.arrayTableKey = u.arrayTableKey[:0]
}
func (u *unmarshaler) InlineTableBegin() {
if u.skipping() || u.err != nil {
return
}
u.builder.Save()
if u.builder.IsSliceOrPtr() {
u.err = u.builder.SliceNewElem()
} else {
u.err = u.builder.EnsureStructOrMap()
}
u.assign = false
}
func (u *unmarshaler) InlineTableEnd() {
if u.skipping() || u.err != nil {
return
}
u.builder.Load()
}
func (u *unmarshaler) KeyValBegin() {
if u.skipKeyValCount > 0 {
u.skipKeyValCount++
return
}
if u.skipping() || u.err != nil {
return
}
u.builder.Save()
}
func (u *unmarshaler) KeyValEnd() {
if u.skipKeyValCount > 0 {
u.skipKeyValCount--
return
}
if u.skipping() || u.err != nil {
return
}
u.builder.Load()
}
func (u *unmarshaler) StringValue(v []byte) {
if u.skipping() || u.err != nil {
return
}
if u.builder.IsSliceOrPtr() {
u.builder.Save()
s := string(v)
u.err = u.builder.SliceAppend(reflect.ValueOf(&s))
if u.err != nil {
return
}
u.builder.Load()
} else {
s := string(v)
u.err = u.builder.Set(reflect.ValueOf(&s))
}
u.assign = false
}
func (u *unmarshaler) BoolValue(b bool) {
if u.skipping() || u.err != nil {
return
}
if u.builder.IsSliceOrPtr() {
u.builder.Save()
u.err = u.builder.SliceAppend(reflect.ValueOf(&b))
if u.err != nil {
return
}
u.builder.Load()
} else {
u.err = u.builder.SetBool(b)
}
u.assign = false
}
func (u *unmarshaler) FloatValue(n float64) {
if u.skipping() || u.err != nil {
return
}
if u.builder.IsSliceOrPtr() {
u.builder.Save()
u.err = u.builder.SliceAppend(reflect.ValueOf(&n))
if u.err != nil {
return
}
u.builder.Load()
} else {
u.err = u.builder.Set(reflect.ValueOf(&n))
//u.err = u.builder.SetFloat(n)
}
u.assign = false
}
func (u *unmarshaler) IntValue(n int64) {
if u.skipping() || u.err != nil {
return
}
if u.builder.IsSliceOrPtr() {
u.builder.Save()
u.err = u.builder.SliceAppend(reflect.ValueOf(&n))
if u.err != nil {
return
}
u.builder.Load()
} else {
u.err = u.builder.Set(reflect.ValueOf(&n))
}
u.assign = false
}
func (u *unmarshaler) LocalDateValue(date LocalDate) {
if u.skipping() || u.err != nil {
return
}
if u.builder.IsSliceOrPtr() {
u.builder.Save()
u.err = u.builder.SliceAppend(reflect.ValueOf(&date))
if u.err != nil {
return
}
u.builder.Load()
} else {
u.err = u.builder.Set(reflect.ValueOf(&date))
}
u.assign = false
}
func (u *unmarshaler) LocalDateTimeValue(dt LocalDateTime) {
if u.skipping() || u.err != nil {
return
}
if u.builder.IsSliceOrPtr() {
u.builder.Save()
u.err = u.builder.SliceAppend(reflect.ValueOf(&dt))
if u.err != nil {
return
}
u.builder.Load()
} else {
u.err = u.builder.Set(reflect.ValueOf(&dt))
}
u.assign = false
}
func (u *unmarshaler) DateTimeValue(dt time.Time) {
if u.skipping() || u.err != nil {
return
}
if u.builder.IsSliceOrPtr() {
u.builder.Save()
u.err = u.builder.SliceAppend(reflect.ValueOf(&dt))
if u.err != nil {
return
}
u.builder.Load()
} else {
u.err = u.builder.Set(reflect.ValueOf(&dt))
}
u.assign = false
}
func (u *unmarshaler) LocalTimeValue(localTime LocalTime) {
if u.skipping() || u.err != nil {
return
}
if u.builder.IsSliceOrPtr() {
u.builder.Save()
u.err = u.builder.SliceAppend(reflect.ValueOf(&localTime))
if u.err != nil {
return
}
u.builder.Load()
} else {
u.err = u.builder.Set(reflect.ValueOf(&localTime))
}
u.assign = false
}
func (u *unmarshaler) SimpleKey(v []byte) {
if u.skipping() || u.err != nil {
return
}
if u.parsingTableArray {
u.arrayTableKey = append(u.arrayTableKey, v)
} else {
if u.builder.Cursor().Kind() == reflect.Slice {
u.err = u.builder.SliceLastOrCreate()
if u.err != nil {
return
}
}
u.err = u.builder.DigField(string(v))
if u.err == nil {
return
}
if _, ok := u.err.(reflectbuild.FieldNotFoundError); ok {
u.err = nil
if u.parsingTable {
u.skipTable = true
} else {
u.skipKeyValCount = 1
}
}
// TODO: figure out what to do with unexported fields
}
}
func (u *unmarshaler) StandardTableBegin() {
u.skipTable = false
u.parsingTable = true
if u.skipping() || u.err != nil {
return
}
// tables are only top-level
u.builder.Reset()
}
func (u *unmarshaler) StandardTableEnd() {
u.parsingTable = false
if u.skipping() || u.err != nil {
return
}
u.builder.EnsureStructOrMap() // TODO: handle error
}
-361
View File
@@ -1,361 +0,0 @@
package toml_test
import (
"testing"
"github.com/pelletier/go-toml/v2"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestUnmarshalSimple(t *testing.T) {
x := struct{ Foo string }{}
err := toml.Unmarshal([]byte(`Foo = "hello"`), &x)
require.NoError(t, err)
assert.Equal(t, "hello", x.Foo)
}
func TestUnmarshalInt(t *testing.T) {
x := struct{ Foo int }{}
err := toml.Unmarshal([]byte(`Foo = 42`), &x)
require.NoError(t, err)
assert.Equal(t, 42, x.Foo)
}
func TestUnmarshalNestedStructs(t *testing.T) {
x := struct{ Foo struct{ Bar string } }{}
err := toml.Unmarshal([]byte(`Foo.Bar = "hello"`), &x)
require.NoError(t, err)
assert.Equal(t, "hello", x.Foo.Bar)
}
func TestUnmarshalNestedStructsMultipleExpressions(t *testing.T) {
x := struct {
A struct{ B string }
C string
}{}
err := toml.Unmarshal([]byte(`A.B = "hello"
C = "test"`), &x)
require.NoError(t, err)
assert.Equal(t, "hello", x.A.B)
assert.Equal(t, "test", x.C)
}
func TestUnmarshalTable(t *testing.T) {
x := struct {
Foo struct {
A string
B string
C string
}
Bar struct {
D string
}
E string
}{}
err := toml.Unmarshal([]byte(`
E = "E"
Foo.C = "C"
[Foo]
A = "A"
B = 'B'
[Bar]
D = "D"
`), &x)
require.NoError(t, err)
assert.Equal(t, "A", x.Foo.A)
assert.Equal(t, "B", x.Foo.B)
assert.Equal(t, "C", x.Foo.C)
assert.Equal(t, "D", x.Bar.D)
assert.Equal(t, "E", x.E)
}
func TestUnmarshalDoesNotEraseBaseStruct(t *testing.T) {
x := struct {
A string
B string
}{
A: "preset",
}
err := toml.Unmarshal([]byte(`B = "data"`), &x)
require.NoError(t, err)
assert.Equal(t, "preset", x.A)
assert.Equal(t, "data", x.B)
}
func TestArrayTableSimple(t *testing.T) {
doc := `
[[Products]]
Name = "Hammer"
[[Products]]
Name = "Nail"
`
type Product struct {
Name string
}
type Data struct {
Products []Product
}
x := Data{}
err := toml.Unmarshal([]byte(doc), &x)
require.NoError(t, err)
expected := Data{
Products: []Product{
{
Name: "Hammer",
},
{
Name: "Nail",
},
},
}
assert.Equal(t, expected, x)
}
func TestUnmarshalArrayTablesMultiple(t *testing.T) {
doc := `
[[Products]]
Name = "Hammer"
Sku = "738594937"
[[Products]] # empty table within the array
[[Products]]
Name = "Nail"
Sku = "284758393"
Color = "gray"
`
type Product struct {
Name string
Sku string
Color string
}
type Data struct {
Products []Product
}
x := Data{}
err := toml.Unmarshal([]byte(doc), &x)
require.NoError(t, err)
expected := Data{
Products: []Product{
{
Name: "Hammer",
Sku: "738594937",
},
{},
{
Name: "Nail",
Sku: "284758393",
Color: "gray",
},
},
}
assert.Equal(t, expected, x)
}
func TestUnmarshalArrayTablesNested(t *testing.T) {
doc := `
[[Fruits]]
Name = "apple"
[Fruits.Physical] # subtable
Color = "red"
Shape = "round"
[[Fruits.Varieties]] # nested array of tables
Name = "red delicious"
[[Fruits.Varieties]]
Name = "granny smith"
[[Fruits]]
Name = "banana"
[[Fruits.Varieties]]
Name = "plantain"
`
type Variety struct {
Name string
}
type Physical struct {
Color string
Shape string
}
type Fruit struct {
Name string
Physical Physical
Varieties []Variety
}
type Doc struct {
Fruits []Fruit
}
x := Doc{}
err := toml.Unmarshal([]byte(doc), &x)
require.NoError(t, err)
expected := Doc{
Fruits: []Fruit{
{
Name: "apple",
Physical: Physical{
Color: "red",
Shape: "round",
},
Varieties: []Variety{
{Name: "red delicious"},
{Name: "granny smith"},
},
},
{
Name: "banana",
Varieties: []Variety{
{Name: "plantain"},
},
},
},
}
assert.Equal(t, expected, x)
}
func TestUnmarshalArraySimple(t *testing.T) {
x := struct {
Foo []string
}{}
doc := `Foo = ["hello", "world"]`
err := toml.Unmarshal([]byte(doc), &x)
require.NoError(t, err)
assert.Equal(t, []string{"hello", "world"}, x.Foo)
}
func TestUnmarshalArrayNestedInTable(t *testing.T) {
x := struct {
Wrapper struct {
Foo []string
}
}{}
doc := `[Wrapper]
Foo = ["hello", "world"]`
err := toml.Unmarshal([]byte(doc), &x)
require.NoError(t, err)
assert.Equal(t, []string{"hello", "world"}, x.Wrapper.Foo)
}
func TestUnmarshalArrayMixed(t *testing.T) {
x := struct {
Wrapper struct {
Foo []interface{}
}
}{}
doc := `[Wrapper]
Foo = ["hello", true]`
err := toml.Unmarshal([]byte(doc), &x)
require.NoError(t, err)
assert.Equal(t, []interface{}{"hello", true}, x.Wrapper.Foo)
}
func TestUnmarshalArrayNested(t *testing.T) {
x := struct {
Foo [][]string
}{}
doc := `Foo = [["hello", "world"], ["a"], []]`
err := toml.Unmarshal([]byte(doc), &x)
require.NoError(t, err)
assert.Equal(t, [][]string{{"hello", "world"}, {"a"}, nil}, x.Foo)
}
func TestUnmarshalBool(t *testing.T) {
x := struct {
Truthy bool
Falsey bool
}{Falsey: true}
doc := `Truthy = true
Falsey = false`
err := toml.Unmarshal([]byte(doc), &x)
require.NoError(t, err)
assert.Equal(t, true, x.Truthy)
assert.Equal(t, false, x.Falsey)
}
func TestUnmarshalBoolArray(t *testing.T) {
x := struct{ Bits []bool }{}
doc := `Bits = [true, false, true, true]`
err := toml.Unmarshal([]byte(doc), &x)
require.NoError(t, err)
assert.Equal(t, []bool{true, false, true, true}, x.Bits)
}
func TestUnmarshalInlineTable(t *testing.T) {
doc := `
Name = { First = "Tom", Last = "Preston-Werner" }
Point = { X = "1", Y = "2" }
Animal = { Type.Name = "pug" }`
type Name struct {
First string
Last string
}
type Point struct {
X string
Y string
}
type Type struct {
Name string
}
type Animal struct {
Type Type
}
type Doc struct {
Name Name
Point Point
Animal Animal
}
x := Doc{}
err := toml.Unmarshal([]byte(doc), &x)
require.NoError(t, err)
expected := Doc{
Name: Name{
First: "Tom",
Last: "Preston-Werner",
},
Point: Point{
X: "1",
Y: "2",
},
Animal: Animal{
Type: Type{
Name: "pug",
},
},
}
assert.Equal(t, expected, x)
}
@@ -1,4 +1,4 @@
package unmarshaler
package toml
import (
"fmt"
@@ -1,4 +1,4 @@
package unmarshaler
package toml
import (
"math"