Files
go-toml/parser.go
T
Thomas Pelletier 861c4734ac Support for hex, oct, and bin integers (#205)
Add support for non-decimal integers. At the time of writing, this is an
unreleased backward-compatible feature of TOML:

```
  Non-negative integer values may also be expressed in hexadecimal, octal, or
  binary. In these formats, leading zeros are allowed (after the prefix). Hex
  values are case insensitive. Underscores are allowed between digits (but
  not between the prefix and the value).

  # hexadecimal with prefix `0x`
  hex1 = 0xDEADBEEF
  hex2 = 0xdeadbeef
  hex3 = 0xdead_beef

  # octal with prefix `0o`
  oct1 = 0o01234567
  oct2 = 0o755 # useful for Unix file permissions

  # binary with prefix `0b`
  bin1 = 0b11010110
```

Fixes #204
2017-12-22 12:24:26 +01:00

426 lines
10 KiB
Go

// TOML Parser.
package toml
import (
"errors"
"fmt"
"reflect"
"regexp"
"strconv"
"strings"
"time"
)
type tomlParser struct {
flowIdx int
flow []token
tree *Tree
currentTable []string
seenTableKeys []string
}
type tomlParserStateFn func() tomlParserStateFn
// Formats and panics an error message based on a token
func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) {
panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...))
}
func (p *tomlParser) run() {
for state := p.parseStart; state != nil; {
state = state()
}
}
func (p *tomlParser) peek() *token {
if p.flowIdx >= len(p.flow) {
return nil
}
return &p.flow[p.flowIdx]
}
func (p *tomlParser) assume(typ tokenType) {
tok := p.getToken()
if tok == nil {
p.raiseError(tok, "was expecting token %s, but token stream is empty", tok)
}
if tok.typ != typ {
p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok)
}
}
func (p *tomlParser) getToken() *token {
tok := p.peek()
if tok == nil {
return nil
}
p.flowIdx++
return tok
}
func (p *tomlParser) parseStart() tomlParserStateFn {
tok := p.peek()
// end of stream, parsing is finished
if tok == nil {
return nil
}
switch tok.typ {
case tokenDoubleLeftBracket:
return p.parseGroupArray
case tokenLeftBracket:
return p.parseGroup
case tokenKey:
return p.parseAssign
case tokenEOF:
return nil
default:
p.raiseError(tok, "unexpected token")
}
return nil
}
func (p *tomlParser) parseGroupArray() tomlParserStateFn {
startToken := p.getToken() // discard the [[
key := p.getToken()
if key.typ != tokenKeyGroupArray {
p.raiseError(key, "unexpected token %s, was expecting a table array key", key)
}
// get or create table array element at the indicated part in the path
keys, err := parseKey(key.val)
if err != nil {
p.raiseError(key, "invalid table array key: %s", err)
}
p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries
destTree := p.tree.GetPath(keys)
var array []*Tree
if destTree == nil {
array = make([]*Tree, 0)
} else if target, ok := destTree.([]*Tree); ok && target != nil {
array = destTree.([]*Tree)
} else {
p.raiseError(key, "key %s is already assigned and not of type table array", key)
}
p.currentTable = keys
// add a new tree to the end of the table array
newTree := newTree()
newTree.position = startToken.Position
array = append(array, newTree)
p.tree.SetPath(p.currentTable, "", false, array)
// remove all keys that were children of this table array
prefix := key.val + "."
found := false
for ii := 0; ii < len(p.seenTableKeys); {
tableKey := p.seenTableKeys[ii]
if strings.HasPrefix(tableKey, prefix) {
p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...)
} else {
found = (tableKey == key.val)
ii++
}
}
// keep this key name from use by other kinds of assignments
if !found {
p.seenTableKeys = append(p.seenTableKeys, key.val)
}
// move to next parser state
p.assume(tokenDoubleRightBracket)
return p.parseStart
}
func (p *tomlParser) parseGroup() tomlParserStateFn {
startToken := p.getToken() // discard the [
key := p.getToken()
if key.typ != tokenKeyGroup {
p.raiseError(key, "unexpected token %s, was expecting a table key", key)
}
for _, item := range p.seenTableKeys {
if item == key.val {
p.raiseError(key, "duplicated tables")
}
}
p.seenTableKeys = append(p.seenTableKeys, key.val)
keys, err := parseKey(key.val)
if err != nil {
p.raiseError(key, "invalid table array key: %s", err)
}
if err := p.tree.createSubTree(keys, startToken.Position); err != nil {
p.raiseError(key, "%s", err)
}
p.assume(tokenRightBracket)
p.currentTable = keys
return p.parseStart
}
func (p *tomlParser) parseAssign() tomlParserStateFn {
key := p.getToken()
p.assume(tokenEqual)
value := p.parseRvalue()
var tableKey []string
if len(p.currentTable) > 0 {
tableKey = p.currentTable
} else {
tableKey = []string{}
}
// find the table to assign, looking out for arrays of tables
var targetNode *Tree
switch node := p.tree.GetPath(tableKey).(type) {
case []*Tree:
targetNode = node[len(node)-1]
case *Tree:
targetNode = node
default:
p.raiseError(key, "Unknown table type for path: %s",
strings.Join(tableKey, "."))
}
// assign value to the found table
keyVals, err := parseKey(key.val)
if err != nil {
p.raiseError(key, "%s", err)
}
if len(keyVals) != 1 {
p.raiseError(key, "Invalid key")
}
keyVal := keyVals[0]
localKey := []string{keyVal}
finalKey := append(tableKey, keyVal)
if targetNode.GetPath(localKey) != nil {
p.raiseError(key, "The following key was defined twice: %s",
strings.Join(finalKey, "."))
}
var toInsert interface{}
switch value.(type) {
case *Tree, []*Tree:
toInsert = value
default:
toInsert = &tomlValue{value: value, position: key.Position}
}
targetNode.values[keyVal] = toInsert
return p.parseStart
}
var numberUnderscoreInvalidRegexp *regexp.Regexp
var hexNumberUnderscoreInvalidRegexp *regexp.Regexp
func numberContainsInvalidUnderscore(value string) error {
if numberUnderscoreInvalidRegexp.MatchString(value) {
return errors.New("invalid use of _ in number")
}
return nil
}
func hexNumberContainsInvalidUnderscore(value string) error {
if hexNumberUnderscoreInvalidRegexp.MatchString(value) {
return errors.New("invalid use of _ in hex number")
}
return nil
}
func cleanupNumberToken(value string) string {
cleanedVal := strings.Replace(value, "_", "", -1)
return cleanedVal
}
func (p *tomlParser) parseRvalue() interface{} {
tok := p.getToken()
if tok == nil || tok.typ == tokenEOF {
p.raiseError(tok, "expecting a value")
}
switch tok.typ {
case tokenString:
return tok.val
case tokenTrue:
return true
case tokenFalse:
return false
case tokenInteger:
cleanedVal := cleanupNumberToken(tok.val)
var err error
var val int64
if len(cleanedVal) >= 3 && cleanedVal[0] == '0' {
switch cleanedVal[1] {
case 'x':
err = hexNumberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
val, err = strconv.ParseInt(cleanedVal[2:], 16, 64)
case 'o':
err = numberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
val, err = strconv.ParseInt(cleanedVal[2:], 8, 64)
case 'b':
err = numberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
val, err = strconv.ParseInt(cleanedVal[2:], 2, 64)
default:
panic("invalid base") // the lexer should catch this first
}
} else {
err = numberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
val, err = strconv.ParseInt(cleanedVal, 10, 64)
}
if err != nil {
p.raiseError(tok, "%s", err)
}
return val
case tokenFloat:
err := numberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
cleanedVal := cleanupNumberToken(tok.val)
val, err := strconv.ParseFloat(cleanedVal, 64)
if err != nil {
p.raiseError(tok, "%s", err)
}
return val
case tokenDate:
val, err := time.ParseInLocation(time.RFC3339Nano, tok.val, time.UTC)
if err != nil {
p.raiseError(tok, "%s", err)
}
return val
case tokenLeftBracket:
return p.parseArray()
case tokenLeftCurlyBrace:
return p.parseInlineTable()
case tokenEqual:
p.raiseError(tok, "cannot have multiple equals for the same key")
case tokenError:
p.raiseError(tok, "%s", tok)
}
p.raiseError(tok, "never reached")
return nil
}
func tokenIsComma(t *token) bool {
return t != nil && t.typ == tokenComma
}
func (p *tomlParser) parseInlineTable() *Tree {
tree := newTree()
var previous *token
Loop:
for {
follow := p.peek()
if follow == nil || follow.typ == tokenEOF {
p.raiseError(follow, "unterminated inline table")
}
switch follow.typ {
case tokenRightCurlyBrace:
p.getToken()
break Loop
case tokenKey:
if !tokenIsComma(previous) && previous != nil {
p.raiseError(follow, "comma expected between fields in inline table")
}
key := p.getToken()
p.assume(tokenEqual)
value := p.parseRvalue()
tree.Set(key.val, "", false, value)
case tokenComma:
if previous == nil {
p.raiseError(follow, "inline table cannot start with a comma")
}
if tokenIsComma(previous) {
p.raiseError(follow, "need field between two commas in inline table")
}
p.getToken()
default:
p.raiseError(follow, "unexpected token type in inline table: %s", follow.typ.String())
}
previous = follow
}
if tokenIsComma(previous) {
p.raiseError(previous, "trailing comma at the end of inline table")
}
return tree
}
func (p *tomlParser) parseArray() interface{} {
var array []interface{}
arrayType := reflect.TypeOf(nil)
for {
follow := p.peek()
if follow == nil || follow.typ == tokenEOF {
p.raiseError(follow, "unterminated array")
}
if follow.typ == tokenRightBracket {
p.getToken()
break
}
val := p.parseRvalue()
if arrayType == nil {
arrayType = reflect.TypeOf(val)
}
if reflect.TypeOf(val) != arrayType {
p.raiseError(follow, "mixed types in array")
}
array = append(array, val)
follow = p.peek()
if follow == nil || follow.typ == tokenEOF {
p.raiseError(follow, "unterminated array")
}
if follow.typ != tokenRightBracket && follow.typ != tokenComma {
p.raiseError(follow, "missing comma")
}
if follow.typ == tokenComma {
p.getToken()
}
}
// An array of Trees is actually an array of inline
// tables, which is a shorthand for a table array. If the
// array was not converted from []interface{} to []*Tree,
// the two notations would not be equivalent.
if arrayType == reflect.TypeOf(newTree()) {
tomlArray := make([]*Tree, len(array))
for i, v := range array {
tomlArray[i] = v.(*Tree)
}
return tomlArray
}
return array
}
func parseToml(flow []token) *Tree {
result := newTree()
result.position = Position{1, 1}
parser := &tomlParser{
flowIdx: 0,
flow: flow,
tree: result,
currentTable: make([]string, 0),
seenTableKeys: make([]string, 0),
}
parser.run()
return result
}
func init() {
numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d])|_$|^_`)
hexNumberUnderscoreInvalidRegexp = regexp.MustCompile(`(^0x_)|([^\da-f]_|_[^\da-f])|_$|^_`)
}