Fix parsing of single quoted keys (#201)
Patch #193 doesn't work correctly because that must be handled by the lexer, and `parseKey()` must not handle escape sequences. Ref #61
This commit is contained in:
committed by
Thomas Pelletier
parent
9bf0212445
commit
4874e8477b
+14
-90
@@ -6,36 +6,16 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
|
||||||
"unicode"
|
"unicode"
|
||||||
)
|
)
|
||||||
|
|
||||||
var escapeSequenceMap = map[rune]rune{
|
// Convert the bare key group string to an array.
|
||||||
'b': '\b',
|
// The input supports double quotation to allow "." inside the key name,
|
||||||
't': '\t',
|
// but escape sequences are not supported. Lexers must unescape them beforehand.
|
||||||
'n': '\n',
|
|
||||||
'f': '\f',
|
|
||||||
'r': '\r',
|
|
||||||
'"': '"',
|
|
||||||
'\\': '\\',
|
|
||||||
}
|
|
||||||
|
|
||||||
type parseKeyState int
|
|
||||||
|
|
||||||
const (
|
|
||||||
bare parseKeyState = iota
|
|
||||||
basic
|
|
||||||
literal
|
|
||||||
esc
|
|
||||||
unicode4
|
|
||||||
unicode8
|
|
||||||
)
|
|
||||||
|
|
||||||
func parseKey(key string) ([]string, error) {
|
func parseKey(key string) ([]string, error) {
|
||||||
groups := []string{}
|
groups := []string{}
|
||||||
var buffer bytes.Buffer
|
var buffer bytes.Buffer
|
||||||
var hex bytes.Buffer
|
inQuotes := false
|
||||||
state := bare
|
|
||||||
wasInQuotes := false
|
wasInQuotes := false
|
||||||
ignoreSpace := true
|
ignoreSpace := true
|
||||||
expectDot := false
|
expectDot := false
|
||||||
@@ -47,67 +27,17 @@ func parseKey(key string) ([]string, error) {
|
|||||||
}
|
}
|
||||||
ignoreSpace = false
|
ignoreSpace = false
|
||||||
}
|
}
|
||||||
|
|
||||||
if state == esc {
|
|
||||||
if char == 'u' {
|
|
||||||
state = unicode4
|
|
||||||
hex.Reset()
|
|
||||||
} else if char == 'U' {
|
|
||||||
state = unicode8
|
|
||||||
hex.Reset()
|
|
||||||
} else if newChar, ok := escapeSequenceMap[char]; ok {
|
|
||||||
buffer.WriteRune(newChar)
|
|
||||||
state = basic
|
|
||||||
} else {
|
|
||||||
return nil, fmt.Errorf(`invalid escape sequence \%c`, char)
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if state == unicode4 || state == unicode8 {
|
|
||||||
if isHexDigit(char) {
|
|
||||||
hex.WriteRune(char)
|
|
||||||
}
|
|
||||||
if (state == unicode4 && hex.Len() == 4) || (state == unicode8 && hex.Len() == 8) {
|
|
||||||
if value, err := strconv.ParseInt(hex.String(), 16, 32); err == nil {
|
|
||||||
buffer.WriteRune(rune(value))
|
|
||||||
} else {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
state = basic
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
switch char {
|
switch char {
|
||||||
case '\\':
|
|
||||||
if state == basic {
|
|
||||||
state = esc
|
|
||||||
} else if state == literal {
|
|
||||||
buffer.WriteRune(char)
|
|
||||||
}
|
|
||||||
case '\'':
|
|
||||||
if state == bare {
|
|
||||||
state = literal
|
|
||||||
} else if state == literal {
|
|
||||||
groups = append(groups, buffer.String())
|
|
||||||
buffer.Reset()
|
|
||||||
wasInQuotes = true
|
|
||||||
state = bare
|
|
||||||
}
|
|
||||||
expectDot = false
|
|
||||||
case '"':
|
case '"':
|
||||||
if state == bare {
|
if inQuotes {
|
||||||
state = basic
|
|
||||||
} else if state == basic {
|
|
||||||
groups = append(groups, buffer.String())
|
groups = append(groups, buffer.String())
|
||||||
buffer.Reset()
|
buffer.Reset()
|
||||||
state = bare
|
|
||||||
wasInQuotes = true
|
wasInQuotes = true
|
||||||
}
|
}
|
||||||
|
inQuotes = !inQuotes
|
||||||
expectDot = false
|
expectDot = false
|
||||||
case '.':
|
case '.':
|
||||||
if state != bare {
|
if inQuotes {
|
||||||
buffer.WriteRune(char)
|
buffer.WriteRune(char)
|
||||||
} else {
|
} else {
|
||||||
if !wasInQuotes {
|
if !wasInQuotes {
|
||||||
@@ -122,31 +52,25 @@ func parseKey(key string) ([]string, error) {
|
|||||||
wasInQuotes = false
|
wasInQuotes = false
|
||||||
}
|
}
|
||||||
case ' ':
|
case ' ':
|
||||||
if state == basic {
|
if inQuotes {
|
||||||
buffer.WriteRune(char)
|
buffer.WriteRune(char)
|
||||||
} else {
|
} else {
|
||||||
expectDot = true
|
expectDot = true
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
if state == bare {
|
if !inQuotes && !isValidBareChar(char) {
|
||||||
if !isValidBareChar(char) {
|
return nil, fmt.Errorf("invalid bare character: %c", char)
|
||||||
return nil, fmt.Errorf("invalid bare character: %c", char)
|
}
|
||||||
} else if expectDot {
|
if !inQuotes && expectDot {
|
||||||
return nil, errors.New("what?")
|
return nil, errors.New("what?")
|
||||||
}
|
|
||||||
}
|
}
|
||||||
buffer.WriteRune(char)
|
buffer.WriteRune(char)
|
||||||
expectDot = false
|
expectDot = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if inQuotes {
|
||||||
// state must be bare at the end
|
|
||||||
if state == esc {
|
|
||||||
return nil, errors.New("unfinished escape sequence")
|
|
||||||
} else if state != bare {
|
|
||||||
return nil, errors.New("mismatched quotes")
|
return nil, errors.New("mismatched quotes")
|
||||||
}
|
}
|
||||||
|
|
||||||
if buffer.Len() > 0 {
|
if buffer.Len() > 0 {
|
||||||
groups = append(groups, buffer.String())
|
groups = append(groups, buffer.String())
|
||||||
}
|
}
|
||||||
|
|||||||
+3
-10
@@ -50,17 +50,10 @@ func TestBaseKeyPound(t *testing.T) {
|
|||||||
func TestQuotedKeys(t *testing.T) {
|
func TestQuotedKeys(t *testing.T) {
|
||||||
testResult(t, `hello."foo".bar`, []string{"hello", "foo", "bar"})
|
testResult(t, `hello."foo".bar`, []string{"hello", "foo", "bar"})
|
||||||
testResult(t, `"hello!"`, []string{"hello!"})
|
testResult(t, `"hello!"`, []string{"hello!"})
|
||||||
testResult(t, `"hello\tworld"`, []string{"hello\tworld"})
|
testResult(t, `foo."ba.r".baz`, []string{"foo", "ba.r", "baz"})
|
||||||
testResult(t, `"\U0001F914"`, []string{"\U0001F914"})
|
|
||||||
testResult(t, `"\u2764"`, []string{"\u2764"})
|
|
||||||
|
|
||||||
testResult(t, `hello.'foo'.bar`, []string{"hello", "foo", "bar"})
|
// escape sequences must not be converted
|
||||||
testResult(t, `'hello!'`, []string{"hello!"})
|
testResult(t, `"hello\tworld"`, []string{`hello\tworld`})
|
||||||
testResult(t, `'hello\tworld'`, []string{`hello\tworld`})
|
|
||||||
|
|
||||||
testError(t, `"\w"`, `invalid escape sequence \w`)
|
|
||||||
testError(t, `"\`, `unfinished escape sequence`)
|
|
||||||
testError(t, `"\t`, `mismatched quotes`)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEmptyKey(t *testing.T) {
|
func TestEmptyKey(t *testing.T) {
|
||||||
|
|||||||
@@ -277,6 +277,8 @@ func (l *tomlLexer) lexComma() tomlLexStateFn {
|
|||||||
return l.lexRvalue
|
return l.lexRvalue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse the key and emits its value without escape sequences.
|
||||||
|
// bare keys, basic string keys and literal string keys are supported.
|
||||||
func (l *tomlLexer) lexKey() tomlLexStateFn {
|
func (l *tomlLexer) lexKey() tomlLexStateFn {
|
||||||
growingString := ""
|
growingString := ""
|
||||||
|
|
||||||
@@ -287,7 +289,16 @@ func (l *tomlLexer) lexKey() tomlLexStateFn {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return l.errorf(err.Error())
|
return l.errorf(err.Error())
|
||||||
}
|
}
|
||||||
growingString += `"` + str + `"`
|
growingString += str
|
||||||
|
l.next()
|
||||||
|
continue
|
||||||
|
} else if r == '\'' {
|
||||||
|
l.next()
|
||||||
|
str, err := l.lexLiteralStringAsString(`'`, false)
|
||||||
|
if err != nil {
|
||||||
|
return l.errorf(err.Error())
|
||||||
|
}
|
||||||
|
growingString += str
|
||||||
l.next()
|
l.next()
|
||||||
continue
|
continue
|
||||||
} else if r == '\n' {
|
} else if r == '\n' {
|
||||||
@@ -527,6 +538,7 @@ func (l *tomlLexer) lexTableKey() tomlLexStateFn {
|
|||||||
return l.lexInsideTableKey
|
return l.lexInsideTableKey
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse the key till "]]", but only bare keys are supported
|
||||||
func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
|
func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
|
||||||
for r := l.peek(); r != eof; r = l.peek() {
|
for r := l.peek(); r != eof; r = l.peek() {
|
||||||
switch r {
|
switch r {
|
||||||
@@ -550,6 +562,7 @@ func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
|
|||||||
return l.errorf("unclosed table array key")
|
return l.errorf("unclosed table array key")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse the key till "]" but only bare keys are supported
|
||||||
func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
|
func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
|
||||||
for r := l.peek(); r != eof; r = l.peek() {
|
for r := l.peek(); r != eof; r = l.peek() {
|
||||||
switch r {
|
switch r {
|
||||||
|
|||||||
+1
-1
@@ -690,7 +690,7 @@ func TestKeyGroupArray(t *testing.T) {
|
|||||||
|
|
||||||
func TestQuotedKey(t *testing.T) {
|
func TestQuotedKey(t *testing.T) {
|
||||||
testFlow(t, "\"a b\" = 42", []token{
|
testFlow(t, "\"a b\" = 42", []token{
|
||||||
{Position{1, 1}, tokenKey, "\"a b\""},
|
{Position{1, 1}, tokenKey, "a b"},
|
||||||
{Position{1, 7}, tokenEqual, "="},
|
{Position{1, 7}, tokenEqual, "="},
|
||||||
{Position{1, 9}, tokenInteger, "42"},
|
{Position{1, 9}, tokenInteger, "42"},
|
||||||
{Position{1, 11}, tokenEOF, ""},
|
{Position{1, 11}, tokenEOF, ""},
|
||||||
|
|||||||
@@ -185,10 +185,7 @@ func (p *tomlParser) parseAssign() tomlParserStateFn {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// assign value to the found table
|
// assign value to the found table
|
||||||
keyVals, err := parseKey(key.val)
|
keyVals := []string{key.val}
|
||||||
if err != nil {
|
|
||||||
p.raiseError(key, "%s", err)
|
|
||||||
}
|
|
||||||
if len(keyVals) != 1 {
|
if len(keyVals) != 1 {
|
||||||
p.raiseError(key, "Invalid key")
|
p.raiseError(key, "Invalid key")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -72,6 +72,17 @@ func TestNumberInKey(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestIncorrectKeyExtraSquareBracket(t *testing.T) {
|
||||||
|
_, err := Load(`[a]b]
|
||||||
|
zyx = 42`)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("Error should have been returned.")
|
||||||
|
}
|
||||||
|
if err.Error() != "(1, 4): unexpected token" {
|
||||||
|
t.Error("Bad error message:", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestSimpleNumbers(t *testing.T) {
|
func TestSimpleNumbers(t *testing.T) {
|
||||||
tree, err := Load("a = +42\nb = -21\nc = +4.2\nd = -2.1")
|
tree, err := Load("a = +42\nb = -21\nc = +4.2\nd = -2.1")
|
||||||
assertTree(t, tree, err, map[string]interface{}{
|
assertTree(t, tree, err, map[string]interface{}{
|
||||||
@@ -208,6 +219,36 @@ func TestSpaceKey(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDoubleQuotedKey(t *testing.T) {
|
||||||
|
tree, err := Load(`
|
||||||
|
"key" = "a"
|
||||||
|
"\t" = "b"
|
||||||
|
"\U0001F914" = "c"
|
||||||
|
"\u2764" = "d"
|
||||||
|
`)
|
||||||
|
assertTree(t, tree, err, map[string]interface{}{
|
||||||
|
"key": "a",
|
||||||
|
"\t": "b",
|
||||||
|
"\U0001F914": "c",
|
||||||
|
"\u2764": "d",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSingleQuotedKey(t *testing.T) {
|
||||||
|
tree, err := Load(`
|
||||||
|
'key' = "a"
|
||||||
|
'\t' = "b"
|
||||||
|
'\U0001F914' = "c"
|
||||||
|
'\u2764' = "d"
|
||||||
|
`)
|
||||||
|
assertTree(t, tree, err, map[string]interface{}{
|
||||||
|
`key`: "a",
|
||||||
|
`\t`: "b",
|
||||||
|
`\U0001F914`: "c",
|
||||||
|
`\u2764`: "d",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestStringEscapables(t *testing.T) {
|
func TestStringEscapables(t *testing.T) {
|
||||||
tree, err := Load("a = \"a \\n b\"")
|
tree, err := Load("a = \"a \\n b\"")
|
||||||
assertTree(t, tree, err, map[string]interface{}{
|
assertTree(t, tree, err, map[string]interface{}{
|
||||||
|
|||||||
@@ -71,18 +71,15 @@ func (t *Tree) Keys() []string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get the value at key in the Tree.
|
// Get the value at key in the Tree.
|
||||||
// Key is a dot-separated path (e.g. a.b.c).
|
// Key is a dot-separated path (e.g. a.b.c) without single/double quoted strings.
|
||||||
|
// If you need to retrieve non-bare keys, use GetPath.
|
||||||
// Returns nil if the path does not exist in the tree.
|
// Returns nil if the path does not exist in the tree.
|
||||||
// If keys is of length zero, the current tree is returned.
|
// If keys is of length zero, the current tree is returned.
|
||||||
func (t *Tree) Get(key string) interface{} {
|
func (t *Tree) Get(key string) interface{} {
|
||||||
if key == "" {
|
if key == "" {
|
||||||
return t
|
return t
|
||||||
}
|
}
|
||||||
comps, err := parseKey(key)
|
return t.GetPath(strings.Split(key, "."))
|
||||||
if err != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return t.GetPath(comps)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetPath returns the element in the tree indicated by 'keys'.
|
// GetPath returns the element in the tree indicated by 'keys'.
|
||||||
|
|||||||
Reference in New Issue
Block a user