Fix parsing of single quoted keys (#201)
Patch #193 doesn't work correctly because that must be handled by the lexer, and `parseKey()` must not handle escape sequences. Ref #61
This commit is contained in:
committed by
Thomas Pelletier
parent
9bf0212445
commit
4874e8477b
+14
-90
@@ -6,36 +6,16 @@ import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
var escapeSequenceMap = map[rune]rune{
|
||||
'b': '\b',
|
||||
't': '\t',
|
||||
'n': '\n',
|
||||
'f': '\f',
|
||||
'r': '\r',
|
||||
'"': '"',
|
||||
'\\': '\\',
|
||||
}
|
||||
|
||||
type parseKeyState int
|
||||
|
||||
const (
|
||||
bare parseKeyState = iota
|
||||
basic
|
||||
literal
|
||||
esc
|
||||
unicode4
|
||||
unicode8
|
||||
)
|
||||
|
||||
// Convert the bare key group string to an array.
|
||||
// The input supports double quotation to allow "." inside the key name,
|
||||
// but escape sequences are not supported. Lexers must unescape them beforehand.
|
||||
func parseKey(key string) ([]string, error) {
|
||||
groups := []string{}
|
||||
var buffer bytes.Buffer
|
||||
var hex bytes.Buffer
|
||||
state := bare
|
||||
inQuotes := false
|
||||
wasInQuotes := false
|
||||
ignoreSpace := true
|
||||
expectDot := false
|
||||
@@ -47,67 +27,17 @@ func parseKey(key string) ([]string, error) {
|
||||
}
|
||||
ignoreSpace = false
|
||||
}
|
||||
|
||||
if state == esc {
|
||||
if char == 'u' {
|
||||
state = unicode4
|
||||
hex.Reset()
|
||||
} else if char == 'U' {
|
||||
state = unicode8
|
||||
hex.Reset()
|
||||
} else if newChar, ok := escapeSequenceMap[char]; ok {
|
||||
buffer.WriteRune(newChar)
|
||||
state = basic
|
||||
} else {
|
||||
return nil, fmt.Errorf(`invalid escape sequence \%c`, char)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if state == unicode4 || state == unicode8 {
|
||||
if isHexDigit(char) {
|
||||
hex.WriteRune(char)
|
||||
}
|
||||
if (state == unicode4 && hex.Len() == 4) || (state == unicode8 && hex.Len() == 8) {
|
||||
if value, err := strconv.ParseInt(hex.String(), 16, 32); err == nil {
|
||||
buffer.WriteRune(rune(value))
|
||||
} else {
|
||||
return nil, err
|
||||
}
|
||||
state = basic
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
switch char {
|
||||
case '\\':
|
||||
if state == basic {
|
||||
state = esc
|
||||
} else if state == literal {
|
||||
buffer.WriteRune(char)
|
||||
}
|
||||
case '\'':
|
||||
if state == bare {
|
||||
state = literal
|
||||
} else if state == literal {
|
||||
groups = append(groups, buffer.String())
|
||||
buffer.Reset()
|
||||
wasInQuotes = true
|
||||
state = bare
|
||||
}
|
||||
expectDot = false
|
||||
case '"':
|
||||
if state == bare {
|
||||
state = basic
|
||||
} else if state == basic {
|
||||
if inQuotes {
|
||||
groups = append(groups, buffer.String())
|
||||
buffer.Reset()
|
||||
state = bare
|
||||
wasInQuotes = true
|
||||
}
|
||||
inQuotes = !inQuotes
|
||||
expectDot = false
|
||||
case '.':
|
||||
if state != bare {
|
||||
if inQuotes {
|
||||
buffer.WriteRune(char)
|
||||
} else {
|
||||
if !wasInQuotes {
|
||||
@@ -122,31 +52,25 @@ func parseKey(key string) ([]string, error) {
|
||||
wasInQuotes = false
|
||||
}
|
||||
case ' ':
|
||||
if state == basic {
|
||||
if inQuotes {
|
||||
buffer.WriteRune(char)
|
||||
} else {
|
||||
expectDot = true
|
||||
}
|
||||
default:
|
||||
if state == bare {
|
||||
if !isValidBareChar(char) {
|
||||
return nil, fmt.Errorf("invalid bare character: %c", char)
|
||||
} else if expectDot {
|
||||
return nil, errors.New("what?")
|
||||
}
|
||||
if !inQuotes && !isValidBareChar(char) {
|
||||
return nil, fmt.Errorf("invalid bare character: %c", char)
|
||||
}
|
||||
if !inQuotes && expectDot {
|
||||
return nil, errors.New("what?")
|
||||
}
|
||||
buffer.WriteRune(char)
|
||||
expectDot = false
|
||||
}
|
||||
}
|
||||
|
||||
// state must be bare at the end
|
||||
if state == esc {
|
||||
return nil, errors.New("unfinished escape sequence")
|
||||
} else if state != bare {
|
||||
if inQuotes {
|
||||
return nil, errors.New("mismatched quotes")
|
||||
}
|
||||
|
||||
if buffer.Len() > 0 {
|
||||
groups = append(groups, buffer.String())
|
||||
}
|
||||
|
||||
+3
-10
@@ -50,17 +50,10 @@ func TestBaseKeyPound(t *testing.T) {
|
||||
func TestQuotedKeys(t *testing.T) {
|
||||
testResult(t, `hello."foo".bar`, []string{"hello", "foo", "bar"})
|
||||
testResult(t, `"hello!"`, []string{"hello!"})
|
||||
testResult(t, `"hello\tworld"`, []string{"hello\tworld"})
|
||||
testResult(t, `"\U0001F914"`, []string{"\U0001F914"})
|
||||
testResult(t, `"\u2764"`, []string{"\u2764"})
|
||||
testResult(t, `foo."ba.r".baz`, []string{"foo", "ba.r", "baz"})
|
||||
|
||||
testResult(t, `hello.'foo'.bar`, []string{"hello", "foo", "bar"})
|
||||
testResult(t, `'hello!'`, []string{"hello!"})
|
||||
testResult(t, `'hello\tworld'`, []string{`hello\tworld`})
|
||||
|
||||
testError(t, `"\w"`, `invalid escape sequence \w`)
|
||||
testError(t, `"\`, `unfinished escape sequence`)
|
||||
testError(t, `"\t`, `mismatched quotes`)
|
||||
// escape sequences must not be converted
|
||||
testResult(t, `"hello\tworld"`, []string{`hello\tworld`})
|
||||
}
|
||||
|
||||
func TestEmptyKey(t *testing.T) {
|
||||
|
||||
@@ -277,6 +277,8 @@ func (l *tomlLexer) lexComma() tomlLexStateFn {
|
||||
return l.lexRvalue
|
||||
}
|
||||
|
||||
// Parse the key and emits its value without escape sequences.
|
||||
// bare keys, basic string keys and literal string keys are supported.
|
||||
func (l *tomlLexer) lexKey() tomlLexStateFn {
|
||||
growingString := ""
|
||||
|
||||
@@ -287,7 +289,16 @@ func (l *tomlLexer) lexKey() tomlLexStateFn {
|
||||
if err != nil {
|
||||
return l.errorf(err.Error())
|
||||
}
|
||||
growingString += `"` + str + `"`
|
||||
growingString += str
|
||||
l.next()
|
||||
continue
|
||||
} else if r == '\'' {
|
||||
l.next()
|
||||
str, err := l.lexLiteralStringAsString(`'`, false)
|
||||
if err != nil {
|
||||
return l.errorf(err.Error())
|
||||
}
|
||||
growingString += str
|
||||
l.next()
|
||||
continue
|
||||
} else if r == '\n' {
|
||||
@@ -527,6 +538,7 @@ func (l *tomlLexer) lexTableKey() tomlLexStateFn {
|
||||
return l.lexInsideTableKey
|
||||
}
|
||||
|
||||
// Parse the key till "]]", but only bare keys are supported
|
||||
func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
|
||||
for r := l.peek(); r != eof; r = l.peek() {
|
||||
switch r {
|
||||
@@ -550,6 +562,7 @@ func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
|
||||
return l.errorf("unclosed table array key")
|
||||
}
|
||||
|
||||
// Parse the key till "]" but only bare keys are supported
|
||||
func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
|
||||
for r := l.peek(); r != eof; r = l.peek() {
|
||||
switch r {
|
||||
|
||||
+1
-1
@@ -690,7 +690,7 @@ func TestKeyGroupArray(t *testing.T) {
|
||||
|
||||
func TestQuotedKey(t *testing.T) {
|
||||
testFlow(t, "\"a b\" = 42", []token{
|
||||
{Position{1, 1}, tokenKey, "\"a b\""},
|
||||
{Position{1, 1}, tokenKey, "a b"},
|
||||
{Position{1, 7}, tokenEqual, "="},
|
||||
{Position{1, 9}, tokenInteger, "42"},
|
||||
{Position{1, 11}, tokenEOF, ""},
|
||||
|
||||
@@ -185,10 +185,7 @@ func (p *tomlParser) parseAssign() tomlParserStateFn {
|
||||
}
|
||||
|
||||
// assign value to the found table
|
||||
keyVals, err := parseKey(key.val)
|
||||
if err != nil {
|
||||
p.raiseError(key, "%s", err)
|
||||
}
|
||||
keyVals := []string{key.val}
|
||||
if len(keyVals) != 1 {
|
||||
p.raiseError(key, "Invalid key")
|
||||
}
|
||||
|
||||
@@ -72,6 +72,17 @@ func TestNumberInKey(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestIncorrectKeyExtraSquareBracket(t *testing.T) {
|
||||
_, err := Load(`[a]b]
|
||||
zyx = 42`)
|
||||
if err == nil {
|
||||
t.Error("Error should have been returned.")
|
||||
}
|
||||
if err.Error() != "(1, 4): unexpected token" {
|
||||
t.Error("Bad error message:", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func TestSimpleNumbers(t *testing.T) {
|
||||
tree, err := Load("a = +42\nb = -21\nc = +4.2\nd = -2.1")
|
||||
assertTree(t, tree, err, map[string]interface{}{
|
||||
@@ -208,6 +219,36 @@ func TestSpaceKey(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestDoubleQuotedKey(t *testing.T) {
|
||||
tree, err := Load(`
|
||||
"key" = "a"
|
||||
"\t" = "b"
|
||||
"\U0001F914" = "c"
|
||||
"\u2764" = "d"
|
||||
`)
|
||||
assertTree(t, tree, err, map[string]interface{}{
|
||||
"key": "a",
|
||||
"\t": "b",
|
||||
"\U0001F914": "c",
|
||||
"\u2764": "d",
|
||||
})
|
||||
}
|
||||
|
||||
func TestSingleQuotedKey(t *testing.T) {
|
||||
tree, err := Load(`
|
||||
'key' = "a"
|
||||
'\t' = "b"
|
||||
'\U0001F914' = "c"
|
||||
'\u2764' = "d"
|
||||
`)
|
||||
assertTree(t, tree, err, map[string]interface{}{
|
||||
`key`: "a",
|
||||
`\t`: "b",
|
||||
`\U0001F914`: "c",
|
||||
`\u2764`: "d",
|
||||
})
|
||||
}
|
||||
|
||||
func TestStringEscapables(t *testing.T) {
|
||||
tree, err := Load("a = \"a \\n b\"")
|
||||
assertTree(t, tree, err, map[string]interface{}{
|
||||
|
||||
@@ -71,18 +71,15 @@ func (t *Tree) Keys() []string {
|
||||
}
|
||||
|
||||
// Get the value at key in the Tree.
|
||||
// Key is a dot-separated path (e.g. a.b.c).
|
||||
// Key is a dot-separated path (e.g. a.b.c) without single/double quoted strings.
|
||||
// If you need to retrieve non-bare keys, use GetPath.
|
||||
// Returns nil if the path does not exist in the tree.
|
||||
// If keys is of length zero, the current tree is returned.
|
||||
func (t *Tree) Get(key string) interface{} {
|
||||
if key == "" {
|
||||
return t
|
||||
}
|
||||
comps, err := parseKey(key)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return t.GetPath(comps)
|
||||
return t.GetPath(strings.Split(key, "."))
|
||||
}
|
||||
|
||||
// GetPath returns the element in the tree indicated by 'keys'.
|
||||
|
||||
Reference in New Issue
Block a user