Fix parsing of single quoted keys (#201)

Patch #193 doesn't work correctly because that must be handled by the
lexer, and `parseKey()` must not handle escape sequences.

Ref #61
This commit is contained in:
Kazuyoshi Kato
2017-10-26 23:26:39 -07:00
committed by Thomas Pelletier
parent 9bf0212445
commit 4874e8477b
7 changed files with 77 additions and 112 deletions
+14 -90
View File
@@ -6,36 +6,16 @@ import (
"bytes"
"errors"
"fmt"
"strconv"
"unicode"
)
var escapeSequenceMap = map[rune]rune{
'b': '\b',
't': '\t',
'n': '\n',
'f': '\f',
'r': '\r',
'"': '"',
'\\': '\\',
}
type parseKeyState int
const (
bare parseKeyState = iota
basic
literal
esc
unicode4
unicode8
)
// Convert the bare key group string to an array.
// The input supports double quotation to allow "." inside the key name,
// but escape sequences are not supported. Lexers must unescape them beforehand.
func parseKey(key string) ([]string, error) {
groups := []string{}
var buffer bytes.Buffer
var hex bytes.Buffer
state := bare
inQuotes := false
wasInQuotes := false
ignoreSpace := true
expectDot := false
@@ -47,67 +27,17 @@ func parseKey(key string) ([]string, error) {
}
ignoreSpace = false
}
if state == esc {
if char == 'u' {
state = unicode4
hex.Reset()
} else if char == 'U' {
state = unicode8
hex.Reset()
} else if newChar, ok := escapeSequenceMap[char]; ok {
buffer.WriteRune(newChar)
state = basic
} else {
return nil, fmt.Errorf(`invalid escape sequence \%c`, char)
}
continue
}
if state == unicode4 || state == unicode8 {
if isHexDigit(char) {
hex.WriteRune(char)
}
if (state == unicode4 && hex.Len() == 4) || (state == unicode8 && hex.Len() == 8) {
if value, err := strconv.ParseInt(hex.String(), 16, 32); err == nil {
buffer.WriteRune(rune(value))
} else {
return nil, err
}
state = basic
}
continue
}
switch char {
case '\\':
if state == basic {
state = esc
} else if state == literal {
buffer.WriteRune(char)
}
case '\'':
if state == bare {
state = literal
} else if state == literal {
groups = append(groups, buffer.String())
buffer.Reset()
wasInQuotes = true
state = bare
}
expectDot = false
case '"':
if state == bare {
state = basic
} else if state == basic {
if inQuotes {
groups = append(groups, buffer.String())
buffer.Reset()
state = bare
wasInQuotes = true
}
inQuotes = !inQuotes
expectDot = false
case '.':
if state != bare {
if inQuotes {
buffer.WriteRune(char)
} else {
if !wasInQuotes {
@@ -122,31 +52,25 @@ func parseKey(key string) ([]string, error) {
wasInQuotes = false
}
case ' ':
if state == basic {
if inQuotes {
buffer.WriteRune(char)
} else {
expectDot = true
}
default:
if state == bare {
if !isValidBareChar(char) {
return nil, fmt.Errorf("invalid bare character: %c", char)
} else if expectDot {
return nil, errors.New("what?")
}
if !inQuotes && !isValidBareChar(char) {
return nil, fmt.Errorf("invalid bare character: %c", char)
}
if !inQuotes && expectDot {
return nil, errors.New("what?")
}
buffer.WriteRune(char)
expectDot = false
}
}
// state must be bare at the end
if state == esc {
return nil, errors.New("unfinished escape sequence")
} else if state != bare {
if inQuotes {
return nil, errors.New("mismatched quotes")
}
if buffer.Len() > 0 {
groups = append(groups, buffer.String())
}
+3 -10
View File
@@ -50,17 +50,10 @@ func TestBaseKeyPound(t *testing.T) {
func TestQuotedKeys(t *testing.T) {
testResult(t, `hello."foo".bar`, []string{"hello", "foo", "bar"})
testResult(t, `"hello!"`, []string{"hello!"})
testResult(t, `"hello\tworld"`, []string{"hello\tworld"})
testResult(t, `"\U0001F914"`, []string{"\U0001F914"})
testResult(t, `"\u2764"`, []string{"\u2764"})
testResult(t, `foo."ba.r".baz`, []string{"foo", "ba.r", "baz"})
testResult(t, `hello.'foo'.bar`, []string{"hello", "foo", "bar"})
testResult(t, `'hello!'`, []string{"hello!"})
testResult(t, `'hello\tworld'`, []string{`hello\tworld`})
testError(t, `"\w"`, `invalid escape sequence \w`)
testError(t, `"\`, `unfinished escape sequence`)
testError(t, `"\t`, `mismatched quotes`)
// escape sequences must not be converted
testResult(t, `"hello\tworld"`, []string{`hello\tworld`})
}
func TestEmptyKey(t *testing.T) {
+14 -1
View File
@@ -277,6 +277,8 @@ func (l *tomlLexer) lexComma() tomlLexStateFn {
return l.lexRvalue
}
// Parse the key and emits its value without escape sequences.
// bare keys, basic string keys and literal string keys are supported.
func (l *tomlLexer) lexKey() tomlLexStateFn {
growingString := ""
@@ -287,7 +289,16 @@ func (l *tomlLexer) lexKey() tomlLexStateFn {
if err != nil {
return l.errorf(err.Error())
}
growingString += `"` + str + `"`
growingString += str
l.next()
continue
} else if r == '\'' {
l.next()
str, err := l.lexLiteralStringAsString(`'`, false)
if err != nil {
return l.errorf(err.Error())
}
growingString += str
l.next()
continue
} else if r == '\n' {
@@ -527,6 +538,7 @@ func (l *tomlLexer) lexTableKey() tomlLexStateFn {
return l.lexInsideTableKey
}
// Parse the key till "]]", but only bare keys are supported
func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
for r := l.peek(); r != eof; r = l.peek() {
switch r {
@@ -550,6 +562,7 @@ func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
return l.errorf("unclosed table array key")
}
// Parse the key till "]" but only bare keys are supported
func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
for r := l.peek(); r != eof; r = l.peek() {
switch r {
+1 -1
View File
@@ -690,7 +690,7 @@ func TestKeyGroupArray(t *testing.T) {
func TestQuotedKey(t *testing.T) {
testFlow(t, "\"a b\" = 42", []token{
{Position{1, 1}, tokenKey, "\"a b\""},
{Position{1, 1}, tokenKey, "a b"},
{Position{1, 7}, tokenEqual, "="},
{Position{1, 9}, tokenInteger, "42"},
{Position{1, 11}, tokenEOF, ""},
+1 -4
View File
@@ -185,10 +185,7 @@ func (p *tomlParser) parseAssign() tomlParserStateFn {
}
// assign value to the found table
keyVals, err := parseKey(key.val)
if err != nil {
p.raiseError(key, "%s", err)
}
keyVals := []string{key.val}
if len(keyVals) != 1 {
p.raiseError(key, "Invalid key")
}
+41
View File
@@ -72,6 +72,17 @@ func TestNumberInKey(t *testing.T) {
})
}
func TestIncorrectKeyExtraSquareBracket(t *testing.T) {
_, err := Load(`[a]b]
zyx = 42`)
if err == nil {
t.Error("Error should have been returned.")
}
if err.Error() != "(1, 4): unexpected token" {
t.Error("Bad error message:", err.Error())
}
}
func TestSimpleNumbers(t *testing.T) {
tree, err := Load("a = +42\nb = -21\nc = +4.2\nd = -2.1")
assertTree(t, tree, err, map[string]interface{}{
@@ -208,6 +219,36 @@ func TestSpaceKey(t *testing.T) {
})
}
func TestDoubleQuotedKey(t *testing.T) {
tree, err := Load(`
"key" = "a"
"\t" = "b"
"\U0001F914" = "c"
"\u2764" = "d"
`)
assertTree(t, tree, err, map[string]interface{}{
"key": "a",
"\t": "b",
"\U0001F914": "c",
"\u2764": "d",
})
}
func TestSingleQuotedKey(t *testing.T) {
tree, err := Load(`
'key' = "a"
'\t' = "b"
'\U0001F914' = "c"
'\u2764' = "d"
`)
assertTree(t, tree, err, map[string]interface{}{
`key`: "a",
`\t`: "b",
`\U0001F914`: "c",
`\u2764`: "d",
})
}
func TestStringEscapables(t *testing.T) {
tree, err := Load("a = \"a \\n b\"")
assertTree(t, tree, err, map[string]interface{}{
+3 -6
View File
@@ -71,18 +71,15 @@ func (t *Tree) Keys() []string {
}
// Get the value at key in the Tree.
// Key is a dot-separated path (e.g. a.b.c).
// Key is a dot-separated path (e.g. a.b.c) without single/double quoted strings.
// If you need to retrieve non-bare keys, use GetPath.
// Returns nil if the path does not exist in the tree.
// If keys is of length zero, the current tree is returned.
func (t *Tree) Get(key string) interface{} {
if key == "" {
return t
}
comps, err := parseKey(key)
if err != nil {
return nil
}
return t.GetPath(comps)
return t.GetPath(strings.Split(key, "."))
}
// GetPath returns the element in the tree indicated by 'keys'.