Decoding error position tracking

This commit is contained in:
Thomas Pelletier
2021-03-30 21:43:57 -04:00
parent 18d45c446b
commit 32da85ab11
8 changed files with 150 additions and 34 deletions
+1 -1
View File
@@ -17,7 +17,7 @@ Development branch. Probably does not work.
- [x] Benchmark! - [x] Benchmark!
- [x] Abstract AST. - [x] Abstract AST.
- [x] Original go-toml testgen tests pass. - [x] Original go-toml testgen tests pass.
- [ ] Track file position (line, column) for errors. - [x] Track file position (line, column) for errors.
- [ ] Attach comments to AST (gated by parser flag). - [ ] Attach comments to AST (gated by parser flag).
- [ ] Benchmark again! - [ ] Benchmark again!
+6 -6
View File
@@ -34,7 +34,7 @@ func parseLocalDate(b []byte) (LocalDate, error) {
date := LocalDate{} date := LocalDate{}
if len(b) != 10 || b[4] != '-' || b[7] != '-' { if len(b) != 10 || b[4] != '-' || b[7] != '-' {
return date, fmt.Errorf("dates are expected to have the format YYYY-MM-DD") return date, newDecodeError(b, "dates are expected to have the format YYYY-MM-DD")
} }
var err error var err error
@@ -89,7 +89,7 @@ func parseDateTime(b []byte) (time.Time, error) {
zone = time.UTC zone = time.UTC
} else { } else {
if len(b) != 6 { if len(b) != 6 {
return time.Time{}, fmt.Errorf("invalid date-time timezone") return time.Time{}, newDecodeError(b, "invalid date-time timezone")
} }
direction := 1 direction := 1
switch b[0] { switch b[0] {
@@ -97,7 +97,7 @@ func parseDateTime(b []byte) (time.Time, error) {
case '-': case '-':
direction = -1 direction = -1
default: default:
return time.Time{}, fmt.Errorf("invalid timezone offset character") return time.Time{}, newDecodeError(b[0:1], "invalid timezone offset character")
} }
hours := digitsToInt(b[1:3]) hours := digitsToInt(b[1:3])
@@ -107,7 +107,7 @@ func parseDateTime(b []byte) (time.Time, error) {
} }
if len(b) > 0 { if len(b) > 0 {
return time.Time{}, fmt.Errorf("extra bytes at the end of the timezone") return time.Time{}, newDecodeError(b, "extra bytes at the end of the timezone")
} }
t := time.Date( t := time.Date(
@@ -166,14 +166,14 @@ func parseLocalTime(b []byte) (LocalTime, []byte, error) {
return t, nil, err return t, nil, err
} }
if b[2] != ':' { if b[2] != ':' {
return t, nil, fmt.Errorf("expecting colon between hours and minutes") return t, nil, newDecodeError(b[2:3], "expecting colon between hours and minutes")
} }
t.Minute, err = parseDecimalDigits(b[3:5]) t.Minute, err = parseDecimalDigits(b[3:5])
if err != nil { if err != nil {
return t, nil, err return t, nil, err
} }
if b[5] != ':' { if b[5] != ':' {
return t, nil, fmt.Errorf("expecting colon between minutes and seconds") return t, nil, newDecodeError(b[5:6], "expecting colon between minutes and seconds")
} }
t.Second, err = parseDecimalDigits(b[6:8]) t.Second, err = parseDecimalDigits(b[6:8])
if err != nil { if err != nil {
+9 -1
View File
@@ -129,8 +129,16 @@ func formatLineNumber(line int, width int) string {
func linesOfContext(document []byte, highlight []byte, offset int, linesAround int) ([][]byte, [][]byte) { func linesOfContext(document []byte, highlight []byte, offset int, linesAround int) ([][]byte, [][]byte) {
var beforeLines [][]byte var beforeLines [][]byte
for beforeOffset, lastOffset := offset, offset; beforeOffset >= 0 && len(beforeLines) <= linesAround; beforeOffset-- { for beforeOffset, lastOffset := offset, offset; beforeOffset >= 0 && len(beforeLines) <= linesAround; beforeOffset-- {
if beforeOffset == len(document) {
beforeLines = append(beforeLines, []byte{})
continue
}
if document[beforeOffset] == '\n' { if document[beforeOffset] == '\n' {
beforeLines = append(beforeLines, document[beforeOffset+1:lastOffset]) if beforeOffset == lastOffset {
beforeLines = append(beforeLines, []byte{})
} else {
beforeLines = append(beforeLines, document[beforeOffset+1:lastOffset])
}
lastOffset = beforeOffset lastOffset = beforeOffset
} else if beforeOffset == 0 && beforeOffset != lastOffset { } else if beforeOffset == 0 && beforeOffset != lastOffset {
beforeLines = append(beforeLines, document[beforeOffset:lastOffset]) beforeLines = append(beforeLines, document[beforeOffset:lastOffset])
+1 -1
View File
@@ -23,7 +23,7 @@ func SubsliceOffset(data []byte, subslice []byte) int {
intoffset := int(offset) intoffset := int(offset)
if intoffset >= datap.Len { if intoffset > datap.Len {
panic(fmt.Errorf("slice offset (%d) is farther than data length (%d)", intoffset, datap.Len)) panic(fmt.Errorf("slice offset (%d) is farther than data length (%d)", intoffset, datap.Len))
} }
+8 -5
View File
@@ -363,7 +363,7 @@ func (p *parser) parseValArray(b []byte) (ast.Reference, []byte, error) {
} }
if len(b) == 0 { if len(b) == 0 {
return parent, nil, unexpectedCharacter{b: b} return parent, nil, unexpectedCharacter{b: b} // TODO: should be unexpected EOF
} }
if b[0] == ']' { if b[0] == ']' {
@@ -590,7 +590,7 @@ func (p *parser) parseSimpleKey(b []byte) (key, rest []byte, err error) {
//quoted-key = basic-string / literal-string //quoted-key = basic-string / literal-string
if len(b) == 0 { if len(b) == 0 {
return nil, nil, unexpectedCharacter{b: b} return nil, nil, unexpectedCharacter{b: b} // TODO: should be unexpected EOF
} }
if b[0] == '\'' { if b[0] == '\'' {
@@ -600,7 +600,7 @@ func (p *parser) parseSimpleKey(b []byte) (key, rest []byte, err error) {
} else if isUnquotedKeyChar(b[0]) { } else if isUnquotedKeyChar(b[0]) {
key, rest, err = scanUnquotedKey(b) key, rest, err = scanUnquotedKey(b)
} else { } else {
err = unexpectedCharacter{b: b} err = unexpectedCharacter{b: b} // TODO: should contain expected characters
} }
return return
} }
@@ -1158,8 +1158,11 @@ func isValidBinaryRune(r byte) bool {
} }
func expect(x byte, b []byte) ([]byte, error) { func expect(x byte, b []byte) ([]byte, error) {
if len(b) == 0 || b[0] != x { if len(b) == 0 {
return nil, unexpectedCharacter{r: x, b: b} return nil, newDecodeError(b[:0], "expecting %#U", x)
}
if b[0] != x {
return nil, newDecodeError(b[0:1], "expected character %U", x)
} }
return b[1:], nil return b[1:], nil
} }
+9 -9
View File
@@ -30,7 +30,7 @@ func scanUnquotedKey(b []byte) ([]byte, []byte, error) {
return b[:i], b[i:], nil return b[:i], b[i:], nil
} }
} }
return b, nil, nil return b, b[len(b):], nil
} }
func isUnquotedKeyChar(r byte) bool { func isUnquotedKeyChar(r byte) bool {
@@ -46,10 +46,10 @@ func scanLiteralString(b []byte) ([]byte, []byte, error) {
case '\'': case '\'':
return b[:i+1], b[i+1:], nil return b[:i+1], b[i+1:], nil
case '\n': case '\n':
return nil, nil, fmt.Errorf("literal strings cannot have new lines") return nil, nil, newDecodeError(b[i:i+1], "literal strings cannot have new lines")
} }
} }
return nil, nil, fmt.Errorf("unterminated literal string") return nil, nil, newDecodeError(b[len(b):], "unterminated literal string")
} }
func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) { func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
@@ -70,7 +70,7 @@ func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
} }
} }
return nil, nil, fmt.Errorf(`multiline literal string not terminated by '''`) return nil, nil, newDecodeError(b[len(b):], `multiline literal string not terminated by '''`)
} }
func scanWindowsNewline(b []byte) ([]byte, []byte, error) { func scanWindowsNewline(b []byte) ([]byte, []byte, error) {
@@ -92,7 +92,7 @@ func scanWhitespace(b []byte) ([]byte, []byte) {
return b[:i], b[i:] return b[:i], b[i:]
} }
} }
return b, nil return b, b[len(b):]
} }
func scanComment(b []byte) ([]byte, []byte, error) { func scanComment(b []byte) ([]byte, []byte, error) {
@@ -125,10 +125,10 @@ func scanBasicString(b []byte) ([]byte, []byte, error) {
case '"': case '"':
return b[:i+1], b[i+1:], nil return b[:i+1], b[i+1:], nil
case '\n': case '\n':
return nil, nil, fmt.Errorf("basic strings cannot have new lines") return nil, nil, newDecodeError(b[i:i+1], "basic strings cannot have new lines")
case '\\': case '\\':
if len(b) < i+2 { if len(b) < i+2 {
return nil, nil, fmt.Errorf("need a character after \\") return nil, nil, newDecodeError(b[i:i+1], "need a character after \\")
} }
i++ // skip the next character i++ // skip the next character
} }
@@ -158,11 +158,11 @@ func scanMultilineBasicString(b []byte) ([]byte, []byte, error) {
} }
case '\\': case '\\':
if len(b) < i+2 { if len(b) < i+2 {
return nil, nil, fmt.Errorf("need a character after \\") return nil, nil, newDecodeError(b[len(b):], "need a character after \\")
} }
i++ // skip the next character i++ // skip the next character
} }
} }
return nil, nil, fmt.Errorf(`multiline basic string not terminated by """`) return nil, nil, newDecodeError(b[len(b):], `multiline basic string not terminated by """`)
} }
+16 -1
View File
@@ -268,11 +268,22 @@ func (d *decoder) unmarshalValue(x target, node ast.Node) error {
return unmarshalLocalDateTime(x, node) return unmarshalLocalDateTime(x, node)
case ast.DateTime: case ast.DateTime:
return unmarshalDateTime(x, node) return unmarshalDateTime(x, node)
case ast.LocalDate:
return unmarshalLocalDate(x, node)
default: default:
panic(fmt.Errorf("unhandled unmarshalValue kind %s", node.Kind)) panic(fmt.Errorf("unhandled unmarshalValue kind %s", node.Kind))
} }
} }
func unmarshalLocalDate(x target, node ast.Node) error {
assertNode(ast.LocalDate, node)
v, err := parseLocalDate(node.Data)
if err != nil {
return err
}
return setDate(x, v)
}
func unmarshalLocalDateTime(x target, node ast.Node) error { func unmarshalLocalDateTime(x target, node ast.Node) error {
assertNode(ast.LocalDateTime, node) assertNode(ast.LocalDateTime, node)
v, rest, err := parseLocalDateTime(node.Data) v, rest, err := parseLocalDateTime(node.Data)
@@ -280,7 +291,7 @@ func unmarshalLocalDateTime(x target, node ast.Node) error {
return err return err
} }
if len(rest) > 0 { if len(rest) > 0 {
return fmt.Errorf("extra characters at the end of a local date time") return newDecodeError(rest, "extra characters at the end of a local date time")
} }
return setLocalDateTime(x, v) return setLocalDateTime(x, v)
} }
@@ -302,6 +313,10 @@ func setDateTime(x target, v time.Time) error {
return x.set(reflect.ValueOf(v)) return x.set(reflect.ValueOf(v))
} }
func setDate(x target, v LocalDate) error {
return x.set(reflect.ValueOf(v))
}
func unmarshalString(x target, node ast.Node) error { func unmarshalString(x target, node ast.Node) error {
assertNode(ast.String, node) assertNode(ast.String, node)
return setString(x, string(node.Data)) return setString(x, string(node.Data))
+100 -10
View File
@@ -200,10 +200,10 @@ func TestUnmarshal(t *testing.T) {
gen: func() test { gen: func() test {
m := map[string]interface{}{} m := map[string]interface{}{}
return test{ return test{
target: &m, target: &m,
expected: &map[string]interface{}{ expected: &map[string]interface{}{
"fruit": map[string]interface{}{ "fruit": map[string]interface{}{
"color": "yellow", "color": "yellow",
"flavor": "banana", "flavor": "banana",
}, },
}, },
@@ -217,7 +217,7 @@ func TestUnmarshal(t *testing.T) {
gen: func() test { gen: func() test {
m := map[string]interface{}{} m := map[string]interface{}{}
return test{ return test{
target: &m, target: &m,
expected: &map[string]interface{}{ expected: &map[string]interface{}{
`"a"`: int64(1), `"a"`: int64(1),
`"b"`: int64(2), `"b"`: int64(2),
@@ -226,7 +226,7 @@ func TestUnmarshal(t *testing.T) {
}, },
}, },
{ {
desc: "multiline basic string", desc: "multiline basic string",
input: `A = """\ input: `A = """\
Test"""`, Test"""`,
gen: func() test { gen: func() test {
@@ -705,7 +705,6 @@ B = "data"`,
} }
} }
type Integer484 struct { type Integer484 struct {
Value int Value int
} }
@@ -726,7 +725,7 @@ type Config484 struct {
Integers []Integer484 `toml:"integers"` Integers []Integer484 `toml:"integers"`
} }
func TestIssue484(t *testing.T) { func TestIssue484(t *testing.T) {
raw := []byte(`integers = ["1","2","3","100"]`) raw := []byte(`integers = ["1","2","3","100"]`)
var cfg Config484 var cfg Config484
err := toml.Unmarshal(raw, &cfg) err := toml.Unmarshal(raw, &cfg)
@@ -753,10 +752,101 @@ version = "0.1.0"`)
require.NoError(t, err) require.NoError(t, err)
a := m.A("package") a := m.A("package")
expected := Slice458{ expected := Slice458{
map[string]interface {}{ map[string]interface{}{
"dependencies": []interface {}{"regex"}, "dependencies": []interface{}{"regex"},
"name":"decode", "name": "decode",
"version":"0.1.0"}, "version": "0.1.0"},
} }
assert.Equal(t, expected, a) assert.Equal(t, expected, a)
} }
func TestUnmarshalDecodeErrors(t *testing.T) {
examples := []struct {
desc string
data string
msg string
}{
{
desc: "int with wrong base",
data: `a = 0f2`,
},
{
desc: "literal string with new lines",
data: `a = 'hello
world'`,
msg: `literal strings cannot have new lines`,
},
{
desc: "unterminated literal string",
data: `a = 'hello`,
msg: `unterminated literal string`,
},
{
desc: "unterminated multiline literal string",
data: `a = '''hello`,
msg: `multiline literal string not terminated by '''`,
},
{
desc: "basic string with new lines",
data: `a = "hello
"`,
msg: `basic strings cannot have new lines`,
},
{
desc: "basic string with unfinished escape",
data: `a = "hello \`,
msg: `need a character after \`,
},
{
desc: "basic unfinished multiline string",
data: `a = """hello`,
msg: `multiline basic string not terminated by """`,
},
{
desc: "basic unfinished escape in multiline string",
data: `a = """hello \`,
msg: `need a character after \`,
},
{
desc: "malformed local date",
data: `a = 2021-033-0`,
msg: `dates are expected to have the format YYYY-MM-DD`,
},
{
desc: "malformed tz",
data: `a = 2021-03-30 21:31:00+1`,
msg: `invalid date-time timezone`,
},
{
desc: "malformed tz first char",
data: `a = 2021-03-30 21:31:00:1`,
msg: `extra characters at the end of a local date time`,
},
{
desc: "bad char between hours and minutes",
data: `a = 2021-03-30 213:1:00`,
msg: `expecting colon between hours and minutes`,
},
{
desc: "bad char between minutes and seconds",
data: `a = 2021-03-30 21:312:0`,
msg: `expecting colon between minutes and seconds`,
},
}
for _, e := range examples {
t.Run(e.desc, func(t *testing.T) {
m := map[string]interface{}{}
err := toml.Unmarshal([]byte(e.data), &m)
require.Error(t, err)
de, ok := err.(*toml.DecodeError)
if !ok {
t.Fatalf("err should have been a *toml.DecodeError, but got %s (%T)", err, err)
}
if e.msg != "" {
t.Log("\n" + de.String())
require.Equal(t, e.msg, de.Error())
}
})
}
}