From 80189ba4490ee03d5a46d3452b3351e8059a0bba Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 12 Apr 2026 12:26:03 +0000 Subject: [PATCH] fix(unstable): correct Parser.Range for non-suffix highlights Parser.Range used len(data)-len(highlight), which only matches suffix slices. Single-byte highlights like b[0:1] are subslices of the remaining buffer, so the wrong offset pointed at the end of the document (issue #1047). Use pointer-based subslice offset like wrapDecodeError. Add regression tests for unstable.Parser and toml.Unmarshal error positions and human-readable output. Co-authored-by: Thomas Pelletier --- errors_test.go | 32 +++++++++++++++++++++++++++++++ unstable/parser.go | 25 +++++++++++++++++------- unstable/parser_range_test.go | 36 +++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 7 deletions(-) create mode 100644 unstable/parser_range_test.go diff --git a/errors_test.go b/errors_test.go index 3703bd0..91e612b 100644 --- a/errors_test.go +++ b/errors_test.go @@ -202,6 +202,38 @@ func TestDecodeError_Accessors(t *testing.T) { assert.Equal(t, "bar", e.String()) } +func TestDecodeError_InvalidKeyStartAfterComment(t *testing.T) { + // Regression for https://github.com/pelletier/go-toml/issues/1047: the "=" + // that starts an invalid keyval must be reported on line 2, column 1, with + // the human-readable context pointing at that byte (not the document end). + doc := "# comment\n= \"value\"" + + var v map[string]any + err := Unmarshal([]byte(doc), &v) + if err == nil { + t.Fatal("expected an error") + } + + var derr *DecodeError + if !errors.As(err, &derr) { + t.Fatalf("expected *DecodeError, got %T", err) + } + + row, col := derr.Position() + if row != 2 || col != 1 { + t.Errorf("Position(): got row %d col %d, want row 2 col 1", row, col) + } + + human := derr.String() + if !strings.Contains(human, `2| = "value"`) { + t.Errorf("human output should show the error line; got:\n%s", human) + } + // Caret line uses line-number column width padding; only the "| ~" part is stable here. + if !strings.Contains(human, "| ~ invalid character at start of key") { + t.Errorf("human output should underline '=' and include the parser message; got:\n%s", human) + } +} + func TestDecodeError_DuplicateContent(t *testing.T) { // This test verifies that when the same content appears multiple times // in the document, the error correctly points to the actual location diff --git a/unstable/parser.go b/unstable/parser.go index e7c68dc..f162932 100644 --- a/unstable/parser.go +++ b/unstable/parser.go @@ -3,6 +3,7 @@ package unstable import ( "bytes" "fmt" + "reflect" "unicode" "github.com/pelletier/go-toml/v2/internal/characters" @@ -69,8 +70,8 @@ func (p *Parser) Data() []byte { // panics. func (p *Parser) Range(b []byte) Range { return Range{ - Offset: uint32(p.subsliceOffset(b)), //nolint:gosec // TOML documents are small - Length: uint32(len(b)), //nolint:gosec // TOML documents are small + Offset: uint32(subsliceOffset(p.data, b)), //nolint:gosec // TOML documents are small + Length: uint32(len(b)), //nolint:gosec // TOML documents are small } } @@ -82,11 +83,21 @@ func (p *Parser) rangeOfToken(token, rest []byte) Range { return Range{Offset: uint32(offset), Length: uint32(len(token))} //nolint:gosec // TOML documents are small } -// subsliceOffset returns the byte offset of subslice b within p.data. -// b must be a suffix (tail) of p.data. -func (p *Parser) subsliceOffset(b []byte) int { - // b is a suffix of p.data, so its offset is len(p.data) - len(b) - return len(p.data) - len(b) +// subsliceOffset returns the byte offset of subslice b within data. +// b must share the same backing array as data (any subslice of data). +func subsliceOffset(data, b []byte) int { + if len(b) == 0 { + return 0 + } + + dataPtr := reflect.ValueOf(data).Pointer() + bPtr := reflect.ValueOf(b).Pointer() + + offset := int(bPtr - dataPtr) + if offset < 0 || offset > len(data) { + panic("subslice is not within data") + } + return offset } // Raw returns the slice corresponding to the bytes in the given range. diff --git a/unstable/parser_range_test.go b/unstable/parser_range_test.go new file mode 100644 index 0000000..7237339 --- /dev/null +++ b/unstable/parser_range_test.go @@ -0,0 +1,36 @@ +package unstable + +import ( + "errors" + "testing" +) + +// Regression test for https://github.com/pelletier/go-toml/issues/1047: +// Parser.Range must use the real slice offset, not len(data)-len(slice). +func TestParser_Range_HighlightAfterComment(t *testing.T) { + input := []byte("# comment\n= \"value\"") + + var p Parser + p.Reset(input) + for p.NextExpression() { + } + err := p.Error() + if err == nil { + t.Fatal("expected an error") + } + + var perr *ParserError + if !errors.As(err, &perr) { + t.Fatalf("expected *ParserError, got %T", err) + } + + r := p.Range(perr.Highlight) + shape := p.Shape(r) + + if r.Offset != 10 { + t.Errorf("Range offset: got %d, want 10", r.Offset) + } + if shape.Start.Line != 2 || shape.Start.Column != 1 { + t.Errorf("position: got %d:%d, want 2:1", shape.Start.Line, shape.Start.Column) + } +}