From 80189ba4490ee03d5a46d3452b3351e8059a0bba Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Sun, 12 Apr 2026 12:26:03 +0000
Subject: [PATCH] fix(unstable): correct Parser.Range for non-suffix highlights

Parser.Range used len(data)-len(highlight), which only matches suffix
slices. Single-byte highlights like b[0:1] are subslices of the
remaining buffer, so the wrong offset pointed at the end of the
document (issue #1047). Use pointer-based subslice offset like
wrapDecodeError.

Add regression tests for unstable.Parser and toml.Unmarshal error
positions and human-readable output.

Co-authored-by: Thomas Pelletier <thomas@pelletier.dev>
---
 errors_test.go                | 32 +++++++++++++++++++++++++++++++
 unstable/parser.go            | 25 +++++++++++++++++-------
 unstable/parser_range_test.go | 36 +++++++++++++++++++++++++++++++++++
 3 files changed, 86 insertions(+), 7 deletions(-)
 create mode 100644 unstable/parser_range_test.go

diff --git a/errors_test.go b/errors_test.go
index 3703bd0..91e612b 100644
--- a/errors_test.go
+++ b/errors_test.go
@@ -202,6 +202,38 @@ func TestDecodeError_Accessors(t *testing.T) {
 	assert.Equal(t, "bar", e.String())
 }
 
+func TestDecodeError_InvalidKeyStartAfterComment(t *testing.T) {
+	// Regression for https://github.com/pelletier/go-toml/issues/1047: the "="
+	// that starts an invalid keyval must be reported on line 2, column 1, with
+	// the human-readable context pointing at that byte (not the document end).
+	doc := "# comment\n= \"value\""
+
+	var v map[string]any
+	err := Unmarshal([]byte(doc), &v)
+	if err == nil {
+		t.Fatal("expected an error")
+	}
+
+	var derr *DecodeError
+	if !errors.As(err, &derr) {
+		t.Fatalf("expected *DecodeError, got %T", err)
+	}
+
+	row, col := derr.Position()
+	if row != 2 || col != 1 {
+		t.Errorf("Position(): got row %d col %d, want row 2 col 1", row, col)
+	}
+
+	human := derr.String()
+	if !strings.Contains(human, `2| = "value"`) {
+		t.Errorf("human output should show the error line; got:\n%s", human)
+	}
+	// Caret line uses line-number column width padding; only the "| ~" part is stable here.
+	if !strings.Contains(human, "| ~ invalid character at start of key") {
+		t.Errorf("human output should underline '=' and include the parser message; got:\n%s", human)
+	}
+}
+
 func TestDecodeError_DuplicateContent(t *testing.T) {
 	// This test verifies that when the same content appears multiple times
 	// in the document, the error correctly points to the actual location
diff --git a/unstable/parser.go b/unstable/parser.go
index e7c68dc..f162932 100644
--- a/unstable/parser.go
+++ b/unstable/parser.go
@@ -3,6 +3,7 @@ package unstable
 import (
 	"bytes"
 	"fmt"
+	"reflect"
 	"unicode"
 
 	"github.com/pelletier/go-toml/v2/internal/characters"
@@ -69,8 +70,8 @@ func (p *Parser) Data() []byte {
 // panics.
 func (p *Parser) Range(b []byte) Range {
 	return Range{
-		Offset: uint32(p.subsliceOffset(b)), //nolint:gosec // TOML documents are small
-		Length: uint32(len(b)),              //nolint:gosec // TOML documents are small
+		Offset: uint32(subsliceOffset(p.data, b)), //nolint:gosec // TOML documents are small
+		Length: uint32(len(b)),                    //nolint:gosec // TOML documents are small
 	}
 }
 
@@ -82,11 +83,21 @@ func (p *Parser) rangeOfToken(token, rest []byte) Range {
 	return Range{Offset: uint32(offset), Length: uint32(len(token))} //nolint:gosec // TOML documents are small
 }
 
-// subsliceOffset returns the byte offset of subslice b within p.data.
-// b must be a suffix (tail) of p.data.
-func (p *Parser) subsliceOffset(b []byte) int {
-	// b is a suffix of p.data, so its offset is len(p.data) - len(b)
-	return len(p.data) - len(b)
+// subsliceOffset returns the byte offset of subslice b within data.
+// b must share the same backing array as data (any subslice of data).
+func subsliceOffset(data, b []byte) int {
+	if len(b) == 0 {
+		return 0
+	}
+
+	dataPtr := reflect.ValueOf(data).Pointer()
+	bPtr := reflect.ValueOf(b).Pointer()
+
+	offset := int(bPtr - dataPtr)
+	if offset < 0 || offset > len(data) {
+		panic("subslice is not within data")
+	}
+	return offset
 }
 
 // Raw returns the slice corresponding to the bytes in the given range.
diff --git a/unstable/parser_range_test.go b/unstable/parser_range_test.go
new file mode 100644
index 0000000..7237339
--- /dev/null
+++ b/unstable/parser_range_test.go
@@ -0,0 +1,36 @@
+package unstable
+
+import (
+	"errors"
+	"testing"
+)
+
+// Regression test for https://github.com/pelletier/go-toml/issues/1047:
+// Parser.Range must use the real slice offset, not len(data)-len(slice).
+func TestParser_Range_HighlightAfterComment(t *testing.T) {
+	input := []byte("# comment\n= \"value\"")
+
+	var p Parser
+	p.Reset(input)
+	for p.NextExpression() {
+	}
+	err := p.Error()
+	if err == nil {
+		t.Fatal("expected an error")
+	}
+
+	var perr *ParserError
+	if !errors.As(err, &perr) {
+		t.Fatalf("expected *ParserError, got %T", err)
+	}
+
+	r := p.Range(perr.Highlight)
+	shape := p.Shape(r)
+
+	if r.Offset != 10 {
+		t.Errorf("Range offset: got %d, want 10", r.Offset)
+	}
+	if shape.Start.Line != 2 || shape.Start.Column != 1 {
+		t.Errorf("position: got %d:%d, want 2:1", shape.Start.Line, shape.Start.Column)
+	}
+}