Cache error offset in ParserError for safer position tracking

Instead of requiring downstream consumers to re-derive the byte offset from pointer arithmetic on the Highlight slice, compute and cache the offset inside the parser at error-capture time via setErrOffset(). This is safer because: - The parser is the one place where the backing-array guarantee is known to hold (Highlight is always a subslice of the parse buffer) - Downstream consumers (wrapDecodeError) can use the cached offset directly, avoiding the need for pointer comparison - Errors created outside the parser (strict.go) set the offset from existing Raw ranges, which are already correct by construction Add ParserError.SetOffset/Offset methods for setting and retrieving the cached offset. Update wrapDecodeError to prefer the cached offset when available, falling back to subsliceOffset for backward compatibility. Co-authored-by: Thomas Pelletier <thomas@pelletier.dev>
2026-04-12 13:00:38 +00:00
parent d528d3c6b4
commit 154d80392f
4 changed files with 106 additions and 11 deletions
@@ -100,7 +100,12 @@ func (e *DecodeError) Key() Key {
 //
 //nolint:funlen
 func wrapDecodeError(document []byte, de *unstable.ParserError) *DecodeError {
-	offset := subsliceOffset(document, de.Highlight)
+	var offset int
+	if o, ok := de.Offset(); ok {
+		offset = o
+	} else {
+		offset = subsliceOffset(document, de.Highlight)
+	}

 	errMessage := de.Error()
 	errLine, errColumn := positionAtEnd(document[:offset])
@@ -54,11 +54,14 @@ func (s *strict) MissingTable(node *unstable.Node) {
 		return
 	}

-	s.missing = append(s.missing, unstable.ParserError{
-		Highlight: s.keyLocation(node),
+	highlight, offset := s.keyLocation(node)
+	pe := unstable.ParserError{
+		Highlight: highlight,
 		Message:   "missing table",
 		Key:       s.key.Key(),
-	})
+	}
+	pe.SetOffset(offset)
+	s.missing = append(s.missing, pe)
 }

 func (s *strict) MissingField(node *unstable.Node) {
@@ -66,11 +69,14 @@ func (s *strict) MissingField(node *unstable.Node) {
 		return
 	}

-	s.missing = append(s.missing, unstable.ParserError{
-		Highlight: s.keyLocation(node),
+	highlight, offset := s.keyLocation(node)
+	pe := unstable.ParserError{
+		Highlight: highlight,
 		Message:   "missing field",
 		Key:       s.key.Key(),
-	})
+	}
+	pe.SetOffset(offset)
+	s.missing = append(s.missing, pe)
 }

 func (s *strict) Error(doc []byte) error {
@@ -90,7 +96,7 @@ func (s *strict) Error(doc []byte) error {
 	return err
 }

-func (s *strict) keyLocation(node *unstable.Node) []byte {
+func (s *strict) keyLocation(node *unstable.Node) ([]byte, int) {
 	k := node.Key()

 	hasOne := k.Next()
@@ -98,7 +104,6 @@ func (s *strict) keyLocation(node *unstable.Node) []byte {
 		panic("should not be called with empty key")
 	}

-	// Get the range from the first key to the last key.
 	firstRaw := k.Node().Raw
 	lastRaw := firstRaw

@@ -106,9 +111,8 @@ func (s *strict) keyLocation(node *unstable.Node) []byte {
 		lastRaw = k.Node().Raw
 	}

-	// Compute the slice from the document using the ranges.
 	start := firstRaw.Offset
 	end := lastRaw.Offset + lastRaw.Length

-	return s.doc[start:end]
+	return s.doc[start:end], int(start)
 }
@@ -2,6 +2,7 @@ package unstable

 import (
 	"bytes"
+	"errors"
 	"fmt"
 	"reflect"
 	"unicode"
@@ -17,6 +18,9 @@ type ParserError struct {
 	Highlight []byte
 	Message   string
 	Key       []string // optional
+
+	offset      int
+	offsetValid bool
 }

 // Error is the implementation of the error interface.
@@ -24,6 +28,21 @@ func (e *ParserError) Error() string {
 	return e.Message
 }

+// SetOffset records the byte offset of the error highlight within the
+// document. Used by the parser to cache position information so
+// downstream consumers don't need to re-derive it from pointers.
+func (e *ParserError) SetOffset(offset int) {
+	e.offset = offset
+	e.offsetValid = true
+}
+
+// Offset returns the byte offset of the error highlight within the
+// document, if it was previously set by the parser. The boolean
+// indicates whether the offset is valid.
+func (e *ParserError) Offset() (int, bool) {
+	return e.offset, e.offsetValid
+}
+
 // NewParserError is a convenience function to create a ParserError
 //
 // Warning: Highlight needs to be a subslice of Parser.data, so only slices
@@ -137,12 +156,14 @@ func (p *Parser) NextExpression() bool {
 		}

 		if len(p.left) == 0 || p.err != nil {
+			p.setErrOffset()
 			return false
 		}

 		p.ref, p.left, p.err = p.parseExpression(p.left)

 		if p.err != nil {
+			p.setErrOffset()
 			return false
 		}

@@ -165,6 +186,23 @@ func (p *Parser) Error() error {
 	return p.err
 }

+// setErrOffset computes and caches the byte offset of the error's
+// highlight within p.data, so downstream consumers can use it
+// without pointer arithmetic.
+func (p *Parser) setErrOffset() {
+	if p.err == nil {
+		return
+	}
+	var perr *ParserError
+	if !errors.As(p.err, &perr) {
+		return
+	}
+	if perr.offsetValid || len(perr.Highlight) == 0 {
+		return
+	}
+	perr.SetOffset(p.subsliceOffset(perr.Highlight))
+}
+
 // Position describes a position in the input.
 type Position struct {
 	// Number of bytes from the beginning of the input.
@@ -766,6 +766,54 @@ func TestErrorHighlightPositions(t *testing.T) {
 	}
 }

+func TestParserError_CachedOffset(t *testing.T) {
+	examples := []struct {
+		desc       string
+		input      string
+		wantOffset int
+	}{
+		{
+			desc:       "error after comment",
+			input:      "# comment\n= \"value\"",
+			wantOffset: 10,
+		},
+		{
+			desc:       "error on first line",
+			input:      "= \"value\"",
+			wantOffset: 0,
+		},
+		{
+			desc:       "error after two lines",
+			input:      "a = 1\n= \"value\"",
+			wantOffset: 6,
+		},
+	}
+
+	for _, e := range examples {
+		t.Run(e.desc, func(t *testing.T) {
+			p := Parser{}
+			p.Reset([]byte(e.input))
+			for p.NextExpression() {
+			}
+			err := p.Error()
+			if err == nil {
+				t.Fatal("expected an error")
+			}
+			var perr *ParserError
+			if !errors.As(err, &perr) {
+				t.Fatalf("expected ParserError, got %T", err)
+			}
+			offset, ok := perr.Offset()
+			if !ok {
+				t.Fatal("expected offset to be set")
+			}
+			if offset != e.wantOffset {
+				t.Errorf("cached offset: got %d, want %d", offset, e.wantOffset)
+			}
+		})
+	}
+}
+
 func ExampleParser() {
 	doc := `
 	hello = "world"