Remove cap tricks, use address comparison for subslice offset

Replace cap(parent) - cap(subslice) with a straightforward scan that compares element addresses: &data[i] == &subslice[0]. This is well-defined Go pointer comparison on elements of the same backing array, with no dependency on capacity semantics, reflect, or unsafe. The scan is O(n) but only runs on error paths, and TOML documents are small per the project's design constraints. Also remove the Offset field from ParserError and the setErrOffset machinery — the offset is computed at the point of consumption (wrapDecodeError, Parser.Range) rather than cached on the error. Co-authored-by: Thomas Pelletier <thomas@pelletier.dev>
2026-04-12 18:17:55 +00:00
parent 96ac48eb74
commit 19174a4293
5 changed files with 31 additions and 82 deletions
@@ -99,7 +99,7 @@ func (e *DecodeError) Key() Key {
 //
 //nolint:funlen
 func wrapDecodeError(document []byte, de *unstable.ParserError) *DecodeError {
-	offset := cap(document) - cap(de.Highlight)
+	offset := subsliceOffset(document, de.Highlight)

 	errMessage := de.Error()
 	errLine, errColumn := positionAtEnd(document[:offset])
@@ -261,3 +261,17 @@ func positionAtEnd(b []byte) (row int, column int) {

 	return row, column
 }
+
+// subsliceOffset finds the byte offset of subslice within data by
+// scanning for the matching element address.
+func subsliceOffset(data []byte, subslice []byte) int {
+	if len(subslice) == 0 {
+		return len(data)
+	}
+	for i := range data {
+		if &data[i] == &subslice[0] {
+			return i
+		}
+	}
+	panic("subslice is not within data")
+}
@@ -172,7 +172,6 @@ line 5`,
 			err := wrapDecodeError(doc, &unstable.ParserError{
 				Highlight: hl,
 				Message:   e.msg,
-				Offset:    start,
 			})

 			var derr *DecodeError
@@ -54,12 +54,10 @@ func (s *strict) MissingTable(node *unstable.Node) {
 		return
 	}

-	highlight, offset := s.keyLocation(node)
 	s.missing = append(s.missing, unstable.ParserError{
-		Highlight: highlight,
+		Highlight: s.keyLocation(node),
 		Message:   "missing table",
 		Key:       s.key.Key(),
-		Offset:    offset,
 	})
 }

@@ -68,12 +66,10 @@ func (s *strict) MissingField(node *unstable.Node) {
 		return
 	}

-	highlight, offset := s.keyLocation(node)
 	s.missing = append(s.missing, unstable.ParserError{
-		Highlight: highlight,
+		Highlight: s.keyLocation(node),
 		Message:   "missing field",
 		Key:       s.key.Key(),
-		Offset:    offset,
 	})
 }

@@ -94,7 +90,7 @@ func (s *strict) Error(doc []byte) error {
 	return err
 }

-func (s *strict) keyLocation(node *unstable.Node) ([]byte, int) {
+func (s *strict) keyLocation(node *unstable.Node) []byte {
 	k := node.Key()

 	hasOne := k.Next()
@@ -112,5 +108,5 @@ func (s *strict) keyLocation(node *unstable.Node) ([]byte, int) {
 	start := firstRaw.Offset
 	end := lastRaw.Offset + lastRaw.Length

-	return s.doc[start:end], int(start)
+	return s.doc[start:end]
 }
@@ -2,7 +2,6 @@ package unstable

 import (
 	"bytes"
-	"errors"
 	"fmt"
 	"unicode"

@@ -17,10 +16,6 @@ type ParserError struct {
 	Highlight []byte
 	Message   string
 	Key       []string // optional
-
-	// Offset is the byte offset of Highlight within the document.
-	// Set by the parser when the error is captured.
-	Offset int
 }

 // Error is the implementation of the error interface.
@@ -87,10 +82,18 @@ func (p *Parser) rangeOfToken(token, rest []byte) Range {
 	return Range{Offset: uint32(offset), Length: uint32(len(token))} //nolint:gosec // TOML documents are small
 }

-// subsliceOffset returns the byte offset of subslice b within p.data.
-// b must share the same backing array as p.data.
+// subsliceOffset finds the byte offset of subslice b within p.data
+// by scanning for the matching element address.
 func (p *Parser) subsliceOffset(b []byte) int {
-	return cap(p.data) - cap(b)
+	if len(b) == 0 {
+		return len(p.data)
+	}
+	for i := range p.data {
+		if &p.data[i] == &b[0] {
+			return i
+		}
+	}
+	panic("subslice is not within parser data")
 }

 // Raw returns the slice corresponding to the bytes in the given range.
@@ -131,19 +134,13 @@ func (p *Parser) NextExpression() bool {
 			p.left, p.err = p.parseNewline(p.left)
 		}

-		if p.err != nil {
-			p.setErrOffset()
-			return false
-		}
-
-		if len(p.left) == 0 {
+		if len(p.left) == 0 || p.err != nil {
 			return false
 		}

 		p.ref, p.left, p.err = p.parseExpression(p.left)

 		if p.err != nil {
-			p.setErrOffset()
 			return false
 		}

@@ -166,19 +163,6 @@ func (p *Parser) Error() error {
 	return p.err
 }

-// setErrOffset sets the byte offset on the parser error from the
-// highlight's position within p.data.
-func (p *Parser) setErrOffset() {
-	if p.err == nil {
-		return
-	}
-	var perr *ParserError
-	if !errors.As(p.err, &perr) {
-		return
-	}
-	perr.Offset = p.subsliceOffset(perr.Highlight)
-}
-
 // Position describes a position in the input.
 type Position struct {
 	// Number of bytes from the beginning of the input.
@@ -766,50 +766,6 @@ func TestErrorHighlightPositions(t *testing.T) {
 	}
 }

-func TestParserError_Offset(t *testing.T) {
-	examples := []struct {
-		desc       string
-		input      string
-		wantOffset int
-	}{
-		{
-			desc:       "error after comment",
-			input:      "# comment\n= \"value\"",
-			wantOffset: 10,
-		},
-		{
-			desc:       "error on first line",
-			input:      "= \"value\"",
-			wantOffset: 0,
-		},
-		{
-			desc:       "error after two lines",
-			input:      "a = 1\n= \"value\"",
-			wantOffset: 6,
-		},
-	}
-
-	for _, e := range examples {
-		t.Run(e.desc, func(t *testing.T) {
-			p := Parser{}
-			p.Reset([]byte(e.input))
-			for p.NextExpression() {
-			}
-			err := p.Error()
-			if err == nil {
-				t.Fatal("expected an error")
-			}
-			var perr *ParserError
-			if !errors.As(err, &perr) {
-				t.Fatalf("expected ParserError, got %T", err)
-			}
-			if perr.Offset != e.wantOffset {
-				t.Errorf("offset: got %d, want %d", perr.Offset, e.wantOffset)
-			}
-		})
-	}
-}
-
 func ExampleParser() {
 	doc := `
 	hello = "world"