Remove cap tricks, use address comparison for subslice offset

Replace cap(parent) - cap(subslice) with a straightforward scan
that compares element addresses: &data[i] == &subslice[0]. This is
well-defined Go pointer comparison on elements of the same backing
array, with no dependency on capacity semantics, reflect, or unsafe.

The scan is O(n) but only runs on error paths, and TOML documents
are small per the project's design constraints.

Also remove the Offset field from ParserError and the setErrOffset
machinery — the offset is computed at the point of consumption
(wrapDecodeError, Parser.Range) rather than cached on the error.

Co-authored-by: Thomas Pelletier <thomas@pelletier.dev>
This commit is contained in:
Cursor Agent
2026-04-12 18:17:55 +00:00
parent 96ac48eb74
commit 19174a4293
5 changed files with 31 additions and 82 deletions
+15 -1
View File
@@ -99,7 +99,7 @@ func (e *DecodeError) Key() Key {
// //
//nolint:funlen //nolint:funlen
func wrapDecodeError(document []byte, de *unstable.ParserError) *DecodeError { func wrapDecodeError(document []byte, de *unstable.ParserError) *DecodeError {
offset := cap(document) - cap(de.Highlight) offset := subsliceOffset(document, de.Highlight)
errMessage := de.Error() errMessage := de.Error()
errLine, errColumn := positionAtEnd(document[:offset]) errLine, errColumn := positionAtEnd(document[:offset])
@@ -261,3 +261,17 @@ func positionAtEnd(b []byte) (row int, column int) {
return row, column return row, column
} }
// subsliceOffset finds the byte offset of subslice within data by
// scanning for the matching element address.
func subsliceOffset(data []byte, subslice []byte) int {
if len(subslice) == 0 {
return len(data)
}
for i := range data {
if &data[i] == &subslice[0] {
return i
}
}
panic("subslice is not within data")
}
-1
View File
@@ -172,7 +172,6 @@ line 5`,
err := wrapDecodeError(doc, &unstable.ParserError{ err := wrapDecodeError(doc, &unstable.ParserError{
Highlight: hl, Highlight: hl,
Message: e.msg, Message: e.msg,
Offset: start,
}) })
var derr *DecodeError var derr *DecodeError
+4 -8
View File
@@ -54,12 +54,10 @@ func (s *strict) MissingTable(node *unstable.Node) {
return return
} }
highlight, offset := s.keyLocation(node)
s.missing = append(s.missing, unstable.ParserError{ s.missing = append(s.missing, unstable.ParserError{
Highlight: highlight, Highlight: s.keyLocation(node),
Message: "missing table", Message: "missing table",
Key: s.key.Key(), Key: s.key.Key(),
Offset: offset,
}) })
} }
@@ -68,12 +66,10 @@ func (s *strict) MissingField(node *unstable.Node) {
return return
} }
highlight, offset := s.keyLocation(node)
s.missing = append(s.missing, unstable.ParserError{ s.missing = append(s.missing, unstable.ParserError{
Highlight: highlight, Highlight: s.keyLocation(node),
Message: "missing field", Message: "missing field",
Key: s.key.Key(), Key: s.key.Key(),
Offset: offset,
}) })
} }
@@ -94,7 +90,7 @@ func (s *strict) Error(doc []byte) error {
return err return err
} }
func (s *strict) keyLocation(node *unstable.Node) ([]byte, int) { func (s *strict) keyLocation(node *unstable.Node) []byte {
k := node.Key() k := node.Key()
hasOne := k.Next() hasOne := k.Next()
@@ -112,5 +108,5 @@ func (s *strict) keyLocation(node *unstable.Node) ([]byte, int) {
start := firstRaw.Offset start := firstRaw.Offset
end := lastRaw.Offset + lastRaw.Length end := lastRaw.Offset + lastRaw.Length
return s.doc[start:end], int(start) return s.doc[start:end]
} }
+12 -28
View File
@@ -2,7 +2,6 @@ package unstable
import ( import (
"bytes" "bytes"
"errors"
"fmt" "fmt"
"unicode" "unicode"
@@ -17,10 +16,6 @@ type ParserError struct {
Highlight []byte Highlight []byte
Message string Message string
Key []string // optional Key []string // optional
// Offset is the byte offset of Highlight within the document.
// Set by the parser when the error is captured.
Offset int
} }
// Error is the implementation of the error interface. // Error is the implementation of the error interface.
@@ -87,10 +82,18 @@ func (p *Parser) rangeOfToken(token, rest []byte) Range {
return Range{Offset: uint32(offset), Length: uint32(len(token))} //nolint:gosec // TOML documents are small return Range{Offset: uint32(offset), Length: uint32(len(token))} //nolint:gosec // TOML documents are small
} }
// subsliceOffset returns the byte offset of subslice b within p.data. // subsliceOffset finds the byte offset of subslice b within p.data
// b must share the same backing array as p.data. // by scanning for the matching element address.
func (p *Parser) subsliceOffset(b []byte) int { func (p *Parser) subsliceOffset(b []byte) int {
return cap(p.data) - cap(b) if len(b) == 0 {
return len(p.data)
}
for i := range p.data {
if &p.data[i] == &b[0] {
return i
}
}
panic("subslice is not within parser data")
} }
// Raw returns the slice corresponding to the bytes in the given range. // Raw returns the slice corresponding to the bytes in the given range.
@@ -131,19 +134,13 @@ func (p *Parser) NextExpression() bool {
p.left, p.err = p.parseNewline(p.left) p.left, p.err = p.parseNewline(p.left)
} }
if p.err != nil { if len(p.left) == 0 || p.err != nil {
p.setErrOffset()
return false
}
if len(p.left) == 0 {
return false return false
} }
p.ref, p.left, p.err = p.parseExpression(p.left) p.ref, p.left, p.err = p.parseExpression(p.left)
if p.err != nil { if p.err != nil {
p.setErrOffset()
return false return false
} }
@@ -166,19 +163,6 @@ func (p *Parser) Error() error {
return p.err return p.err
} }
// setErrOffset sets the byte offset on the parser error from the
// highlight's position within p.data.
func (p *Parser) setErrOffset() {
if p.err == nil {
return
}
var perr *ParserError
if !errors.As(p.err, &perr) {
return
}
perr.Offset = p.subsliceOffset(perr.Highlight)
}
// Position describes a position in the input. // Position describes a position in the input.
type Position struct { type Position struct {
// Number of bytes from the beginning of the input. // Number of bytes from the beginning of the input.
-44
View File
@@ -766,50 +766,6 @@ func TestErrorHighlightPositions(t *testing.T) {
} }
} }
func TestParserError_Offset(t *testing.T) {
examples := []struct {
desc string
input string
wantOffset int
}{
{
desc: "error after comment",
input: "# comment\n= \"value\"",
wantOffset: 10,
},
{
desc: "error on first line",
input: "= \"value\"",
wantOffset: 0,
},
{
desc: "error after two lines",
input: "a = 1\n= \"value\"",
wantOffset: 6,
},
}
for _, e := range examples {
t.Run(e.desc, func(t *testing.T) {
p := Parser{}
p.Reset([]byte(e.input))
for p.NextExpression() {
}
err := p.Error()
if err == nil {
t.Fatal("expected an error")
}
var perr *ParserError
if !errors.As(err, &perr) {
t.Fatalf("expected ParserError, got %T", err)
}
if perr.Offset != e.wantOffset {
t.Errorf("offset: got %d, want %d", perr.Offset, e.wantOffset)
}
})
}
}
func ExampleParser() { func ExampleParser() {
doc := ` doc := `
hello = "world" hello = "world"