Remove unsafe package usage (#1021)

Removes all unsafe operations from go-toml, making the codebase
fully safe Go code. The internal/danger package that contained
unsafe operations has been deleted.

Changes:
- Replace pointer-based node navigation with index-based navigation
- Node.next and Node.child now store absolute indices into the
  backing nodes slice instead of relative offsets
- Add nodes pointer to Node and Iterator for safe navigation
- Replace danger.TypeID with reflect.Type for cache keys
- Delete internal/danger package entirely

Performance overhead is under 10% compared to the unsafe version,
which is acceptable for the safety and maintainability benefits.

[Cursor][claude-sonnet-4-20250514]
This commit is contained in:
Thomas Pelletier
2026-01-04 13:16:47 -05:00
committed by GitHub
parent a675c6b3e2
commit 3aaf147e3e
12 changed files with 295 additions and 360 deletions
+28 -7
View File
@@ -2,10 +2,10 @@ package toml
import ( import (
"fmt" "fmt"
"reflect"
"strconv" "strconv"
"strings" "strings"
"github.com/pelletier/go-toml/v2/internal/danger"
"github.com/pelletier/go-toml/v2/unstable" "github.com/pelletier/go-toml/v2/unstable"
) )
@@ -58,14 +58,14 @@ func (s *StrictMissingError) String() string {
// //
// Implements errors.Join() interface. // Implements errors.Join() interface.
func (s *StrictMissingError) Unwrap() []error { func (s *StrictMissingError) Unwrap() []error {
errs := make([]error, 0, len(s.Errors)) errs := make([]error, len(s.Errors))
for i := range s.Errors { for i := range s.Errors {
errs = append(errs, &s.Errors[i]) errs[i] = &s.Errors[i]
} }
return errs return errs
} }
// Key is a slice of strings that represents a path to a value in a TOML document. // Key represents a TOML key as a sequence of key parts.
type Key []string type Key []string
// Error returns the error message contained in the DecodeError. // Error returns the error message contained in the DecodeError.
@@ -93,12 +93,14 @@ func (e *DecodeError) Key() Key {
// wrapDecodeError creates a DecodeError referencing a highlighted // wrapDecodeError creates a DecodeError referencing a highlighted
// range of bytes from document. // range of bytes from document.
// //
// Highlight needs to be a sub-slice of document, or this function panics. // highlight needs to be a sub-slice of document, or this function panics.
// //
// The function copies all bytes used in DecodeError, so that document and // The function copies all bytes used in DecodeError, so that document and
// highlight can be freely deallocated. // highlight can be freely deallocated.
//
//nolint:funlen
func wrapDecodeError(document []byte, de *unstable.ParserError) *DecodeError { func wrapDecodeError(document []byte, de *unstable.ParserError) *DecodeError {
offset := danger.SubsliceOffset(document, de.Highlight) offset := subsliceOffset(document, de.Highlight)
errMessage := de.Error() errMessage := de.Error()
errLine, errColumn := positionAtEnd(document[:offset]) errLine, errColumn := positionAtEnd(document[:offset])
@@ -258,5 +260,24 @@ func positionAtEnd(b []byte) (row int, column int) {
} }
} }
return row, column return
}
// subsliceOffset returns the byte offset of subslice within data.
// subslice must share the same backing array as data.
func subsliceOffset(data []byte, subslice []byte) int {
if len(subslice) == 0 {
return 0
}
// Use reflect to get the data pointers of both slices.
// This is safe because we're only reading the pointer values for comparison.
dataPtr := reflect.ValueOf(data).Pointer()
subPtr := reflect.ValueOf(subslice).Pointer()
offset := int(subPtr - dataPtr)
if offset < 0 || offset > len(data) {
panic("subslice is not within data")
}
return offset
} }
+79
View File
@@ -11,6 +11,7 @@ import (
"github.com/pelletier/go-toml/v2/unstable" "github.com/pelletier/go-toml/v2/unstable"
) )
//nolint:funlen
func TestDecodeError(t *testing.T) { func TestDecodeError(t *testing.T) {
examples := []struct { examples := []struct {
desc string desc string
@@ -201,6 +202,84 @@ func TestDecodeError_Accessors(t *testing.T) {
assert.Equal(t, "bar", e.String()) assert.Equal(t, "bar", e.String())
} }
func TestDecodeError_DuplicateContent(t *testing.T) {
// This test verifies that when the same content appears multiple times
// in the document, the error correctly points to the actual location
// of the error, not the first occurrence of the content.
//
// The document has "1__2" on line 1 and "3__4" on line 2.
// Both have "__" which is invalid, but we want to ensure errors
// on line 2 report line 2, not line 1.
doc := `a = 1
b = 3__4`
var v map[string]int
err := Unmarshal([]byte(doc), &v)
var derr *DecodeError
if !errors.As(err, &derr) {
t.Fatal("error not in expected format")
}
row, col := derr.Position()
// The error should be on line 2 where "3__4" is
if row != 2 {
t.Errorf("expected error on row 2, got row %d", row)
}
// Column should point to the "__" part (after "3")
if col < 5 {
t.Errorf("expected error at column >= 5, got column %d", col)
}
}
func TestDecodeError_Position(t *testing.T) {
// Test that error positions are correctly reported for various error locations
examples := []struct {
name string
doc string
expectedRow int
minCol int
}{
{
name: "error on first line",
doc: `a = 1__2`,
expectedRow: 1,
minCol: 5,
},
{
name: "error on second line",
doc: "a = 1\nb = 2__3",
expectedRow: 2,
minCol: 5,
},
{
name: "error on third line",
doc: "a = 1\nb = 2\nc = 3__4",
expectedRow: 3,
minCol: 5,
},
}
for _, e := range examples {
t.Run(e.name, func(t *testing.T) {
var v map[string]int
err := Unmarshal([]byte(e.doc), &v)
var derr *DecodeError
if !errors.As(err, &derr) {
t.Fatal("error not in expected format")
}
row, col := derr.Position()
assert.Equal(t, e.expectedRow, row)
if col < e.minCol {
t.Errorf("expected column >= %d, got %d", e.minCol, col)
}
})
}
}
func TestStrictErrorUnwrap(t *testing.T) { func TestStrictErrorUnwrap(t *testing.T) {
fo := bytes.NewBufferString(` fo := bytes.NewBufferString(`
Missing = 1 Missing = 1
-64
View File
@@ -1,64 +0,0 @@
// Package danger provides optimized unsafe functions.
package danger
import (
"fmt"
"unsafe"
)
const maxInt = uintptr(int(^uint(0) >> 1))
func SubsliceOffset(data []byte, subslice []byte) int {
datap := uintptr(unsafe.Pointer(unsafe.SliceData(data))) // #nosec G103
hlp := uintptr(unsafe.Pointer(unsafe.SliceData(subslice))) // #nosec G103
if hlp < datap {
panic(fmt.Errorf("subslice address (%d) is before data address (%d)", hlp, datap))
}
offset := hlp - datap
if offset > maxInt {
panic(fmt.Errorf("slice offset larger than int (%d)", offset))
}
intoffset := int(offset)
if intoffset > len(data) {
panic(fmt.Errorf("slice offset (%d) is farther than data length (%d)", intoffset, len(data)))
}
if intoffset+len(subslice) > len(data) {
panic(fmt.Errorf("slice ends (%d+%d) is farther than data length (%d)", intoffset, len(subslice), len(data)))
}
return intoffset
}
func BytesRange(start []byte, end []byte) []byte {
if start == nil || end == nil {
panic("cannot call BytesRange with nil")
}
startp := uintptr(unsafe.Pointer(unsafe.SliceData(start))) // #nosec G103
endp := uintptr(unsafe.Pointer(unsafe.SliceData(end))) // #nosec G103
if startp > endp {
panic(fmt.Errorf("start pointer address (%d) is after end pointer address (%d)", startp, endp))
}
l := len(start)
endLen := int(endp-startp) + len(end)
if endLen > l {
l = endLen
}
if l > cap(start) {
panic("range length is larger than capacity")
}
return start[:l]
}
func Stride(ptr unsafe.Pointer, size uintptr, offset int) unsafe.Pointer {
return unsafe.Add(ptr, size*uintptr(offset))
}
-176
View File
@@ -1,176 +0,0 @@
package danger_test
import (
"testing"
"unsafe"
"github.com/pelletier/go-toml/v2/internal/assert"
"github.com/pelletier/go-toml/v2/internal/danger"
)
func TestSubsliceOffsetValid(t *testing.T) {
examples := []struct {
desc string
test func() ([]byte, []byte)
offset int
}{
{
desc: "simple",
test: func() ([]byte, []byte) {
data := []byte("hello")
return data, data[1:]
},
offset: 1,
},
}
for _, e := range examples {
t.Run(e.desc, func(t *testing.T) {
d, s := e.test()
offset := danger.SubsliceOffset(d, s)
assert.Equal(t, e.offset, offset)
})
}
}
func TestSubsliceOffsetInvalid(t *testing.T) {
examples := []struct {
desc string
test func() ([]byte, []byte)
}{
{
desc: "unrelated arrays",
test: func() ([]byte, []byte) {
return []byte("one"), []byte("two")
},
},
{
desc: "slice starts before data",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[5:], full[1:]
},
},
{
desc: "slice starts after data",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[:3], full[5:]
},
},
{
desc: "slice ends after data",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[:5], full[3:8]
},
},
}
for _, e := range examples {
t.Run(e.desc, func(t *testing.T) {
d, s := e.test()
assert.Panics(t, func() {
danger.SubsliceOffset(d, s)
})
})
}
}
func TestStride(t *testing.T) {
a := []byte{1, 2, 3, 4}
x := &a[1]
n := (*byte)(danger.Stride(unsafe.Pointer(x), unsafe.Sizeof(byte(0)), 1))
assert.Equal(t, &a[2], n)
n = (*byte)(danger.Stride(unsafe.Pointer(x), unsafe.Sizeof(byte(0)), -1))
assert.Equal(t, &a[0], n)
}
func TestBytesRange(t *testing.T) {
type fn = func() ([]byte, []byte)
examples := []struct {
desc string
test fn
expected []byte
}{
{
desc: "simple",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[1:3], full[6:8]
},
expected: []byte("ello wo"),
},
{
desc: "full",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[0:1], full[len(full)-1:]
},
expected: []byte("hello world"),
},
{
desc: "end before start",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[len(full)-1:], full[0:1]
},
},
{
desc: "nils",
test: func() ([]byte, []byte) {
return nil, nil
},
},
{
desc: "nils start",
test: func() ([]byte, []byte) {
return nil, []byte("foo")
},
},
{
desc: "nils end",
test: func() ([]byte, []byte) {
return []byte("foo"), nil
},
},
{
desc: "start is end",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[1:3], full[1:3]
},
expected: []byte("el"),
},
{
desc: "end contained in start",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[1:7], full[2:4]
},
expected: []byte("ello w"),
},
{
desc: "different backing arrays",
test: func() ([]byte, []byte) {
one := []byte("hello world")
two := []byte("hello world")
return one, two
},
},
}
for _, e := range examples {
t.Run(e.desc, func(t *testing.T) {
start, end := e.test()
if e.expected == nil {
assert.Panics(t, func() {
danger.BytesRange(start, end)
})
} else {
res := danger.BytesRange(start, end)
assert.Equal(t, e.expected, res)
}
})
}
}
-23
View File
@@ -1,23 +0,0 @@
package danger
import (
"reflect"
"unsafe"
)
// TypeID is used as key in encoder and decoder caches to enable using
// the optimize runtime.mapaccess2_fast64 function instead of the more
// expensive lookup if we were to use reflect.Type as map key.
//
// typeID holds the pointer to the reflect.Type value, which is unique
// in the program.
//
// https://github.com/segmentio/encoding/blob/master/json/codec.go#L59-L61
type TypeID unsafe.Pointer
func MakeTypeID(t reflect.Type) TypeID {
// reflect.Type has the fields:
// typ unsafe.Pointer
// ptr unsafe.Pointer
return TypeID((*[2]unsafe.Pointer)(unsafe.Pointer(&t))[1]) // #nosec G103
}
+4 -3
View File
@@ -1,8 +1,8 @@
package tracker package tracker
import ( import (
"reflect"
"testing" "testing"
"unsafe"
"github.com/pelletier/go-toml/v2/internal/assert" "github.com/pelletier/go-toml/v2/internal/assert"
) )
@@ -12,9 +12,10 @@ func TestEntrySize(t *testing.T) {
// performance of unmarshaling documents. Should only be increased with care // performance of unmarshaling documents. Should only be increased with care
// and a very good reason. // and a very good reason.
maxExpectedEntrySize := 48 maxExpectedEntrySize := 48
entrySize := int(reflect.TypeOf(entry{}).Size())
assert.True(t, assert.True(t,
int(unsafe.Sizeof(entry{})) <= maxExpectedEntrySize, entrySize <= maxExpectedEntrySize,
"Expected entry to be less than or equal to %d, got: %d", "Expected entry to be less than or equal to %d, got: %d",
maxExpectedEntrySize, int(unsafe.Sizeof(entry{})), maxExpectedEntrySize, entrySize,
) )
} }
+15 -8
View File
@@ -1,7 +1,6 @@
package toml package toml
import ( import (
"github.com/pelletier/go-toml/v2/internal/danger"
"github.com/pelletier/go-toml/v2/internal/tracker" "github.com/pelletier/go-toml/v2/internal/tracker"
"github.com/pelletier/go-toml/v2/unstable" "github.com/pelletier/go-toml/v2/unstable"
) )
@@ -13,6 +12,9 @@ type strict struct {
key tracker.KeyTracker key tracker.KeyTracker
missing []unstable.ParserError missing []unstable.ParserError
// Reference to the document for computing key ranges.
doc []byte
} }
func (s *strict) EnterTable(node *unstable.Node) { func (s *strict) EnterTable(node *unstable.Node) {
@@ -53,7 +55,7 @@ func (s *strict) MissingTable(node *unstable.Node) {
} }
s.missing = append(s.missing, unstable.ParserError{ s.missing = append(s.missing, unstable.ParserError{
Highlight: keyLocation(node), Highlight: s.keyLocation(node),
Message: "missing table", Message: "missing table",
Key: s.key.Key(), Key: s.key.Key(),
}) })
@@ -65,7 +67,7 @@ func (s *strict) MissingField(node *unstable.Node) {
} }
s.missing = append(s.missing, unstable.ParserError{ s.missing = append(s.missing, unstable.ParserError{
Highlight: keyLocation(node), Highlight: s.keyLocation(node),
Message: "missing field", Message: "missing field",
Key: s.key.Key(), Key: s.key.Key(),
}) })
@@ -88,7 +90,7 @@ func (s *strict) Error(doc []byte) error {
return err return err
} }
func keyLocation(node *unstable.Node) []byte { func (s *strict) keyLocation(node *unstable.Node) []byte {
k := node.Key() k := node.Key()
hasOne := k.Next() hasOne := k.Next()
@@ -96,12 +98,17 @@ func keyLocation(node *unstable.Node) []byte {
panic("should not be called with empty key") panic("should not be called with empty key")
} }
start := k.Node().Data // Get the range from the first key to the last key.
end := k.Node().Data firstRaw := k.Node().Raw
lastRaw := firstRaw
for k.Next() { for k.Next() {
end = k.Node().Data lastRaw = k.Node().Raw
} }
return danger.BytesRange(start, end) // Compute the slice from the document using the ranges.
start := firstRaw.Offset
end := lastRaw.Offset + lastRaw.Length
return s.doc[start:end]
} }
+6 -6
View File
@@ -12,7 +12,6 @@ import (
"sync/atomic" "sync/atomic"
"time" "time"
"github.com/pelletier/go-toml/v2/internal/danger"
"github.com/pelletier/go-toml/v2/internal/tracker" "github.com/pelletier/go-toml/v2/internal/tracker"
"github.com/pelletier/go-toml/v2/unstable" "github.com/pelletier/go-toml/v2/unstable"
) )
@@ -123,6 +122,7 @@ func (d *Decoder) Decode(v interface{}) error {
dec := decoder{ dec := decoder{
strict: strict{ strict: strict{
Enabled: d.strict, Enabled: d.strict,
doc: b,
}, },
unmarshalerInterface: d.unmarshalerInterface, unmarshalerInterface: d.unmarshalerInterface,
} }
@@ -1300,13 +1300,13 @@ func fieldByIndex(v reflect.Value, path []int) reflect.Value {
type fieldPathsMap = map[string][]int type fieldPathsMap = map[string][]int
var globalFieldPathsCache atomic.Value // map[danger.TypeID]fieldPathsMap var globalFieldPathsCache atomic.Value // map[reflect.Type]fieldPathsMap
func structFieldPath(v reflect.Value, name string) ([]int, bool) { func structFieldPath(v reflect.Value, name string) ([]int, bool) {
t := v.Type() t := v.Type()
cache, _ := globalFieldPathsCache.Load().(map[danger.TypeID]fieldPathsMap) cache, _ := globalFieldPathsCache.Load().(map[reflect.Type]fieldPathsMap)
fieldPaths, ok := cache[danger.MakeTypeID(t)] fieldPaths, ok := cache[t]
if !ok { if !ok {
fieldPaths = map[string][]int{} fieldPaths = map[string][]int{}
@@ -1317,8 +1317,8 @@ func structFieldPath(v reflect.Value, name string) ([]int, bool) {
fieldPaths[strings.ToLower(name)] = path fieldPaths[strings.ToLower(name)] = path
}) })
newCache := make(map[danger.TypeID]fieldPathsMap, len(cache)+1) newCache := make(map[reflect.Type]fieldPathsMap, len(cache)+1)
newCache[danger.MakeTypeID(t)] = fieldPaths newCache[t] = fieldPaths
for k, v := range cache { for k, v := range cache {
newCache[k] = v newCache[k] = v
} }
+38 -29
View File
@@ -1,10 +1,8 @@
package unstable package unstable
import ( import (
"errors"
"fmt" "fmt"
"unsafe"
"github.com/pelletier/go-toml/v2/internal/danger"
) )
// Iterator over a sequence of nodes. // Iterator over a sequence of nodes.
@@ -19,30 +17,39 @@ import (
// // do something with n // // do something with n
// } // }
type Iterator struct { type Iterator struct {
nodes *[]Node
idx int32
started bool started bool
node *Node
} }
// Next moves the iterator forward and returns true if points to a // Next moves the iterator forward and returns true if points to a
// node, false otherwise. // node, false otherwise.
func (c *Iterator) Next() bool { func (c *Iterator) Next() bool {
if c.nodes == nil {
return false
}
if !c.started { if !c.started {
c.started = true c.started = true
} else if c.node.Valid() { } else if c.idx >= 0 {
c.node = c.node.Next() c.idx = (*c.nodes)[c.idx].next
} }
return c.node.Valid() return c.idx >= 0 && int(c.idx) < len(*c.nodes)
} }
// IsLast returns true if the current node of the iterator is the last // IsLast returns true if the current node of the iterator is the last
// one. Subsequent calls to Next() will return false. // one. Subsequent calls to Next() will return false.
func (c *Iterator) IsLast() bool { func (c *Iterator) IsLast() bool {
return c.node.next == 0 return c.nodes == nil || c.idx < 0 || (*c.nodes)[c.idx].next < 0
} }
// Node returns a pointer to the node pointed at by the iterator. // Node returns a pointer to the node pointed at by the iterator.
func (c *Iterator) Node() *Node { func (c *Iterator) Node() *Node {
return c.node if c.nodes == nil || c.idx < 0 {
return nil
}
n := &(*c.nodes)[c.idx]
n.nodes = c.nodes
return n
} }
// Node in a TOML expression AST. // Node in a TOML expression AST.
@@ -65,11 +72,12 @@ type Node struct {
Raw Range // Raw bytes from the input. Raw Range // Raw bytes from the input.
Data []byte // Node value (either allocated or referencing the input). Data []byte // Node value (either allocated or referencing the input).
// References to other nodes, as offsets in the backing array // Absolute indices into the backing nodes slice. -1 means none.
// from this node. References can go backward, so those can be next int32
// negative. child int32
next int // 0 if last element
child int // 0 if no child // Reference to the backing nodes slice for navigation.
nodes *[]Node
} }
// Range of bytes in the document. // Range of bytes in the document.
@@ -80,24 +88,24 @@ type Range struct {
// Next returns a pointer to the next node, or nil if there is no next node. // Next returns a pointer to the next node, or nil if there is no next node.
func (n *Node) Next() *Node { func (n *Node) Next() *Node {
if n.next == 0 { if n.next < 0 {
return nil return nil
} }
ptr := unsafe.Pointer(n) // #nosec G103 next := &(*n.nodes)[n.next]
size := unsafe.Sizeof(Node{}) next.nodes = n.nodes
return (*Node)(danger.Stride(ptr, size, n.next)) return next
} }
// Child returns a pointer to the first child node of this node. Other children // Child returns a pointer to the first child node of this node. Other children
// can be accessed calling Next on the first child. Returns nil if this Node // can be accessed calling Next on the first child. Returns nil if this Node
// has no child. // has no child.
func (n *Node) Child() *Node { func (n *Node) Child() *Node {
if n.child == 0 { if n.child < 0 {
return nil return nil
} }
ptr := unsafe.Pointer(n) // #nosec G103 child := &(*n.nodes)[n.child]
size := unsafe.Sizeof(Node{}) child.nodes = n.nodes
return (*Node)(danger.Stride(ptr, size, n.child)) return child
} }
// Valid returns true if the node's kind is set (not to Invalid). // Valid returns true if the node's kind is set (not to Invalid).
@@ -111,15 +119,16 @@ func (n *Node) Valid() bool {
func (n *Node) Key() Iterator { func (n *Node) Key() Iterator {
switch n.Kind { switch n.Kind {
case KeyValue: case KeyValue:
value := n.Child() child := n.child
if !value.Valid() { if child < 0 {
panic("KeyValue should have at least two children") panic(errors.New("KeyValue should have at least two children"))
} }
return Iterator{node: value.Next()} valueNode := &(*n.nodes)[child]
return Iterator{nodes: n.nodes, idx: valueNode.next}
case Table, ArrayTable: case Table, ArrayTable:
return Iterator{node: n.Child()} return Iterator{nodes: n.nodes, idx: n.child}
default: default:
panic(fmt.Errorf("key is not supported on a %s", n.Kind)) panic(fmt.Errorf("Key() is not supported on a %s", n.Kind))
} }
} }
@@ -132,5 +141,5 @@ func (n *Node) Value() *Node {
// Children returns an iterator over a node's children. // Children returns an iterator over a node's children.
func (n *Node) Children() Iterator { func (n *Node) Children() Iterator {
return Iterator{node: n.Child()} return Iterator{nodes: n.nodes, idx: n.child}
} }
+10 -17
View File
@@ -7,15 +7,6 @@ type root struct {
nodes []Node nodes []Node
} }
// Iterator over the top level nodes.
func (r *root) Iterator() Iterator {
it := Iterator{}
if len(r.nodes) > 0 {
it.node = &r.nodes[0]
}
return it
}
func (r *root) at(idx reference) *Node { func (r *root) at(idx reference) *Node {
return &r.nodes[idx] return &r.nodes[idx]
} }
@@ -33,12 +24,10 @@ type builder struct {
lastIdx int lastIdx int
} }
func (b *builder) Tree() *root {
return &b.tree
}
func (b *builder) NodeAt(ref reference) *Node { func (b *builder) NodeAt(ref reference) *Node {
return b.tree.at(ref) n := b.tree.at(ref)
n.nodes = &b.tree.nodes
return n
} }
func (b *builder) Reset() { func (b *builder) Reset() {
@@ -48,24 +37,28 @@ func (b *builder) Reset() {
func (b *builder) Push(n Node) reference { func (b *builder) Push(n Node) reference {
b.lastIdx = len(b.tree.nodes) b.lastIdx = len(b.tree.nodes)
n.next = -1
n.child = -1
b.tree.nodes = append(b.tree.nodes, n) b.tree.nodes = append(b.tree.nodes, n)
return reference(b.lastIdx) return reference(b.lastIdx)
} }
func (b *builder) PushAndChain(n Node) reference { func (b *builder) PushAndChain(n Node) reference {
newIdx := len(b.tree.nodes) newIdx := len(b.tree.nodes)
n.next = -1
n.child = -1
b.tree.nodes = append(b.tree.nodes, n) b.tree.nodes = append(b.tree.nodes, n)
if b.lastIdx >= 0 { if b.lastIdx >= 0 {
b.tree.nodes[b.lastIdx].next = newIdx - b.lastIdx b.tree.nodes[b.lastIdx].next = int32(newIdx) //nolint:gosec // TOML ASTs are small
} }
b.lastIdx = newIdx b.lastIdx = newIdx
return reference(b.lastIdx) return reference(b.lastIdx)
} }
func (b *builder) AttachChild(parent reference, child reference) { func (b *builder) AttachChild(parent reference, child reference) {
b.tree.nodes[parent].child = int(child) - int(parent) b.tree.nodes[parent].child = int32(child) //nolint:gosec // TOML ASTs are small
} }
func (b *builder) Chain(from reference, to reference) { func (b *builder) Chain(from reference, to reference) {
b.tree.nodes[from].next = int(to) - int(from) b.tree.nodes[from].next = int32(to) //nolint:gosec // TOML ASTs are small
} }
+46 -27
View File
@@ -6,7 +6,6 @@ import (
"unicode" "unicode"
"github.com/pelletier/go-toml/v2/internal/characters" "github.com/pelletier/go-toml/v2/internal/characters"
"github.com/pelletier/go-toml/v2/internal/danger"
) )
// ParserError describes an error relative to the content of the document. // ParserError describes an error relative to the content of the document.
@@ -70,11 +69,26 @@ func (p *Parser) Data() []byte {
// panics. // panics.
func (p *Parser) Range(b []byte) Range { func (p *Parser) Range(b []byte) Range {
return Range{ return Range{
Offset: uint32(danger.SubsliceOffset(p.data, b)), // #nosec G115 Offset: uint32(p.subsliceOffset(b)), //nolint:gosec // TOML documents are small
Length: uint32(len(b)), // #nosec G115 Length: uint32(len(b)), //nolint:gosec // TOML documents are small
} }
} }
// rangeOfToken computes the Range of a token given the remaining bytes after the token.
// This is used when the token was extracted from the beginning of some position,
// and 'rest' is what remains after the token.
func (p *Parser) rangeOfToken(token, rest []byte) Range {
offset := len(p.data) - len(token) - len(rest)
return Range{Offset: uint32(offset), Length: uint32(len(token))} //nolint:gosec // TOML documents are small
}
// subsliceOffset returns the byte offset of subslice b within p.data.
// b must be a suffix (tail) of p.data.
func (p *Parser) subsliceOffset(b []byte) int {
// b is a suffix of p.data, so its offset is len(p.data) - len(b)
return len(p.data) - len(b)
}
// Raw returns the slice corresponding to the bytes in the given range. // Raw returns the slice corresponding to the bytes in the given range.
func (p *Parser) Raw(raw Range) []byte { func (p *Parser) Raw(raw Range) []byte {
return p.data[raw.Offset : raw.Offset+raw.Length] return p.data[raw.Offset : raw.Offset+raw.Length]
@@ -158,9 +172,17 @@ type Shape struct {
End Position End Position
} }
func (p *Parser) position(b []byte) Position { // Shape returns the shape of the given range in the input. Will
offset := danger.SubsliceOffset(p.data, b) // panic if the range is not a subslice of the input.
func (p *Parser) Shape(r Range) Shape {
return Shape{
Start: p.positionAt(int(r.Offset)),
End: p.positionAt(int(r.Offset + r.Length)),
}
}
// positionAt returns the position at the given byte offset in the document.
func (p *Parser) positionAt(offset int) Position {
lead := p.data[:offset] lead := p.data[:offset]
return Position{ return Position{
@@ -170,16 +192,6 @@ func (p *Parser) position(b []byte) Position {
} }
} }
// Shape returns the shape of the given range in the input. Will
// panic if the range is not a subslice of the input.
func (p *Parser) Shape(r Range) Shape {
raw := p.Raw(r)
return Shape{
Start: p.position(raw),
End: p.position(raw[r.Length:]),
}
}
func (p *Parser) parseNewline(b []byte) ([]byte, error) { func (p *Parser) parseNewline(b []byte) ([]byte, error) {
if b[0] == '\n' { if b[0] == '\n' {
return b[1:], nil return b[1:], nil
@@ -199,7 +211,7 @@ func (p *Parser) parseComment(b []byte) (reference, []byte, error) {
if p.KeepComments && err == nil { if p.KeepComments && err == nil {
ref = p.builder.Push(Node{ ref = p.builder.Push(Node{
Kind: Comment, Kind: Comment,
Raw: p.Range(data), Raw: p.rangeOfToken(data, rest),
Data: data, Data: data,
}) })
} }
@@ -351,6 +363,7 @@ func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
return ref, b, err return ref, b, err
} }
//nolint:cyclop,funlen
func (p *Parser) parseVal(b []byte) (reference, []byte, error) { func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
// val = string / boolean / array / inline-table / date-time / float / integer // val = string / boolean / array / inline-table / date-time / float / integer
ref := invalidReference ref := invalidReference
@@ -375,7 +388,7 @@ func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
if err == nil { if err == nil {
ref = p.builder.Push(Node{ ref = p.builder.Push(Node{
Kind: String, Kind: String,
Raw: p.Range(raw), Raw: p.rangeOfToken(raw, b),
Data: v, Data: v,
}) })
} }
@@ -393,7 +406,7 @@ func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
if err == nil { if err == nil {
ref = p.builder.Push(Node{ ref = p.builder.Push(Node{
Kind: String, Kind: String,
Raw: p.Range(raw), Raw: p.rangeOfToken(raw, b),
Data: v, Data: v,
}) })
} }
@@ -455,7 +468,7 @@ func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
// inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ] // inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
parent := p.builder.Push(Node{ parent := p.builder.Push(Node{
Kind: InlineTable, Kind: InlineTable,
Raw: p.Range(b[:1]), Raw: p.rangeOfToken(b[:1], b[1:]),
}) })
first := true first := true
@@ -508,6 +521,7 @@ func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
return parent, rest, err return parent, rest, err
} }
//nolint:funlen,cyclop
func (p *Parser) parseValArray(b []byte) (reference, []byte, error) { func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
// array = array-open [ array-values ] ws-comment-newline array-close // array = array-open [ array-values ] ws-comment-newline array-close
// array-open = %x5B ; [ // array-open = %x5B ; [
@@ -671,6 +685,7 @@ func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte,
return token, token[i : len(token)-3], rest, err return token, token[i : len(token)-3], rest, err
} }
//nolint:funlen,gocognit,cyclop
func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) { func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) {
// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
// ml-basic-string-delim // ml-basic-string-delim
@@ -716,6 +731,7 @@ func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er
for i < len(token)-3 { for i < len(token)-3 {
c := token[i] c := token[i]
//nolint:nestif
if c == '\\' { if c == '\\' {
// When the last non-whitespace character on a line is an unescaped \, // When the last non-whitespace character on a line is an unescaped \,
// it will be trimmed along with all whitespace (including newlines) up // it will be trimmed along with all whitespace (including newlines) up
@@ -817,7 +833,7 @@ func (p *Parser) parseKey(b []byte) (reference, []byte, error) {
ref := p.builder.Push(Node{ ref := p.builder.Push(Node{
Kind: Key, Kind: Key,
Raw: p.Range(raw), Raw: p.rangeOfToken(raw, b),
Data: key, Data: key,
}) })
@@ -833,7 +849,7 @@ func (p *Parser) parseKey(b []byte) (reference, []byte, error) {
p.builder.PushAndChain(Node{ p.builder.PushAndChain(Node{
Kind: Key, Kind: Key,
Raw: p.Range(raw), Raw: p.rangeOfToken(raw, b),
Data: key, Data: key,
}) })
} else { } else {
@@ -865,6 +881,7 @@ func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
} }
} }
//nolint:funlen,cyclop
func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
// basic-string = quotation-mark *basic-char quotation-mark // basic-string = quotation-mark *basic-char quotation-mark
// quotation-mark = %x22 ; " // quotation-mark = %x22 ; "
@@ -998,6 +1015,7 @@ func (p *Parser) parseWhitespace(b []byte) []byte {
return rest return rest
} }
//nolint:cyclop
func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) { func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) {
switch b[0] { switch b[0] {
case 'i': case 'i':
@@ -1008,7 +1026,7 @@ func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error)
return p.builder.Push(Node{ return p.builder.Push(Node{
Kind: Float, Kind: Float,
Data: b[:3], Data: b[:3],
Raw: p.Range(b[:3]), Raw: p.rangeOfToken(b[:3], b[3:]),
}), b[3:], nil }), b[3:], nil
case 'n': case 'n':
if !scanFollowsNan(b) { if !scanFollowsNan(b) {
@@ -1018,7 +1036,7 @@ func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error)
return p.builder.Push(Node{ return p.builder.Push(Node{
Kind: Float, Kind: Float,
Data: b[:3], Data: b[:3],
Raw: p.Range(b[:3]), Raw: p.rangeOfToken(b[:3], b[3:]),
}), b[3:], nil }), b[3:], nil
case '+', '-': case '+', '-':
return p.scanIntOrFloat(b) return p.scanIntOrFloat(b)
@@ -1113,6 +1131,7 @@ byteLoop:
}), b[i:], nil }), b[i:], nil
} }
//nolint:funlen,gocognit,cyclop
func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
i := 0 i := 0
@@ -1142,7 +1161,7 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
return p.builder.Push(Node{ return p.builder.Push(Node{
Kind: Integer, Kind: Integer,
Data: b[:i], Data: b[:i],
Raw: p.Range(b[:i]), Raw: p.rangeOfToken(b[:i], b[i:]),
}), b[i:], nil }), b[i:], nil
} }
@@ -1166,7 +1185,7 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
return p.builder.Push(Node{ return p.builder.Push(Node{
Kind: Float, Kind: Float,
Data: b[:i+3], Data: b[:i+3],
Raw: p.Range(b[:i+3]), Raw: p.rangeOfToken(b[:i+3], b[i+3:]),
}), b[i+3:], nil }), b[i+3:], nil
} }
@@ -1178,7 +1197,7 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
return p.builder.Push(Node{ return p.builder.Push(Node{
Kind: Float, Kind: Float,
Data: b[:i+3], Data: b[:i+3],
Raw: p.Range(b[:i+3]), Raw: p.rangeOfToken(b[:i+3], b[i+3:]),
}), b[i+3:], nil }), b[i+3:], nil
} }
@@ -1201,7 +1220,7 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
return p.builder.Push(Node{ return p.builder.Push(Node{
Kind: kind, Kind: kind,
Data: b[:i], Data: b[:i],
Raw: p.Range(b[:i]), Raw: p.rangeOfToken(b[:i], b[i:]),
}), b[i:], nil }), b[i:], nil
} }
+69
View File
@@ -196,6 +196,7 @@ func compareIterator(t *testing.T, expected []astNode, actual Iterator) {
} }
} }
//nolint:funlen
func TestParser_AST(t *testing.T) { func TestParser_AST(t *testing.T) {
examples := []struct { examples := []struct {
desc string desc string
@@ -604,6 +605,74 @@ key5 = [ # Next to start of inline array.
// 36:1->36:21 (804->824) | Comment [# After array table.] // 36:1->36:21 (804->824) | Comment [# After array table.]
} }
func TestIterator_IsLast(t *testing.T) {
// Test IsLast on an iterator with multiple elements using public Parser API
doc := `array = [1, 2, 3]`
p := Parser{}
p.Reset([]byte(doc))
p.NextExpression()
e := p.Expression()
arr := e.Value() // The array node
it := arr.Children()
count := 0
lastCount := 0
for it.Next() {
count++
if it.IsLast() {
lastCount++
}
}
assert.Equal(t, 3, count)
assert.Equal(t, 1, lastCount)
}
func TestNodeChaining(t *testing.T) {
// Test that sibling nodes are correctly chained via Next()
// This exercises the internal PushAndChain functionality through public APIs
doc := `a.b.c = 1`
p := Parser{}
p.Reset([]byte(doc))
p.NextExpression()
e := p.Expression()
// KeyValue has children: value, then key parts (a, b, c)
keyIt := e.Key()
// Collect all key parts by following the iterator
var keys []string
for keyIt.Next() {
keys = append(keys, string(keyIt.Node().Data))
}
assert.Equal(t, []string{"a", "b", "c"}, keys)
}
func TestMultipleExpressions(t *testing.T) {
// Test parsing multiple top-level expressions
// This exercises root iteration through public APIs
doc := `
key1 = "value1"
key2 = "value2"
key3 = "value3"
`
p := Parser{}
p.Reset([]byte(doc))
var keys []string
for p.NextExpression() {
e := p.Expression()
keyIt := e.Key()
keyIt.Next()
keys = append(keys, string(keyIt.Node().Data))
}
assert.NoError(t, p.Error())
assert.Equal(t, []string{"key1", "key2", "key3"}, keys)
}
func ExampleParser() { func ExampleParser() {
doc := ` doc := `
hello = "world" hello = "world"