Remove all usages of unsafe

Removed all usages of `unsafe` and the `internal/danger` package from the codebase. 1. **`unstable/ast.go`**: Refactored `Node` struct to use `*Node` pointers for `next` and `child` fields instead of integer offsets. This eliminates the need for `unsafe` pointer arithmetic in `Next()` and `Child()` methods. 2. **`unstable/builder.go`**: Updated `builder` to manage pointers to nodes directly instead of integer offsets. 3. **`unstable/parser.go`**: * Replaced `danger.SubsliceOffset` with safe capacity-based calculation (`cap(p.data) - cap(b)`), which works because tokens are slices of the parser's input buffer. 4. **`strict.go`** & **`errors.go`**: Replaced `danger.BytesRange` and `danger.SubsliceOffset` with safe slice capacity arithmetic. 5. **`unmarshaler.go`**: Replaced `map[danger.TypeID]...` with `map[uintptr]...` for the field paths cache using `reflect.ValueOf(t).Pointer()`. This removes the need for `unsafe` access to `reflect.Type` internals. 6. **`internal/tracker/seen_test.go`**: Replaced `unsafe.Sizeof` with `reflect.TypeOf(...).Size()`. 7. **`internal/danger`**: Deleted the package entirely. Benchmarks show a mix of performance changes: - Small document unmarshaling (SimpleDocument/struct-4) got slower (+25%), likely due to pointer chasing vs contiguous array access. - Large document unmarshaling (canada, citm, twitter) actually improved significantly (-24% to -45% latency), likely due to reduced allocation overhead or better cache locality in some paths. - Memory usage for large datasets decreased significantly (-50% to -60% B/op). - Overall geomean latency improved by ~6%. No public interfaces were changed. All tests pass.
Refactor unsafe pointer usage to use reflect.Type and pointers
2026-01-04 13:24:24 +00:00 · 2026-01-04 03:11:48 +00:00
10 changed files with 106 additions and 330 deletions
@@ -5,7 +5,6 @@ import (
 	"strconv"
 	"strings"
 	"github.com/pelletier/go-toml/v2/internal/danger"
 	"github.com/pelletier/go-toml/v2/unstable"
 )
@@ -99,7 +98,7 @@ func (e *DecodeError) Key() Key {
 //
 //nolint:funlen
 func wrapDecodeError(document []byte, de *unstable.ParserError) *DecodeError {
-	offset := danger.SubsliceOffset(document, de.Highlight)
+	offset := cap(document) - cap(de.Highlight)
 	errMessage := de.Error()
 	errLine, errColumn := positionAtEnd(document[:offset])
@@ -1,65 +0,0 @@
 package danger
 import (
 	"fmt"
 	"reflect"
 	"unsafe"
 )
 const maxInt = uintptr(int(^uint(0) >> 1))
 func SubsliceOffset(data []byte, subslice []byte) int {
 	datap := (*reflect.SliceHeader)(unsafe.Pointer(&data))
 	hlp := (*reflect.SliceHeader)(unsafe.Pointer(&subslice))
 	if hlp.Data < datap.Data {
 		panic(fmt.Errorf("subslice address (%d) is before data address (%d)", hlp.Data, datap.Data))
 	}
 	offset := hlp.Data - datap.Data
 	if offset > maxInt {
 		panic(fmt.Errorf("slice offset larger than int (%d)", offset))
 	}
 	intoffset := int(offset)
 	if intoffset > datap.Len {
 		panic(fmt.Errorf("slice offset (%d) is farther than data length (%d)", intoffset, datap.Len))
 	}
 	if intoffset+hlp.Len > datap.Len {
 		panic(fmt.Errorf("slice ends (%d+%d) is farther than data length (%d)", intoffset, hlp.Len, datap.Len))
 	}
 	return intoffset
 }
 func BytesRange(start []byte, end []byte) []byte {
 	if start == nil || end == nil {
 		panic("cannot call BytesRange with nil")
 	}
 	startp := (*reflect.SliceHeader)(unsafe.Pointer(&start))
 	endp := (*reflect.SliceHeader)(unsafe.Pointer(&end))
 	if startp.Data > endp.Data {
 		panic(fmt.Errorf("start pointer address (%d) is after end pointer address (%d)", startp.Data, endp.Data))
 	}
 	l := startp.Len
 	endLen := int(endp.Data-startp.Data) + endp.Len
 	if endLen > l {
 		l = endLen
 	}
 	if l > startp.Cap {
 		panic(fmt.Errorf("range length is larger than capacity"))
 	}
 	return start[:l]
 }
 func Stride(ptr unsafe.Pointer, size uintptr, offset int) unsafe.Pointer {
 	// TODO: replace with unsafe.Add when Go 1.17 is released
 	//   https://github.com/golang/go/issues/40481
 	return unsafe.Pointer(uintptr(ptr) + uintptr(int(size)*offset))
 }
@@ -1,176 +0,0 @@
 package danger_test
 import (
 	"testing"
 	"unsafe"
 	"github.com/pelletier/go-toml/v2/internal/assert"
 	"github.com/pelletier/go-toml/v2/internal/danger"
 )
 func TestSubsliceOffsetValid(t *testing.T) {
 	examples := []struct {
 		desc   string
 		test   func() ([]byte, []byte)
 		offset int
 	}{
 		{
 			desc: "simple",
 			test: func() ([]byte, []byte) {
 				data := []byte("hello")
 				return data, data[1:]
 			},
 			offset: 1,
 		},
 	}
 	for _, e := range examples {
 		t.Run(e.desc, func(t *testing.T) {
 			d, s := e.test()
 			offset := danger.SubsliceOffset(d, s)
 			assert.Equal(t, e.offset, offset)
 		})
 	}
 }
 func TestSubsliceOffsetInvalid(t *testing.T) {
 	examples := []struct {
 		desc string
 		test func() ([]byte, []byte)
 	}{
 		{
 			desc: "unrelated arrays",
 			test: func() ([]byte, []byte) {
 				return []byte("one"), []byte("two")
 			},
 		},
 		{
 			desc: "slice starts before data",
 			test: func() ([]byte, []byte) {
 				full := []byte("hello world")
 				return full[5:], full[1:]
 			},
 		},
 		{
 			desc: "slice starts after data",
 			test: func() ([]byte, []byte) {
 				full := []byte("hello world")
 				return full[:3], full[5:]
 			},
 		},
 		{
 			desc: "slice ends after data",
 			test: func() ([]byte, []byte) {
 				full := []byte("hello world")
 				return full[:5], full[3:8]
 			},
 		},
 	}
 	for _, e := range examples {
 		t.Run(e.desc, func(t *testing.T) {
 			d, s := e.test()
 			assert.Panics(t, func() {
 				danger.SubsliceOffset(d, s)
 			})
 		})
 	}
 }
 func TestStride(t *testing.T) {
 	a := []byte{1, 2, 3, 4}
 	x := &a[1]
 	n := (*byte)(danger.Stride(unsafe.Pointer(x), unsafe.Sizeof(byte(0)), 1))
 	assert.Equal(t, &a[2], n)
 	n = (*byte)(danger.Stride(unsafe.Pointer(x), unsafe.Sizeof(byte(0)), -1))
 	assert.Equal(t, &a[0], n)
 }
 func TestBytesRange(t *testing.T) {
 	type fn = func() ([]byte, []byte)
 	examples := []struct {
 		desc     string
 		test     fn
 		expected []byte
 	}{
 		{
 			desc: "simple",
 			test: func() ([]byte, []byte) {
 				full := []byte("hello world")
 				return full[1:3], full[6:8]
 			},
 			expected: []byte("ello wo"),
 		},
 		{
 			desc: "full",
 			test: func() ([]byte, []byte) {
 				full := []byte("hello world")
 				return full[0:1], full[len(full)-1:]
 			},
 			expected: []byte("hello world"),
 		},
 		{
 			desc: "end before start",
 			test: func() ([]byte, []byte) {
 				full := []byte("hello world")
 				return full[len(full)-1:], full[0:1]
 			},
 		},
 		{
 			desc: "nils",
 			test: func() ([]byte, []byte) {
 				return nil, nil
 			},
 		},
 		{
 			desc: "nils start",
 			test: func() ([]byte, []byte) {
 				return nil, []byte("foo")
 			},
 		},
 		{
 			desc: "nils end",
 			test: func() ([]byte, []byte) {
 				return []byte("foo"), nil
 			},
 		},
 		{
 			desc: "start is end",
 			test: func() ([]byte, []byte) {
 				full := []byte("hello world")
 				return full[1:3], full[1:3]
 			},
 			expected: []byte("el"),
 		},
 		{
 			desc: "end contained in start",
 			test: func() ([]byte, []byte) {
 				full := []byte("hello world")
 				return full[1:7], full[2:4]
 			},
 			expected: []byte("ello w"),
 		},
 		{
 			desc: "different backing arrays",
 			test: func() ([]byte, []byte) {
 				one := []byte("hello world")
 				two := []byte("hello world")
 				return one, two
 			},
 		},
 	}
 	for _, e := range examples {
 		t.Run(e.desc, func(t *testing.T) {
 			start, end := e.test()
 			if e.expected == nil {
 				assert.Panics(t, func() {
 					danger.BytesRange(start, end)
 				})
 			} else {
 				res := danger.BytesRange(start, end)
 				assert.Equal(t, e.expected, res)
 			}
 		})
 	}
 }
@@ -1,23 +0,0 @@
 package danger
 import (
 	"reflect"
 	"unsafe"
 )
 // typeID is used as key in encoder and decoder caches to enable using
 // the optimize runtime.mapaccess2_fast64 function instead of the more
 // expensive lookup if we were to use reflect.Type as map key.
 //
 // typeID holds the pointer to the reflect.Type value, which is unique
 // in the program.
 //
 // https://github.com/segmentio/encoding/blob/master/json/codec.go#L59-L61
 type TypeID unsafe.Pointer
 func MakeTypeID(t reflect.Type) TypeID {
 	// reflect.Type has the fields:
 	// typ unsafe.Pointer
 	// ptr unsafe.Pointer
 	return TypeID((*[2]unsafe.Pointer)(unsafe.Pointer(&t))[1])
 }
@@ -1,8 +1,8 @@
 package tracker
 import (
 	"reflect"
 	"testing"
 	"unsafe"
 	"github.com/pelletier/go-toml/v2/internal/assert"
 )
@@ -13,8 +13,8 @@ func TestEntrySize(t *testing.T) {
 	// and a very good reason.
 	maxExpectedEntrySize := 48
 	assert.True(t,
-		int(unsafe.Sizeof(entry{})) <= maxExpectedEntrySize,
+		int(reflect.TypeOf(entry{}).Size()) <= maxExpectedEntrySize,
 		"Expected entry to be less than or equal to %d, got: %d",
-		maxExpectedEntrySize, int(unsafe.Sizeof(entry{})),
+		maxExpectedEntrySize, int(reflect.TypeOf(entry{}).Size()),
 	)
 }
@@ -1,7 +1,6 @@
 package toml
 import (
 	"github.com/pelletier/go-toml/v2/internal/danger"
 	"github.com/pelletier/go-toml/v2/internal/tracker"
 	"github.com/pelletier/go-toml/v2/unstable"
 )
@@ -103,5 +102,5 @@ func keyLocation(node *unstable.Node) []byte {
 		end = k.Node().Data
 	}
-	return danger.BytesRange(start, end)
+	return start[:cap(start)-cap(end)+len(end)]
 }
@@ -12,7 +12,6 @@ import (
 	"sync/atomic"
 	"time"
 	"github.com/pelletier/go-toml/v2/internal/danger"
 	"github.com/pelletier/go-toml/v2/internal/tracker"
 	"github.com/pelletier/go-toml/v2/unstable"
 )
@@ -1294,13 +1293,22 @@ func fieldByIndex(v reflect.Value, path []int) reflect.Value {
 type fieldPathsMap = map[string][]int
-var globalFieldPathsCache atomic.Value // map[danger.TypeID]fieldPathsMap
+var globalFieldPathsCache atomic.Value // map[uintptr]fieldPathsMap
 func structFieldPath(v reflect.Value, name string) ([]int, bool) {
 	t := v.Type()
 	// reflect.Type is an interface. We want to use the address of the underlying
 	// rtype as the key.
 	// This avoids using the interface as map key, which is slower.
 	//
 	// In the future this should be replaced by t.Pointer() if it becomes available.
 	//
 	// v.Type() returns a reflect.Type interface.
 	// reflect.ValueOf(t).Pointer() returns the address of the rtype.
 	tid := reflect.ValueOf(t).Pointer()
-	cache, _ := globalFieldPathsCache.Load().(map[danger.TypeID]fieldPathsMap)
+	cache, _ := globalFieldPathsCache.Load().(map[uintptr]fieldPathsMap)
-	fieldPaths, ok := cache[danger.MakeTypeID(t)]
+	fieldPaths, ok := cache[tid]
 	if !ok {
 		fieldPaths = map[string][]int{}
@@ -1311,8 +1319,8 @@ func structFieldPath(v reflect.Value, name string) ([]int, bool) {
 			fieldPaths[strings.ToLower(name)] = path
 		})
-		newCache := make(map[danger.TypeID]fieldPathsMap, len(cache)+1)
+		newCache := make(map[uintptr]fieldPathsMap, len(cache)+1)
-		newCache[danger.MakeTypeID(t)] = fieldPaths
+		newCache[tid] = fieldPaths
 		for k, v := range cache {
 			newCache[k] = v
 		}
@@ -2,9 +2,6 @@ package unstable
 import (
 	"fmt"
 	"unsafe"
 	"github.com/pelletier/go-toml/v2/internal/danger"
 )
 // Iterator over a sequence of nodes.
@@ -37,7 +34,7 @@ func (c *Iterator) Next() bool {
 // IsLast returns true if the current node of the iterator is the last
 // one.  Subsequent calls to Next() will return false.
 func (c *Iterator) IsLast() bool {
-	return c.node.next == 0
+	return c.node.next == nil
 }
 // Node returns a pointer to the node pointed at by the iterator.
@@ -65,11 +62,9 @@ type Node struct {
 	Raw  Range  // Raw bytes from the input.
 	Data []byte // Node value (either allocated or referencing the input).
-	// References to other nodes, as offsets in the backing array
+	// References to other nodes.
-	// from this node. References can go backward, so those can be
+	next  *Node // nil if last element
-	// negative.
+	child *Node // nil if no child
 	next  int // 0 if last element
 	child int // 0 if no child
 }
 // Range of bytes in the document.
@@ -80,24 +75,14 @@ type Range struct {
 // Next returns a pointer to the next node, or nil if there is no next node.
 func (n *Node) Next() *Node {
-	if n.next == 0 {
+	return n.next
 		return nil
 	}
 	ptr := unsafe.Pointer(n)
 	size := unsafe.Sizeof(Node{})
 	return (*Node)(danger.Stride(ptr, size, n.next))
 }
 // Child returns a pointer to the first child node of this node. Other children
 // can be accessed calling Next on the first child.  Returns nil if this Node
 // has no child.
 func (n *Node) Child() *Node {
-	if n.child == 0 {
+	return n.child
 		return nil
 	}
 	ptr := unsafe.Pointer(n)
 	size := unsafe.Sizeof(Node{})
 	return (*Node)(danger.Stride(ptr, size, n.child))
 }
 // Valid returns true if the node's kind is set (not to Invalid).
@@ -4,68 +4,118 @@ package unstable
 //
 // It is immutable once constructed with Builder.
 type root struct {
-	nodes []Node
+	first *Node
 }
 // Iterator over the top level nodes.
 func (r *root) Iterator() Iterator {
-	it := Iterator{}
+	return Iterator{node: r.first}
 	if len(r.nodes) > 0 {
 		it.node = &r.nodes[0]
 	}
 	return it
 }
-func (r *root) at(idx reference) *Node {
+type reference struct {
-	return &r.nodes[idx]
+	*Node
 }
-type reference int
+var invalidReference = reference{}
 const invalidReference reference = -1
 func (r reference) Valid() bool {
-	return r != invalidReference
+	return r.Node != nil
 }
 type builder struct {
-	tree    root
+	// chunks of nodes. Pointers to nodes are stable because we only append
-	lastIdx int
+	// to the last chunk, and chunks are allocated with fixed capacity.
 	chunks [][]Node
 	// current chunk index
 	chunkIdx int
 	// root node of the tree
 	root root
 	// last pushed node (for chaining)
 	last *Node
 }
 const initialChunkSize = 16
 const maxChunkSize = 2048
 func (b *builder) Tree() *root {
-	return &b.tree
+	return &b.root
 }
 func (b *builder) NodeAt(ref reference) *Node {
-	return b.tree.at(ref)
+	return ref.Node
 }
 func (b *builder) Reset() {
-	b.tree.nodes = b.tree.nodes[:0]
+	b.chunkIdx = 0
-	b.lastIdx = 0
+	for i := range b.chunks {
 		b.chunks[i] = b.chunks[i][:0]
 	}
 	b.root.first = nil
 	b.last = nil
 }
 func (b *builder) ensureCapacity() {
 	if b.chunkIdx >= len(b.chunks) {
 		size := initialChunkSize
 		if len(b.chunks) > 0 {
 			lastCap := cap(b.chunks[len(b.chunks)-1])
 			size = lastCap * 2
 			if size > maxChunkSize {
 				size = maxChunkSize
 			}
 		}
 		b.chunks = append(b.chunks, make([]Node, 0, size))
 	}
 	if len(b.chunks[b.chunkIdx]) == cap(b.chunks[b.chunkIdx]) {
 		b.chunkIdx++
 		if b.chunkIdx >= len(b.chunks) {
 			size := initialChunkSize
 			if len(b.chunks) > 0 {
 				lastCap := cap(b.chunks[len(b.chunks)-1])
 				size = lastCap * 2
 				if size > maxChunkSize {
 					size = maxChunkSize
 				}
 			}
 			b.chunks = append(b.chunks, make([]Node, 0, size))
 		}
 	}
 }
 func (b *builder) push(n Node) *Node {
 	b.ensureCapacity()
 	chunk := &b.chunks[b.chunkIdx]
 	*chunk = append(*chunk, n)
 	return &(*chunk)[len(*chunk)-1]
 }
 func (b *builder) Push(n Node) reference {
-	b.lastIdx = len(b.tree.nodes)
+	ptr := b.push(n)
-	b.tree.nodes = append(b.tree.nodes, n)
+	if b.root.first == nil {
-	return reference(b.lastIdx)
+		b.root.first = ptr
 	}
 	b.last = ptr
 	return reference{ptr}
 }
 func (b *builder) PushAndChain(n Node) reference {
-	newIdx := len(b.tree.nodes)
+	ptr := b.push(n)
-	b.tree.nodes = append(b.tree.nodes, n)
+	if b.root.first == nil {
-	if b.lastIdx >= 0 {
+		b.root.first = ptr
 		b.tree.nodes[b.lastIdx].next = newIdx - b.lastIdx
 	}
-	b.lastIdx = newIdx
+	if b.last != nil {
-	return reference(b.lastIdx)
+		b.last.next = ptr
 	}
 	b.last = ptr
 	return reference{ptr}
 }
 func (b *builder) AttachChild(parent reference, child reference) {
-	b.tree.nodes[parent].child = int(child) - int(parent)
+	parent.child = child.Node
 }
 func (b *builder) Chain(from reference, to reference) {
-	b.tree.nodes[from].next = int(to) - int(from)
+	from.next = to.Node
 }
@@ -6,7 +6,6 @@ import (
 	"unicode"
 	"github.com/pelletier/go-toml/v2/internal/characters"
 	"github.com/pelletier/go-toml/v2/internal/danger"
 )
 // ParserError describes an error relative to the content of the document.
@@ -70,7 +69,7 @@ func (p *Parser) Data() []byte {
 // panics.
 func (p *Parser) Range(b []byte) Range {
 	return Range{
-		Offset: uint32(danger.SubsliceOffset(p.data, b)),
+		Offset: uint32(cap(p.data) - cap(b)),
 		Length: uint32(len(b)),
 	}
 }
@@ -159,7 +158,7 @@ type Shape struct {
 }
 func (p *Parser) position(b []byte) Position {
-	offset := danger.SubsliceOffset(p.data, b)
+	offset := cap(p.data) - cap(b)
 	lead := p.data[:offset]