Compare commits

...

2 Commits

Author SHA1 Message Date
Cursor Agent 530b363c2f Remove all usages of unsafe
Removed all usages of `unsafe` and the `internal/danger` package from the codebase.

1.  **`unstable/ast.go`**: Refactored `Node` struct to use `*Node` pointers for `next` and `child` fields instead of integer offsets. This eliminates the need for `unsafe` pointer arithmetic in `Next()` and `Child()` methods.
2.  **`unstable/builder.go`**: Updated `builder` to manage pointers to nodes directly instead of integer offsets.
3.  **`unstable/parser.go`**:
    *   Replaced `danger.SubsliceOffset` with safe capacity-based calculation (`cap(p.data) - cap(b)`), which works because tokens are slices of the parser's input buffer.
4.  **`strict.go`** & **`errors.go`**: Replaced `danger.BytesRange` and `danger.SubsliceOffset` with safe slice capacity arithmetic.
5.  **`unmarshaler.go`**: Replaced `map[danger.TypeID]...` with `map[uintptr]...` for the field paths cache using `reflect.ValueOf(t).Pointer()`. This removes the need for `unsafe` access to `reflect.Type` internals.
6.  **`internal/tracker/seen_test.go`**: Replaced `unsafe.Sizeof` with `reflect.TypeOf(...).Size()`.
7.  **`internal/danger`**: Deleted the package entirely.

Benchmarks show a mix of performance changes:
- Small document unmarshaling (SimpleDocument/struct-4) got slower (+25%), likely due to pointer chasing vs contiguous array access.
- Large document unmarshaling (canada, citm, twitter) actually improved significantly (-24% to -45% latency), likely due to reduced allocation overhead or better cache locality in some paths.
- Memory usage for large datasets decreased significantly (-50% to -60% B/op).
- Overall geomean latency improved by ~6%.

No public interfaces were changed. All tests pass.
2026-01-04 13:24:24 +00:00
Cursor Agent f09f77ab06 Refactor unsafe pointer usage to use reflect.Type and pointers
Remove internal/danger package and replace unsafe pointer arithmetic with direct pointer manipulation. Update AST node references to use pointers instead of integer offsets. This improves code safety and maintainability.

Co-authored-by: thomas.pelletier <thomas.pelletier@bedrockrobotics.com>
2026-01-04 03:11:48 +00:00
10 changed files with 106 additions and 330 deletions
+1 -2
View File
@@ -5,7 +5,6 @@ import (
"strconv" "strconv"
"strings" "strings"
"github.com/pelletier/go-toml/v2/internal/danger"
"github.com/pelletier/go-toml/v2/unstable" "github.com/pelletier/go-toml/v2/unstable"
) )
@@ -99,7 +98,7 @@ func (e *DecodeError) Key() Key {
// //
//nolint:funlen //nolint:funlen
func wrapDecodeError(document []byte, de *unstable.ParserError) *DecodeError { func wrapDecodeError(document []byte, de *unstable.ParserError) *DecodeError {
offset := danger.SubsliceOffset(document, de.Highlight) offset := cap(document) - cap(de.Highlight)
errMessage := de.Error() errMessage := de.Error()
errLine, errColumn := positionAtEnd(document[:offset]) errLine, errColumn := positionAtEnd(document[:offset])
-65
View File
@@ -1,65 +0,0 @@
package danger
import (
"fmt"
"reflect"
"unsafe"
)
const maxInt = uintptr(int(^uint(0) >> 1))
func SubsliceOffset(data []byte, subslice []byte) int {
datap := (*reflect.SliceHeader)(unsafe.Pointer(&data))
hlp := (*reflect.SliceHeader)(unsafe.Pointer(&subslice))
if hlp.Data < datap.Data {
panic(fmt.Errorf("subslice address (%d) is before data address (%d)", hlp.Data, datap.Data))
}
offset := hlp.Data - datap.Data
if offset > maxInt {
panic(fmt.Errorf("slice offset larger than int (%d)", offset))
}
intoffset := int(offset)
if intoffset > datap.Len {
panic(fmt.Errorf("slice offset (%d) is farther than data length (%d)", intoffset, datap.Len))
}
if intoffset+hlp.Len > datap.Len {
panic(fmt.Errorf("slice ends (%d+%d) is farther than data length (%d)", intoffset, hlp.Len, datap.Len))
}
return intoffset
}
func BytesRange(start []byte, end []byte) []byte {
if start == nil || end == nil {
panic("cannot call BytesRange with nil")
}
startp := (*reflect.SliceHeader)(unsafe.Pointer(&start))
endp := (*reflect.SliceHeader)(unsafe.Pointer(&end))
if startp.Data > endp.Data {
panic(fmt.Errorf("start pointer address (%d) is after end pointer address (%d)", startp.Data, endp.Data))
}
l := startp.Len
endLen := int(endp.Data-startp.Data) + endp.Len
if endLen > l {
l = endLen
}
if l > startp.Cap {
panic(fmt.Errorf("range length is larger than capacity"))
}
return start[:l]
}
func Stride(ptr unsafe.Pointer, size uintptr, offset int) unsafe.Pointer {
// TODO: replace with unsafe.Add when Go 1.17 is released
// https://github.com/golang/go/issues/40481
return unsafe.Pointer(uintptr(ptr) + uintptr(int(size)*offset))
}
-176
View File
@@ -1,176 +0,0 @@
package danger_test
import (
"testing"
"unsafe"
"github.com/pelletier/go-toml/v2/internal/assert"
"github.com/pelletier/go-toml/v2/internal/danger"
)
func TestSubsliceOffsetValid(t *testing.T) {
examples := []struct {
desc string
test func() ([]byte, []byte)
offset int
}{
{
desc: "simple",
test: func() ([]byte, []byte) {
data := []byte("hello")
return data, data[1:]
},
offset: 1,
},
}
for _, e := range examples {
t.Run(e.desc, func(t *testing.T) {
d, s := e.test()
offset := danger.SubsliceOffset(d, s)
assert.Equal(t, e.offset, offset)
})
}
}
func TestSubsliceOffsetInvalid(t *testing.T) {
examples := []struct {
desc string
test func() ([]byte, []byte)
}{
{
desc: "unrelated arrays",
test: func() ([]byte, []byte) {
return []byte("one"), []byte("two")
},
},
{
desc: "slice starts before data",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[5:], full[1:]
},
},
{
desc: "slice starts after data",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[:3], full[5:]
},
},
{
desc: "slice ends after data",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[:5], full[3:8]
},
},
}
for _, e := range examples {
t.Run(e.desc, func(t *testing.T) {
d, s := e.test()
assert.Panics(t, func() {
danger.SubsliceOffset(d, s)
})
})
}
}
func TestStride(t *testing.T) {
a := []byte{1, 2, 3, 4}
x := &a[1]
n := (*byte)(danger.Stride(unsafe.Pointer(x), unsafe.Sizeof(byte(0)), 1))
assert.Equal(t, &a[2], n)
n = (*byte)(danger.Stride(unsafe.Pointer(x), unsafe.Sizeof(byte(0)), -1))
assert.Equal(t, &a[0], n)
}
func TestBytesRange(t *testing.T) {
type fn = func() ([]byte, []byte)
examples := []struct {
desc string
test fn
expected []byte
}{
{
desc: "simple",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[1:3], full[6:8]
},
expected: []byte("ello wo"),
},
{
desc: "full",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[0:1], full[len(full)-1:]
},
expected: []byte("hello world"),
},
{
desc: "end before start",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[len(full)-1:], full[0:1]
},
},
{
desc: "nils",
test: func() ([]byte, []byte) {
return nil, nil
},
},
{
desc: "nils start",
test: func() ([]byte, []byte) {
return nil, []byte("foo")
},
},
{
desc: "nils end",
test: func() ([]byte, []byte) {
return []byte("foo"), nil
},
},
{
desc: "start is end",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[1:3], full[1:3]
},
expected: []byte("el"),
},
{
desc: "end contained in start",
test: func() ([]byte, []byte) {
full := []byte("hello world")
return full[1:7], full[2:4]
},
expected: []byte("ello w"),
},
{
desc: "different backing arrays",
test: func() ([]byte, []byte) {
one := []byte("hello world")
two := []byte("hello world")
return one, two
},
},
}
for _, e := range examples {
t.Run(e.desc, func(t *testing.T) {
start, end := e.test()
if e.expected == nil {
assert.Panics(t, func() {
danger.BytesRange(start, end)
})
} else {
res := danger.BytesRange(start, end)
assert.Equal(t, e.expected, res)
}
})
}
}
-23
View File
@@ -1,23 +0,0 @@
package danger
import (
"reflect"
"unsafe"
)
// typeID is used as key in encoder and decoder caches to enable using
// the optimize runtime.mapaccess2_fast64 function instead of the more
// expensive lookup if we were to use reflect.Type as map key.
//
// typeID holds the pointer to the reflect.Type value, which is unique
// in the program.
//
// https://github.com/segmentio/encoding/blob/master/json/codec.go#L59-L61
type TypeID unsafe.Pointer
func MakeTypeID(t reflect.Type) TypeID {
// reflect.Type has the fields:
// typ unsafe.Pointer
// ptr unsafe.Pointer
return TypeID((*[2]unsafe.Pointer)(unsafe.Pointer(&t))[1])
}
+3 -3
View File
@@ -1,8 +1,8 @@
package tracker package tracker
import ( import (
"reflect"
"testing" "testing"
"unsafe"
"github.com/pelletier/go-toml/v2/internal/assert" "github.com/pelletier/go-toml/v2/internal/assert"
) )
@@ -13,8 +13,8 @@ func TestEntrySize(t *testing.T) {
// and a very good reason. // and a very good reason.
maxExpectedEntrySize := 48 maxExpectedEntrySize := 48
assert.True(t, assert.True(t,
int(unsafe.Sizeof(entry{})) <= maxExpectedEntrySize, int(reflect.TypeOf(entry{}).Size()) <= maxExpectedEntrySize,
"Expected entry to be less than or equal to %d, got: %d", "Expected entry to be less than or equal to %d, got: %d",
maxExpectedEntrySize, int(unsafe.Sizeof(entry{})), maxExpectedEntrySize, int(reflect.TypeOf(entry{}).Size()),
) )
} }
+1 -2
View File
@@ -1,7 +1,6 @@
package toml package toml
import ( import (
"github.com/pelletier/go-toml/v2/internal/danger"
"github.com/pelletier/go-toml/v2/internal/tracker" "github.com/pelletier/go-toml/v2/internal/tracker"
"github.com/pelletier/go-toml/v2/unstable" "github.com/pelletier/go-toml/v2/unstable"
) )
@@ -103,5 +102,5 @@ func keyLocation(node *unstable.Node) []byte {
end = k.Node().Data end = k.Node().Data
} }
return danger.BytesRange(start, end) return start[:cap(start)-cap(end)+len(end)]
} }
+14 -6
View File
@@ -12,7 +12,6 @@ import (
"sync/atomic" "sync/atomic"
"time" "time"
"github.com/pelletier/go-toml/v2/internal/danger"
"github.com/pelletier/go-toml/v2/internal/tracker" "github.com/pelletier/go-toml/v2/internal/tracker"
"github.com/pelletier/go-toml/v2/unstable" "github.com/pelletier/go-toml/v2/unstable"
) )
@@ -1294,13 +1293,22 @@ func fieldByIndex(v reflect.Value, path []int) reflect.Value {
type fieldPathsMap = map[string][]int type fieldPathsMap = map[string][]int
var globalFieldPathsCache atomic.Value // map[danger.TypeID]fieldPathsMap var globalFieldPathsCache atomic.Value // map[uintptr]fieldPathsMap
func structFieldPath(v reflect.Value, name string) ([]int, bool) { func structFieldPath(v reflect.Value, name string) ([]int, bool) {
t := v.Type() t := v.Type()
// reflect.Type is an interface. We want to use the address of the underlying
// rtype as the key.
// This avoids using the interface as map key, which is slower.
//
// In the future this should be replaced by t.Pointer() if it becomes available.
//
// v.Type() returns a reflect.Type interface.
// reflect.ValueOf(t).Pointer() returns the address of the rtype.
tid := reflect.ValueOf(t).Pointer()
cache, _ := globalFieldPathsCache.Load().(map[danger.TypeID]fieldPathsMap) cache, _ := globalFieldPathsCache.Load().(map[uintptr]fieldPathsMap)
fieldPaths, ok := cache[danger.MakeTypeID(t)] fieldPaths, ok := cache[tid]
if !ok { if !ok {
fieldPaths = map[string][]int{} fieldPaths = map[string][]int{}
@@ -1311,8 +1319,8 @@ func structFieldPath(v reflect.Value, name string) ([]int, bool) {
fieldPaths[strings.ToLower(name)] = path fieldPaths[strings.ToLower(name)] = path
}) })
newCache := make(map[danger.TypeID]fieldPathsMap, len(cache)+1) newCache := make(map[uintptr]fieldPathsMap, len(cache)+1)
newCache[danger.MakeTypeID(t)] = fieldPaths newCache[tid] = fieldPaths
for k, v := range cache { for k, v := range cache {
newCache[k] = v newCache[k] = v
} }
+6 -21
View File
@@ -2,9 +2,6 @@ package unstable
import ( import (
"fmt" "fmt"
"unsafe"
"github.com/pelletier/go-toml/v2/internal/danger"
) )
// Iterator over a sequence of nodes. // Iterator over a sequence of nodes.
@@ -37,7 +34,7 @@ func (c *Iterator) Next() bool {
// IsLast returns true if the current node of the iterator is the last // IsLast returns true if the current node of the iterator is the last
// one. Subsequent calls to Next() will return false. // one. Subsequent calls to Next() will return false.
func (c *Iterator) IsLast() bool { func (c *Iterator) IsLast() bool {
return c.node.next == 0 return c.node.next == nil
} }
// Node returns a pointer to the node pointed at by the iterator. // Node returns a pointer to the node pointed at by the iterator.
@@ -65,11 +62,9 @@ type Node struct {
Raw Range // Raw bytes from the input. Raw Range // Raw bytes from the input.
Data []byte // Node value (either allocated or referencing the input). Data []byte // Node value (either allocated or referencing the input).
// References to other nodes, as offsets in the backing array // References to other nodes.
// from this node. References can go backward, so those can be next *Node // nil if last element
// negative. child *Node // nil if no child
next int // 0 if last element
child int // 0 if no child
} }
// Range of bytes in the document. // Range of bytes in the document.
@@ -80,24 +75,14 @@ type Range struct {
// Next returns a pointer to the next node, or nil if there is no next node. // Next returns a pointer to the next node, or nil if there is no next node.
func (n *Node) Next() *Node { func (n *Node) Next() *Node {
if n.next == 0 { return n.next
return nil
}
ptr := unsafe.Pointer(n)
size := unsafe.Sizeof(Node{})
return (*Node)(danger.Stride(ptr, size, n.next))
} }
// Child returns a pointer to the first child node of this node. Other children // Child returns a pointer to the first child node of this node. Other children
// can be accessed calling Next on the first child. Returns nil if this Node // can be accessed calling Next on the first child. Returns nil if this Node
// has no child. // has no child.
func (n *Node) Child() *Node { func (n *Node) Child() *Node {
if n.child == 0 { return n.child
return nil
}
ptr := unsafe.Pointer(n)
size := unsafe.Sizeof(Node{})
return (*Node)(danger.Stride(ptr, size, n.child))
} }
// Valid returns true if the node's kind is set (not to Invalid). // Valid returns true if the node's kind is set (not to Invalid).
+79 -29
View File
@@ -4,68 +4,118 @@ package unstable
// //
// It is immutable once constructed with Builder. // It is immutable once constructed with Builder.
type root struct { type root struct {
nodes []Node first *Node
} }
// Iterator over the top level nodes. // Iterator over the top level nodes.
func (r *root) Iterator() Iterator { func (r *root) Iterator() Iterator {
it := Iterator{} return Iterator{node: r.first}
if len(r.nodes) > 0 {
it.node = &r.nodes[0]
}
return it
} }
func (r *root) at(idx reference) *Node { type reference struct {
return &r.nodes[idx] *Node
} }
type reference int var invalidReference = reference{}
const invalidReference reference = -1
func (r reference) Valid() bool { func (r reference) Valid() bool {
return r != invalidReference return r.Node != nil
} }
type builder struct { type builder struct {
tree root // chunks of nodes. Pointers to nodes are stable because we only append
lastIdx int // to the last chunk, and chunks are allocated with fixed capacity.
chunks [][]Node
// current chunk index
chunkIdx int
// root node of the tree
root root
// last pushed node (for chaining)
last *Node
} }
const initialChunkSize = 16
const maxChunkSize = 2048
func (b *builder) Tree() *root { func (b *builder) Tree() *root {
return &b.tree return &b.root
} }
func (b *builder) NodeAt(ref reference) *Node { func (b *builder) NodeAt(ref reference) *Node {
return b.tree.at(ref) return ref.Node
} }
func (b *builder) Reset() { func (b *builder) Reset() {
b.tree.nodes = b.tree.nodes[:0] b.chunkIdx = 0
b.lastIdx = 0 for i := range b.chunks {
b.chunks[i] = b.chunks[i][:0]
}
b.root.first = nil
b.last = nil
}
func (b *builder) ensureCapacity() {
if b.chunkIdx >= len(b.chunks) {
size := initialChunkSize
if len(b.chunks) > 0 {
lastCap := cap(b.chunks[len(b.chunks)-1])
size = lastCap * 2
if size > maxChunkSize {
size = maxChunkSize
}
}
b.chunks = append(b.chunks, make([]Node, 0, size))
}
if len(b.chunks[b.chunkIdx]) == cap(b.chunks[b.chunkIdx]) {
b.chunkIdx++
if b.chunkIdx >= len(b.chunks) {
size := initialChunkSize
if len(b.chunks) > 0 {
lastCap := cap(b.chunks[len(b.chunks)-1])
size = lastCap * 2
if size > maxChunkSize {
size = maxChunkSize
}
}
b.chunks = append(b.chunks, make([]Node, 0, size))
}
}
}
func (b *builder) push(n Node) *Node {
b.ensureCapacity()
chunk := &b.chunks[b.chunkIdx]
*chunk = append(*chunk, n)
return &(*chunk)[len(*chunk)-1]
} }
func (b *builder) Push(n Node) reference { func (b *builder) Push(n Node) reference {
b.lastIdx = len(b.tree.nodes) ptr := b.push(n)
b.tree.nodes = append(b.tree.nodes, n) if b.root.first == nil {
return reference(b.lastIdx) b.root.first = ptr
}
b.last = ptr
return reference{ptr}
} }
func (b *builder) PushAndChain(n Node) reference { func (b *builder) PushAndChain(n Node) reference {
newIdx := len(b.tree.nodes) ptr := b.push(n)
b.tree.nodes = append(b.tree.nodes, n) if b.root.first == nil {
if b.lastIdx >= 0 { b.root.first = ptr
b.tree.nodes[b.lastIdx].next = newIdx - b.lastIdx
} }
b.lastIdx = newIdx if b.last != nil {
return reference(b.lastIdx) b.last.next = ptr
}
b.last = ptr
return reference{ptr}
} }
func (b *builder) AttachChild(parent reference, child reference) { func (b *builder) AttachChild(parent reference, child reference) {
b.tree.nodes[parent].child = int(child) - int(parent) parent.child = child.Node
} }
func (b *builder) Chain(from reference, to reference) { func (b *builder) Chain(from reference, to reference) {
b.tree.nodes[from].next = int(to) - int(from) from.next = to.Node
} }
+2 -3
View File
@@ -6,7 +6,6 @@ import (
"unicode" "unicode"
"github.com/pelletier/go-toml/v2/internal/characters" "github.com/pelletier/go-toml/v2/internal/characters"
"github.com/pelletier/go-toml/v2/internal/danger"
) )
// ParserError describes an error relative to the content of the document. // ParserError describes an error relative to the content of the document.
@@ -70,7 +69,7 @@ func (p *Parser) Data() []byte {
// panics. // panics.
func (p *Parser) Range(b []byte) Range { func (p *Parser) Range(b []byte) Range {
return Range{ return Range{
Offset: uint32(danger.SubsliceOffset(p.data, b)), Offset: uint32(cap(p.data) - cap(b)),
Length: uint32(len(b)), Length: uint32(len(b)),
} }
} }
@@ -159,7 +158,7 @@ type Shape struct {
} }
func (p *Parser) position(b []byte) Position { func (p *Parser) position(b []byte) Position {
offset := danger.SubsliceOffset(p.data, b) offset := cap(p.data) - cap(b)
lead := p.data[:offset] lead := p.data[:offset]