AST Tweaks (#551)

* Use pointers instead of copying around ast.Node

Node is a 56B struct that is constantly in the hot path. Passing nodes
around by copy had a cost that started to add up. This change replaces
them by pointers. Using unsafe pointer arithmetic and converting
sibling/child indexes to relative offsets, it removes the need to carry
around a pointer to the root of the tree. This saves 8B per Node. This
space will be used to store an extra []byte slice to provide contextual
error handling on all nodes, including the ones whose data is different
than the raw input (for example: strings with escaped characters), while
staying under the size of a cache line.

* Remove conditional

* Add Raw to track range in data for parsed values

* Simplify reference tracking
This commit is contained in:
Thomas Pelletier
2021-06-03 21:48:51 -04:00
committed by GitHub
parent f3bb20ea79
commit 618f0181ac
13 changed files with 239 additions and 165 deletions
+37 -29
View File
@@ -2,6 +2,9 @@ package ast
import (
"fmt"
"unsafe"
"github.com/pelletier/go-toml/v2/internal/danger"
)
// Iterator starts uninitialized, you need to call Next() first.
@@ -14,7 +17,7 @@ import (
// }
type Iterator struct {
started bool
node Node
node *Node
}
// Next moves the iterator forward and returns true if points to a node, false
@@ -31,11 +34,11 @@ func (c *Iterator) Next() bool {
// IsLast returns true if the current node of the iterator is the last one.
// Subsequent call to Next() will return false.
func (c *Iterator) IsLast() bool {
return c.node.next <= 0
return c.node.next == 0
}
// Node returns a copy of the node pointed at by the iterator.
func (c *Iterator) Node() Node {
func (c *Iterator) Node() *Node {
return c.node
}
@@ -50,14 +53,13 @@ type Root struct {
func (r *Root) Iterator() Iterator {
it := Iterator{}
if len(r.nodes) > 0 {
it.node = r.nodes[0]
it.node = &r.nodes[0]
}
return it
}
func (r *Root) at(idx int) Node {
// TODO: unsafe to point to the node directly
return r.nodes[idx]
func (r *Root) at(idx Reference) *Node {
return &r.nodes[idx]
}
// Arrays have one child per element in the array.
@@ -69,42 +71,48 @@ func (r *Root) at(idx int) Node {
// children []Node
type Node struct {
Kind Kind
Data []byte // Raw bytes from the input
Raw Range // Raw bytes from the input.
Data []byte // Node value (could be either allocated or referencing the input).
// next idx (in the root array). 0 if last of the collection.
next int
// child idx (in the root array). 0 if no child.
child int
// pointer to the root array
root *Root
// References to other nodes, as offsets in the backing array from this
// node. References can go backward, so those can be negative.
next int // 0 if last element
child int // 0 if no child
}
type Range struct {
Offset uint32
Length uint32
}
// Next returns a copy of the next node, or an invalid Node if there is no
// next node.
func (n Node) Next() Node {
if n.next <= 0 {
return noNode
func (n *Node) Next() *Node {
if n.next == 0 {
return nil
}
return n.root.at(n.next)
ptr := unsafe.Pointer(n)
size := unsafe.Sizeof(Node{})
return (*Node)(danger.Stride(ptr, size, n.next))
}
// Child returns a copy of the first child node of this node. Other children
// can be accessed calling Next on the first child.
// Returns an invalid Node if there is none.
func (n Node) Child() Node {
if n.child <= 0 {
return noNode
func (n *Node) Child() *Node {
if n.child == 0 {
return nil
}
return n.root.at(n.child)
ptr := unsafe.Pointer(n)
size := unsafe.Sizeof(Node{})
return (*Node)(danger.Stride(ptr, size, n.child))
}
// Valid returns true if the node's kind is set (not to Invalid).
func (n Node) Valid() bool {
return n.Kind != Invalid
func (n *Node) Valid() bool {
return n != nil
}
var noNode = Node{}
// Key returns the child nodes making the Key on a supported node. Panics
// otherwise.
// They are guaranteed to be all be of the Kind Key. A simple key would return
@@ -127,13 +135,13 @@ func (n *Node) Key() Iterator {
// Value returns a pointer to the value node of a KeyValue.
// Guaranteed to be non-nil.
// Panics if not called on a KeyValue node, or if the Children are malformed.
func (n Node) Value() Node {
assertKind(KeyValue, n)
func (n *Node) Value() *Node {
assertKind(KeyValue, *n)
return n.Child()
}
// Children returns an iterator over a node's children.
func (n Node) Children() Iterator {
func (n *Node) Children() Iterator {
return Iterator{node: n.Child()}
}