seen tracker: use array for storage (#545)
name old time/op new time/op delta UnmarshalDataset/config-32 81.2ms ± 3% 77.8ms ± 3% -4.25% (p=0.000 n=20+19) UnmarshalDataset/canada-32 104ms ± 5% 105ms ± 4% ~ (p=0.102 n=20+20) UnmarshalDataset/citm_catalog-32 57.5ms ± 5% 59.0ms ± 5% +2.54% (p=0.033 n=20+20) UnmarshalDataset/twitter-32 25.7ms ± 7% 28.1ms ± 5% +9.33% (p=0.000 n=20+20) UnmarshalDataset/code-32 305ms ± 6% 292ms ± 5% -4.29% (p=0.000 n=20+19) UnmarshalDataset/example-32 519µs ± 6% 522µs ± 5% ~ (p=0.659 n=20+20) UnmarshalSimple/struct-32 1.44µs ± 1% 1.17µs ± 6% -18.78% (p=0.000 n=14+20) UnmarshalSimple/map-32 2.30µs ± 4% 1.99µs ± 4% -13.65% (p=0.000 n=20+19) ReferenceFile/struct-32 44.1µs ± 4% 38.1µs ± 5% -13.61% (p=0.000 n=18+20) ReferenceFile/map-32 197µs ± 7% 189µs ± 5% -3.91% (p=0.000 n=20+20) HugoFrontMatter-32 45.9µs ± 6% 39.3µs ± 6% -14.46% (p=0.000 n=19+20) name old speed new speed delta UnmarshalDataset/config-32 12.9MB/s ± 3% 13.5MB/s ± 3% +4.42% (p=0.000 n=20+19) UnmarshalDataset/canada-32 21.1MB/s ± 5% 20.9MB/s ± 4% ~ (p=0.101 n=20+20) UnmarshalDataset/citm_catalog-32 9.72MB/s ± 6% 9.47MB/s ± 5% -2.53% (p=0.031 n=20+20) UnmarshalDataset/twitter-32 17.2MB/s ± 7% 15.8MB/s ± 5% -8.57% (p=0.000 n=20+20) UnmarshalDataset/code-32 8.81MB/s ± 7% 9.20MB/s ± 5% +4.47% (p=0.000 n=20+19) UnmarshalDataset/example-32 15.6MB/s ± 6% 15.5MB/s ± 5% ~ (p=0.644 n=20+20) UnmarshalSimple/struct-32 7.61MB/s ± 1% 9.39MB/s ± 7% +23.33% (p=0.000 n=15+20) UnmarshalSimple/map-32 4.78MB/s ± 4% 5.54MB/s ± 5% +15.85% (p=0.000 n=20+19) ReferenceFile/struct-32 119MB/s ± 4% 138MB/s ± 5% +15.79% (p=0.000 n=18+20) ReferenceFile/map-32 26.6MB/s ± 7% 27.7MB/s ± 5% +4.06% (p=0.000 n=20+20) HugoFrontMatter-32 11.9MB/s ± 6% 13.9MB/s ± 6% +16.91% (p=0.000 n=19+20) name old alloc/op new alloc/op delta UnmarshalDataset/config-32 16.9MB ± 0% 13.9MB ± 0% -17.48% (p=0.000 n=18+18) UnmarshalDataset/canada-32 74.3MB ± 0% 74.3MB ± 0% -0.00% (p=0.000 n=20+20) UnmarshalDataset/citm_catalog-32 37.3MB ± 0% 37.3MB ± 0% +0.11% (p=0.000 n=20+20) UnmarshalDataset/twitter-32 15.6MB ± 0% 15.6MB ± 0% ~ (p=0.211 n=19+20) UnmarshalDataset/code-32 59.5MB ± 0% 52.4MB ± 0% -11.96% (p=0.000 n=19+20) UnmarshalDataset/example-32 238kB ± 0% 239kB ± 0% +0.02% (p=0.000 n=18+20) UnmarshalSimple/struct-32 981B ± 0% 709B ± 0% -27.73% (p=0.000 n=20+20) UnmarshalSimple/map-32 1.45kB ± 0% 1.17kB ± 0% -18.82% (p=0.000 n=20+20) ReferenceFile/struct-32 11.8kB ± 0% 9.7kB ± 0% -17.64% (p=0.000 n=20+20) ReferenceFile/map-32 51.5kB ± 0% 52.2kB ± 0% +1.30% (p=0.000 n=20+17) HugoFrontMatter-32 12.1kB ± 0% 11.1kB ± 0% -7.97% (p=0.000 n=20+19) name old allocs/op new allocs/op delta UnmarshalDataset/config-32 645k ± 0% 557k ± 0% -13.76% (p=0.000 n=20+16) UnmarshalDataset/canada-32 896k ± 0% 896k ± 0% -0.00% (p=0.000 n=20+20) UnmarshalDataset/citm_catalog-32 380k ± 0% 377k ± 0% -0.75% (p=0.000 n=19+20) UnmarshalDataset/twitter-32 158k ± 0% 158k ± 0% -0.01% (p=0.000 n=18+18) UnmarshalDataset/code-32 2.92M ± 0% 2.57M ± 0% -11.87% (p=0.000 n=20+20) UnmarshalDataset/example-32 3.66k ± 0% 3.64k ± 0% -0.63% (p=0.000 n=20+20) UnmarshalSimple/struct-32 13.0 ± 0% 10.0 ± 0% -23.08% (p=0.000 n=20+20) UnmarshalSimple/map-32 22.0 ± 0% 19.0 ± 0% -13.64% (p=0.000 n=20+20) ReferenceFile/struct-32 253 ± 0% 155 ± 0% -38.74% (p=0.000 n=20+20) ReferenceFile/map-32 1.67k ± 0% 1.44k ± 0% -14.23% (p=0.000 n=20+20) HugoFrontMatter-32 357 ± 0% 313 ± 0% -12.32% (p=0.000 n=20+20)
This commit is contained in:
+146
-89
@@ -1,6 +1,7 @@
|
||||
package tracker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
|
||||
"github.com/pelletier/go-toml/v2/internal/ast"
|
||||
@@ -29,67 +30,92 @@ func (k keyKind) String() string {
|
||||
panic("missing keyKind string mapping")
|
||||
}
|
||||
|
||||
// SeenTracker tracks which keys have been seen with which TOML type to flag duplicates
|
||||
// and mismatches according to the spec.
|
||||
// SeenTracker tracks which keys have been seen with which TOML type to flag
|
||||
// duplicates and mismatches according to the spec.
|
||||
//
|
||||
// Each node in the visited tree is represented by an entry. Each entry has an
|
||||
// identifier, which is provided by a counter. Entries are stored in the array
|
||||
// entries. As new nodes are discovered (referenced for the first time in the
|
||||
// TOML document), entries are created and appended to the array. An entry
|
||||
// points to its parent using its id.
|
||||
//
|
||||
// To find whether a given key (sequence of []byte) has already been visited,
|
||||
// the entries are linearly searched, looking for one with the right name and
|
||||
// parent id.
|
||||
//
|
||||
// Given that all keys appear in the document after their parent, it is
|
||||
// guaranteed that all descendants of a node are stored after the node, this
|
||||
// speeds up the search process.
|
||||
//
|
||||
// When encountering [[array tables]], the descendants of that node are removed
|
||||
// to allow that branch of the tree to be "rediscovered". To maintain the
|
||||
// invariant above, the deletion process needs to keep the order of entries.
|
||||
// This results in more copies in that case.
|
||||
type SeenTracker struct {
|
||||
root *info
|
||||
current *info
|
||||
entries []entry
|
||||
currentIdx int
|
||||
nextID int
|
||||
}
|
||||
|
||||
type info struct {
|
||||
parent *info
|
||||
type entry struct {
|
||||
id int
|
||||
parent int
|
||||
name []byte
|
||||
kind keyKind
|
||||
children map[string]*info
|
||||
explicit bool
|
||||
}
|
||||
|
||||
func (i *info) Clear() {
|
||||
i.children = nil
|
||||
// Remove all descendent of node at position idx.
|
||||
func (s *SeenTracker) clear(idx int) {
|
||||
p := s.entries[idx].id
|
||||
rest := clear(p, s.entries[idx+1:])
|
||||
s.entries = s.entries[:idx+1+len(rest)]
|
||||
}
|
||||
|
||||
func (i *info) Has(k string) (*info, bool) {
|
||||
c, ok := i.children[k]
|
||||
return c, ok
|
||||
}
|
||||
|
||||
func (i *info) SetKind(kind keyKind) {
|
||||
i.kind = kind
|
||||
}
|
||||
|
||||
func (i *info) CreateTable(k string, explicit bool) *info {
|
||||
return i.createChild(k, tableKind, explicit)
|
||||
}
|
||||
|
||||
func (i *info) CreateArrayTable(k string, explicit bool) *info {
|
||||
return i.createChild(k, arrayTableKind, explicit)
|
||||
}
|
||||
|
||||
func (i *info) createChild(k string, kind keyKind, explicit bool) *info {
|
||||
if i.children == nil {
|
||||
i.children = make(map[string]*info, 1)
|
||||
func clear(parentID int, entries []entry) []entry {
|
||||
for i := 0; i < len(entries); {
|
||||
if entries[i].parent == parentID {
|
||||
id := entries[i].id
|
||||
copy(entries[i:], entries[i+1:])
|
||||
entries = entries[:len(entries)-1]
|
||||
rest := clear(id, entries[i:])
|
||||
entries = entries[:i+len(rest)]
|
||||
} else {
|
||||
i++
|
||||
}
|
||||
}
|
||||
return entries
|
||||
}
|
||||
|
||||
x := &info{
|
||||
parent: i,
|
||||
func (s *SeenTracker) create(parentIdx int, name []byte, kind keyKind, explicit bool) int {
|
||||
parentID := s.id(parentIdx)
|
||||
|
||||
idx := len(s.entries)
|
||||
s.entries = append(s.entries, entry{
|
||||
id: s.nextID,
|
||||
parent: parentID,
|
||||
name: name,
|
||||
kind: kind,
|
||||
explicit: explicit,
|
||||
}
|
||||
i.children[k] = x
|
||||
return x
|
||||
})
|
||||
s.nextID++
|
||||
return idx
|
||||
}
|
||||
|
||||
// CheckExpression takes a top-level node and checks that it does not contain keys
|
||||
// that have been seen in previous calls, and validates that types are consistent.
|
||||
func (s *SeenTracker) CheckExpression(node ast.Node) error {
|
||||
if s.root == nil {
|
||||
s.root = &info{
|
||||
kind: tableKind,
|
||||
}
|
||||
s.current = s.root
|
||||
if s.entries == nil {
|
||||
//s.entries = make([]entry, 0, 8)
|
||||
// Skip ID = 0 to remove the confusion between nodes whose parent has
|
||||
// id 0 and root nodes (parent id is 0 because it's the zero value).
|
||||
s.nextID = 1
|
||||
// Start unscoped, so idx is negative.
|
||||
s.currentIdx = -1
|
||||
}
|
||||
switch node.Kind {
|
||||
case ast.KeyValue:
|
||||
return s.checkKeyValue(s.current, node)
|
||||
return s.checkKeyValue(node)
|
||||
case ast.Table:
|
||||
return s.checkTable(node)
|
||||
case ast.ArrayTable:
|
||||
@@ -97,104 +123,135 @@ func (s *SeenTracker) CheckExpression(node ast.Node) error {
|
||||
default:
|
||||
panic(fmt.Errorf("this should not be a top level node type: %s", node.Kind))
|
||||
}
|
||||
|
||||
}
|
||||
func (s *SeenTracker) checkTable(node ast.Node) error {
|
||||
s.current = s.root
|
||||
|
||||
func (s *SeenTracker) checkTable(node ast.Node) error {
|
||||
it := node.Key()
|
||||
// handle the first parts of the key, excluding the last one
|
||||
|
||||
parentIdx := -1
|
||||
|
||||
// This code is duplicated in checkArrayTable. This is because factoring
|
||||
// it in a function requires to copy the iterator, or allocate it to the
|
||||
// heap, which is not cheap.
|
||||
for it.Next() {
|
||||
if !it.Node().Next().Valid() {
|
||||
break
|
||||
}
|
||||
|
||||
k := string(it.Node().Data)
|
||||
child, found := s.current.Has(k)
|
||||
if !found {
|
||||
child = s.current.CreateTable(k, false)
|
||||
k := it.Node().Data
|
||||
|
||||
idx := s.find(parentIdx, k)
|
||||
|
||||
if idx < 0 {
|
||||
idx = s.create(parentIdx, k, tableKind, false)
|
||||
}
|
||||
s.current = child
|
||||
parentIdx = idx
|
||||
}
|
||||
|
||||
// handle the last part of the key
|
||||
k := string(it.Node().Data)
|
||||
k := it.Node().Data
|
||||
idx := s.find(parentIdx, k)
|
||||
|
||||
i, found := s.current.Has(k)
|
||||
if found {
|
||||
if i.kind != tableKind {
|
||||
return fmt.Errorf("toml: key %s should be a table, not a %s", k, i.kind)
|
||||
if idx >= 0 {
|
||||
kind := s.entries[idx].kind
|
||||
if kind != tableKind {
|
||||
return fmt.Errorf("toml: key %s should be a table, not a %s", string(k), kind)
|
||||
}
|
||||
if i.explicit {
|
||||
return fmt.Errorf("toml: table %s already exists", k)
|
||||
if s.entries[idx].explicit {
|
||||
return fmt.Errorf("toml: table %s already exists", string(k))
|
||||
}
|
||||
i.explicit = true
|
||||
s.current = i
|
||||
s.entries[idx].explicit = true
|
||||
} else {
|
||||
s.current = s.current.CreateTable(k, true)
|
||||
idx = s.create(parentIdx, k, tableKind, true)
|
||||
}
|
||||
|
||||
s.currentIdx = idx
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SeenTracker) checkArrayTable(node ast.Node) error {
|
||||
s.current = s.root
|
||||
|
||||
it := node.Key()
|
||||
|
||||
// handle the first parts of the key, excluding the last one
|
||||
parentIdx := -1
|
||||
|
||||
for it.Next() {
|
||||
if !it.Node().Next().Valid() {
|
||||
break
|
||||
}
|
||||
|
||||
k := string(it.Node().Data)
|
||||
child, found := s.current.Has(k)
|
||||
if !found {
|
||||
child = s.current.CreateTable(k, false)
|
||||
k := it.Node().Data
|
||||
|
||||
idx := s.find(parentIdx, k)
|
||||
|
||||
if idx < 0 {
|
||||
idx = s.create(parentIdx, k, tableKind, false)
|
||||
}
|
||||
s.current = child
|
||||
parentIdx = idx
|
||||
}
|
||||
|
||||
// handle the last part of the key
|
||||
k := string(it.Node().Data)
|
||||
k := it.Node().Data
|
||||
idx := s.find(parentIdx, k)
|
||||
|
||||
info, found := s.current.Has(k)
|
||||
if found {
|
||||
if info.kind != arrayTableKind {
|
||||
return fmt.Errorf("toml: key %s already exists as a %s, but should be an array table", info.kind, k)
|
||||
if idx >= 0 {
|
||||
kind := s.entries[idx].kind
|
||||
if kind != arrayTableKind {
|
||||
return fmt.Errorf("toml: key %s already exists as a %s, but should be an array table", kind, string(k))
|
||||
}
|
||||
info.Clear()
|
||||
s.clear(idx)
|
||||
} else {
|
||||
info = s.current.CreateArrayTable(k, true)
|
||||
idx = s.create(parentIdx, k, arrayTableKind, true)
|
||||
}
|
||||
|
||||
s.current = info
|
||||
s.currentIdx = idx
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SeenTracker) checkKeyValue(context *info, node ast.Node) error {
|
||||
func (s *SeenTracker) checkKeyValue(node ast.Node) error {
|
||||
it := node.Key()
|
||||
|
||||
// handle the first parts of the key, excluding the last one
|
||||
parentIdx := s.currentIdx
|
||||
|
||||
for it.Next() {
|
||||
k := string(it.Node().Data)
|
||||
child, found := context.Has(k)
|
||||
if found {
|
||||
if child.kind != tableKind {
|
||||
return fmt.Errorf("toml: expected %s to be a table, not a %s", k, child.kind)
|
||||
k := it.Node().Data
|
||||
|
||||
idx := s.find(parentIdx, k)
|
||||
|
||||
if idx >= 0 {
|
||||
if s.entries[idx].kind != tableKind {
|
||||
return fmt.Errorf("toml: expected %s to be a table, not a %s", string(k), s.entries[idx].kind)
|
||||
}
|
||||
} else {
|
||||
child = context.CreateTable(k, false)
|
||||
idx = s.create(parentIdx, k, tableKind, false)
|
||||
}
|
||||
context = child
|
||||
parentIdx = idx
|
||||
}
|
||||
|
||||
kind := valueKind
|
||||
|
||||
if node.Value().Kind == ast.InlineTable {
|
||||
context.SetKind(tableKind)
|
||||
} else {
|
||||
context.SetKind(valueKind)
|
||||
kind = tableKind
|
||||
}
|
||||
s.entries[parentIdx].kind = kind
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SeenTracker) id(idx int) int {
|
||||
if idx >= 0 {
|
||||
return s.entries[idx].id
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (s *SeenTracker) find(parentIdx int, k []byte) int {
|
||||
parentID := s.id(parentIdx)
|
||||
|
||||
for i := parentIdx + 1; i < len(s.entries); i++ {
|
||||
if s.entries[i].parent == parentID && bytes.Equal(s.entries[i].name, k) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user