From 390927a0cd2a2bb84d8aa15d9360798e4fcdc2d4 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Thu, 25 Mar 2021 22:37:16 -0400 Subject: [PATCH] Reuse AST storage between top-level expressions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` Comparing: old: v2-wip/1da2fc7 (2021-03-25 20:38:05 -0400 -0400) run: v2-wip/3f23ab9 (2021-03-25 22:35:06 -0400 -0400) ----------------------------------------------------------- name old time/op new time/op delta UnmarshalSimple/v2-8 700ns ± 3% 705ns ± 2% ~ (p=0.690 n=5+5) UnmarshalSimple/v1-8 3.85µs ± 1% 4.02µs ± 4% +4.19% (p=0.032 n=5+5) UnmarshalSimple/bs-8 2.34µs ± 2% 2.38µs ± 3% ~ (p=0.310 n=5+5) ReferenceFile/v2-8 32.2µs ±13% 23.9µs ± 1% -25.79% (p=0.008 n=5+5) ReferenceFile/v1-8 270µs ± 2% 264µs ± 2% ~ (p=0.095 n=5+5) ReferenceFile/bs-8 291µs ± 0% 294µs ± 0% +0.88% (p=0.008 n=5+5) name old alloc/op new alloc/op delta ReferenceFile/v2-8 37.1kB ± 0% 6.7kB ± 0% -81.91% (p=0.008 n=5+5) ReferenceFile/v1-8 131kB ± 0% 131kB ± 0% ~ (p=0.444 n=5+5) ReferenceFile/bs-8 80.8kB ± 0% 80.8kB ± 0% ~ (p=0.571 n=5+5) name old allocs/op new allocs/op delta ReferenceFile/v2-8 152 ± 0% 148 ± 0% -2.63% (p=0.008 n=5+5) ReferenceFile/v1-8 2.65k ± 0% 2.65k ± 0% ~ (all equal) ReferenceFile/bs-8 1.73k ± 0% 1.73k ± 0% ~ (all equal) ~/s/g/p/g/benchmark$ go test -bench=. goos: linux goarch: amd64 pkg: github.com/pelletier/go-toml/v2/benchmark cpu: Intel(R) Core(TM) i7-7700 CPU @ 3.60GHz BenchmarkUnmarshalSimple/v2-8 1692444 710.7 ns/op BenchmarkUnmarshalSimple/v1-8 307609 3862 ns/op BenchmarkUnmarshalSimple/bs-8 520429 2285 ns/op BenchmarkReferenceFile/v2-8 50395 24006 ns/op 6704 B/op 148 allocs/op BenchmarkReferenceFile/v1-8 4144 264655 ns/op 130567 B/op 2649 allocs/op BenchmarkReferenceFile/bs-8 3969 293635 ns/op 80784 B/op 1729 allocs/op PASS ok github.com/pelletier/go-toml/v2/benchmark 8.143s ``` --- internal/ast/builder.go | 44 +++--- parser.go | 65 ++++++-- parser_test.go | 215 +++++++++++++-------------- unmarshaler.go | 17 +-- unmarshaler_test.go | 320 +--------------------------------------- 5 files changed, 188 insertions(+), 473 deletions(-) diff --git a/internal/ast/builder.go b/internal/ast/builder.go index a6868cd..796b7f1 100644 --- a/internal/ast/builder.go +++ b/internal/ast/builder.go @@ -1,10 +1,5 @@ package ast -type Builder struct { - nodes []Node - lastIdx int -} - type Reference struct { idx int set bool @@ -14,22 +9,28 @@ func (r Reference) Valid() bool { return r.set } -func (b *Builder) Finish() *Root { - r := &Root{ - nodes: b.nodes, - } - b.nodes = nil +type Builder struct { + tree Root + lastIdx int +} - for i := range r.nodes { - r.nodes[i].root = r - } +func (b *Builder) Tree() *Root { + return &b.tree +} - return r +func (b *Builder) NodeAt(ref Reference) Node { + return b.tree.at(ref.idx) +} + +func (b *Builder) Reset() { + b.tree.nodes = b.tree.nodes[:0] + b.lastIdx = 0 } func (b *Builder) Push(n Node) Reference { - b.lastIdx = len(b.nodes) - b.nodes = append(b.nodes, n) + n.root = &b.tree + b.lastIdx = len(b.tree.nodes) + b.tree.nodes = append(b.tree.nodes, n) return Reference{ idx: b.lastIdx, set: true, @@ -37,10 +38,11 @@ func (b *Builder) Push(n Node) Reference { } func (b *Builder) PushAndChain(n Node) Reference { - newIdx := len(b.nodes) - b.nodes = append(b.nodes, n) + n.root = &b.tree + newIdx := len(b.tree.nodes) + b.tree.nodes = append(b.tree.nodes, n) if b.lastIdx >= 0 { - b.nodes[b.lastIdx].next = newIdx + b.tree.nodes[b.lastIdx].next = newIdx } b.lastIdx = newIdx return Reference{ @@ -50,9 +52,9 @@ func (b *Builder) PushAndChain(n Node) Reference { } func (b *Builder) AttachChild(parent Reference, child Reference) { - b.nodes[parent.idx].child = child.idx + b.tree.nodes[parent.idx].child = child.idx } func (b *Builder) Chain(from Reference, to Reference) { - b.nodes[from.idx].next = to.idx + b.tree.nodes[from.idx].next = to.idx } diff --git a/parser.go b/parser.go index f2ca2df..25726bd 100644 --- a/parser.go +++ b/parser.go @@ -11,30 +11,63 @@ import ( type parser struct { builder ast.Builder + ref ast.Reference + data []byte + left []byte + err error + first bool } -func (p *parser) parse(b []byte) error { - last, b, err := p.parseExpression(b) - if err != nil { - return err +func (p *parser) Reset(b []byte) { + p.builder.Reset() + p.ref = ast.Reference{} + p.data = b + p.left = b + p.err = nil + p.first = true +} + +func (p *parser) NextExpression() bool { + if len(p.left) == 0 || p.err != nil { + return false } - for len(b) > 0 { - b, err = p.parseNewline(b) - if err != nil { - return err + + p.builder.Reset() + p.ref = ast.Reference{} + + for { + if len(p.left) == 0 || p.err != nil { + return false } - var next ast.Reference - next, b, err = p.parseExpression(b) - if err != nil { - return err + if !p.first { + p.left, p.err = p.parseNewline(p.left) } - if next.Valid() { - p.builder.Chain(last, next) - last = next + + if len(p.left) == 0 || p.err != nil { + return false } + + p.ref, p.left, p.err = p.parseExpression(p.left) + + if p.err != nil { + return false + } + + if p.ref.Valid() { + return true + } + + p.first = false } - return nil +} + +func (p *parser) Expression() ast.Node { + return p.builder.NodeAt(p.ref) +} + +func (p *parser) Error() error { + return p.err } func (p *parser) parseNewline(b []byte) ([]byte, error) { diff --git a/parser_test.go b/parser_test.go index 1f55047..0bd9be7 100644 --- a/parser_test.go +++ b/parser_test.go @@ -119,23 +119,22 @@ func TestParser_AST_Numbers(t *testing.T) { for _, e := range examples { t.Run(e.desc, func(t *testing.T) { p := parser{} - err := p.parse([]byte(`A = ` + e.input)) + p.Reset([]byte(`A = ` + e.input)) + p.NextExpression() + err := p.Error() if e.err { require.Error(t, err) } else { require.NoError(t, err) - expected := astRoot{ - astNode{ - Kind: ast.KeyValue, - Children: []astNode{ - {Kind: e.kind, Data: []byte(e.input)}, - {Kind: ast.Key, Data: []byte(`A`)}, - }, + expected := astNode{ + Kind: ast.KeyValue, + Children: []astNode{ + {Kind: e.kind, Data: []byte(e.input)}, + {Kind: ast.Key, Data: []byte(`A`)}, }, } - - compareAST(t, expected, p.builder.Finish()) + compareNode(t, expected, p.Expression()) } }) } @@ -153,6 +152,13 @@ func compareAST(t *testing.T, expected astRoot, actual *ast.Root) { compareIterator(t, expected, it) } +func compareNode(t *testing.T, e astNode, n ast.Node) { + require.Equal(t, e.Kind, n.Kind) + require.Equal(t, e.Data, n.Data) + + compareIterator(t, e.Children, n.Children()) +} + func compareIterator(t *testing.T, expected []astNode, actual ast.Iterator) { idx := 0 @@ -164,10 +170,7 @@ func compareIterator(t *testing.T, expected []astNode, actual ast.Iterator) { } e := expected[idx] - require.Equal(t, e.Kind, n.Kind) - require.Equal(t, e.Data, n.Data) - - compareIterator(t, e.Children, n.Children()) + compareNode(t, e, n) idx++ } @@ -199,7 +202,7 @@ func (r astRoot) toOrig() *ast.Root { } } - return builder.Finish() + return builder.Tree() } func childrenToOrig(b *ast.Builder, nodes []astNode) ast.Reference { @@ -229,24 +232,22 @@ func TestParser_AST(t *testing.T) { examples := []struct { desc string input string - ast astRoot + ast astNode err bool }{ { desc: "simple string assignment", input: `A = "hello"`, - ast: astRoot{ - astNode{ - Kind: ast.KeyValue, - Children: []astNode{ - { - Kind: ast.String, - Data: []byte(`hello`), - }, - { - Kind: ast.Key, - Data: []byte(`A`), - }, + ast: astNode{ + Kind: ast.KeyValue, + Children: []astNode{ + { + Kind: ast.String, + Data: []byte(`hello`), + }, + { + Kind: ast.Key, + Data: []byte(`A`), }, }, }, @@ -254,18 +255,16 @@ func TestParser_AST(t *testing.T) { { desc: "simple bool assignment", input: `A = true`, - ast: astRoot{ - astNode{ - Kind: ast.KeyValue, - Children: []astNode{ - { - Kind: ast.Bool, - Data: []byte(`true`), - }, - { - Kind: ast.Key, - Data: []byte(`A`), - }, + ast: astNode{ + Kind: ast.KeyValue, + Children: []astNode{ + { + Kind: ast.Bool, + Data: []byte(`true`), + }, + { + Kind: ast.Key, + Data: []byte(`A`), }, }, }, @@ -273,36 +272,34 @@ func TestParser_AST(t *testing.T) { { desc: "array of strings", input: `A = ["hello", ["world", "again"]]`, - ast: astRoot{ - astNode{ - Kind: ast.KeyValue, - Children: []astNode{ - { - Kind: ast.Array, - Children: []astNode{ - { - Kind: ast.String, - Data: []byte(`hello`), - }, - { - Kind: ast.Array, - Children: []astNode{ - { - Kind: ast.String, - Data: []byte(`world`), - }, - { - Kind: ast.String, - Data: []byte(`again`), - }, + ast: astNode{ + Kind: ast.KeyValue, + Children: []astNode{ + { + Kind: ast.Array, + Children: []astNode{ + { + Kind: ast.String, + Data: []byte(`hello`), + }, + { + Kind: ast.Array, + Children: []astNode{ + { + Kind: ast.String, + Data: []byte(`world`), + }, + { + Kind: ast.String, + Data: []byte(`again`), }, }, }, }, - { - Kind: ast.Key, - Data: []byte(`A`), - }, + }, + { + Kind: ast.Key, + Data: []byte(`A`), }, }, }, @@ -310,27 +307,25 @@ func TestParser_AST(t *testing.T) { { desc: "array of arrays of strings", input: `A = ["hello", "world"]`, - ast: astRoot{ - astNode{ - Kind: ast.KeyValue, - Children: []astNode{ - { - Kind: ast.Array, - Children: []astNode{ - { - Kind: ast.String, - Data: []byte(`hello`), - }, - { - Kind: ast.String, - Data: []byte(`world`), - }, + ast: astNode{ + Kind: ast.KeyValue, + Children: []astNode{ + { + Kind: ast.Array, + Children: []astNode{ + { + Kind: ast.String, + Data: []byte(`hello`), + }, + { + Kind: ast.String, + Data: []byte(`world`), }, }, - { - Kind: ast.Key, - Data: []byte(`A`), - }, + }, + { + Kind: ast.Key, + Data: []byte(`A`), }, }, }, @@ -338,33 +333,31 @@ func TestParser_AST(t *testing.T) { { desc: "inline table", input: `name = { first = "Tom", last = "Preston-Werner" }`, - ast: astRoot{ - astNode{ - Kind: ast.KeyValue, - Children: []astNode{ - { - Kind: ast.InlineTable, - Children: []astNode{ - { - Kind: ast.KeyValue, - Children: []astNode{ - {Kind: ast.String, Data: []byte(`Tom`)}, - {Kind: ast.Key, Data: []byte(`first`)}, - }, + ast: astNode{ + Kind: ast.KeyValue, + Children: []astNode{ + { + Kind: ast.InlineTable, + Children: []astNode{ + { + Kind: ast.KeyValue, + Children: []astNode{ + {Kind: ast.String, Data: []byte(`Tom`)}, + {Kind: ast.Key, Data: []byte(`first`)}, }, - { - Kind: ast.KeyValue, - Children: []astNode{ - {Kind: ast.String, Data: []byte(`Preston-Werner`)}, - {Kind: ast.Key, Data: []byte(`last`)}, - }, + }, + { + Kind: ast.KeyValue, + Children: []astNode{ + {Kind: ast.String, Data: []byte(`Preston-Werner`)}, + {Kind: ast.Key, Data: []byte(`last`)}, }, }, }, - { - Kind: ast.Key, - Data: []byte(`name`), - }, + }, + { + Kind: ast.Key, + Data: []byte(`name`), }, }, }, @@ -374,12 +367,14 @@ func TestParser_AST(t *testing.T) { for _, e := range examples { t.Run(e.desc, func(t *testing.T) { p := parser{} - err := p.parse([]byte(e.input)) + p.Reset([]byte(e.input)) + p.NextExpression() + err := p.Error() if e.err { require.Error(t, err) } else { require.NoError(t, err) - compareAST(t, e.ast, p.builder.Finish()) + compareNode(t, e.ast, p.Expression()) } }) } diff --git a/unmarshaler.go b/unmarshaler.go index 14c66b4..31d12e6 100644 --- a/unmarshaler.go +++ b/unmarshaler.go @@ -11,13 +11,9 @@ import ( func Unmarshal(data []byte, v interface{}) error { p := parser{} - err := p.parse(data) - if err != nil { - return err - } - + p.Reset(data) d := decoder{} - return d.fromAst(p.builder.Finish(), v) + return d.FromParser(&p, v) } type decoder struct { @@ -41,7 +37,7 @@ func (d *decoder) arrayIndex(append bool, v reflect.Value) int { return idx } -func (d *decoder) fromAst(tree *ast.Root, v interface{}) error { +func (d *decoder) FromParser(p *parser, v interface{}) error { r := reflect.ValueOf(v) if r.Kind() != reflect.Ptr { return fmt.Errorf("need to target a pointer, not %s", r.Kind()) @@ -55,9 +51,8 @@ func (d *decoder) fromAst(tree *ast.Root, v interface{}) error { var root target = valueTarget(r.Elem()) current := root - it := tree.Iterator() - for it.Next() { - node := it.Node() + for p.NextExpression() { + node := p.Expression() var found bool switch node.Kind { case ast.KeyValue: @@ -83,7 +78,7 @@ func (d *decoder) fromAst(tree *ast.Root, v interface{}) error { } } - return nil + return p.Error() } // scopeWithKey performs target scoping when unmarshaling an ast.KeyValue node. diff --git a/unmarshaler_test.go b/unmarshaler_test.go index 8558e25..9acb3be 100644 --- a/unmarshaler_test.go +++ b/unmarshaler_test.go @@ -1,13 +1,12 @@ -package toml +package toml_test import ( "math" "testing" + "github.com/pelletier/go-toml/v2" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - - "github.com/pelletier/go-toml/v2/internal/ast" ) func TestUnmarshal_Integers(t *testing.T) { @@ -61,7 +60,7 @@ func TestUnmarshal_Integers(t *testing.T) { for _, e := range examples { t.Run(e.desc, func(t *testing.T) { doc := doc{} - err := Unmarshal([]byte(`A = `+e.input), &doc) + err := toml.Unmarshal([]byte(`A = `+e.input), &doc) require.NoError(t, err) assert.Equal(t, e.expected, doc.A) }) @@ -157,7 +156,7 @@ func TestUnmarshal_Floats(t *testing.T) { for _, e := range examples { t.Run(e.desc, func(t *testing.T) { doc := doc{} - err := Unmarshal([]byte(`A = `+e.input), &doc) + err := toml.Unmarshal([]byte(`A = `+e.input), &doc) require.NoError(t, err) if e.testFn != nil { e.testFn(t, doc.A) @@ -648,7 +647,7 @@ B = "data"`, if test.err && test.expected != nil { panic("invalid test: cannot expect both an error and a value") } - err := Unmarshal([]byte(e.input), test.target) + err := toml.Unmarshal([]byte(e.input), test.target) if test.err { require.Error(t, err) } else { @@ -658,312 +657,3 @@ B = "data"`, }) } } - -func TestFromAst_KV(t *testing.T) { - root := astRoot{ - astNode{ - Kind: ast.KeyValue, - Children: []astNode{ - { - Kind: ast.String, - Data: []byte(`hello`), - }, - { - Kind: ast.Key, - Data: []byte(`Foo`), - }, - }, - }, - } - - type Doc struct { - Foo string - } - - x := Doc{} - d := decoder{} - err := d.fromAst(root.toOrig(), &x) - require.NoError(t, err) - assert.Equal(t, Doc{Foo: "hello"}, x) -} - -func TestFromAst_Table(t *testing.T) { - t.Run("one level table on struct", func(t *testing.T) { - root := astRoot{ - astNode{ - Kind: ast.Table, - Children: []astNode{ - {Kind: ast.Key, Data: []byte(`Level1`)}, - }, - }, - astNode{ - Kind: ast.KeyValue, - Children: []astNode{ - { - Kind: ast.String, - Data: []byte(`hello`), - }, - { - Kind: ast.Key, - Data: []byte(`A`), - }, - }, - }, - astNode{ - Kind: ast.KeyValue, - Children: []astNode{ - { - Kind: ast.String, - Data: []byte(`world`), - }, - { - Kind: ast.Key, - Data: []byte(`B`), - }, - }, - }, - } - - type Level1 struct { - A string - B string - } - - type Doc struct { - Level1 Level1 - } - - x := Doc{} - d := decoder{} - err := d.fromAst(root.toOrig(), &x) - require.NoError(t, err) - assert.Equal(t, Doc{ - Level1: Level1{ - A: "hello", - B: "world", - }, - }, x) - }) - t.Run("one level table on struct", func(t *testing.T) { - root := astRoot{ - astNode{ - Kind: ast.Table, - Children: []astNode{ - {Kind: ast.Key, Data: []byte(`A`)}, - {Kind: ast.Key, Data: []byte(`B`)}, - }, - }, - astNode{ - Kind: ast.KeyValue, - Children: []astNode{ - { - Kind: ast.String, - Data: []byte(`value`), - }, - { - Kind: ast.Key, - Data: []byte(`C`), - }, - }, - }, - } - - type B struct { - C string - } - - type A struct { - B B - } - - type Doc struct { - A A - } - - x := Doc{} - d := decoder{} - err := d.fromAst(root.toOrig(), &x) - require.NoError(t, err) - assert.Equal(t, Doc{ - A: A{B: B{C: "value"}}, - }, x) - }) -} - -func TestFromAst_InlineTable(t *testing.T) { - t.Run("one level of strings", func(t *testing.T) { - root := astRoot{ - astNode{ - Kind: ast.KeyValue, - Children: []astNode{ - { - Kind: ast.InlineTable, - Children: []astNode{ - { - Kind: ast.KeyValue, - Children: []astNode{ - {Kind: ast.String, Data: []byte(`Tom`)}, - {Kind: ast.Key, Data: []byte(`First`)}, - }, - }, - { - Kind: ast.KeyValue, - Children: []astNode{ - {Kind: ast.String, Data: []byte(`Preston-Werner`)}, - {Kind: ast.Key, Data: []byte(`Last`)}, - }, - }, - }, - }, - { - Kind: ast.Key, - Data: []byte(`Name`), - }, - }, - }, - } - - type Name struct { - First string - Last string - } - - type Doc struct { - Name Name - } - - x := Doc{} - d := decoder{} - err := d.fromAst(root.toOrig(), &x) - require.NoError(t, err) - assert.Equal(t, Doc{ - Name: Name{ - First: "Tom", - Last: "Preston-Werner", - }, - }, x) - - }) -} - -func TestFromAst_Slice(t *testing.T) { - t.Run("slice of string", func(t *testing.T) { - root := astRoot{ - astNode{ - Kind: ast.KeyValue, - Children: []astNode{ - { - Kind: ast.Array, - Children: []astNode{ - { - Kind: ast.String, - Data: []byte(`hello`), - }, - { - Kind: ast.String, - Data: []byte(`world`), - }, - }, - }, - { - Kind: ast.Key, - Data: []byte(`Foo`), - }, - }, - }, - } - - type Doc struct { - Foo []string - } - - x := Doc{} - d := decoder{} - err := d.fromAst(root.toOrig(), &x) - require.NoError(t, err) - assert.Equal(t, Doc{Foo: []string{"hello", "world"}}, x) - }) - - t.Run("slice of interfaces for strings", func(t *testing.T) { - root := astRoot{ - astNode{ - Kind: ast.KeyValue, - Children: []astNode{ - { - Kind: ast.Array, - Children: []astNode{ - { - Kind: ast.String, - Data: []byte(`hello`), - }, - { - Kind: ast.String, - Data: []byte(`world`), - }, - }, - }, - { - Kind: ast.Key, - Data: []byte(`Foo`), - }, - }, - }, - } - - type Doc struct { - Foo []interface{} - } - - x := Doc{} - d := decoder{} - err := d.fromAst(root.toOrig(), &x) - require.NoError(t, err) - assert.Equal(t, Doc{Foo: []interface{}{"hello", "world"}}, x) - }) - - t.Run("slice of interfaces with slices", func(t *testing.T) { - root := astRoot{ - astNode{ - Kind: ast.KeyValue, - Children: []astNode{ - { - Kind: ast.Array, - Children: []astNode{ - { - Kind: ast.String, - Data: []byte(`hello`), - }, - { - Kind: ast.Array, - Children: []astNode{ - { - Kind: ast.String, - Data: []byte(`inner1`), - }, - { - Kind: ast.String, - Data: []byte(`inner2`), - }, - }, - }, - }, - }, - { - Kind: ast.Key, - Data: []byte(`Foo`), - }, - }, - }, - } - - type Doc struct { - Foo []interface{} - } - - x := Doc{} - d := decoder{} - err := d.fromAst(root.toOrig(), &x) - require.NoError(t, err) - assert.Equal(t, Doc{Foo: []interface{}{"hello", []interface{}{"inner1", "inner2"}}}, x) - }) -}