Reduce marshal and unmarshal overhead (#1044)
* Reduce marshal and unmarshal overhead Targeted optimizations to reduce performance overhead introduced by recent feature additions and the unsafe removal. Unmarshal: - parseKeyval: access the node directly in the builder's slice to set Raw, bypassing NodeAt which triggers a GC write barrier for the nodes-pointer on every key-value expression. - Iterator.Next: cache the *nodes slice dereference in a local variable to avoid repeated pointer-to-slice indirection in the hot loop. Marshal: - Guard shouldOmitZero calls with an inlineable options.omitzero check. shouldOmitZero has inlining cost 1145 (budget 80), so avoiding the function call when omitzero is not set removes per-field overhead. - Inline the isNil check in encodeMap. isNil has inlining cost 93 (budget 80), so expanding it at the single hot call site avoids per-map-entry function call overhead. Update README benchmarks. Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -235,17 +235,17 @@ the AST level. See https://pkg.go.dev/github.com/pelletier/go-toml/v2/unstable.
|
|||||||
Execution time speedup compared to other Go TOML libraries:
|
Execution time speedup compared to other Go TOML libraries:
|
||||||
|
|
||||||
<table>
|
<table>
|
||||||
<thead>
|
<thead>
|
||||||
<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
|
<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr><td>Marshal/HugoFrontMatter-2</td><td>2.1x</td><td>2.0x</td></tr>
|
<tr><td>Marshal/HugoFrontMatter-2</td><td>2.1x</td><td>2.0x</td></tr>
|
||||||
<tr><td>Marshal/ReferenceFile/map-2</td><td>1.9x</td><td>2.0x</td></tr>
|
<tr><td>Marshal/ReferenceFile/map-2</td><td>2.0x</td><td>2.0x</td></tr>
|
||||||
<tr><td>Marshal/ReferenceFile/struct-2</td><td>2.3x</td><td>2.5x</td></tr>
|
<tr><td>Marshal/ReferenceFile/struct-2</td><td>2.3x</td><td>2.5x</td></tr>
|
||||||
<tr><td>Unmarshal/HugoFrontMatter-2</td><td>3.4x</td><td>2.8x</td></tr>
|
<tr><td>Unmarshal/HugoFrontMatter-2</td><td>3.3x</td><td>2.8x</td></tr>
|
||||||
<tr><td>Unmarshal/ReferenceFile/map-2</td><td>3.0x</td><td>3.0x</td></tr>
|
<tr><td>Unmarshal/ReferenceFile/map-2</td><td>2.9x</td><td>3.0x</td></tr>
|
||||||
<tr><td>Unmarshal/ReferenceFile/struct-2</td><td>4.9x</td><td>5.1x</td></tr>
|
<tr><td>Unmarshal/ReferenceFile/struct-2</td><td>4.8x</td><td>5.0x</td></tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
<details><summary>See more</summary>
|
<details><summary>See more</summary>
|
||||||
<p>The table above has the results of the most common use-cases. The table below
|
<p>The table above has the results of the most common use-cases. The table below
|
||||||
@@ -253,22 +253,22 @@ contains the results of all benchmarks, including unrealistic ones. It is
|
|||||||
provided for completeness.</p>
|
provided for completeness.</p>
|
||||||
|
|
||||||
<table>
|
<table>
|
||||||
<thead>
|
<thead>
|
||||||
<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
|
<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr><td>Marshal/SimpleDocument/map-2</td><td>2.0x</td><td>2.9x</td></tr>
|
<tr><td>Marshal/SimpleDocument/map-2</td><td>2.0x</td><td>2.9x</td></tr>
|
||||||
<tr><td>Marshal/SimpleDocument/struct-2</td><td>2.5x</td><td>3.5x</td></tr>
|
<tr><td>Marshal/SimpleDocument/struct-2</td><td>2.5x</td><td>3.6x</td></tr>
|
||||||
<tr><td>Unmarshal/SimpleDocument/map-2</td><td>4.3x</td><td>3.5x</td></tr>
|
<tr><td>Unmarshal/SimpleDocument/map-2</td><td>4.2x</td><td>3.4x</td></tr>
|
||||||
<tr><td>Unmarshal/SimpleDocument/struct-2</td><td>5.9x</td><td>4.5x</td></tr>
|
<tr><td>Unmarshal/SimpleDocument/struct-2</td><td>5.9x</td><td>4.4x</td></tr>
|
||||||
<tr><td>UnmarshalDataset/example-2</td><td>3.2x</td><td>2.9x</td></tr>
|
<tr><td>UnmarshalDataset/example-2</td><td>3.2x</td><td>2.9x</td></tr>
|
||||||
<tr><td>UnmarshalDataset/code-2</td><td>2.4x</td><td>2.9x</td></tr>
|
<tr><td>UnmarshalDataset/code-2</td><td>2.4x</td><td>2.8x</td></tr>
|
||||||
<tr><td>UnmarshalDataset/twitter-2</td><td>2.7x</td><td>2.5x</td></tr>
|
<tr><td>UnmarshalDataset/twitter-2</td><td>2.7x</td><td>2.5x</td></tr>
|
||||||
<tr><td>UnmarshalDataset/citm_catalog-2</td><td>2.1x</td><td>2.1x</td></tr>
|
<tr><td>UnmarshalDataset/citm_catalog-2</td><td>2.3x</td><td>2.3x</td></tr>
|
||||||
<tr><td>UnmarshalDataset/canada-2</td><td>1.9x</td><td>1.5x</td></tr>
|
<tr><td>UnmarshalDataset/canada-2</td><td>1.9x</td><td>1.5x</td></tr>
|
||||||
<tr><td>UnmarshalDataset/config-2</td><td>5.4x</td><td>3.1x</td></tr>
|
<tr><td>UnmarshalDataset/config-2</td><td>5.4x</td><td>3.0x</td></tr>
|
||||||
<tr><td>geomean</td><td>2.9x</td><td>2.8x</td></tr>
|
<tr><td>geomean</td><td>2.9x</td><td>2.8x</td></tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
<p>This table can be generated with <code>./ci.sh benchmark -a -html</code>.</p>
|
<p>This table can be generated with <code>./ci.sh benchmark -a -html</code>.</p>
|
||||||
</details>
|
</details>
|
||||||
|
|||||||
+12
-9
@@ -704,15 +704,18 @@ func (enc *Encoder) encodeMap(b []byte, ctx encoderCtx, v reflect.Value) ([]byte
|
|||||||
for iter.Next() {
|
for iter.Next() {
|
||||||
v := iter.Value()
|
v := iter.Value()
|
||||||
|
|
||||||
if isNil(v) {
|
// Handle nil values: convert nil pointers to zero value,
|
||||||
// For nil pointers, convert to zero value of the element type.
|
// skip nil interfaces and nil maps.
|
||||||
// This allows round-trip marshaling of maps with nil pointer values.
|
switch v.Kind() {
|
||||||
// For nil interfaces and nil maps, skip since we can't derive a type.
|
case reflect.Ptr:
|
||||||
if v.Kind() == reflect.Ptr {
|
if v.IsNil() {
|
||||||
v = reflect.Zero(v.Type().Elem())
|
v = reflect.Zero(v.Type().Elem())
|
||||||
} else {
|
}
|
||||||
|
case reflect.Interface, reflect.Map:
|
||||||
|
if v.IsNil() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
k, err := enc.keyToString(iter.Key())
|
k, err := enc.keyToString(iter.Key())
|
||||||
@@ -936,7 +939,7 @@ func (enc *Encoder) encodeTable(b []byte, ctx encoderCtx, t table) ([]byte, erro
|
|||||||
if shouldOmitEmpty(kv.Options, kv.Value) {
|
if shouldOmitEmpty(kv.Options, kv.Value) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if shouldOmitZero(kv.Options, kv.Value) {
|
if kv.Options.omitzero && shouldOmitZero(kv.Options, kv.Value) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
hasNonEmptyKV = true
|
hasNonEmptyKV = true
|
||||||
@@ -958,7 +961,7 @@ func (enc *Encoder) encodeTable(b []byte, ctx encoderCtx, t table) ([]byte, erro
|
|||||||
if shouldOmitEmpty(table.Options, table.Value) {
|
if shouldOmitEmpty(table.Options, table.Value) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if shouldOmitZero(table.Options, table.Value) {
|
if table.Options.omitzero && shouldOmitZero(table.Options, table.Value) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if first {
|
if first {
|
||||||
@@ -995,7 +998,7 @@ func (enc *Encoder) encodeTableInline(b []byte, ctx encoderCtx, t table) ([]byte
|
|||||||
if shouldOmitEmpty(kv.Options, kv.Value) {
|
if shouldOmitEmpty(kv.Options, kv.Value) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if shouldOmitZero(kv.Options, kv.Value) {
|
if kv.Options.omitzero && shouldOmitZero(kv.Options, kv.Value) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+7
-3
@@ -28,12 +28,16 @@ func (c *Iterator) Next() bool {
|
|||||||
if c.nodes == nil {
|
if c.nodes == nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
nodes := *c.nodes
|
||||||
if !c.started {
|
if !c.started {
|
||||||
c.started = true
|
c.started = true
|
||||||
} else if c.idx >= 0 {
|
} else {
|
||||||
c.idx = (*c.nodes)[c.idx].next
|
idx := c.idx
|
||||||
|
if idx >= 0 && int(idx) < len(nodes) {
|
||||||
|
c.idx = nodes[idx].next
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return c.idx >= 0 && int(c.idx) < len(*c.nodes)
|
return c.idx >= 0 && int(c.idx) < len(nodes)
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsLast returns true if the current node of the iterator is the last
|
// IsLast returns true if the current node of the iterator is the last
|
||||||
|
|||||||
+4
-3
@@ -363,9 +363,10 @@ func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
|
|||||||
p.builder.Chain(valRef, key)
|
p.builder.Chain(valRef, key)
|
||||||
p.builder.AttachChild(ref, valRef)
|
p.builder.AttachChild(ref, valRef)
|
||||||
|
|
||||||
// Set Raw to span the entire key-value expression
|
// Set Raw to span the entire key-value expression.
|
||||||
node := p.builder.NodeAt(ref)
|
// Access the node directly in the slice to avoid the write barrier
|
||||||
node.Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b)
|
// that NodeAt's nodes-pointer setup would trigger.
|
||||||
|
p.builder.tree.nodes[ref].Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b)
|
||||||
|
|
||||||
return ref, b, err
|
return ref, b, err
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user