Reduce marshal and unmarshal overhead (#1044)
* Reduce marshal and unmarshal overhead Targeted optimizations to reduce performance overhead introduced by recent feature additions and the unsafe removal. Unmarshal: - parseKeyval: access the node directly in the builder's slice to set Raw, bypassing NodeAt which triggers a GC write barrier for the nodes-pointer on every key-value expression. - Iterator.Next: cache the *nodes slice dereference in a local variable to avoid repeated pointer-to-slice indirection in the hot loop. Marshal: - Guard shouldOmitZero calls with an inlineable options.omitzero check. shouldOmitZero has inlining cost 1145 (budget 80), so avoiding the function call when omitzero is not set removes per-field overhead. - Inline the isNil check in encodeMap. isNil has inlining cost 93 (budget 80), so expanding it at the single hot call site avoids per-map-entry function call overhead. Update README benchmarks. Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -235,17 +235,17 @@ the AST level. See https://pkg.go.dev/github.com/pelletier/go-toml/v2/unstable.
|
||||
Execution time speedup compared to other Go TOML libraries:
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr><td>Marshal/HugoFrontMatter-2</td><td>2.1x</td><td>2.0x</td></tr>
|
||||
<tr><td>Marshal/ReferenceFile/map-2</td><td>1.9x</td><td>2.0x</td></tr>
|
||||
<tr><td>Marshal/ReferenceFile/struct-2</td><td>2.3x</td><td>2.5x</td></tr>
|
||||
<tr><td>Unmarshal/HugoFrontMatter-2</td><td>3.4x</td><td>2.8x</td></tr>
|
||||
<tr><td>Unmarshal/ReferenceFile/map-2</td><td>3.0x</td><td>3.0x</td></tr>
|
||||
<tr><td>Unmarshal/ReferenceFile/struct-2</td><td>4.9x</td><td>5.1x</td></tr>
|
||||
</tbody>
|
||||
<thead>
|
||||
<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr><td>Marshal/HugoFrontMatter-2</td><td>2.1x</td><td>2.0x</td></tr>
|
||||
<tr><td>Marshal/ReferenceFile/map-2</td><td>2.0x</td><td>2.0x</td></tr>
|
||||
<tr><td>Marshal/ReferenceFile/struct-2</td><td>2.3x</td><td>2.5x</td></tr>
|
||||
<tr><td>Unmarshal/HugoFrontMatter-2</td><td>3.3x</td><td>2.8x</td></tr>
|
||||
<tr><td>Unmarshal/ReferenceFile/map-2</td><td>2.9x</td><td>3.0x</td></tr>
|
||||
<tr><td>Unmarshal/ReferenceFile/struct-2</td><td>4.8x</td><td>5.0x</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<details><summary>See more</summary>
|
||||
<p>The table above has the results of the most common use-cases. The table below
|
||||
@@ -253,22 +253,22 @@ contains the results of all benchmarks, including unrealistic ones. It is
|
||||
provided for completeness.</p>
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr><td>Marshal/SimpleDocument/map-2</td><td>2.0x</td><td>2.9x</td></tr>
|
||||
<tr><td>Marshal/SimpleDocument/struct-2</td><td>2.5x</td><td>3.5x</td></tr>
|
||||
<tr><td>Unmarshal/SimpleDocument/map-2</td><td>4.3x</td><td>3.5x</td></tr>
|
||||
<tr><td>Unmarshal/SimpleDocument/struct-2</td><td>5.9x</td><td>4.5x</td></tr>
|
||||
<tr><td>UnmarshalDataset/example-2</td><td>3.2x</td><td>2.9x</td></tr>
|
||||
<tr><td>UnmarshalDataset/code-2</td><td>2.4x</td><td>2.9x</td></tr>
|
||||
<tr><td>UnmarshalDataset/twitter-2</td><td>2.7x</td><td>2.5x</td></tr>
|
||||
<tr><td>UnmarshalDataset/citm_catalog-2</td><td>2.1x</td><td>2.1x</td></tr>
|
||||
<tr><td>UnmarshalDataset/canada-2</td><td>1.9x</td><td>1.5x</td></tr>
|
||||
<tr><td>UnmarshalDataset/config-2</td><td>5.4x</td><td>3.1x</td></tr>
|
||||
<tr><td>geomean</td><td>2.9x</td><td>2.8x</td></tr>
|
||||
</tbody>
|
||||
<thead>
|
||||
<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr><td>Marshal/SimpleDocument/map-2</td><td>2.0x</td><td>2.9x</td></tr>
|
||||
<tr><td>Marshal/SimpleDocument/struct-2</td><td>2.5x</td><td>3.6x</td></tr>
|
||||
<tr><td>Unmarshal/SimpleDocument/map-2</td><td>4.2x</td><td>3.4x</td></tr>
|
||||
<tr><td>Unmarshal/SimpleDocument/struct-2</td><td>5.9x</td><td>4.4x</td></tr>
|
||||
<tr><td>UnmarshalDataset/example-2</td><td>3.2x</td><td>2.9x</td></tr>
|
||||
<tr><td>UnmarshalDataset/code-2</td><td>2.4x</td><td>2.8x</td></tr>
|
||||
<tr><td>UnmarshalDataset/twitter-2</td><td>2.7x</td><td>2.5x</td></tr>
|
||||
<tr><td>UnmarshalDataset/citm_catalog-2</td><td>2.3x</td><td>2.3x</td></tr>
|
||||
<tr><td>UnmarshalDataset/canada-2</td><td>1.9x</td><td>1.5x</td></tr>
|
||||
<tr><td>UnmarshalDataset/config-2</td><td>5.4x</td><td>3.0x</td></tr>
|
||||
<tr><td>geomean</td><td>2.9x</td><td>2.8x</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>This table can be generated with <code>./ci.sh benchmark -a -html</code>.</p>
|
||||
</details>
|
||||
|
||||
+12
-9
@@ -704,15 +704,18 @@ func (enc *Encoder) encodeMap(b []byte, ctx encoderCtx, v reflect.Value) ([]byte
|
||||
for iter.Next() {
|
||||
v := iter.Value()
|
||||
|
||||
if isNil(v) {
|
||||
// For nil pointers, convert to zero value of the element type.
|
||||
// This allows round-trip marshaling of maps with nil pointer values.
|
||||
// For nil interfaces and nil maps, skip since we can't derive a type.
|
||||
if v.Kind() == reflect.Ptr {
|
||||
// Handle nil values: convert nil pointers to zero value,
|
||||
// skip nil interfaces and nil maps.
|
||||
switch v.Kind() {
|
||||
case reflect.Ptr:
|
||||
if v.IsNil() {
|
||||
v = reflect.Zero(v.Type().Elem())
|
||||
} else {
|
||||
}
|
||||
case reflect.Interface, reflect.Map:
|
||||
if v.IsNil() {
|
||||
continue
|
||||
}
|
||||
default:
|
||||
}
|
||||
|
||||
k, err := enc.keyToString(iter.Key())
|
||||
@@ -936,7 +939,7 @@ func (enc *Encoder) encodeTable(b []byte, ctx encoderCtx, t table) ([]byte, erro
|
||||
if shouldOmitEmpty(kv.Options, kv.Value) {
|
||||
continue
|
||||
}
|
||||
if shouldOmitZero(kv.Options, kv.Value) {
|
||||
if kv.Options.omitzero && shouldOmitZero(kv.Options, kv.Value) {
|
||||
continue
|
||||
}
|
||||
hasNonEmptyKV = true
|
||||
@@ -958,7 +961,7 @@ func (enc *Encoder) encodeTable(b []byte, ctx encoderCtx, t table) ([]byte, erro
|
||||
if shouldOmitEmpty(table.Options, table.Value) {
|
||||
continue
|
||||
}
|
||||
if shouldOmitZero(table.Options, table.Value) {
|
||||
if table.Options.omitzero && shouldOmitZero(table.Options, table.Value) {
|
||||
continue
|
||||
}
|
||||
if first {
|
||||
@@ -995,7 +998,7 @@ func (enc *Encoder) encodeTableInline(b []byte, ctx encoderCtx, t table) ([]byte
|
||||
if shouldOmitEmpty(kv.Options, kv.Value) {
|
||||
continue
|
||||
}
|
||||
if shouldOmitZero(kv.Options, kv.Value) {
|
||||
if kv.Options.omitzero && shouldOmitZero(kv.Options, kv.Value) {
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
+7
-3
@@ -28,12 +28,16 @@ func (c *Iterator) Next() bool {
|
||||
if c.nodes == nil {
|
||||
return false
|
||||
}
|
||||
nodes := *c.nodes
|
||||
if !c.started {
|
||||
c.started = true
|
||||
} else if c.idx >= 0 {
|
||||
c.idx = (*c.nodes)[c.idx].next
|
||||
} else {
|
||||
idx := c.idx
|
||||
if idx >= 0 && int(idx) < len(nodes) {
|
||||
c.idx = nodes[idx].next
|
||||
}
|
||||
}
|
||||
return c.idx >= 0 && int(c.idx) < len(*c.nodes)
|
||||
return c.idx >= 0 && int(c.idx) < len(nodes)
|
||||
}
|
||||
|
||||
// IsLast returns true if the current node of the iterator is the last
|
||||
|
||||
+4
-3
@@ -363,9 +363,10 @@ func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
|
||||
p.builder.Chain(valRef, key)
|
||||
p.builder.AttachChild(ref, valRef)
|
||||
|
||||
// Set Raw to span the entire key-value expression
|
||||
node := p.builder.NodeAt(ref)
|
||||
node.Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b)
|
||||
// Set Raw to span the entire key-value expression.
|
||||
// Access the node directly in the slice to avoid the write barrier
|
||||
// that NodeAt's nodes-pointer setup would trigger.
|
||||
p.builder.tree.nodes[ref].Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b)
|
||||
|
||||
return ref, b, err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user