fix parser error highlight offsets for non-suffix slices

Co-authored-by: Thomas Pelletier <thomas@pelletier.dev>
Reduce marshal and unmarshal overhead (#1044 )
2026-04-12 12:26:40 +00:00 · 2026-03-24 11:08:39 +00:00 · 2026-03-23 22:00:18 -04:00
11 changed files with 94 additions and 202 deletions
@@ -1,25 +0,0 @@
 name: capabilities
 on:
  push:
    branches:
      - v2
  pull_request:
    branches:
      - v2
 jobs:
  check:
    name: check capabilities
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - name: Setup go
        uses: actions/setup-go@v6
        with:
          go-version: "1.26"
      - name: Install capslock
        run: go install github.com/google/capslock/cmd/capslock@latest
      - name: Check for new capabilities
        run: ./caps.sh check
@@ -53,14 +53,6 @@ go-toml is a TOML library for Go. The goal is to provide an easy-to-use and effi
 - Commit messages must explain **why** the change is needed
 - Keep messages clear and informative even if details are in the PR description
 ### Capabilities
 go-toml tracks system-level capabilities using [capslock](https://github.com/google/capslock). The baseline is in `capability_baseline.txt` and CI enforces that it does not grow.
 - **Do not introduce new capabilities.** PRs that increase the capability set (e.g., adding network access, subprocess execution, syscalls) are unlikely to be accepted.
 - If a change causes the capabilities check to fail, do not update the baseline to make it pass. Instead, rethink the approach to avoid requiring new capabilities.
 - To check locally: `./caps.sh check` (requires `capslock` installed via `go install github.com/google/capslock/cmd/capslock@latest`)
 ## Pull Request Checklist
 Before submitting:
@@ -69,5 +61,4 @@ Before submitting:
 2. No backward-incompatible changes (unless discussed)
 3. Relevant documentation added/updated
 4. No performance regression (verify with benchmarks)
-5. Capabilities are not increasing (`./caps.sh check`)
+5. Title is clear and understandable for changelog
 6. Title is clear and understandable for changelog
@@ -180,25 +180,6 @@ description. Pull requests that lower performance will receive more scrutiny.
 [benchstat]: https://pkg.go.dev/golang.org/x/perf/cmd/benchstat
 ### Capabilities
 We use [capslock](https://github.com/google/capslock) to track what
 system-level capabilities (file access, network, syscalls, etc.) each package
 requires. The current baseline is in `capability_baseline.txt`. CI will fail if
 a change introduces a new capability.
 **Pull requests that increase the set of capabilities are unlikely to be
 accepted.** go-toml is a parsing library and should not need network access,
 subprocess execution, or other capabilities beyond what it already uses.
 If you believe a new capability is genuinely needed, discuss it in an issue
 first. To update the baseline after approval:
 ```bash
 go install github.com/google/capslock/cmd/capslock@latest
 ./caps.sh generate
 ```
 ### Style
 Try to look around and follow the same format and structure as the rest of the
@@ -239,12 +239,12 @@ Execution time speedup compared to other Go TOML libraries:
        <tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
    </thead>
    <tbody>
-		<tr><td>Marshal/HugoFrontMatter-2</td><td>1.9x</td><td>2.2x</td></tr>
+        <tr><td>Marshal/HugoFrontMatter-2</td><td>2.1x</td><td>2.0x</td></tr>
-		<tr><td>Marshal/ReferenceFile/map-2</td><td>1.7x</td><td>2.1x</td></tr>
+        <tr><td>Marshal/ReferenceFile/map-2</td><td>2.0x</td><td>2.0x</td></tr>
-		<tr><td>Marshal/ReferenceFile/struct-2</td><td>2.2x</td><td>3.0x</td></tr>
+        <tr><td>Marshal/ReferenceFile/struct-2</td><td>2.3x</td><td>2.5x</td></tr>
-		<tr><td>Unmarshal/HugoFrontMatter-2</td><td>2.9x</td><td>2.7x</td></tr>
+        <tr><td>Unmarshal/HugoFrontMatter-2</td><td>3.3x</td><td>2.8x</td></tr>
-		<tr><td>Unmarshal/ReferenceFile/map-2</td><td>2.6x</td><td>2.7x</td></tr>
+        <tr><td>Unmarshal/ReferenceFile/map-2</td><td>2.9x</td><td>3.0x</td></tr>
-		<tr><td>Unmarshal/ReferenceFile/struct-2</td><td>4.6x</td><td>5.1x</td></tr>
+        <tr><td>Unmarshal/ReferenceFile/struct-2</td><td>4.8x</td><td>5.0x</td></tr>
     </tbody>
 </table>
 <details><summary>See more</summary>
@@ -257,17 +257,17 @@ provided for completeness.</p>
        <tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
    </thead>
    <tbody>
-		<tr><td>Marshal/SimpleDocument/map-2</td><td>1.8x</td><td>2.7x</td></tr>
+        <tr><td>Marshal/SimpleDocument/map-2</td><td>2.0x</td><td>2.9x</td></tr>
-		<tr><td>Marshal/SimpleDocument/struct-2</td><td>2.7x</td><td>3.8x</td></tr>
+        <tr><td>Marshal/SimpleDocument/struct-2</td><td>2.5x</td><td>3.6x</td></tr>
-		<tr><td>Unmarshal/SimpleDocument/map-2</td><td>3.8x</td><td>3.0x</td></tr>
+        <tr><td>Unmarshal/SimpleDocument/map-2</td><td>4.2x</td><td>3.4x</td></tr>
-		<tr><td>Unmarshal/SimpleDocument/struct-2</td><td>5.6x</td><td>4.1x</td></tr>
+        <tr><td>Unmarshal/SimpleDocument/struct-2</td><td>5.9x</td><td>4.4x</td></tr>
-		<tr><td>UnmarshalDataset/example-2</td><td>3.0x</td><td>3.2x</td></tr>
+        <tr><td>UnmarshalDataset/example-2</td><td>3.2x</td><td>2.9x</td></tr>
-		<tr><td>UnmarshalDataset/code-2</td><td>2.3x</td><td>2.9x</td></tr>
+        <tr><td>UnmarshalDataset/code-2</td><td>2.4x</td><td>2.8x</td></tr>
-		<tr><td>UnmarshalDataset/twitter-2</td><td>2.6x</td><td>2.7x</td></tr>
+        <tr><td>UnmarshalDataset/twitter-2</td><td>2.7x</td><td>2.5x</td></tr>
-		<tr><td>UnmarshalDataset/citm_catalog-2</td><td>2.2x</td><td>2.3x</td></tr>
+        <tr><td>UnmarshalDataset/citm_catalog-2</td><td>2.3x</td><td>2.3x</td></tr>
-		<tr><td>UnmarshalDataset/canada-2</td><td>1.8x</td><td>1.5x</td></tr>
+        <tr><td>UnmarshalDataset/canada-2</td><td>1.9x</td><td>1.5x</td></tr>
-		<tr><td>UnmarshalDataset/config-2</td><td>4.1x</td><td>2.9x</td></tr>
+        <tr><td>UnmarshalDataset/config-2</td><td>5.4x</td><td>3.0x</td></tr>
-		<tr><td>geomean</td><td>2.7x</td><td>2.8x</td></tr>
+        <tr><td>geomean</td><td>2.9x</td><td>2.8x</td></tr>
     </tbody>
 </table>
 <p>This table can be generated with <code>./ci.sh benchmark -a -html</code>.</p>
@@ -1 +0,0 @@
 github.com/pelletier/go-toml/v2: CAPABILITY_REFLECT, CAPABILITY_UNANALYZED, CAPABILITY_UNSAFE_POINTER
@@ -1,101 +0,0 @@
 #!/usr/bin/env bash
 #
 # Generates or checks the capability baseline for go-toml.
 #
 # Usage:
 #   ./caps.sh generate   # regenerate capability_baseline.txt
 #   ./caps.sh check      # check that capabilities haven't grown
 #
 # Requires: go, capslock (go install github.com/google/capslock/cmd/capslock@latest)
 set -euo pipefail
 BASELINE="capability_baseline.txt"
 CAPSLOCK="${CAPSLOCK:-capslock}"
 # Capabilities that must never appear in any package.
 FORBIDDEN_CAPS=(
    CAPABILITY_NETWORK
    CAPABILITY_CGO
    CAPABILITY_EXEC
 )
 capslock_to_baseline() {
    "$CAPSLOCK" -packages=. -output=package -granularity=package \
        | jq -r 'to_entries | sort_by(.key) | .[] | .key + ": " + (.value | sort | join(", "))'
 }
 generate() {
    capslock_to_baseline > "$BASELINE"
    echo "Wrote $BASELINE"
 }
 check() {
    if [ ! -f "$BASELINE" ]; then
        echo "ERROR: $BASELINE not found. Run '$0 generate' first."
        exit 1
    fi
    current=$(mktemp)
    trap 'rm -f "$current"' EXIT
    capslock_to_baseline > "$current"
    failed=0
    # Check for forbidden capabilities in current output.
    for cap in "${FORBIDDEN_CAPS[@]}"; do
        if grep -q "$cap" "$current"; then
            echo "FORBIDDEN capability found: $cap"
            grep "$cap" "$current"
            failed=1
        fi
    done
    # Extract all unique capability names from baseline and current.
    baseline_caps=$(grep -oE 'CAPABILITY_[A-Z_]+' "$BASELINE" | sort -u)
    current_caps=$(grep -oE 'CAPABILITY_[A-Z_]+' "$current" | sort -u)
    # Check for new capability names not in the baseline.
    new_caps=$(comm -13 <(echo "$baseline_caps") <(echo "$current_caps"))
    if [ -n "$new_caps" ]; then
        echo "NEW capabilities detected (not in baseline):"
        echo "$new_caps"
        failed=1
    fi
    # Check for new per-package capabilities (a package gained a capability it didn't have before).
    while IFS=': ' read -r pkg caps; do
        baseline_pkg_caps=$(grep "^${pkg}:" "$BASELINE" 2>/dev/null | sed 's/^[^:]*: //' || true)
        if [ -z "$baseline_pkg_caps" ]; then
            echo "NEW package with capabilities: $pkg: $caps"
            failed=1
            continue
        fi
        # Check each capability in current for this package
        for cap in $(echo "$caps" | tr ', ' '\n' | grep -v '^$'); do
            if ! echo "$baseline_pkg_caps" | grep -q "$cap"; then
                echo "NEW capability for $pkg: $cap"
                failed=1
            fi
        done
    done < "$current"
    if [ "$failed" -eq 1 ]; then
        echo ""
        echo "FAILED: capabilities have grown."
        echo "If this is intentional, run '$0 generate' and commit the updated $BASELINE."
        exit 1
    fi
    echo "OK: no new capabilities detected."
 }
 case "${1:-}" in
    generate) generate ;;
    check)    check ;;
    *)
        echo "Usage: $0 {generate|check}"
        exit 1
        ;;
 esac
@@ -147,7 +147,7 @@ bench() {
    pushd "$dir"
    if [ "${replace}" != "" ]; then
-        find ./benchmark/ -iname '*.go' -exec sed -i -E "s|github.com/pelletier/go-toml/v2|${replace}|g" {} \;
+        find ./benchmark/ -iname '*.go' -exec sed -i -E "s|github.com/pelletier/go-toml/v2\"|${replace}\"|g" {} \;
        go get "${replace}"
    fi
@@ -195,6 +195,11 @@ for line in reversed(lines[2:]):
        "%.1fx" % (float(line[3])/v2),  # v1
        "%.1fx" % (float(line[7])/v2),  # bs
    ])
 if not results:
    print("No benchmark results to display.", file=sys.stderr)
    sys.exit(1)
 # move geomean to the end
 results.append(results[0])
 del results[0]
@@ -286,6 +286,27 @@ func TestDecodeError_Position(t *testing.T) {
 	}
 }
 func TestDecodeError_InvalidKeyStartAfterComment(t *testing.T) {
 	doc := "# comment\n= \"value\""
 	var out map[string]string
 	err := Unmarshal([]byte(doc), &out)
 	assert.Error(t, err)
 	var derr *DecodeError
 	if !errors.As(err, &derr) {
 		t.Fatal("error not in expected format")
 	}
 	row, col := derr.Position()
 	assert.Equal(t, 2, row)
 	assert.Equal(t, 1, col)
 	assert.Equal(t, "toml: invalid character at start of key: =", derr.Error())
 	assert.Equal(t, `1| # comment
 2| = "value"
 | ~ invalid character at start of key: =`, derr.String())
 }
 func TestStrictErrorUnwrap(t *testing.T) {
 	fo := bytes.NewBufferString(`
 Missing = 1
@@ -704,15 +704,18 @@ func (enc *Encoder) encodeMap(b []byte, ctx encoderCtx, v reflect.Value) ([]byte
 	for iter.Next() {
 		v := iter.Value()
-		if isNil(v) {
+		// Handle nil values: convert nil pointers to zero value,
-			// For nil pointers, convert to zero value of the element type.
+		// skip nil interfaces and nil maps.
-			// This allows round-trip marshaling of maps with nil pointer values.
+		switch v.Kind() {
-			// For nil interfaces and nil maps, skip since we can't derive a type.
+		case reflect.Ptr:
-			if v.Kind() == reflect.Ptr {
+			if v.IsNil() {
 				v = reflect.Zero(v.Type().Elem())
-			} else {
+			}
 		case reflect.Interface, reflect.Map:
 			if v.IsNil() {
 				continue
 			}
 		default:
 		}
 		k, err := enc.keyToString(iter.Key())
@@ -936,7 +939,7 @@ func (enc *Encoder) encodeTable(b []byte, ctx encoderCtx, t table) ([]byte, erro
 		if shouldOmitEmpty(kv.Options, kv.Value) {
 			continue
 		}
-		if shouldOmitZero(kv.Options, kv.Value) {
+		if kv.Options.omitzero && shouldOmitZero(kv.Options, kv.Value) {
 			continue
 		}
 		hasNonEmptyKV = true
@@ -958,7 +961,7 @@ func (enc *Encoder) encodeTable(b []byte, ctx encoderCtx, t table) ([]byte, erro
 		if shouldOmitEmpty(table.Options, table.Value) {
 			continue
 		}
-		if shouldOmitZero(table.Options, table.Value) {
+		if table.Options.omitzero && shouldOmitZero(table.Options, table.Value) {
 			continue
 		}
 		if first {
@@ -995,7 +998,7 @@ func (enc *Encoder) encodeTableInline(b []byte, ctx encoderCtx, t table) ([]byte
 		if shouldOmitEmpty(kv.Options, kv.Value) {
 			continue
 		}
-		if shouldOmitZero(kv.Options, kv.Value) {
+		if kv.Options.omitzero && shouldOmitZero(kv.Options, kv.Value) {
 			continue
 		}
@@ -28,12 +28,16 @@ func (c *Iterator) Next() bool {
 	if c.nodes == nil {
 		return false
 	}
 	nodes := *c.nodes
 	if !c.started {
 		c.started = true
-	} else if c.idx >= 0 {
+	} else {
-		c.idx = (*c.nodes)[c.idx].next
+		idx := c.idx
 		if idx >= 0 && int(idx) < len(nodes) {
 			c.idx = nodes[idx].next
 		}
-	return c.idx >= 0 && int(c.idx) < len(*c.nodes)
+	}
 	return c.idx >= 0 && int(c.idx) < len(nodes)
 }
 // IsLast returns true if the current node of the iterator is the last
@@ -3,6 +3,7 @@ package unstable
 import (
 	"bytes"
 	"fmt"
 	"reflect"
 	"unicode"
 	"github.com/pelletier/go-toml/v2/internal/characters"
@@ -83,10 +84,22 @@ func (p *Parser) rangeOfToken(token, rest []byte) Range {
 }
 // subsliceOffset returns the byte offset of subslice b within p.data.
-// b must be a suffix (tail) of p.data.
+// b must share the same backing array as p.data.
 func (p *Parser) subsliceOffset(b []byte) int {
-	// b is a suffix of p.data, so its offset is len(p.data) - len(b)
+	if len(b) == 0 {
-	return len(p.data) - len(b)
+		// Most callers pass suffix slices, so preserve EOF behavior.
 		return len(p.data)
 	}
 	dataPtr := reflect.ValueOf(p.data).Pointer()
 	subPtr := reflect.ValueOf(b).Pointer()
 	offset := int(subPtr - dataPtr)
 	if offset < 0 || offset+len(b) > len(p.data) {
 		panic("subslice is not within parser input")
 	}
 	return offset
 }
 // Raw returns the slice corresponding to the bytes in the given range.
@@ -363,9 +376,10 @@ func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
 	p.builder.Chain(valRef, key)
 	p.builder.AttachChild(ref, valRef)
-	// Set Raw to span the entire key-value expression
+	// Set Raw to span the entire key-value expression.
-	node := p.builder.NodeAt(ref)
+	// Access the node directly in the slice to avoid the write barrier
-	node.Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b)
+	// that NodeAt's nodes-pointer setup would trigger.
 	p.builder.tree.nodes[ref].Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b)
 	return ref, b, err
 }
		`@@ -1 +0,0 @@`
			`github.com/pelletier/go-toml/v2: CAPABILITY_REFLECT, CAPABILITY_UNANALYZED, CAPABILITY_UNSAFE_POINTER`