fix(unstable): correct Parser.Range for non-suffix highlights

Parser.Range used len(data)-len(highlight), which only matches suffix slices. Single-byte highlights like b[0:1] are subslices of the remaining buffer, so the wrong offset pointed at the end of the document (issue #1047). Use pointer-based subslice offset like wrapDecodeError. Add regression tests for unstable.Parser and toml.Unmarshal error positions and human-readable output. Co-authored-by: Thomas Pelletier <thomas@pelletier.dev>
Reduce marshal and unmarshal overhead (#1044 )
2026-04-12 12:26:03 +00:00 · 2026-03-24 11:08:39 +00:00 · 2026-03-23 22:00:18 -04:00
12 changed files with 143 additions and 206 deletions
@@ -1,25 +0,0 @@
-name: capabilities
-on:
-  push:
-    branches:
-      - v2
-  pull_request:
-    branches:
-      - v2
-
-jobs:
-  check:
-    name: check capabilities
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
-      - name: Setup go
-        uses: actions/setup-go@v6
-        with:
-          go-version: "1.26"
-      - name: Install capslock
-        run: go install github.com/google/capslock/cmd/capslock@latest
-      - name: Check for new capabilities
-        run: ./caps.sh check
@@ -53,14 +53,6 @@ go-toml is a TOML library for Go. The goal is to provide an easy-to-use and effi
 - Commit messages must explain **why** the change is needed
 - Keep messages clear and informative even if details are in the PR description

-### Capabilities
-
-go-toml tracks system-level capabilities using [capslock](https://github.com/google/capslock). The baseline is in `capability_baseline.txt` and CI enforces that it does not grow.
-
- **Do not introduce new capabilities.** PRs that increase the capability set (e.g., adding network access, subprocess execution, syscalls) are unlikely to be accepted.
- If a change causes the capabilities check to fail, do not update the baseline to make it pass. Instead, rethink the approach to avoid requiring new capabilities.
- To check locally: `./caps.sh check` (requires `capslock` installed via `go install github.com/google/capslock/cmd/capslock@latest`)
-
 ## Pull Request Checklist

 Before submitting:
@@ -69,5 +61,4 @@ Before submitting:
 2. No backward-incompatible changes (unless discussed)
 3. Relevant documentation added/updated
 4. No performance regression (verify with benchmarks)
-5. Capabilities are not increasing (`./caps.sh check`)
-6. Title is clear and understandable for changelog
+5. Title is clear and understandable for changelog
@@ -180,25 +180,6 @@ description. Pull requests that lower performance will receive more scrutiny.

 [benchstat]: https://pkg.go.dev/golang.org/x/perf/cmd/benchstat

-### Capabilities
-
-We use [capslock](https://github.com/google/capslock) to track what
-system-level capabilities (file access, network, syscalls, etc.) each package
-requires. The current baseline is in `capability_baseline.txt`. CI will fail if
-a change introduces a new capability.
-
-**Pull requests that increase the set of capabilities are unlikely to be
-accepted.** go-toml is a parsing library and should not need network access,
-subprocess execution, or other capabilities beyond what it already uses.
-
-If you believe a new capability is genuinely needed, discuss it in an issue
-first. To update the baseline after approval:
-
-```bash
-go install github.com/google/capslock/cmd/capslock@latest
-./caps.sh generate
-```
-
 ### Style

 Try to look around and follow the same format and structure as the rest of the
@@ -235,17 +235,17 @@ the AST level. See https://pkg.go.dev/github.com/pelletier/go-toml/v2/unstable.
 Execution time speedup compared to other Go TOML libraries:

 <table>
-	<thead>
-		<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
-	</thead>
-	<tbody>
-		<tr><td>Marshal/HugoFrontMatter-2</td><td>1.9x</td><td>2.2x</td></tr>
-		<tr><td>Marshal/ReferenceFile/map-2</td><td>1.7x</td><td>2.1x</td></tr>
-		<tr><td>Marshal/ReferenceFile/struct-2</td><td>2.2x</td><td>3.0x</td></tr>
-		<tr><td>Unmarshal/HugoFrontMatter-2</td><td>2.9x</td><td>2.7x</td></tr>
-		<tr><td>Unmarshal/ReferenceFile/map-2</td><td>2.6x</td><td>2.7x</td></tr>
-		<tr><td>Unmarshal/ReferenceFile/struct-2</td><td>4.6x</td><td>5.1x</td></tr>
-	 </tbody>
+    <thead>
+        <tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
+    </thead>
+    <tbody>
+        <tr><td>Marshal/HugoFrontMatter-2</td><td>2.1x</td><td>2.0x</td></tr>
+        <tr><td>Marshal/ReferenceFile/map-2</td><td>2.0x</td><td>2.0x</td></tr>
+        <tr><td>Marshal/ReferenceFile/struct-2</td><td>2.3x</td><td>2.5x</td></tr>
+        <tr><td>Unmarshal/HugoFrontMatter-2</td><td>3.3x</td><td>2.8x</td></tr>
+        <tr><td>Unmarshal/ReferenceFile/map-2</td><td>2.9x</td><td>3.0x</td></tr>
+        <tr><td>Unmarshal/ReferenceFile/struct-2</td><td>4.8x</td><td>5.0x</td></tr>
+     </tbody>
 </table>
 <details><summary>See more</summary>
 <p>The table above has the results of the most common use-cases. The table below
@@ -253,22 +253,22 @@ contains the results of all benchmarks, including unrealistic ones. It is
 provided for completeness.</p>

 <table>
-	<thead>
-		<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
-	</thead>
-	<tbody>
-		<tr><td>Marshal/SimpleDocument/map-2</td><td>1.8x</td><td>2.7x</td></tr>
-		<tr><td>Marshal/SimpleDocument/struct-2</td><td>2.7x</td><td>3.8x</td></tr>
-		<tr><td>Unmarshal/SimpleDocument/map-2</td><td>3.8x</td><td>3.0x</td></tr>
-		<tr><td>Unmarshal/SimpleDocument/struct-2</td><td>5.6x</td><td>4.1x</td></tr>
-		<tr><td>UnmarshalDataset/example-2</td><td>3.0x</td><td>3.2x</td></tr>
-		<tr><td>UnmarshalDataset/code-2</td><td>2.3x</td><td>2.9x</td></tr>
-		<tr><td>UnmarshalDataset/twitter-2</td><td>2.6x</td><td>2.7x</td></tr>
-		<tr><td>UnmarshalDataset/citm_catalog-2</td><td>2.2x</td><td>2.3x</td></tr>
-		<tr><td>UnmarshalDataset/canada-2</td><td>1.8x</td><td>1.5x</td></tr>
-		<tr><td>UnmarshalDataset/config-2</td><td>4.1x</td><td>2.9x</td></tr>
-		<tr><td>geomean</td><td>2.7x</td><td>2.8x</td></tr>
-	 </tbody>
+    <thead>
+        <tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
+    </thead>
+    <tbody>
+        <tr><td>Marshal/SimpleDocument/map-2</td><td>2.0x</td><td>2.9x</td></tr>
+        <tr><td>Marshal/SimpleDocument/struct-2</td><td>2.5x</td><td>3.6x</td></tr>
+        <tr><td>Unmarshal/SimpleDocument/map-2</td><td>4.2x</td><td>3.4x</td></tr>
+        <tr><td>Unmarshal/SimpleDocument/struct-2</td><td>5.9x</td><td>4.4x</td></tr>
+        <tr><td>UnmarshalDataset/example-2</td><td>3.2x</td><td>2.9x</td></tr>
+        <tr><td>UnmarshalDataset/code-2</td><td>2.4x</td><td>2.8x</td></tr>
+        <tr><td>UnmarshalDataset/twitter-2</td><td>2.7x</td><td>2.5x</td></tr>
+        <tr><td>UnmarshalDataset/citm_catalog-2</td><td>2.3x</td><td>2.3x</td></tr>
+        <tr><td>UnmarshalDataset/canada-2</td><td>1.9x</td><td>1.5x</td></tr>
+        <tr><td>UnmarshalDataset/config-2</td><td>5.4x</td><td>3.0x</td></tr>
+        <tr><td>geomean</td><td>2.9x</td><td>2.8x</td></tr>
+     </tbody>
 </table>
 <p>This table can be generated with <code>./ci.sh benchmark -a -html</code>.</p>
 </details>
@@ -1 +0,0 @@
-github.com/pelletier/go-toml/v2: CAPABILITY_REFLECT, CAPABILITY_UNANALYZED, CAPABILITY_UNSAFE_POINTER
@@ -1,101 +0,0 @@
-#!/usr/bin/env bash
-#
-# Generates or checks the capability baseline for go-toml.
-#
-# Usage:
-#   ./caps.sh generate   # regenerate capability_baseline.txt
-#   ./caps.sh check      # check that capabilities haven't grown
-#
-# Requires: go, capslock (go install github.com/google/capslock/cmd/capslock@latest)
-
-set -euo pipefail
-
-BASELINE="capability_baseline.txt"
-CAPSLOCK="${CAPSLOCK:-capslock}"
-
-# Capabilities that must never appear in any package.
-FORBIDDEN_CAPS=(
-    CAPABILITY_NETWORK
-    CAPABILITY_CGO
-    CAPABILITY_EXEC
-)
-
-capslock_to_baseline() {
-    "$CAPSLOCK" -packages=. -output=package -granularity=package \
-        | jq -r 'to_entries | sort_by(.key) | .[] | .key + ": " + (.value | sort | join(", "))'
-}
-
-generate() {
-    capslock_to_baseline > "$BASELINE"
-    echo "Wrote $BASELINE"
-}
-
-check() {
-    if [ ! -f "$BASELINE" ]; then
-        echo "ERROR: $BASELINE not found. Run '$0 generate' first."
-        exit 1
-    fi
-
-    current=$(mktemp)
-    trap 'rm -f "$current"' EXIT
-
-    capslock_to_baseline > "$current"
-
-    failed=0
-
-    # Check for forbidden capabilities in current output.
-    for cap in "${FORBIDDEN_CAPS[@]}"; do
-        if grep -q "$cap" "$current"; then
-            echo "FORBIDDEN capability found: $cap"
-            grep "$cap" "$current"
-            failed=1
-        fi
-    done
-
-    # Extract all unique capability names from baseline and current.
-    baseline_caps=$(grep -oE 'CAPABILITY_[A-Z_]+' "$BASELINE" | sort -u)
-    current_caps=$(grep -oE 'CAPABILITY_[A-Z_]+' "$current" | sort -u)
-
-    # Check for new capability names not in the baseline.
-    new_caps=$(comm -13 <(echo "$baseline_caps") <(echo "$current_caps"))
-    if [ -n "$new_caps" ]; then
-        echo "NEW capabilities detected (not in baseline):"
-        echo "$new_caps"
-        failed=1
-    fi
-
-    # Check for new per-package capabilities (a package gained a capability it didn't have before).
-    while IFS=': ' read -r pkg caps; do
-        baseline_pkg_caps=$(grep "^${pkg}:" "$BASELINE" 2>/dev/null | sed 's/^[^:]*: //' || true)
-        if [ -z "$baseline_pkg_caps" ]; then
-            echo "NEW package with capabilities: $pkg: $caps"
-            failed=1
-            continue
-        fi
-        # Check each capability in current for this package
-        for cap in $(echo "$caps" | tr ', ' '\n' | grep -v '^$'); do
-            if ! echo "$baseline_pkg_caps" | grep -q "$cap"; then
-                echo "NEW capability for $pkg: $cap"
-                failed=1
-            fi
-        done
-    done < "$current"
-
-    if [ "$failed" -eq 1 ]; then
-        echo ""
-        echo "FAILED: capabilities have grown."
-        echo "If this is intentional, run '$0 generate' and commit the updated $BASELINE."
-        exit 1
-    fi
-
-    echo "OK: no new capabilities detected."
-}
-
-case "${1:-}" in
-    generate) generate ;;
-    check)    check ;;
-    *)
-        echo "Usage: $0 {generate|check}"
-        exit 1
-        ;;
-esac
@@ -147,7 +147,7 @@ bench() {
    pushd "$dir"

    if [ "${replace}" != "" ]; then
-        find ./benchmark/ -iname '*.go' -exec sed -i -E "s|github.com/pelletier/go-toml/v2|${replace}|g" {} \;
+        find ./benchmark/ -iname '*.go' -exec sed -i -E "s|github.com/pelletier/go-toml/v2\"|${replace}\"|g" {} \;
        go get "${replace}"
    fi

@@ -195,6 +195,11 @@ for line in reversed(lines[2:]):
        "%.1fx" % (float(line[3])/v2),  # v1
        "%.1fx" % (float(line[7])/v2),  # bs
    ])
+
+if not results:
+    print("No benchmark results to display.", file=sys.stderr)
+    sys.exit(1)
+
 # move geomean to the end
 results.append(results[0])
 del results[0]
@@ -202,6 +202,38 @@ func TestDecodeError_Accessors(t *testing.T) {
 	assert.Equal(t, "bar", e.String())
 }

+func TestDecodeError_InvalidKeyStartAfterComment(t *testing.T) {
+	// Regression for https://github.com/pelletier/go-toml/issues/1047: the "="
+	// that starts an invalid keyval must be reported on line 2, column 1, with
+	// the human-readable context pointing at that byte (not the document end).
+	doc := "# comment\n= \"value\""
+
+	var v map[string]any
+	err := Unmarshal([]byte(doc), &v)
+	if err == nil {
+		t.Fatal("expected an error")
+	}
+
+	var derr *DecodeError
+	if !errors.As(err, &derr) {
+		t.Fatalf("expected *DecodeError, got %T", err)
+	}
+
+	row, col := derr.Position()
+	if row != 2 || col != 1 {
+		t.Errorf("Position(): got row %d col %d, want row 2 col 1", row, col)
+	}
+
+	human := derr.String()
+	if !strings.Contains(human, `2| = "value"`) {
+		t.Errorf("human output should show the error line; got:\n%s", human)
+	}
+	// Caret line uses line-number column width padding; only the "| ~" part is stable here.
+	if !strings.Contains(human, "| ~ invalid character at start of key") {
+		t.Errorf("human output should underline '=' and include the parser message; got:\n%s", human)
+	}
+}
+
 func TestDecodeError_DuplicateContent(t *testing.T) {
 	// This test verifies that when the same content appears multiple times
 	// in the document, the error correctly points to the actual location
@@ -704,15 +704,18 @@ func (enc *Encoder) encodeMap(b []byte, ctx encoderCtx, v reflect.Value) ([]byte
 	for iter.Next() {
 		v := iter.Value()

-		if isNil(v) {
-			// For nil pointers, convert to zero value of the element type.
-			// This allows round-trip marshaling of maps with nil pointer values.
-			// For nil interfaces and nil maps, skip since we can't derive a type.
-			if v.Kind() == reflect.Ptr {
+		// Handle nil values: convert nil pointers to zero value,
+		// skip nil interfaces and nil maps.
+		switch v.Kind() {
+		case reflect.Ptr:
+			if v.IsNil() {
 				v = reflect.Zero(v.Type().Elem())
-			} else {
+			}
+		case reflect.Interface, reflect.Map:
+			if v.IsNil() {
 				continue
 			}
+		default:
 		}

 		k, err := enc.keyToString(iter.Key())
@@ -936,7 +939,7 @@ func (enc *Encoder) encodeTable(b []byte, ctx encoderCtx, t table) ([]byte, erro
 		if shouldOmitEmpty(kv.Options, kv.Value) {
 			continue
 		}
-		if shouldOmitZero(kv.Options, kv.Value) {
+		if kv.Options.omitzero && shouldOmitZero(kv.Options, kv.Value) {
 			continue
 		}
 		hasNonEmptyKV = true
@@ -958,7 +961,7 @@ func (enc *Encoder) encodeTable(b []byte, ctx encoderCtx, t table) ([]byte, erro
 		if shouldOmitEmpty(table.Options, table.Value) {
 			continue
 		}
-		if shouldOmitZero(table.Options, table.Value) {
+		if table.Options.omitzero && shouldOmitZero(table.Options, table.Value) {
 			continue
 		}
 		if first {
@@ -995,7 +998,7 @@ func (enc *Encoder) encodeTableInline(b []byte, ctx encoderCtx, t table) ([]byte
 		if shouldOmitEmpty(kv.Options, kv.Value) {
 			continue
 		}
-		if shouldOmitZero(kv.Options, kv.Value) {
+		if kv.Options.omitzero && shouldOmitZero(kv.Options, kv.Value) {
 			continue
 		}

@@ -28,12 +28,16 @@ func (c *Iterator) Next() bool {
 	if c.nodes == nil {
 		return false
 	}
+	nodes := *c.nodes
 	if !c.started {
 		c.started = true
-	} else if c.idx >= 0 {
-		c.idx = (*c.nodes)[c.idx].next
+	} else {
+		idx := c.idx
+		if idx >= 0 && int(idx) < len(nodes) {
+			c.idx = nodes[idx].next
+		}
 	}
-	return c.idx >= 0 && int(c.idx) < len(*c.nodes)
+	return c.idx >= 0 && int(c.idx) < len(nodes)
 }

 // IsLast returns true if the current node of the iterator is the last
@@ -3,6 +3,7 @@ package unstable
 import (
 	"bytes"
 	"fmt"
+	"reflect"
 	"unicode"

 	"github.com/pelletier/go-toml/v2/internal/characters"
@@ -69,8 +70,8 @@ func (p *Parser) Data() []byte {
 // panics.
 func (p *Parser) Range(b []byte) Range {
 	return Range{
-		Offset: uint32(p.subsliceOffset(b)), //nolint:gosec // TOML documents are small
-		Length: uint32(len(b)),              //nolint:gosec // TOML documents are small
+		Offset: uint32(subsliceOffset(p.data, b)), //nolint:gosec // TOML documents are small
+		Length: uint32(len(b)),                    //nolint:gosec // TOML documents are small
 	}
 }

@@ -82,11 +83,21 @@ func (p *Parser) rangeOfToken(token, rest []byte) Range {
 	return Range{Offset: uint32(offset), Length: uint32(len(token))} //nolint:gosec // TOML documents are small
 }

-// subsliceOffset returns the byte offset of subslice b within p.data.
-// b must be a suffix (tail) of p.data.
-func (p *Parser) subsliceOffset(b []byte) int {
-	// b is a suffix of p.data, so its offset is len(p.data) - len(b)
-	return len(p.data) - len(b)
+// subsliceOffset returns the byte offset of subslice b within data.
+// b must share the same backing array as data (any subslice of data).
+func subsliceOffset(data, b []byte) int {
+	if len(b) == 0 {
+		return 0
+	}
+
+	dataPtr := reflect.ValueOf(data).Pointer()
+	bPtr := reflect.ValueOf(b).Pointer()
+
+	offset := int(bPtr - dataPtr)
+	if offset < 0 || offset > len(data) {
+		panic("subslice is not within data")
+	}
+	return offset
 }

 // Raw returns the slice corresponding to the bytes in the given range.
@@ -363,9 +374,10 @@ func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
 	p.builder.Chain(valRef, key)
 	p.builder.AttachChild(ref, valRef)

-	// Set Raw to span the entire key-value expression
-	node := p.builder.NodeAt(ref)
-	node.Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b)
+	// Set Raw to span the entire key-value expression.
+	// Access the node directly in the slice to avoid the write barrier
+	// that NodeAt's nodes-pointer setup would trigger.
+	p.builder.tree.nodes[ref].Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b)

 	return ref, b, err
 }
@@ -0,0 +1,36 @@
+package unstable
+
+import (
+	"errors"
+	"testing"
+)
+
+// Regression test for https://github.com/pelletier/go-toml/issues/1047:
+// Parser.Range must use the real slice offset, not len(data)-len(slice).
+func TestParser_Range_HighlightAfterComment(t *testing.T) {
+	input := []byte("# comment\n= \"value\"")
+
+	var p Parser
+	p.Reset(input)
+	for p.NextExpression() {
+	}
+	err := p.Error()
+	if err == nil {
+		t.Fatal("expected an error")
+	}
+
+	var perr *ParserError
+	if !errors.As(err, &perr) {
+		t.Fatalf("expected *ParserError, got %T", err)
+	}
+
+	r := p.Range(perr.Highlight)
+	shape := p.Shape(r)
+
+	if r.Offset != 10 {
+		t.Errorf("Range offset: got %d, want 10", r.Offset)
+	}
+	if shape.Start.Line != 2 || shape.Start.Column != 1 {
+		t.Errorf("position: got %d:%d, want 2:1", shape.Start.Line, shape.Start.Column)
+	}
+}
				`@@ -1 +0,0 @@`
				`github.com/pelletier/go-toml/v2: CAPABILITY_REFLECT, CAPABILITY_UNANALYZED, CAPABILITY_UNSAFE_POINTER`