Preserve original formatting in Unmarshaler by using raw byte ranges

Instead of reconstructing key-value lines from parsed components, now
uses the original raw bytes from the document. This preserves:
- Whitespace around '=' (e.g., "key   =   value")
- String quoting style (basic vs literal)
- Number formats (hex, octal, binary)
- Inline table formatting

Changes:
- Add Raw range tracking to KeyValue expressions in parseKeyval
- Update handleKeyValuesUnmarshaler to use expr.Raw directly
- Remove keyNeedsQuoting helper (no longer needed)
- Add TestIssue873_FormattingPreservation test
- Update expected output in ExampleParser_comments
This commit is contained in:
Claude
2026-01-17 12:41:38 +00:00
parent 6c995ec13e
commit 8df3b65280
4 changed files with 74 additions and 55 deletions
+6 -49
View File
@@ -690,8 +690,8 @@ func (d *decoder) handleKeyValues(v reflect.Value) (reflect.Value, error) {
// and passes them to the Unmarshaler as raw TOML bytes. // and passes them to the Unmarshaler as raw TOML bytes.
func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Value, error) { func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Value, error) {
// Collect raw bytes from all key-value expressions for this table. // Collect raw bytes from all key-value expressions for this table.
// We build a valid TOML document by reconstructing each key-value line // We use the Raw field on each KeyValue expression to preserve the
// from the key names and the value's raw bytes. // original formatting (whitespace, quoting style, etc.) from the document.
var buf []byte var buf []byte
for d.nextExpr() { for d.nextExpr() {
@@ -706,38 +706,10 @@ func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Va
return reflect.Value{}, err return reflect.Value{}, err
} }
// Reconstruct the key-value line from the key(s) and value // Use the raw bytes from the original document to preserve formatting
keyIt := expr.Key() if expr.Raw.Length > 0 {
first := true raw := d.p.Raw(expr.Raw)
for keyIt.Next() { buf = append(buf, raw...)
if !first {
buf = append(buf, '.')
}
keyNode := keyIt.Node()
// Check if key needs quoting
if keyNeedsQuoting(keyNode.Data) {
buf = append(buf, '"')
buf = append(buf, keyNode.Data...)
buf = append(buf, '"')
} else {
buf = append(buf, keyNode.Data...)
}
first = false
}
buf = append(buf, " = "...)
// Get the raw value bytes
value := expr.Value()
if value != nil {
if value.Raw.Length > 0 {
// Use raw bytes from the original document
raw := d.p.Raw(value.Raw)
buf = append(buf, raw...)
} else {
// Some value types (like Bool) don't have Raw set,
// use Data which contains the value representation
buf = append(buf, value.Data...)
}
} }
buf = append(buf, '\n') buf = append(buf, '\n')
} }
@@ -749,21 +721,6 @@ func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Va
return reflect.Value{}, nil return reflect.Value{}, nil
} }
// keyNeedsQuoting returns true if the key needs to be quoted in TOML.
func keyNeedsQuoting(key []byte) bool {
if len(key) == 0 {
return true
}
for _, b := range key {
// Bare keys can only contain A-Za-z0-9_-
if (b < 'A' || b > 'Z') && (b < 'a' || b > 'z') &&
(b < '0' || b > '9') && b != '_' && b != '-' {
return true
}
}
return false
}
type ( type (
handlerFn func(key unstable.Iterator, v reflect.Value) (reflect.Value, error) handlerFn func(key unstable.Iterator, v reflect.Value) (reflect.Value, error)
valueMakerFn func() reflect.Value valueMakerFn func() reflect.Value
+55
View File
@@ -4975,3 +4975,58 @@ key = "value"
assert.Equal(t, []string{"key"}, (*cfg.Section).Keys) assert.Equal(t, []string{"key"}, (*cfg.Section).Keys)
assert.Equal(t, "value", (*cfg.Section).Values["key"]) assert.Equal(t, "value", (*cfg.Section).Values["key"])
} }
// formattingCapture captures the raw TOML bytes to verify formatting preservation
type formattingCapture struct {
RawBytes string
}
func (f *formattingCapture) UnmarshalTOML(data []byte) error {
f.RawBytes = string(data)
return nil
}
func TestIssue873_FormattingPreservation(t *testing.T) {
type Config struct {
Section *formattingCapture `toml:"section"`
}
// Test that various formatting styles are preserved:
// - Extra spaces around '='
// - Literal strings (single quotes)
// - Hex numbers
// - Inline tables
doc := `[section]
key1 = "value with spaces"
key2 = 'literal string'
hex_val = 0xDEADBEEF
inline = { a = 1, b = 2 }
`
var cfg Config
err := toml.NewDecoder(bytes.NewReader([]byte(doc))).
EnableUnmarshalerInterface().
Decode(&cfg)
assert.NoError(t, err)
assert.True(t, cfg.Section != nil)
// The raw bytes should preserve original formatting
raw := cfg.Section.RawBytes
// Check that extra spaces around '=' are preserved
assert.True(t, strings.Contains(raw, "key1 = \"value with spaces\""),
"Expected spacing to be preserved, got: %s", raw)
// Check that literal string style is preserved
assert.True(t, strings.Contains(raw, "key2 = 'literal string'"),
"Expected literal string to be preserved, got: %s", raw)
// Check that hex format is preserved
assert.True(t, strings.Contains(raw, "hex_val = 0xDEADBEEF"),
"Expected hex format to be preserved, got: %s", raw)
// Check that inline table is preserved
assert.True(t, strings.Contains(raw, "inline = { a = 1, b = 2 }"),
"Expected inline table to be preserved, got: %s", raw)
}
+7
View File
@@ -328,6 +328,9 @@ func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) { func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
// keyval = key keyval-sep val // keyval = key keyval-sep val
// Track the start position for Raw range
startB := b
ref := p.builder.Push(Node{ ref := p.builder.Push(Node{
Kind: KeyValue, Kind: KeyValue,
}) })
@@ -360,6 +363,10 @@ func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
p.builder.Chain(valRef, key) p.builder.Chain(valRef, key)
p.builder.AttachChild(ref, valRef) p.builder.AttachChild(ref, valRef)
// Set Raw to span the entire key-value expression
node := p.builder.NodeAt(ref)
node.Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b)
return ref, b, err return ref, b, err
} }
+6 -6
View File
@@ -539,7 +539,7 @@ key5 = [ # Next to start of inline array.
// --- // ---
// 6:1->6:22 (105->126) | Comment [# Above simple value.] // 6:1->6:22 (105->126) | Comment [# Above simple value.]
// --- // ---
// 1:1->1:1 (0->0) | KeyValue [] // 7:1->7:14 (127->140) | KeyValue []
// 7:7->7:14 (133->140) | String [value] // 7:7->7:14 (133->140) | String [value]
// 7:1->7:4 (127->130) | Key [key] // 7:1->7:4 (127->130) | Key [key]
// 7:15->7:38 (141->164) | Comment [# Next to simple value.] // 7:15->7:38 (141->164) | Comment [# Next to simple value.]
@@ -552,12 +552,12 @@ key5 = [ # Next to start of inline array.
// --- // ---
// 14:1->14:22 (252->273) | Comment [# Above inline table.] // 14:1->14:22 (252->273) | Comment [# Above inline table.]
// --- // ---
// 1:1->1:1 (0->0) | KeyValue [] // 15:1->15:50 (274->323) | KeyValue []
// 15:8->15:9 (281->282) | InlineTable [] // 15:8->15:9 (281->282) | InlineTable []
// 1:1->1:1 (0->0) | KeyValue [] // 15:10->15:23 (283->296) | KeyValue []
// 15:18->15:23 (291->296) | String [Tom] // 15:18->15:23 (291->296) | String [Tom]
// 15:10->15:15 (283->288) | Key [first] // 15:10->15:15 (283->288) | Key [first]
// 1:1->1:1 (0->0) | KeyValue [] // 15:25->15:48 (298->321) | KeyValue []
// 15:32->15:48 (305->321) | String [Preston-Werner] // 15:32->15:48 (305->321) | String [Preston-Werner]
// 15:25->15:29 (298->302) | Key [last] // 15:25->15:29 (298->302) | Key [last]
// 15:1->15:5 (274->278) | Key [name] // 15:1->15:5 (274->278) | Key [name]
@@ -567,7 +567,7 @@ key5 = [ # Next to start of inline array.
// --- // ---
// 18:1->18:15 (371->385) | Comment [# Above array.] // 18:1->18:15 (371->385) | Comment [# Above array.]
// --- // ---
// 1:1->1:1 (0->0) | KeyValue [] // 19:1->19:20 (386->405) | KeyValue []
// 1:1->1:1 (0->0) | Array [] // 1:1->1:1 (0->0) | Array []
// 19:11->19:12 (396->397) | Integer [1] // 19:11->19:12 (396->397) | Integer [1]
// 19:14->19:15 (399->400) | Integer [2] // 19:14->19:15 (399->400) | Integer [2]
@@ -579,7 +579,7 @@ key5 = [ # Next to start of inline array.
// --- // ---
// 22:1->22:26 (448->473) | Comment [# Above multi-line array.] // 22:1->22:26 (448->473) | Comment [# Above multi-line array.]
// --- // ---
// 1:1->1:1 (0->0) | KeyValue [] // 23:1->31:2 (474->694) | KeyValue []
// 1:1->1:1 (0->0) | Array [] // 1:1->1:1 (0->0) | Array []
// 23:10->23:42 (483->515) | Comment [# Next to start of inline array.] // 23:10->23:42 (483->515) | Comment [# Next to start of inline array.]
// 24:3->24:38 (518->553) | Comment [# Second line before array content.] // 24:3->24:38 (518->553) | Comment [# Second line before array content.]