From 8df3b65280ea0414e8d24e30fbf502aa79497f5f Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 17 Jan 2026 12:41:38 +0000 Subject: [PATCH] Preserve original formatting in Unmarshaler by using raw byte ranges Instead of reconstructing key-value lines from parsed components, now uses the original raw bytes from the document. This preserves: - Whitespace around '=' (e.g., "key = value") - String quoting style (basic vs literal) - Number formats (hex, octal, binary) - Inline table formatting Changes: - Add Raw range tracking to KeyValue expressions in parseKeyval - Update handleKeyValuesUnmarshaler to use expr.Raw directly - Remove keyNeedsQuoting helper (no longer needed) - Add TestIssue873_FormattingPreservation test - Update expected output in ExampleParser_comments --- unmarshaler.go | 55 +++++------------------------------------ unmarshaler_test.go | 55 +++++++++++++++++++++++++++++++++++++++++ unstable/parser.go | 7 ++++++ unstable/parser_test.go | 12 ++++----- 4 files changed, 74 insertions(+), 55 deletions(-) diff --git a/unmarshaler.go b/unmarshaler.go index 5dfa3b5..e7db812 100644 --- a/unmarshaler.go +++ b/unmarshaler.go @@ -690,8 +690,8 @@ func (d *decoder) handleKeyValues(v reflect.Value) (reflect.Value, error) { // and passes them to the Unmarshaler as raw TOML bytes. func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Value, error) { // Collect raw bytes from all key-value expressions for this table. - // We build a valid TOML document by reconstructing each key-value line - // from the key names and the value's raw bytes. + // We use the Raw field on each KeyValue expression to preserve the + // original formatting (whitespace, quoting style, etc.) from the document. var buf []byte for d.nextExpr() { @@ -706,38 +706,10 @@ func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Va return reflect.Value{}, err } - // Reconstruct the key-value line from the key(s) and value - keyIt := expr.Key() - first := true - for keyIt.Next() { - if !first { - buf = append(buf, '.') - } - keyNode := keyIt.Node() - // Check if key needs quoting - if keyNeedsQuoting(keyNode.Data) { - buf = append(buf, '"') - buf = append(buf, keyNode.Data...) - buf = append(buf, '"') - } else { - buf = append(buf, keyNode.Data...) - } - first = false - } - buf = append(buf, " = "...) - - // Get the raw value bytes - value := expr.Value() - if value != nil { - if value.Raw.Length > 0 { - // Use raw bytes from the original document - raw := d.p.Raw(value.Raw) - buf = append(buf, raw...) - } else { - // Some value types (like Bool) don't have Raw set, - // use Data which contains the value representation - buf = append(buf, value.Data...) - } + // Use the raw bytes from the original document to preserve formatting + if expr.Raw.Length > 0 { + raw := d.p.Raw(expr.Raw) + buf = append(buf, raw...) } buf = append(buf, '\n') } @@ -749,21 +721,6 @@ func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Va return reflect.Value{}, nil } -// keyNeedsQuoting returns true if the key needs to be quoted in TOML. -func keyNeedsQuoting(key []byte) bool { - if len(key) == 0 { - return true - } - for _, b := range key { - // Bare keys can only contain A-Za-z0-9_- - if (b < 'A' || b > 'Z') && (b < 'a' || b > 'z') && - (b < '0' || b > '9') && b != '_' && b != '-' { - return true - } - } - return false -} - type ( handlerFn func(key unstable.Iterator, v reflect.Value) (reflect.Value, error) valueMakerFn func() reflect.Value diff --git a/unmarshaler_test.go b/unmarshaler_test.go index 665f919..4430b66 100644 --- a/unmarshaler_test.go +++ b/unmarshaler_test.go @@ -4975,3 +4975,58 @@ key = "value" assert.Equal(t, []string{"key"}, (*cfg.Section).Keys) assert.Equal(t, "value", (*cfg.Section).Values["key"]) } + +// formattingCapture captures the raw TOML bytes to verify formatting preservation +type formattingCapture struct { + RawBytes string +} + +func (f *formattingCapture) UnmarshalTOML(data []byte) error { + f.RawBytes = string(data) + return nil +} + +func TestIssue873_FormattingPreservation(t *testing.T) { + type Config struct { + Section *formattingCapture `toml:"section"` + } + + // Test that various formatting styles are preserved: + // - Extra spaces around '=' + // - Literal strings (single quotes) + // - Hex numbers + // - Inline tables + doc := `[section] +key1 = "value with spaces" +key2 = 'literal string' +hex_val = 0xDEADBEEF +inline = { a = 1, b = 2 } +` + + var cfg Config + err := toml.NewDecoder(bytes.NewReader([]byte(doc))). + EnableUnmarshalerInterface(). + Decode(&cfg) + + assert.NoError(t, err) + assert.True(t, cfg.Section != nil) + + // The raw bytes should preserve original formatting + raw := cfg.Section.RawBytes + + // Check that extra spaces around '=' are preserved + assert.True(t, strings.Contains(raw, "key1 = \"value with spaces\""), + "Expected spacing to be preserved, got: %s", raw) + + // Check that literal string style is preserved + assert.True(t, strings.Contains(raw, "key2 = 'literal string'"), + "Expected literal string to be preserved, got: %s", raw) + + // Check that hex format is preserved + assert.True(t, strings.Contains(raw, "hex_val = 0xDEADBEEF"), + "Expected hex format to be preserved, got: %s", raw) + + // Check that inline table is preserved + assert.True(t, strings.Contains(raw, "inline = { a = 1, b = 2 }"), + "Expected inline table to be preserved, got: %s", raw) +} diff --git a/unstable/parser.go b/unstable/parser.go index d48e07f..e2c973b 100644 --- a/unstable/parser.go +++ b/unstable/parser.go @@ -328,6 +328,9 @@ func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) { func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) { // keyval = key keyval-sep val + // Track the start position for Raw range + startB := b + ref := p.builder.Push(Node{ Kind: KeyValue, }) @@ -360,6 +363,10 @@ func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) { p.builder.Chain(valRef, key) p.builder.AttachChild(ref, valRef) + // Set Raw to span the entire key-value expression + node := p.builder.NodeAt(ref) + node.Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b) + return ref, b, err } diff --git a/unstable/parser_test.go b/unstable/parser_test.go index 2f5f9ec..9726915 100644 --- a/unstable/parser_test.go +++ b/unstable/parser_test.go @@ -539,7 +539,7 @@ key5 = [ # Next to start of inline array. // --- // 6:1->6:22 (105->126) | Comment [# Above simple value.] // --- - // 1:1->1:1 (0->0) | KeyValue [] + // 7:1->7:14 (127->140) | KeyValue [] // 7:7->7:14 (133->140) | String [value] // 7:1->7:4 (127->130) | Key [key] // 7:15->7:38 (141->164) | Comment [# Next to simple value.] @@ -552,12 +552,12 @@ key5 = [ # Next to start of inline array. // --- // 14:1->14:22 (252->273) | Comment [# Above inline table.] // --- - // 1:1->1:1 (0->0) | KeyValue [] + // 15:1->15:50 (274->323) | KeyValue [] // 15:8->15:9 (281->282) | InlineTable [] - // 1:1->1:1 (0->0) | KeyValue [] + // 15:10->15:23 (283->296) | KeyValue [] // 15:18->15:23 (291->296) | String [Tom] // 15:10->15:15 (283->288) | Key [first] - // 1:1->1:1 (0->0) | KeyValue [] + // 15:25->15:48 (298->321) | KeyValue [] // 15:32->15:48 (305->321) | String [Preston-Werner] // 15:25->15:29 (298->302) | Key [last] // 15:1->15:5 (274->278) | Key [name] @@ -567,7 +567,7 @@ key5 = [ # Next to start of inline array. // --- // 18:1->18:15 (371->385) | Comment [# Above array.] // --- - // 1:1->1:1 (0->0) | KeyValue [] + // 19:1->19:20 (386->405) | KeyValue [] // 1:1->1:1 (0->0) | Array [] // 19:11->19:12 (396->397) | Integer [1] // 19:14->19:15 (399->400) | Integer [2] @@ -579,7 +579,7 @@ key5 = [ # Next to start of inline array. // --- // 22:1->22:26 (448->473) | Comment [# Above multi-line array.] // --- - // 1:1->1:1 (0->0) | KeyValue [] + // 23:1->31:2 (474->694) | KeyValue [] // 1:1->1:1 (0->0) | Array [] // 23:10->23:42 (483->515) | Comment [# Next to start of inline array.] // 24:3->24:38 (518->553) | Comment [# Second line before array content.]