Preserve original formatting in Unmarshaler by using raw byte ranges
Instead of reconstructing key-value lines from parsed components, now uses the original raw bytes from the document. This preserves: - Whitespace around '=' (e.g., "key = value") - String quoting style (basic vs literal) - Number formats (hex, octal, binary) - Inline table formatting Changes: - Add Raw range tracking to KeyValue expressions in parseKeyval - Update handleKeyValuesUnmarshaler to use expr.Raw directly - Remove keyNeedsQuoting helper (no longer needed) - Add TestIssue873_FormattingPreservation test - Update expected output in ExampleParser_comments
This commit is contained in:
+6
-49
@@ -690,8 +690,8 @@ func (d *decoder) handleKeyValues(v reflect.Value) (reflect.Value, error) {
|
||||
// and passes them to the Unmarshaler as raw TOML bytes.
|
||||
func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Value, error) {
|
||||
// Collect raw bytes from all key-value expressions for this table.
|
||||
// We build a valid TOML document by reconstructing each key-value line
|
||||
// from the key names and the value's raw bytes.
|
||||
// We use the Raw field on each KeyValue expression to preserve the
|
||||
// original formatting (whitespace, quoting style, etc.) from the document.
|
||||
var buf []byte
|
||||
|
||||
for d.nextExpr() {
|
||||
@@ -706,38 +706,10 @@ func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Va
|
||||
return reflect.Value{}, err
|
||||
}
|
||||
|
||||
// Reconstruct the key-value line from the key(s) and value
|
||||
keyIt := expr.Key()
|
||||
first := true
|
||||
for keyIt.Next() {
|
||||
if !first {
|
||||
buf = append(buf, '.')
|
||||
}
|
||||
keyNode := keyIt.Node()
|
||||
// Check if key needs quoting
|
||||
if keyNeedsQuoting(keyNode.Data) {
|
||||
buf = append(buf, '"')
|
||||
buf = append(buf, keyNode.Data...)
|
||||
buf = append(buf, '"')
|
||||
} else {
|
||||
buf = append(buf, keyNode.Data...)
|
||||
}
|
||||
first = false
|
||||
}
|
||||
buf = append(buf, " = "...)
|
||||
|
||||
// Get the raw value bytes
|
||||
value := expr.Value()
|
||||
if value != nil {
|
||||
if value.Raw.Length > 0 {
|
||||
// Use raw bytes from the original document
|
||||
raw := d.p.Raw(value.Raw)
|
||||
buf = append(buf, raw...)
|
||||
} else {
|
||||
// Some value types (like Bool) don't have Raw set,
|
||||
// use Data which contains the value representation
|
||||
buf = append(buf, value.Data...)
|
||||
}
|
||||
// Use the raw bytes from the original document to preserve formatting
|
||||
if expr.Raw.Length > 0 {
|
||||
raw := d.p.Raw(expr.Raw)
|
||||
buf = append(buf, raw...)
|
||||
}
|
||||
buf = append(buf, '\n')
|
||||
}
|
||||
@@ -749,21 +721,6 @@ func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Va
|
||||
return reflect.Value{}, nil
|
||||
}
|
||||
|
||||
// keyNeedsQuoting returns true if the key needs to be quoted in TOML.
|
||||
func keyNeedsQuoting(key []byte) bool {
|
||||
if len(key) == 0 {
|
||||
return true
|
||||
}
|
||||
for _, b := range key {
|
||||
// Bare keys can only contain A-Za-z0-9_-
|
||||
if (b < 'A' || b > 'Z') && (b < 'a' || b > 'z') &&
|
||||
(b < '0' || b > '9') && b != '_' && b != '-' {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type (
|
||||
handlerFn func(key unstable.Iterator, v reflect.Value) (reflect.Value, error)
|
||||
valueMakerFn func() reflect.Value
|
||||
|
||||
@@ -4975,3 +4975,58 @@ key = "value"
|
||||
assert.Equal(t, []string{"key"}, (*cfg.Section).Keys)
|
||||
assert.Equal(t, "value", (*cfg.Section).Values["key"])
|
||||
}
|
||||
|
||||
// formattingCapture captures the raw TOML bytes to verify formatting preservation
|
||||
type formattingCapture struct {
|
||||
RawBytes string
|
||||
}
|
||||
|
||||
func (f *formattingCapture) UnmarshalTOML(data []byte) error {
|
||||
f.RawBytes = string(data)
|
||||
return nil
|
||||
}
|
||||
|
||||
func TestIssue873_FormattingPreservation(t *testing.T) {
|
||||
type Config struct {
|
||||
Section *formattingCapture `toml:"section"`
|
||||
}
|
||||
|
||||
// Test that various formatting styles are preserved:
|
||||
// - Extra spaces around '='
|
||||
// - Literal strings (single quotes)
|
||||
// - Hex numbers
|
||||
// - Inline tables
|
||||
doc := `[section]
|
||||
key1 = "value with spaces"
|
||||
key2 = 'literal string'
|
||||
hex_val = 0xDEADBEEF
|
||||
inline = { a = 1, b = 2 }
|
||||
`
|
||||
|
||||
var cfg Config
|
||||
err := toml.NewDecoder(bytes.NewReader([]byte(doc))).
|
||||
EnableUnmarshalerInterface().
|
||||
Decode(&cfg)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, cfg.Section != nil)
|
||||
|
||||
// The raw bytes should preserve original formatting
|
||||
raw := cfg.Section.RawBytes
|
||||
|
||||
// Check that extra spaces around '=' are preserved
|
||||
assert.True(t, strings.Contains(raw, "key1 = \"value with spaces\""),
|
||||
"Expected spacing to be preserved, got: %s", raw)
|
||||
|
||||
// Check that literal string style is preserved
|
||||
assert.True(t, strings.Contains(raw, "key2 = 'literal string'"),
|
||||
"Expected literal string to be preserved, got: %s", raw)
|
||||
|
||||
// Check that hex format is preserved
|
||||
assert.True(t, strings.Contains(raw, "hex_val = 0xDEADBEEF"),
|
||||
"Expected hex format to be preserved, got: %s", raw)
|
||||
|
||||
// Check that inline table is preserved
|
||||
assert.True(t, strings.Contains(raw, "inline = { a = 1, b = 2 }"),
|
||||
"Expected inline table to be preserved, got: %s", raw)
|
||||
}
|
||||
|
||||
@@ -328,6 +328,9 @@ func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
|
||||
|
||||
func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
|
||||
// keyval = key keyval-sep val
|
||||
// Track the start position for Raw range
|
||||
startB := b
|
||||
|
||||
ref := p.builder.Push(Node{
|
||||
Kind: KeyValue,
|
||||
})
|
||||
@@ -360,6 +363,10 @@ func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
|
||||
p.builder.Chain(valRef, key)
|
||||
p.builder.AttachChild(ref, valRef)
|
||||
|
||||
// Set Raw to span the entire key-value expression
|
||||
node := p.builder.NodeAt(ref)
|
||||
node.Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b)
|
||||
|
||||
return ref, b, err
|
||||
}
|
||||
|
||||
|
||||
@@ -539,7 +539,7 @@ key5 = [ # Next to start of inline array.
|
||||
// ---
|
||||
// 6:1->6:22 (105->126) | Comment [# Above simple value.]
|
||||
// ---
|
||||
// 1:1->1:1 (0->0) | KeyValue []
|
||||
// 7:1->7:14 (127->140) | KeyValue []
|
||||
// 7:7->7:14 (133->140) | String [value]
|
||||
// 7:1->7:4 (127->130) | Key [key]
|
||||
// 7:15->7:38 (141->164) | Comment [# Next to simple value.]
|
||||
@@ -552,12 +552,12 @@ key5 = [ # Next to start of inline array.
|
||||
// ---
|
||||
// 14:1->14:22 (252->273) | Comment [# Above inline table.]
|
||||
// ---
|
||||
// 1:1->1:1 (0->0) | KeyValue []
|
||||
// 15:1->15:50 (274->323) | KeyValue []
|
||||
// 15:8->15:9 (281->282) | InlineTable []
|
||||
// 1:1->1:1 (0->0) | KeyValue []
|
||||
// 15:10->15:23 (283->296) | KeyValue []
|
||||
// 15:18->15:23 (291->296) | String [Tom]
|
||||
// 15:10->15:15 (283->288) | Key [first]
|
||||
// 1:1->1:1 (0->0) | KeyValue []
|
||||
// 15:25->15:48 (298->321) | KeyValue []
|
||||
// 15:32->15:48 (305->321) | String [Preston-Werner]
|
||||
// 15:25->15:29 (298->302) | Key [last]
|
||||
// 15:1->15:5 (274->278) | Key [name]
|
||||
@@ -567,7 +567,7 @@ key5 = [ # Next to start of inline array.
|
||||
// ---
|
||||
// 18:1->18:15 (371->385) | Comment [# Above array.]
|
||||
// ---
|
||||
// 1:1->1:1 (0->0) | KeyValue []
|
||||
// 19:1->19:20 (386->405) | KeyValue []
|
||||
// 1:1->1:1 (0->0) | Array []
|
||||
// 19:11->19:12 (396->397) | Integer [1]
|
||||
// 19:14->19:15 (399->400) | Integer [2]
|
||||
@@ -579,7 +579,7 @@ key5 = [ # Next to start of inline array.
|
||||
// ---
|
||||
// 22:1->22:26 (448->473) | Comment [# Above multi-line array.]
|
||||
// ---
|
||||
// 1:1->1:1 (0->0) | KeyValue []
|
||||
// 23:1->31:2 (474->694) | KeyValue []
|
||||
// 1:1->1:1 (0->0) | Array []
|
||||
// 23:10->23:42 (483->515) | Comment [# Next to start of inline array.]
|
||||
// 24:3->24:38 (518->553) | Comment [# Second line before array content.]
|
||||
|
||||
Reference in New Issue
Block a user