Preserve original formatting in Unmarshaler by using raw byte ranges
Instead of reconstructing key-value lines from parsed components, now uses the original raw bytes from the document. This preserves: - Whitespace around '=' (e.g., "key = value") - String quoting style (basic vs literal) - Number formats (hex, octal, binary) - Inline table formatting Changes: - Add Raw range tracking to KeyValue expressions in parseKeyval - Update handleKeyValuesUnmarshaler to use expr.Raw directly - Remove keyNeedsQuoting helper (no longer needed) - Add TestIssue873_FormattingPreservation test - Update expected output in ExampleParser_comments
This commit is contained in:
+5
-48
@@ -690,8 +690,8 @@ func (d *decoder) handleKeyValues(v reflect.Value) (reflect.Value, error) {
|
|||||||
// and passes them to the Unmarshaler as raw TOML bytes.
|
// and passes them to the Unmarshaler as raw TOML bytes.
|
||||||
func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Value, error) {
|
func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Value, error) {
|
||||||
// Collect raw bytes from all key-value expressions for this table.
|
// Collect raw bytes from all key-value expressions for this table.
|
||||||
// We build a valid TOML document by reconstructing each key-value line
|
// We use the Raw field on each KeyValue expression to preserve the
|
||||||
// from the key names and the value's raw bytes.
|
// original formatting (whitespace, quoting style, etc.) from the document.
|
||||||
var buf []byte
|
var buf []byte
|
||||||
|
|
||||||
for d.nextExpr() {
|
for d.nextExpr() {
|
||||||
@@ -706,38 +706,10 @@ func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Va
|
|||||||
return reflect.Value{}, err
|
return reflect.Value{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reconstruct the key-value line from the key(s) and value
|
// Use the raw bytes from the original document to preserve formatting
|
||||||
keyIt := expr.Key()
|
if expr.Raw.Length > 0 {
|
||||||
first := true
|
raw := d.p.Raw(expr.Raw)
|
||||||
for keyIt.Next() {
|
|
||||||
if !first {
|
|
||||||
buf = append(buf, '.')
|
|
||||||
}
|
|
||||||
keyNode := keyIt.Node()
|
|
||||||
// Check if key needs quoting
|
|
||||||
if keyNeedsQuoting(keyNode.Data) {
|
|
||||||
buf = append(buf, '"')
|
|
||||||
buf = append(buf, keyNode.Data...)
|
|
||||||
buf = append(buf, '"')
|
|
||||||
} else {
|
|
||||||
buf = append(buf, keyNode.Data...)
|
|
||||||
}
|
|
||||||
first = false
|
|
||||||
}
|
|
||||||
buf = append(buf, " = "...)
|
|
||||||
|
|
||||||
// Get the raw value bytes
|
|
||||||
value := expr.Value()
|
|
||||||
if value != nil {
|
|
||||||
if value.Raw.Length > 0 {
|
|
||||||
// Use raw bytes from the original document
|
|
||||||
raw := d.p.Raw(value.Raw)
|
|
||||||
buf = append(buf, raw...)
|
buf = append(buf, raw...)
|
||||||
} else {
|
|
||||||
// Some value types (like Bool) don't have Raw set,
|
|
||||||
// use Data which contains the value representation
|
|
||||||
buf = append(buf, value.Data...)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
buf = append(buf, '\n')
|
buf = append(buf, '\n')
|
||||||
}
|
}
|
||||||
@@ -749,21 +721,6 @@ func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Va
|
|||||||
return reflect.Value{}, nil
|
return reflect.Value{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// keyNeedsQuoting returns true if the key needs to be quoted in TOML.
|
|
||||||
func keyNeedsQuoting(key []byte) bool {
|
|
||||||
if len(key) == 0 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
for _, b := range key {
|
|
||||||
// Bare keys can only contain A-Za-z0-9_-
|
|
||||||
if (b < 'A' || b > 'Z') && (b < 'a' || b > 'z') &&
|
|
||||||
(b < '0' || b > '9') && b != '_' && b != '-' {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
type (
|
type (
|
||||||
handlerFn func(key unstable.Iterator, v reflect.Value) (reflect.Value, error)
|
handlerFn func(key unstable.Iterator, v reflect.Value) (reflect.Value, error)
|
||||||
valueMakerFn func() reflect.Value
|
valueMakerFn func() reflect.Value
|
||||||
|
|||||||
@@ -4975,3 +4975,58 @@ key = "value"
|
|||||||
assert.Equal(t, []string{"key"}, (*cfg.Section).Keys)
|
assert.Equal(t, []string{"key"}, (*cfg.Section).Keys)
|
||||||
assert.Equal(t, "value", (*cfg.Section).Values["key"])
|
assert.Equal(t, "value", (*cfg.Section).Values["key"])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// formattingCapture captures the raw TOML bytes to verify formatting preservation
|
||||||
|
type formattingCapture struct {
|
||||||
|
RawBytes string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *formattingCapture) UnmarshalTOML(data []byte) error {
|
||||||
|
f.RawBytes = string(data)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIssue873_FormattingPreservation(t *testing.T) {
|
||||||
|
type Config struct {
|
||||||
|
Section *formattingCapture `toml:"section"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test that various formatting styles are preserved:
|
||||||
|
// - Extra spaces around '='
|
||||||
|
// - Literal strings (single quotes)
|
||||||
|
// - Hex numbers
|
||||||
|
// - Inline tables
|
||||||
|
doc := `[section]
|
||||||
|
key1 = "value with spaces"
|
||||||
|
key2 = 'literal string'
|
||||||
|
hex_val = 0xDEADBEEF
|
||||||
|
inline = { a = 1, b = 2 }
|
||||||
|
`
|
||||||
|
|
||||||
|
var cfg Config
|
||||||
|
err := toml.NewDecoder(bytes.NewReader([]byte(doc))).
|
||||||
|
EnableUnmarshalerInterface().
|
||||||
|
Decode(&cfg)
|
||||||
|
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.True(t, cfg.Section != nil)
|
||||||
|
|
||||||
|
// The raw bytes should preserve original formatting
|
||||||
|
raw := cfg.Section.RawBytes
|
||||||
|
|
||||||
|
// Check that extra spaces around '=' are preserved
|
||||||
|
assert.True(t, strings.Contains(raw, "key1 = \"value with spaces\""),
|
||||||
|
"Expected spacing to be preserved, got: %s", raw)
|
||||||
|
|
||||||
|
// Check that literal string style is preserved
|
||||||
|
assert.True(t, strings.Contains(raw, "key2 = 'literal string'"),
|
||||||
|
"Expected literal string to be preserved, got: %s", raw)
|
||||||
|
|
||||||
|
// Check that hex format is preserved
|
||||||
|
assert.True(t, strings.Contains(raw, "hex_val = 0xDEADBEEF"),
|
||||||
|
"Expected hex format to be preserved, got: %s", raw)
|
||||||
|
|
||||||
|
// Check that inline table is preserved
|
||||||
|
assert.True(t, strings.Contains(raw, "inline = { a = 1, b = 2 }"),
|
||||||
|
"Expected inline table to be preserved, got: %s", raw)
|
||||||
|
}
|
||||||
|
|||||||
@@ -328,6 +328,9 @@ func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
|
|||||||
|
|
||||||
func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
|
func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
|
||||||
// keyval = key keyval-sep val
|
// keyval = key keyval-sep val
|
||||||
|
// Track the start position for Raw range
|
||||||
|
startB := b
|
||||||
|
|
||||||
ref := p.builder.Push(Node{
|
ref := p.builder.Push(Node{
|
||||||
Kind: KeyValue,
|
Kind: KeyValue,
|
||||||
})
|
})
|
||||||
@@ -360,6 +363,10 @@ func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
|
|||||||
p.builder.Chain(valRef, key)
|
p.builder.Chain(valRef, key)
|
||||||
p.builder.AttachChild(ref, valRef)
|
p.builder.AttachChild(ref, valRef)
|
||||||
|
|
||||||
|
// Set Raw to span the entire key-value expression
|
||||||
|
node := p.builder.NodeAt(ref)
|
||||||
|
node.Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b)
|
||||||
|
|
||||||
return ref, b, err
|
return ref, b, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -539,7 +539,7 @@ key5 = [ # Next to start of inline array.
|
|||||||
// ---
|
// ---
|
||||||
// 6:1->6:22 (105->126) | Comment [# Above simple value.]
|
// 6:1->6:22 (105->126) | Comment [# Above simple value.]
|
||||||
// ---
|
// ---
|
||||||
// 1:1->1:1 (0->0) | KeyValue []
|
// 7:1->7:14 (127->140) | KeyValue []
|
||||||
// 7:7->7:14 (133->140) | String [value]
|
// 7:7->7:14 (133->140) | String [value]
|
||||||
// 7:1->7:4 (127->130) | Key [key]
|
// 7:1->7:4 (127->130) | Key [key]
|
||||||
// 7:15->7:38 (141->164) | Comment [# Next to simple value.]
|
// 7:15->7:38 (141->164) | Comment [# Next to simple value.]
|
||||||
@@ -552,12 +552,12 @@ key5 = [ # Next to start of inline array.
|
|||||||
// ---
|
// ---
|
||||||
// 14:1->14:22 (252->273) | Comment [# Above inline table.]
|
// 14:1->14:22 (252->273) | Comment [# Above inline table.]
|
||||||
// ---
|
// ---
|
||||||
// 1:1->1:1 (0->0) | KeyValue []
|
// 15:1->15:50 (274->323) | KeyValue []
|
||||||
// 15:8->15:9 (281->282) | InlineTable []
|
// 15:8->15:9 (281->282) | InlineTable []
|
||||||
// 1:1->1:1 (0->0) | KeyValue []
|
// 15:10->15:23 (283->296) | KeyValue []
|
||||||
// 15:18->15:23 (291->296) | String [Tom]
|
// 15:18->15:23 (291->296) | String [Tom]
|
||||||
// 15:10->15:15 (283->288) | Key [first]
|
// 15:10->15:15 (283->288) | Key [first]
|
||||||
// 1:1->1:1 (0->0) | KeyValue []
|
// 15:25->15:48 (298->321) | KeyValue []
|
||||||
// 15:32->15:48 (305->321) | String [Preston-Werner]
|
// 15:32->15:48 (305->321) | String [Preston-Werner]
|
||||||
// 15:25->15:29 (298->302) | Key [last]
|
// 15:25->15:29 (298->302) | Key [last]
|
||||||
// 15:1->15:5 (274->278) | Key [name]
|
// 15:1->15:5 (274->278) | Key [name]
|
||||||
@@ -567,7 +567,7 @@ key5 = [ # Next to start of inline array.
|
|||||||
// ---
|
// ---
|
||||||
// 18:1->18:15 (371->385) | Comment [# Above array.]
|
// 18:1->18:15 (371->385) | Comment [# Above array.]
|
||||||
// ---
|
// ---
|
||||||
// 1:1->1:1 (0->0) | KeyValue []
|
// 19:1->19:20 (386->405) | KeyValue []
|
||||||
// 1:1->1:1 (0->0) | Array []
|
// 1:1->1:1 (0->0) | Array []
|
||||||
// 19:11->19:12 (396->397) | Integer [1]
|
// 19:11->19:12 (396->397) | Integer [1]
|
||||||
// 19:14->19:15 (399->400) | Integer [2]
|
// 19:14->19:15 (399->400) | Integer [2]
|
||||||
@@ -579,7 +579,7 @@ key5 = [ # Next to start of inline array.
|
|||||||
// ---
|
// ---
|
||||||
// 22:1->22:26 (448->473) | Comment [# Above multi-line array.]
|
// 22:1->22:26 (448->473) | Comment [# Above multi-line array.]
|
||||||
// ---
|
// ---
|
||||||
// 1:1->1:1 (0->0) | KeyValue []
|
// 23:1->31:2 (474->694) | KeyValue []
|
||||||
// 1:1->1:1 (0->0) | Array []
|
// 1:1->1:1 (0->0) | Array []
|
||||||
// 23:10->23:42 (483->515) | Comment [# Next to start of inline array.]
|
// 23:10->23:42 (483->515) | Comment [# Next to start of inline array.]
|
||||||
// 24:3->24:38 (518->553) | Comment [# Second line before array content.]
|
// 24:3->24:38 (518->553) | Comment [# Second line before array content.]
|
||||||
|
|||||||
Reference in New Issue
Block a user