Comments support in unstable/Parser (#860)
This commit is contained in:
+110
-19
@@ -49,8 +49,6 @@ func NewParserError(highlight []byte, format string, args ...interface{}) error
|
|||||||
// For performance reasons, go-toml doesn't make a copy of the input bytes to
|
// For performance reasons, go-toml doesn't make a copy of the input bytes to
|
||||||
// the parser. Make sure to copy all the bytes you need to outlive the slice
|
// the parser. Make sure to copy all the bytes you need to outlive the slice
|
||||||
// given to the parser.
|
// given to the parser.
|
||||||
//
|
|
||||||
// The parser doesn't provide nodes for comments yet, nor for whitespace.
|
|
||||||
type Parser struct {
|
type Parser struct {
|
||||||
data []byte
|
data []byte
|
||||||
builder builder
|
builder builder
|
||||||
@@ -58,6 +56,8 @@ type Parser struct {
|
|||||||
left []byte
|
left []byte
|
||||||
err error
|
err error
|
||||||
first bool
|
first bool
|
||||||
|
|
||||||
|
KeepComments bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Data returns the slice provided to the last call to Reset.
|
// Data returns the slice provided to the last call to Reset.
|
||||||
@@ -142,6 +142,44 @@ func (p *Parser) Error() error {
|
|||||||
return p.err
|
return p.err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Position describes a position in the input.
|
||||||
|
type Position struct {
|
||||||
|
// Number of bytes from the beginning of the input.
|
||||||
|
Offset int
|
||||||
|
// Line number, starting at 1.
|
||||||
|
Line int
|
||||||
|
// Column number, starting at 1.
|
||||||
|
Column int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shape describes the position of a range in the input.
|
||||||
|
type Shape struct {
|
||||||
|
Start Position
|
||||||
|
End Position
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Parser) position(b []byte) Position {
|
||||||
|
offset := danger.SubsliceOffset(p.data, b)
|
||||||
|
|
||||||
|
lead := p.data[:offset]
|
||||||
|
|
||||||
|
return Position{
|
||||||
|
Offset: offset,
|
||||||
|
Line: bytes.Count(lead, []byte{'\n'}) + 1,
|
||||||
|
Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shape returns the shape of the given range in the input. Will
|
||||||
|
// panic if the range is not a subslice of the input.
|
||||||
|
func (p *Parser) Shape(r Range) Shape {
|
||||||
|
raw := p.Raw(r)
|
||||||
|
return Shape{
|
||||||
|
Start: p.position(raw),
|
||||||
|
End: p.position(raw[r.Length:]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (p *Parser) parseNewline(b []byte) ([]byte, error) {
|
func (p *Parser) parseNewline(b []byte) ([]byte, error) {
|
||||||
if b[0] == '\n' {
|
if b[0] == '\n' {
|
||||||
return b[1:], nil
|
return b[1:], nil
|
||||||
@@ -155,6 +193,19 @@ func (p *Parser) parseNewline(b []byte) ([]byte, error) {
|
|||||||
return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
|
return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *Parser) parseComment(b []byte) (reference, []byte, error) {
|
||||||
|
ref := invalidReference
|
||||||
|
data, rest, err := scanComment(b)
|
||||||
|
if p.KeepComments && err == nil {
|
||||||
|
ref = p.builder.Push(Node{
|
||||||
|
Kind: Comment,
|
||||||
|
Raw: p.Range(data),
|
||||||
|
Data: data,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return ref, rest, err
|
||||||
|
}
|
||||||
|
|
||||||
func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
|
func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
|
||||||
// expression = ws [ comment ]
|
// expression = ws [ comment ]
|
||||||
// expression =/ ws keyval ws [ comment ]
|
// expression =/ ws keyval ws [ comment ]
|
||||||
@@ -168,7 +219,7 @@ func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if b[0] == '#' {
|
if b[0] == '#' {
|
||||||
_, rest, err := scanComment(b)
|
ref, rest, err := p.parseComment(b)
|
||||||
return ref, rest, err
|
return ref, rest, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -190,7 +241,10 @@ func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
|
|||||||
b = p.parseWhitespace(b)
|
b = p.parseWhitespace(b)
|
||||||
|
|
||||||
if len(b) > 0 && b[0] == '#' {
|
if len(b) > 0 && b[0] == '#' {
|
||||||
_, rest, err := scanComment(b)
|
cref, rest, err := p.parseComment(b)
|
||||||
|
if cref != invalidReference {
|
||||||
|
p.builder.Chain(ref, cref)
|
||||||
|
}
|
||||||
return ref, rest, err
|
return ref, rest, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -471,17 +525,33 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
|
|||||||
Kind: Array,
|
Kind: Array,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// First indicates whether the parser is looking for the first element
|
||||||
|
// (non-comment) of the array.
|
||||||
first := true
|
first := true
|
||||||
|
|
||||||
var lastChild reference
|
lastChild := invalidReference
|
||||||
|
|
||||||
|
addChild := func(valueRef reference) {
|
||||||
|
if lastChild == invalidReference {
|
||||||
|
p.builder.AttachChild(parent, valueRef)
|
||||||
|
} else {
|
||||||
|
p.builder.Chain(lastChild, valueRef)
|
||||||
|
}
|
||||||
|
lastChild = valueRef
|
||||||
|
}
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
for len(b) > 0 {
|
for len(b) > 0 {
|
||||||
b, err = p.parseOptionalWhitespaceCommentNewline(b)
|
cref := invalidReference
|
||||||
|
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return parent, nil, err
|
return parent, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if cref != invalidReference {
|
||||||
|
addChild(cref)
|
||||||
|
}
|
||||||
|
|
||||||
if len(b) == 0 {
|
if len(b) == 0 {
|
||||||
return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
|
return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
|
||||||
}
|
}
|
||||||
@@ -496,10 +566,13 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
|
|||||||
}
|
}
|
||||||
b = b[1:]
|
b = b[1:]
|
||||||
|
|
||||||
b, err = p.parseOptionalWhitespaceCommentNewline(b)
|
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return parent, nil, err
|
return parent, nil, err
|
||||||
}
|
}
|
||||||
|
if cref != invalidReference {
|
||||||
|
addChild(cref)
|
||||||
|
}
|
||||||
} else if !first {
|
} else if !first {
|
||||||
return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
|
return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
|
||||||
}
|
}
|
||||||
@@ -515,17 +588,16 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
|
|||||||
return parent, nil, err
|
return parent, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if first {
|
addChild(valueRef)
|
||||||
p.builder.AttachChild(parent, valueRef)
|
|
||||||
} else {
|
|
||||||
p.builder.Chain(lastChild, valueRef)
|
|
||||||
}
|
|
||||||
lastChild = valueRef
|
|
||||||
|
|
||||||
b, err = p.parseOptionalWhitespaceCommentNewline(b)
|
cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return parent, nil, err
|
return parent, nil, err
|
||||||
}
|
}
|
||||||
|
if cref != invalidReference {
|
||||||
|
addChild(cref)
|
||||||
|
}
|
||||||
|
|
||||||
first = false
|
first = false
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -534,15 +606,34 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
|
|||||||
return parent, rest, err
|
return parent, rest, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error) {
|
func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) {
|
||||||
|
rootCommentRef := invalidReference
|
||||||
|
latestCommentRef := invalidReference
|
||||||
|
|
||||||
|
addComment := func(ref reference) {
|
||||||
|
if rootCommentRef == invalidReference {
|
||||||
|
rootCommentRef = ref
|
||||||
|
} else if latestCommentRef == invalidReference {
|
||||||
|
p.builder.AttachChild(rootCommentRef, ref)
|
||||||
|
latestCommentRef = ref
|
||||||
|
} else {
|
||||||
|
p.builder.Chain(latestCommentRef, ref)
|
||||||
|
latestCommentRef = ref
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for len(b) > 0 {
|
for len(b) > 0 {
|
||||||
var err error
|
var err error
|
||||||
b = p.parseWhitespace(b)
|
b = p.parseWhitespace(b)
|
||||||
|
|
||||||
if len(b) > 0 && b[0] == '#' {
|
if len(b) > 0 && b[0] == '#' {
|
||||||
_, b, err = scanComment(b)
|
var ref reference
|
||||||
|
ref, b, err = p.parseComment(b)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return invalidReference, nil, err
|
||||||
|
}
|
||||||
|
if ref != invalidReference {
|
||||||
|
addComment(ref)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -553,14 +644,14 @@ func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) ([]byte, error)
|
|||||||
if b[0] == '\n' || b[0] == '\r' {
|
if b[0] == '\n' || b[0] == '\r' {
|
||||||
b, err = p.parseNewline(b)
|
b, err = p.parseNewline(b)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return invalidReference, nil, err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return b, nil
|
return rootCommentRef, b, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
|
func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
|
||||||
|
|||||||
@@ -448,6 +448,163 @@ func TestParser_AST_DateTimes(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This example demonstrates how to parse a TOML document and preserving
|
||||||
|
// comments. Comments are stored in the AST as Comment nodes. This example
|
||||||
|
// displays the structure of the full AST generated by the parser using the
|
||||||
|
// following structure:
|
||||||
|
//
|
||||||
|
// 1. Each root-level expression is separated by three dashes.
|
||||||
|
// 2. Bytes associated to a node are displayed in square brackets.
|
||||||
|
// 3. Siblings have the same indentation.
|
||||||
|
// 4. Children of a node are indented one level.
|
||||||
|
func ExampleParser_comments() {
|
||||||
|
doc := `# Top of the document comment.
|
||||||
|
# Optional, any amount of lines.
|
||||||
|
|
||||||
|
# Above table.
|
||||||
|
[table] # Next to table.
|
||||||
|
# Above simple value.
|
||||||
|
key = "value" # Next to simple value.
|
||||||
|
# Below simple value.
|
||||||
|
|
||||||
|
# Some comment alone.
|
||||||
|
|
||||||
|
# Multiple comments, on multiple lines.
|
||||||
|
|
||||||
|
# Above inline table.
|
||||||
|
name = { first = "Tom", last = "Preston-Werner" } # Next to inline table.
|
||||||
|
# Below inline table.
|
||||||
|
|
||||||
|
# Above array.
|
||||||
|
array = [ 1, 2, 3 ] # Next to one-line array.
|
||||||
|
# Below array.
|
||||||
|
|
||||||
|
# Above multi-line array.
|
||||||
|
key5 = [ # Next to start of inline array.
|
||||||
|
# Second line before array content.
|
||||||
|
1, # Next to first element.
|
||||||
|
# After first element.
|
||||||
|
# Before second element.
|
||||||
|
2,
|
||||||
|
3, # Next to last element
|
||||||
|
# After last element.
|
||||||
|
] # Next to end of array.
|
||||||
|
# Below multi-line array.
|
||||||
|
|
||||||
|
# Before array table.
|
||||||
|
[[products]] # Next to array table.
|
||||||
|
# After array table.
|
||||||
|
`
|
||||||
|
|
||||||
|
var printGeneric func(*Parser, int, *Node)
|
||||||
|
printGeneric = func(p *Parser, indent int, e *Node) {
|
||||||
|
if e == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s := p.Shape(e.Raw)
|
||||||
|
x := fmt.Sprintf("%d:%d->%d:%d (%d->%d)", s.Start.Line, s.Start.Column, s.End.Line, s.End.Column, s.Start.Offset, s.End.Offset)
|
||||||
|
fmt.Printf("%-25s | %s%s [%s]\n", x, strings.Repeat(" ", indent), e.Kind, e.Data)
|
||||||
|
printGeneric(p, indent+1, e.Child())
|
||||||
|
printGeneric(p, indent, e.Next())
|
||||||
|
}
|
||||||
|
|
||||||
|
printTree := func(p *Parser) {
|
||||||
|
for p.NextExpression() {
|
||||||
|
e := p.Expression()
|
||||||
|
fmt.Println("---")
|
||||||
|
printGeneric(p, 0, e)
|
||||||
|
}
|
||||||
|
if err := p.Error(); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
p := &Parser{
|
||||||
|
KeepComments: true,
|
||||||
|
}
|
||||||
|
p.Reset([]byte(doc))
|
||||||
|
printTree(p)
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// ---
|
||||||
|
// 1:1->1:31 (0->30) | Comment [# Top of the document comment.]
|
||||||
|
// ---
|
||||||
|
// 2:1->2:33 (31->63) | Comment [# Optional, any amount of lines.]
|
||||||
|
// ---
|
||||||
|
// 4:1->4:15 (65->79) | Comment [# Above table.]
|
||||||
|
// ---
|
||||||
|
// 1:1->1:1 (0->0) | Table []
|
||||||
|
// 5:2->5:7 (81->86) | Key [table]
|
||||||
|
// 5:9->5:25 (88->104) | Comment [# Next to table.]
|
||||||
|
// ---
|
||||||
|
// 6:1->6:22 (105->126) | Comment [# Above simple value.]
|
||||||
|
// ---
|
||||||
|
// 1:1->1:1 (0->0) | KeyValue []
|
||||||
|
// 7:7->7:14 (133->140) | String [value]
|
||||||
|
// 7:1->7:4 (127->130) | Key [key]
|
||||||
|
// 7:15->7:38 (141->164) | Comment [# Next to simple value.]
|
||||||
|
// ---
|
||||||
|
// 8:1->8:22 (165->186) | Comment [# Below simple value.]
|
||||||
|
// ---
|
||||||
|
// 10:1->10:22 (188->209) | Comment [# Some comment alone.]
|
||||||
|
// ---
|
||||||
|
// 12:1->12:40 (211->250) | Comment [# Multiple comments, on multiple lines.]
|
||||||
|
// ---
|
||||||
|
// 14:1->14:22 (252->273) | Comment [# Above inline table.]
|
||||||
|
// ---
|
||||||
|
// 1:1->1:1 (0->0) | KeyValue []
|
||||||
|
// 15:8->15:9 (281->282) | InlineTable []
|
||||||
|
// 1:1->1:1 (0->0) | KeyValue []
|
||||||
|
// 15:18->15:23 (291->296) | String [Tom]
|
||||||
|
// 15:10->15:15 (283->288) | Key [first]
|
||||||
|
// 1:1->1:1 (0->0) | KeyValue []
|
||||||
|
// 15:32->15:48 (305->321) | String [Preston-Werner]
|
||||||
|
// 15:25->15:29 (298->302) | Key [last]
|
||||||
|
// 15:1->15:5 (274->278) | Key [name]
|
||||||
|
// 15:51->15:74 (324->347) | Comment [# Next to inline table.]
|
||||||
|
// ---
|
||||||
|
// 16:1->16:22 (348->369) | Comment [# Below inline table.]
|
||||||
|
// ---
|
||||||
|
// 18:1->18:15 (371->385) | Comment [# Above array.]
|
||||||
|
// ---
|
||||||
|
// 1:1->1:1 (0->0) | KeyValue []
|
||||||
|
// 1:1->1:1 (0->0) | Array []
|
||||||
|
// 1:1->1:1 (0->0) | Integer [1]
|
||||||
|
// 1:1->1:1 (0->0) | Integer [2]
|
||||||
|
// 1:1->1:1 (0->0) | Integer [3]
|
||||||
|
// 19:1->19:6 (386->391) | Key [array]
|
||||||
|
// 19:21->19:46 (406->431) | Comment [# Next to one-line array.]
|
||||||
|
// ---
|
||||||
|
// 20:1->20:15 (432->446) | Comment [# Below array.]
|
||||||
|
// ---
|
||||||
|
// 22:1->22:26 (448->473) | Comment [# Above multi-line array.]
|
||||||
|
// ---
|
||||||
|
// 1:1->1:1 (0->0) | KeyValue []
|
||||||
|
// 1:1->1:1 (0->0) | Array []
|
||||||
|
// 23:10->23:42 (483->515) | Comment [# Next to start of inline array.]
|
||||||
|
// 24:3->24:38 (518->553) | Comment [# Second line before array content.]
|
||||||
|
// 1:1->1:1 (0->0) | Integer [1]
|
||||||
|
// 25:6->25:30 (559->583) | Comment [# Next to first element.]
|
||||||
|
// 26:3->26:25 (586->608) | Comment [# After first element.]
|
||||||
|
// 27:3->27:27 (611->635) | Comment [# Before second element.]
|
||||||
|
// 1:1->1:1 (0->0) | Integer [2]
|
||||||
|
// 1:1->1:1 (0->0) | Integer [3]
|
||||||
|
// 29:6->29:28 (646->668) | Comment [# Next to last element]
|
||||||
|
// 30:3->30:24 (671->692) | Comment [# After last element.]
|
||||||
|
// 23:1->23:5 (474->478) | Key [key5]
|
||||||
|
// 31:3->31:26 (695->718) | Comment [# Next to end of array.]
|
||||||
|
// ---
|
||||||
|
// 32:1->32:26 (719->744) | Comment [# Below multi-line array.]
|
||||||
|
// ---
|
||||||
|
// 34:1->34:22 (746->767) | Comment [# Before array table.]
|
||||||
|
// ---
|
||||||
|
// 1:1->1:1 (0->0) | ArrayTable []
|
||||||
|
// 35:3->35:11 (770->778) | Key [products]
|
||||||
|
// 35:14->35:36 (781->803) | Comment [# Next to array table.]
|
||||||
|
// ---
|
||||||
|
// 36:1->36:21 (804->824) | Comment [# After array table.]
|
||||||
|
}
|
||||||
|
|
||||||
func ExampleParser() {
|
func ExampleParser() {
|
||||||
doc := `
|
doc := `
|
||||||
hello = "world"
|
hello = "world"
|
||||||
|
|||||||
@@ -151,7 +151,6 @@ func scanWhitespace(b []byte) ([]byte, []byte) {
|
|||||||
return b, b[len(b):]
|
return b, b[len(b):]
|
||||||
}
|
}
|
||||||
|
|
||||||
//nolint:unparam
|
|
||||||
func scanComment(b []byte) ([]byte, []byte, error) {
|
func scanComment(b []byte) ([]byte, []byte, error) {
|
||||||
// comment-start-symbol = %x23 ; #
|
// comment-start-symbol = %x23 ; #
|
||||||
// non-ascii = %x80-D7FF / %xE000-10FFFF
|
// non-ascii = %x80-D7FF / %xE000-10FFFF
|
||||||
|
|||||||
Reference in New Issue
Block a user