wip parsing

This commit is contained in:
Thomas Pelletier
2021-02-05 14:47:24 -05:00
parent 0ee0fe7f7c
commit ca12c0670d
2 changed files with 270 additions and 3 deletions
+3 -3
View File
@@ -151,16 +151,16 @@ func scanWindowsNewline(b []byte) ([]byte, []byte, error) {
return b[:2], b[2:], nil
}
func scanWhitespace(b []byte) ([]byte, []byte, error) {
func scanWhitespace(b []byte) ([]byte, []byte) {
for i := 1; i < len(b); i++ {
switch b[i] {
case ' ', '\t':
continue
default:
return b[:i], b[i:], nil
return b[:i], b[i:]
}
}
return b, nil, nil
return b, nil
}
func scanComment(b []byte) ([]byte, []byte, error) {
+267
View File
@@ -1 +1,268 @@
package toml
import (
"encoding/hex"
"fmt"
"strings"
)
func parse(b []byte) error {
b, err := parseExpression(b)
if err != nil {
return err
}
for len(b) > 0 {
b, err = parseNewline(b)
if err != nil {
return err
}
b, err = parseExpression(b)
if err != nil {
return err
}
}
return nil
}
func parseNewline(b []byte) ([]byte, error) {
if b[0] == '\n' {
return b[1:], nil
}
if b[0] == '\r' {
_, rest, err := scanWindowsNewline(b)
return rest, err
}
return nil, fmt.Errorf("expected newline but got %#U", b[0])
}
func parseExpression(b []byte) ([]byte, error) {
//expression = ws [ comment ]
//expression =/ ws keyval ws [ comment ]
//expression =/ ws table ws [ comment ]
b = parseWhitespace(b)
if len(b) == 0 {
return b, nil
}
if b[0] == '#' {
_, rest, err := scanComment(b)
return rest, err
}
if b[0] == '[' {
// TODO: parse 'table'
} else {
rest, err := parseKeyval(b)
return rest, err
}
b = parseWhitespace(b)
if len(b) > 0 && b[0] == '#' {
_, rest, err := scanComment(b)
return rest, err
}
return b, nil
}
func parseKeyval(b []byte) ([]byte, error) {
//keyval = key keyval-sep val
b, err := parseKey(b)
if err != nil {
return nil, err
}
//keyval-sep = ws %x3D ws ; =
b = parseWhitespace(b)
b, err = expect('=', b)
if err != nil {
return nil, err
}
b = parseWhitespace(b)
return parseVal(b)
}
func parseVal(b []byte) ([]byte, error) {
// val = string / boolean / array / inline-table / date-time / float / integer
}
func parseKey(b []byte) ([]byte, error) {
//key = simple-key / dotted-key
//simple-key = quoted-key / unquoted-key
//
//unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
//quoted-key = basic-string / literal-string
//dotted-key = simple-key 1*( dot-sep simple-key )
//
//dot-sep = ws %x2E ws ; . Period
b, err := parseSimpleKey(b)
if err != nil {
return nil, err
}
for {
if len(b) > 0 && (b[0] == '.' || isWhitespace(b[0])) {
b = parseWhitespace(b)
b, err = expect('.', b)
if err != nil {
return nil, err
}
b = parseWhitespace(b)
b, err = parseSimpleKey(b)
if err != nil {
return nil, err
}
} else {
break
}
}
return b, nil
}
func isWhitespace(b byte) bool {
return b == ' ' || b == '\t'
}
func parseSimpleKey(b []byte) ([]byte, error) {
//simple-key = quoted-key / unquoted-key
//unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
//quoted-key = basic-string / literal-string
if len(b) == 0 {
return nil, unexpectedCharacter{b: b}
}
if b[0] == '\'' {
_, rest, err := scanLiteralString(b)
return rest, err
}
if b[0] == '"' {
_, rest, err := parseBasicString(b)
return rest, err
}
if isUnquotedKeyChar(b[0]) {
_, rest, err := scanUnquotedKey(b)
return rest, err
}
return nil, unexpectedCharacter{b: b}
}
func parseBasicString(b []byte) (string, []byte, error) {
//basic-string = quotation-mark *basic-char quotation-mark
//quotation-mark = %x22 ; "
//basic-char = basic-unescaped / escaped
//basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
//escaped = escape escape-seq-char
//escape-seq-char = %x22 ; " quotation mark U+0022
//escape-seq-char =/ %x5C ; \ reverse solidus U+005C
//escape-seq-char =/ %x62 ; b backspace U+0008
//escape-seq-char =/ %x66 ; f form feed U+000C
//escape-seq-char =/ %x6E ; n line feed U+000A
//escape-seq-char =/ %x72 ; r carriage return U+000D
//escape-seq-char =/ %x74 ; t tab U+0009
//escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX
//escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX
token, rest, err := scanBasicString(b)
if err != nil {
return "", nil, err
}
var builder strings.Builder
// The scanner ensures that the token starts and ends with quotes and that
// escapes are balanced.
for i := 1; i < len(token)-1; i++ {
c := token[i]
if c == '\\' {
i++
c = token[i]
switch c {
case '"', '\\':
builder.WriteByte(c)
case 'b':
builder.WriteByte('\b')
case 'f':
builder.WriteByte('\f')
case 'n':
builder.WriteByte('\n')
case 'r':
builder.WriteByte('\r')
case 't':
builder.WriteByte('\t')
case 'u':
x, err := hexToString(token[i+1:len(token)-1], 4)
if err != nil {
return "", nil, err
}
builder.WriteString(x)
i += 4
case 'U':
x, err := hexToString(token[i+1:len(token)-1], 8)
if err != nil {
return "", nil, err
}
builder.WriteString(x)
i += 8
default:
return "", nil, fmt.Errorf("invalid escaped character: %#U", c)
}
} else {
builder.WriteByte(c)
}
}
return builder.String(), rest, nil
}
func hexToString(b []byte, length int) (string, error) {
if len(b) < length {
return "", fmt.Errorf("unicode point needs %d hex characters", length)
}
// TODO: slow
b, err := hex.DecodeString(string(b[:length]))
if err != nil {
return "", err
}
return string(b), nil
}
func parseWhitespace(b []byte) []byte {
//ws = *wschar
//wschar = %x20 ; Space
//wschar =/ %x09 ; Horizontal tab
_, rest := scanWhitespace(b)
return rest
}
func expect(x byte, b []byte) ([]byte, error) {
if len(b) == 0 || b[0] != x {
return nil, unexpectedCharacter{r: x, b: b}
}
return b[1:], nil
}
type unexpectedCharacter struct {
r byte
b []byte
}
func (u unexpectedCharacter) Error() string {
if len(u.b) == 0 {
return fmt.Sprintf("expected %#U, not EOF", u.r)
}
return fmt.Sprintf("expected %#U, not %#U", u.r, u.b[0])
}