diff --git a/scanner.go b/scanner.go index 12a8d57..4bd0291 100644 --- a/scanner.go +++ b/scanner.go @@ -151,16 +151,16 @@ func scanWindowsNewline(b []byte) ([]byte, []byte, error) { return b[:2], b[2:], nil } -func scanWhitespace(b []byte) ([]byte, []byte, error) { +func scanWhitespace(b []byte) ([]byte, []byte) { for i := 1; i < len(b); i++ { switch b[i] { case ' ', '\t': continue default: - return b[:i], b[i:], nil + return b[:i], b[i:] } } - return b, nil, nil + return b, nil } func scanComment(b []byte) ([]byte, []byte, error) { diff --git a/toml.go b/toml.go index f9fa173..32c03ad 100644 --- a/toml.go +++ b/toml.go @@ -1 +1,268 @@ package toml + +import ( + "encoding/hex" + "fmt" + "strings" +) + +func parse(b []byte) error { + b, err := parseExpression(b) + if err != nil { + return err + } + for len(b) > 0 { + b, err = parseNewline(b) + if err != nil { + return err + } + + b, err = parseExpression(b) + if err != nil { + return err + } + } + return nil +} + +func parseNewline(b []byte) ([]byte, error) { + if b[0] == '\n' { + return b[1:], nil + } + if b[0] == '\r' { + _, rest, err := scanWindowsNewline(b) + return rest, err + } + return nil, fmt.Errorf("expected newline but got %#U", b[0]) +} + +func parseExpression(b []byte) ([]byte, error) { + //expression = ws [ comment ] + //expression =/ ws keyval ws [ comment ] + //expression =/ ws table ws [ comment ] + + b = parseWhitespace(b) + + if len(b) == 0 { + return b, nil + } + + if b[0] == '#' { + _, rest, err := scanComment(b) + return rest, err + } + + if b[0] == '[' { + // TODO: parse 'table' + } else { + rest, err := parseKeyval(b) + return rest, err + } + + b = parseWhitespace(b) + + if len(b) > 0 && b[0] == '#' { + _, rest, err := scanComment(b) + return rest, err + } + + return b, nil +} + +func parseKeyval(b []byte) ([]byte, error) { + //keyval = key keyval-sep val + + b, err := parseKey(b) + if err != nil { + return nil, err + } + + //keyval-sep = ws %x3D ws ; = + + b = parseWhitespace(b) + b, err = expect('=', b) + if err != nil { + return nil, err + } + b = parseWhitespace(b) + + return parseVal(b) +} + +func parseVal(b []byte) ([]byte, error) { + // val = string / boolean / array / inline-table / date-time / float / integer + +} + +func parseKey(b []byte) ([]byte, error) { + //key = simple-key / dotted-key + //simple-key = quoted-key / unquoted-key + // + //unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ + //quoted-key = basic-string / literal-string + //dotted-key = simple-key 1*( dot-sep simple-key ) + // + //dot-sep = ws %x2E ws ; . Period + + b, err := parseSimpleKey(b) + if err != nil { + return nil, err + } + + for { + if len(b) > 0 && (b[0] == '.' || isWhitespace(b[0])) { + b = parseWhitespace(b) + b, err = expect('.', b) + if err != nil { + return nil, err + } + b = parseWhitespace(b) + b, err = parseSimpleKey(b) + if err != nil { + return nil, err + } + } else { + break + } + } + + return b, nil +} + +func isWhitespace(b byte) bool { + return b == ' ' || b == '\t' +} + +func parseSimpleKey(b []byte) ([]byte, error) { + //simple-key = quoted-key / unquoted-key + //unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ + //quoted-key = basic-string / literal-string + + if len(b) == 0 { + return nil, unexpectedCharacter{b: b} + } + + if b[0] == '\'' { + _, rest, err := scanLiteralString(b) + return rest, err + } + if b[0] == '"' { + _, rest, err := parseBasicString(b) + return rest, err + } + + if isUnquotedKeyChar(b[0]) { + _, rest, err := scanUnquotedKey(b) + return rest, err + } + + return nil, unexpectedCharacter{b: b} +} + +func parseBasicString(b []byte) (string, []byte, error) { + //basic-string = quotation-mark *basic-char quotation-mark + //quotation-mark = %x22 ; " + //basic-char = basic-unescaped / escaped + //basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii + //escaped = escape escape-seq-char + //escape-seq-char = %x22 ; " quotation mark U+0022 + //escape-seq-char =/ %x5C ; \ reverse solidus U+005C + //escape-seq-char =/ %x62 ; b backspace U+0008 + //escape-seq-char =/ %x66 ; f form feed U+000C + //escape-seq-char =/ %x6E ; n line feed U+000A + //escape-seq-char =/ %x72 ; r carriage return U+000D + //escape-seq-char =/ %x74 ; t tab U+0009 + //escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX + //escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX + + token, rest, err := scanBasicString(b) + if err != nil { + return "", nil, err + } + var builder strings.Builder + + // The scanner ensures that the token starts and ends with quotes and that + // escapes are balanced. + for i := 1; i < len(token)-1; i++ { + c := token[i] + if c == '\\' { + i++ + c = token[i] + switch c { + case '"', '\\': + builder.WriteByte(c) + case 'b': + builder.WriteByte('\b') + case 'f': + builder.WriteByte('\f') + case 'n': + builder.WriteByte('\n') + case 'r': + builder.WriteByte('\r') + case 't': + builder.WriteByte('\t') + case 'u': + x, err := hexToString(token[i+1:len(token)-1], 4) + if err != nil { + return "", nil, err + } + builder.WriteString(x) + i += 4 + case 'U': + x, err := hexToString(token[i+1:len(token)-1], 8) + if err != nil { + return "", nil, err + } + builder.WriteString(x) + i += 8 + default: + return "", nil, fmt.Errorf("invalid escaped character: %#U", c) + } + } else { + builder.WriteByte(c) + } + } + + return builder.String(), rest, nil +} + +func hexToString(b []byte, length int) (string, error) { + if len(b) < length { + return "", fmt.Errorf("unicode point needs %d hex characters", length) + } + // TODO: slow + b, err := hex.DecodeString(string(b[:length])) + if err != nil { + return "", err + } + return string(b), nil +} + +func parseWhitespace(b []byte) []byte { + //ws = *wschar + //wschar = %x20 ; Space + //wschar =/ %x09 ; Horizontal tab + + _, rest := scanWhitespace(b) + return rest +} + +func expect(x byte, b []byte) ([]byte, error) { + if len(b) == 0 || b[0] != x { + return nil, unexpectedCharacter{r: x, b: b} + } + return b[1:], nil +} + +type unexpectedCharacter struct { + r byte + b []byte +} + +func (u unexpectedCharacter) Error() string { + if len(u.b) == 0 { + return fmt.Sprintf("expected %#U, not EOF", u.r) + + } + return fmt.Sprintf("expected %#U, not %#U", u.r, u.b[0]) +}