Files
go-toml/toml.go
T
2021-02-02 08:28:30 -05:00

851 lines
14 KiB
Go

package toml
import (
"fmt"
"unicode/utf8"
)
type position struct {
line int
column int
}
// eof is a rune value indicating end-of-file.
const eof = -1
type lookahead struct {
r rune
size int
}
func (l lookahead) empty() bool {
return l.r == 0
}
type lexer struct {
parser parser
data []byte
start int
end int
lookahead lookahead
}
func (l *lexer) at(i int) rune {
if l.end+i >= len(l.data) {
return eof
}
return rune(l.data[l.end+i])
}
func (l *lexer) follows(s string) bool {
for i := 0; i < len(s); i++ {
if rune(s[i]) != l.at(i) {
return false
}
}
return true
}
func (l *lexer) peek() rune {
return l.at(0)
}
func (l *lexer) next() rune {
x := l.peek()
if x != eof {
l.end++
}
return x
}
func (l *lexer) expect(expected rune) error {
r := l.next()
if r != expected {
return &UnexpectedCharacter{
r: r,
expected: expected,
}
}
return nil
}
func (l *lexer) peekRune() rune {
if l.lookahead.empty() {
l.lookahead.r, l.lookahead.size = utf8.DecodeRune(l.data[l.end:])
if l.lookahead.r == utf8.RuneError && l.lookahead.size == 0 {
l.lookahead.r = eof
}
}
return l.lookahead.r
}
func (l *lexer) nextRune() rune {
r := l.peekRune()
if r != eof {
l.end += l.lookahead.size
l.lookahead.r = 0
l.lookahead.size = 0
}
return r
}
func (l *lexer) ignore() {
if l.empty() {
panic("cannot ignore empty token")
}
l.start = l.end
}
func (l *lexer) accept() []byte {
if l.empty() {
panic("cannot accept empty token")
}
x := l.data[l.start:l.end]
l.start = l.end
return x
}
func (l *lexer) expectRune(expected rune) error {
r := l.nextRune()
if r != expected {
return &UnexpectedCharacter{
r: r,
expected: expected,
}
}
return nil
}
func (l *lexer) empty() bool {
return l.start == l.end
}
type InvalidCharacter struct {
r rune
}
func (e *InvalidCharacter) Error() string {
return fmt.Sprintf("unexpected character '%#U'", e.r)
}
type UnexpectedCharacter struct {
r rune
expected rune
}
func (e *UnexpectedCharacter) Error() string {
return fmt.Sprintf("expected character '%#U' but got '%#U'", e.expected, e.r)
}
func (l *lexer) run() error {
for {
err := l.lexExpression()
if err != nil {
return err
}
// new lines between expressions
r := l.next()
switch r {
case eof:
return nil
case '\n':
l.ignore()
continue
case '\r':
r = l.next()
if r == '\n' {
l.ignore()
continue
}
}
return &InvalidCharacter{r: r}
}
}
func (l *lexer) lexRequiredNewline() error {
r := l.next()
switch r {
case '\n':
l.ignore()
return nil
case '\r':
r = l.next()
if r == '\n' {
l.ignore()
return nil
}
}
return &InvalidCharacter{r: r}
}
func (l *lexer) lexExpression() error {
//expression = ws [ comment ]
//expression =/ ws keyval ws [ comment ]
//expression =/ ws table ws [ comment ]
err := l.lexWhitespace()
if err != nil {
return err
}
r := l.peek()
// Line with just whitespace and a comment. We can exit early.
if r == '#' {
return l.lexComment()
}
// or line with something?
if r == '[' {
// parse table. could be either a standard table or an array table
err := l.lexTable()
if err != nil {
return err
}
} else if isUnquotedKeyRune(r) || r == '\'' || r == '"' {
err := l.lexKeyval()
if err != nil {
return err
}
}
// parse trailing whitespace and comment
err = l.lexWhitespace()
if err != nil {
return err
}
r = l.peek()
if r == '#' {
return l.lexComment()
}
return nil
}
func (l *lexer) lexKeyval() error {
// key keyval-sep val
//keyval-sep = ws %x3D ws ; =
err := l.lexKey()
if err != nil {
return err
}
err = l.lexWhitespace()
if err != nil {
return err
}
err = l.expect('=')
if err != nil {
return err
}
l.parser.Equal(l.accept())
err = l.lexWhitespace()
if err != nil {
return err
}
return l.lexVal()
}
func (l *lexer) lexVal() error {
//val = string / boolean / array / inline-table / date-time / float / integer
// string = ml-basic-string / basic-string / ml-literal-string / literal-string
r := l.peek()
switch r {
case 't', 'f':
return l.lexBool()
case '\'', '"':
return l.lexString()
case '[':
return l.lexArray()
case '{':
return l.lexInlineTable()
// TODO
default:
return &InvalidCharacter{r: r}
}
}
func (l *lexer) lexInlineTable() error {
//inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
//
//inline-table-open = %x7B ws ; {
// inline-table-close = ws %x7D ; }
//inline-table-sep = ws %x2C ws ; , Comma
//
//inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
err := l.expect('{')
if err != nil {
panic("inline tables should start with {")
}
l.ignore()
l.parser.InlineTableBegin()
err = l.lexWhitespace()
if err != nil {
return err
}
r := l.peek()
if r == '}' {
l.next()
l.ignore()
l.parser.InlineTableEnd()
return nil
}
err = l.lexKeyval()
if err != nil {
return err
}
for {
err = l.lexWhitespace()
if err != nil {
return err
}
r := l.peek()
if r == '}' {
l.next()
l.ignore()
l.parser.InlineTableEnd()
return nil
}
err := l.expect(',')
if err != nil {
return err
}
l.parser.InlineTableSeparator()
l.ignore()
err = l.lexWhitespace()
if err != nil {
return err
}
err = l.lexKeyval()
if err != nil {
return err
}
}
}
func (l *lexer) lexArray() error {
//array = array-open [ array-values ] ws-comment-newline array-close
err := l.expect('[')
if err != nil {
panic("arrays should start with [")
}
l.ignore()
l.parser.ArrayBegin()
err = l.lexWhitespaceCommentNewline()
if err != nil {
return err
}
r := l.peek()
if r == ']' {
l.next()
l.ignore()
l.parser.ArrayEnd()
return nil
}
err = l.lexVal()
if err != nil {
return err
}
for {
err = l.lexWhitespaceCommentNewline()
if err != nil {
return err
}
r := l.peek()
if r == ']' {
l.next()
l.ignore()
l.parser.ArrayEnd()
return nil
}
err := l.expect(',')
if err != nil {
return err
}
l.parser.ArraySeparator()
l.ignore()
err = l.lexWhitespaceCommentNewline()
if err != nil {
return err
}
err = l.lexVal()
if err != nil {
return err
}
}
}
func (l *lexer) lexWhitespaceCommentNewline() error {
// ws-comment-newline = *( wschar / ([ comment ] newline) )
for {
if isWhitespace(l.peek()) {
err := l.lexWhitespace()
if err != nil {
return err
}
}
if l.peek() == '#' {
err := l.lexComment()
if err != nil {
return err
}
}
r := l.peek()
if r != '\n' && r != '\r' {
return nil
}
err := l.lexRequiredNewline()
if err != nil {
return err
}
}
}
func (l *lexer) lexString() error {
r := l.peek()
if r == '\'' {
if l.follows("'''") {
// TODO ml-literal-string
panic("TODO")
} else {
return l.lexLiteralString()
}
} else if r == '"' {
if l.follows("\"\"\"") {
// TODO ml-basic-string
panic("TODO")
} else {
return l.lexBasicString()
}
} else {
panic("string should start with ' or \"")
}
}
func (l *lexer) lexBool() error {
r := l.peek()
if r == 't' {
l.next()
err := l.expect('r')
if err != nil {
return err
}
err = l.expect('u')
if err != nil {
return err
}
err = l.expect('e')
if err != nil {
return err
}
} else if r == 'f' {
l.next()
err := l.expect('a')
if err != nil {
return err
}
err = l.expect('l')
if err != nil {
return err
}
err = l.expect('s')
if err != nil {
return err
}
err = l.expect('e')
if err != nil {
return err
}
} else {
return &InvalidCharacter{r: r}
}
l.parser.Boolean(l.accept())
return nil
}
func (l *lexer) lexKey() error {
// simple-key / dotted-key
// dotted-key = simple-key 1*( dot-sep simple-key )
// dot-sep = ws %x2E ws
for {
err := l.lexSimpleKey()
if err != nil {
return err
}
err = l.lexWhitespace()
if err != nil {
return err
}
r := l.peek()
if r != '.' {
break
}
l.next()
l.parser.Dot(l.accept())
err = l.lexWhitespace()
if err != nil {
return err
}
}
err := l.lexWhitespace()
if err != nil {
return err
}
return nil
}
func isUnquotedKeyRune(r rune) bool {
return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_'
}
func (l *lexer) lexSimpleKey() error {
// simple-key = quoted-key / unquoted-key
// quoted-key = basic-string / literal-string
// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
// basic-string = quotation-mark *basic-char quotation-mark
// literal-string = apostrophe *literal-char apostrophe
r := l.peek()
switch r {
case '\'':
return l.lexLiteralString()
case '"':
return l.lexBasicString()
default:
return l.lexUnquotedKey()
}
}
func (l *lexer) lexUnquotedKey() error {
// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
r := l.next()
if !isUnquotedKeyRune(r) {
return &InvalidCharacter{r: r}
}
for {
r := l.peek()
if !isUnquotedKeyRune(r) {
break
}
l.next()
}
l.parser.UnquotedKey(l.accept())
return nil
}
func (l *lexer) lexComment() error {
if err := l.expect('#'); err != nil {
return err
}
for {
r := l.peek()
if r == eof || r == '\n' {
l.parser.Comment(l.accept())
return nil
}
l.next()
}
}
func isWhitespace(r rune) bool {
return r == 0x20 || r == 0x09
}
type InvalidUnicodeError struct {
r rune
}
func (e *InvalidUnicodeError) Error() string {
return fmt.Sprintf("invalid unicode: %#U", e.r)
}
func (l *lexer) lexWhitespace() error {
for {
r := l.peek()
if isWhitespace(r) {
l.next()
} else {
if !l.empty() {
l.parser.Whitespace(l.accept())
}
return nil
}
}
}
func isNonAsciiChar(r rune) bool {
return (r >= 0x80 && r <= 0xD7FF) || (r >= 0xE000 && r <= 0x10FFFF)
}
func isLiteralChar(r rune) bool {
return r == 0x09 || (r >= 0x20 && r <= 0x26) || (r >= 0x28 && r <= 0x7E) || isNonAsciiChar(r)
}
func (l *lexer) lexLiteralString() error {
// literal-string = apostrophe *literal-char apostrophe
// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
// non-ascii = %x80-D7FF / %xE000-10FFFF
err := l.expect('\'')
if err != nil {
return err
}
l.ignore()
for {
r := l.peekRune()
if r == '\'' {
l.parser.LiteralString(l.accept())
l.nextRune()
l.ignore()
return nil
}
if !isLiteralChar(r) {
return &InvalidCharacter{r: r}
}
l.nextRune()
}
}
func isBasicStringChar(r rune) bool {
return r == ' ' || r == 0x21 || r >= 0x23 && r <= 0x5B || r >= 0x5D && r <= 0x7E || isNonAsciiChar(r)
}
func isEscapeChar(r rune) bool {
return r == '"' || r == '\\' || r == 'b' || r == 'f' || r == 'n' || r == 'r' || r == 't'
}
func isHex(r rune) bool {
return (r >= '0' && r <= '9') || (r >= 'A' && r <= 'F')
}
func (l *lexer) lexBasicString() error {
// basic-string = quotation-mark *basic-char quotation-mark
// basic-char = basic-unescaped / escaped
// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
// escaped = escape escape-seq-char
//escape = %x5C ; \
//escape-seq-char = %x22 ; " quotation mark U+0022
//escape-seq-char =/ %x5C ; \ reverse solidus U+005C
//escape-seq-char =/ %x62 ; b backspace U+0008
//escape-seq-char =/ %x66 ; f form feed U+000C
//escape-seq-char =/ %x6E ; n line feed U+000A
//escape-seq-char =/ %x72 ; r carriage return U+000D
//escape-seq-char =/ %x74 ; t tab U+0009
//escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX
//escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX
// HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
err := l.expect('"')
if err != nil {
return err
}
l.ignore()
for {
r := l.peekRune()
if r == '"' {
l.parser.BasicString(l.accept())
l.nextRune()
l.ignore()
return nil
}
if r == '\\' {
l.nextRune()
r := l.peekRune()
if isEscapeChar(r) {
l.nextRune()
continue
}
if r == 'u' {
l.nextRune()
for i := 0; i < 4; i++ {
r := l.nextRune()
if !isHex(r) {
return &InvalidCharacter{r: r}
}
}
continue
}
if r == 'U' {
l.nextRune()
for i := 0; i < 8; i++ {
r := l.nextRune()
if !isHex(r) {
return &InvalidCharacter{r: r}
}
}
continue
}
return &InvalidCharacter{r: r}
}
if isBasicStringChar(r) {
l.nextRune()
continue
}
}
}
func (l *lexer) lexTable() error {
//;; Table
//
//table = std-table / array-table
//
//;; Standard Table
//
//std-table = std-table-open key std-table-close
//
//std-table-open = %x5B ws ; [ Left square bracket
//std-table-close = ws %x5D ; ] Right square bracket
//
//;; Array Table
//
//array-table = array-table-open key array-table-close
//
//array-table-open = %x5B.5B ws ; [[ Double left square bracket
//array-table-close = ws %x5D.5D ; ]] Double right square bracket
if l.follows("[[") {
return l.lexArrayTable()
}
return l.lexStandardTable()
}
func (l *lexer) lexArrayTable() error {
//;; Array Table
//
//array-table = array-table-open key array-table-close
//
//array-table-open = %x5B.5B ws ; [[ Double left square bracket
//array-table-close = ws %x5D.5D ; ]] Double right square bracket
err := l.expect('[')
if err != nil {
return err
}
err = l.expect('[')
if err != nil {
return err
}
l.ignore()
l.parser.ArrayTableBegin()
err = l.lexWhitespace()
if err != nil {
return err
}
err = l.lexKey()
if err != nil {
return err
}
err = l.lexWhitespace()
if err != nil {
return err
}
err = l.expect(']')
if err != nil {
return err
}
err = l.expect(']')
if err != nil {
return err
}
l.ignore()
l.parser.ArrayTableEnd()
return nil
}
func (l *lexer) lexStandardTable() error {
//;; Standard Table
//
//std-table = std-table-open key std-table-close
//
//std-table-open = %x5B ws ; [ Left square bracket
//std-table-close = ws %x5D ; ] Right square bracket
err := l.expect('[')
if err != nil {
panic("std-table should start with [")
}
l.ignore()
l.parser.StandardTableBegin()
err = l.lexWhitespace()
if err != nil {
return err
}
err = l.lexKey()
if err != nil {
return err
}
err = l.lexWhitespace()
if err != nil {
return err
}
err = l.expect(']')
if err != nil {
return err
}
l.ignore()
l.parser.StandardTableEnd()
return nil
}