diff --git a/document.go b/document.go deleted file mode 100644 index 6292a0a..0000000 --- a/document.go +++ /dev/null @@ -1,151 +0,0 @@ -package toml - -import "fmt" - -type tokenKind int - -const ( - whitespace tokenKind = iota - arrayTableBegin - arrayTableEnd - standardTableBegin - standardTableEnd - inlineTableSeparator - inlineTableBegin - inlineTableEnd - arraySeparator - arrayBegin - arrayEnd - equal - boolean - dot - basicString - literalString - unquotedKey - comment -) - -type token struct { - data []byte - kind tokenKind -} - -type Document struct { - tokens []token -} - -func (d *Document) appendToken(kind tokenKind, data []byte) { - d.tokens = append(d.tokens, token{data: data, kind: kind}) -} - -type docParser struct { - document Document -} - -func (d *docParser) ArrayTableBegin() { - fmt.Println("ARRAY-TABLE[[") - d.document.appendToken(arrayTableBegin, nil) -} - -func (d *docParser) ArrayTableEnd() { - fmt.Println("ARRAY-TABLE]]") - d.document.appendToken(arrayTableEnd, nil) -} - -func (d *docParser) StandardTableBegin() { - fmt.Println("STD-TABLE[") - d.document.appendToken(standardTableBegin, nil) -} - -func (d *docParser) StandardTableEnd() { - fmt.Println("STD-TABLE]") - d.document.appendToken(standardTableEnd, nil) -} - -func (d *docParser) InlineTableSeparator() { - fmt.Println(", InlineTable SEPARATOR") - d.document.appendToken(inlineTableSeparator, nil) -} - -func (d *docParser) InlineTableBegin() { - fmt.Println("{ InlineTable BEGIN") - d.document.appendToken(inlineTableBegin, nil) -} - -func (d *docParser) InlineTableEnd() { - fmt.Println("} InlineTable END") - d.document.appendToken(inlineTableEnd, nil) -} - -func (d *docParser) ArraySeparator() { - fmt.Println(", ARRAY SEPARATOR") - d.document.appendToken(arraySeparator, nil) -} - -func (d *docParser) ArrayBegin() { - fmt.Println("[ ARRAY BEGIN") - d.document.appendToken(arrayBegin, nil) -} - -func (d *docParser) ArrayEnd() { - fmt.Println("] ARRAY END") - d.document.appendToken(arrayEnd, nil) -} - -func (d *docParser) Equal(b []byte) { - s := string(b) - fmt.Printf("EQUAL: '%s'\n", s) - d.document.appendToken(equal, b) -} - -func (d *docParser) Boolean(b []byte) { - s := string(b) - fmt.Printf("Boolean: '%s'\n", s) - d.document.appendToken(boolean, b) -} - -func (d *docParser) Dot(b []byte) { - s := string(b) - fmt.Printf("DOT: '%s'\n", s) - d.document.appendToken(dot, b) -} - -func (d *docParser) BasicString(b []byte) { - s := string(b) - fmt.Printf("BasicString: '%s'\n", s) - d.document.appendToken(basicString, b) -} - -func (d *docParser) LiteralString(b []byte) { - s := string(b) - fmt.Printf("LiteralString: '%s'\n", s) - d.document.appendToken(literalString, b) -} - -func (d *docParser) UnquotedKey(b []byte) { - s := string(b) - fmt.Printf("UnquotedKey: '%s'\n", s) - d.document.appendToken(unquotedKey, b) -} - -func (d *docParser) Comment(b []byte) { - s := string(b) - fmt.Printf("Comment: '%s'\n", s) - d.document.appendToken(comment, b) -} - -func (d *docParser) Whitespace(b []byte) { - s := string(b) - fmt.Printf("Whitespace: '%s'\n", s) - d.document.appendToken(whitespace, b) -} - -func Parse(b []byte) (Document, error) { - p := docParser{} - l := lexer{parser: &p, data: b} - err := l.run() - if err != nil { - return Document{}, err - } - return p.document, nil -} diff --git a/encoding.go b/encoding.go deleted file mode 100644 index 510a7a1..0000000 --- a/encoding.go +++ /dev/null @@ -1,82 +0,0 @@ -package toml - -type unmarshaler struct { -} - -func (u unmarshaler) Whitespace(b []byte) {} -func (u unmarshaler) Comment(b []byte) {} - -func (u unmarshaler) UnquotedKey(b []byte) { - panic("implement me") -} - -func (u unmarshaler) LiteralString(b []byte) { - panic("implement me") -} - -func (u unmarshaler) BasicString(b []byte) { - panic("implement me") -} - -func (u unmarshaler) Dot(b []byte) { - panic("implement me") -} - -func (u unmarshaler) Boolean(b []byte) { - panic("implement me") -} - -func (u unmarshaler) Equal(b []byte) { - panic("implement me") -} - -func (u unmarshaler) ArrayBegin() { - panic("implement me") -} - -func (u unmarshaler) ArrayEnd() { - panic("implement me") -} - -func (u unmarshaler) ArraySeparator() { - panic("implement me") -} - -func (u unmarshaler) InlineTableBegin() { - panic("implement me") -} - -func (u unmarshaler) InlineTableEnd() { - panic("implement me") -} - -func (u unmarshaler) InlineTableSeparator() { - panic("implement me") -} - -func (u unmarshaler) StandardTableBegin() { - panic("implement me") -} - -func (u unmarshaler) StandardTableEnd() { - panic("implement me") -} - -func (u unmarshaler) ArrayTableBegin() { - panic("implement me") -} - -func (u unmarshaler) ArrayTableEnd() { - panic("implement me") -} - -func Unmarshal(data []byte, v interface{}) error { - p := unmarshaler{} - l := lexer{parser: &p, data: data} - return l.run() -} - -func Marshal(v interface{}) ([]byte, error) { - // TODO - return nil, nil -} diff --git a/encoding_test.go b/encoding_test.go deleted file mode 100644 index 27aae5a..0000000 --- a/encoding_test.go +++ /dev/null @@ -1 +0,0 @@ -package toml_test diff --git a/go.sum b/go.sum index d021fdd..acb88a4 100644 --- a/go.sum +++ b/go.sum @@ -1,12 +1,11 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/lexer.go b/lexer.go deleted file mode 100644 index f9fa173..0000000 --- a/lexer.go +++ /dev/null @@ -1 +0,0 @@ -package toml diff --git a/parser.go b/parser.go deleted file mode 100644 index 3f1fed7..0000000 --- a/parser.go +++ /dev/null @@ -1,22 +0,0 @@ -package toml - -type parser interface { - Whitespace(b []byte) - Comment(b []byte) - UnquotedKey(b []byte) - LiteralString(b []byte) - BasicString(b []byte) - Dot(b []byte) - Boolean(b []byte) - Equal(b []byte) - ArrayBegin() - ArrayEnd() - ArraySeparator() - InlineTableBegin() - InlineTableEnd() - InlineTableSeparator() - StandardTableBegin() - StandardTableEnd() - ArrayTableBegin() - ArrayTableEnd() -} diff --git a/scanner.go b/scanner.go new file mode 100644 index 0000000..12a8d57 --- /dev/null +++ b/scanner.go @@ -0,0 +1,236 @@ +package toml + +import "fmt" + +func scanFollows(pattern []byte) func(b []byte) bool { + return func(b []byte) bool { + if len(b) < len(pattern) { + return false + } + for i, c := range pattern { + if b[i] != c { + return false + } + } + return true + } +} + +var scanFollowsMultilineBasicStringDelimiter = scanFollows([]byte{'"', '"', '"'}) +var scanFollowsMultilineLiteralStringDelimiter = scanFollows([]byte{'\'', '\'', '\''}) +var scanFollowsTrue = scanFollows([]byte{'t', 'r', 'u', 'e'}) +var scanFollowsFalse = scanFollows([]byte{'f', 'a', 'l', 's', 'e'}) +var scanFollowsArrayTableBegin = scanFollows([]byte{arrayOrTableBegin, arrayOrTableBegin}) +var scanFollowsArrayTableEnd = scanFollows([]byte{arrayOrTableEnd, arrayOrTableEnd}) + +const ( + dot = '.' + equal = '=' + comma = ',' + inlineTableBegin = '{' + inlineTableEnd = '}' + comment = '#' + arrayOrTableBegin = '[' + arrayOrTableEnd = ']' +) + +// scan returns a []byte containing the next lexical token, bytes left, and an error. +// +// eof is signaled by an empty token and nil error. +func scan(b []byte) ([]byte, []byte, error) { + if len(b) == 0 { + return b, b, nil + } + + switch b[0] { + case dot, equal, inlineTableBegin, inlineTableEnd, comma: + return b[:1], b[1:], nil + case '"': + if scanFollowsMultilineBasicStringDelimiter(b) { + return scanMultilineBasicString(b) + } + return scanBasicString(b) + case '\'': + if scanFollowsMultilineLiteralStringDelimiter(b) { + return scanMultilineLiteralString(b) + } + return scanLiteralString(b) + case comment: + return scanComment(b) + case ' ', '\t': + return scanWhitespace(b) + case '\r': + return scanWindowsNewline(b) + case '\n': + return b[:1], b[1:], nil + case 't': + if scanFollowsTrue(b) { + return b[:4], b[4:], nil + } + case 'f': + if scanFollowsFalse(b) { + return b[:5], b[5:], nil + } + case arrayOrTableBegin: + if scanFollowsArrayTableBegin(b) { + return b[:2], b[2:], nil + } + return b[:1], b[1:], nil + case arrayOrTableEnd: + if scanFollowsArrayTableEnd(b) { + return b[:2], b[2:], nil + } + return b[:1], b[1:], nil + } + + if isUnquotedKeyChar(b[0]) { + return scanUnquotedKey(b) + } + + // TODO: numbers, date-time + panic("unhandled scan") +} + +func scanUnquotedKey(b []byte) ([]byte, []byte, error) { + //unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ + for i := 1; i < len(b); i++ { + if !isUnquotedKeyChar(b[i]) { + return b[:i], b[i:], nil + } + } + return b, nil, nil +} + +func isUnquotedKeyChar(r byte) bool { + return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' +} + +func scanLiteralString(b []byte) ([]byte, []byte, error) { + //literal-string = apostrophe *literal-char apostrophe + //apostrophe = %x27 ; ' apostrophe + //literal-char = %x09 / %x20-26 / %x28-7E / non-ascii + for i := 1; i < len(b); i++ { + switch b[i] { + case '\'': + return b[:i+1], b[i+1:], nil + case '\n': + return nil, nil, fmt.Errorf("literal strings cannot have new lines") + } + } + return nil, nil, fmt.Errorf("unterminated literal string") +} + +func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) { + //ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body + //ml-literal-string-delim + //ml-literal-string-delim = 3apostrophe + //ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ] + // + //mll-content = mll-char / newline + //mll-char = %x09 / %x20-26 / %x28-7E / non-ascii + //mll-quotes = 1*2apostrophe + for i := 3; i < len(b); i++ { + switch b[i] { + case '\'': + if scanFollowsMultilineLiteralStringDelimiter(b[i:]) { + return b[:i+3], b[:i+3], nil + } + } + } + + return nil, nil, fmt.Errorf(`multiline literal string not terminated by '''`) +} + +func scanWindowsNewline(b []byte) ([]byte, []byte, error) { + if len(b) < 2 { + return nil, nil, fmt.Errorf(`windows new line missing \n`) + } + if b[1] != '\n' { + return nil, nil, fmt.Errorf(`windows new line should be \r\n`) + } + return b[:2], b[2:], nil +} + +func scanWhitespace(b []byte) ([]byte, []byte, error) { + for i := 1; i < len(b); i++ { + switch b[i] { + case ' ', '\t': + continue + default: + return b[:i], b[i:], nil + } + } + return b, nil, nil +} + +func scanComment(b []byte) ([]byte, []byte, error) { + //;; Comment + // + //comment-start-symbol = %x23 ; # + //non-ascii = %x80-D7FF / %xE000-10FFFF + //non-eol = %x09 / %x20-7F / non-ascii + // + //comment = comment-start-symbol *non-eol + + for i := 1; i < len(b); i++ { + switch b[i] { + case '\n': + return b[:i+1], b[i+1:], nil + } + } + return b, nil, nil +} + +// TODO perform validation on the string? +func scanBasicString(b []byte) ([]byte, []byte, error) { + //basic-string = quotation-mark *basic-char quotation-mark + //quotation-mark = %x22 ; " + //basic-char = basic-unescaped / escaped + //basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii + //escaped = escape escape-seq-char + for i := 1; i < len(b); i++ { + switch b[i] { + case '"': + return b[:i+1], b[i+1:], nil + case '\n': + return nil, nil, fmt.Errorf("basic strings cannot have new lines") + case '\\': + if len(b) < i+2 { + return nil, nil, fmt.Errorf("need a character after \\") + } + i++ // skip the next character + } + } + + return nil, nil, fmt.Errorf(`basic string not terminated by "`) +} + +// TODO perform validation on the string? +func scanMultilineBasicString(b []byte) ([]byte, []byte, error) { + //ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body + //ml-basic-string-delim + //ml-basic-string-delim = 3quotation-mark + //ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] + // + //mlb-content = mlb-char / newline / mlb-escaped-nl + //mlb-char = mlb-unescaped / escaped + //mlb-quotes = 1*2quotation-mark + //mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii + //mlb-escaped-nl = escape ws newline *( wschar / newline ) + + for i := 3; i < len(b); i++ { + switch b[i] { + case '"': + if scanFollowsMultilineBasicStringDelimiter(b[i:]) { + return b[:i+3], b[:i+3], nil + } + case '\\': + if len(b) < i+2 { + return nil, nil, fmt.Errorf("need a character after \\") + } + i++ // skip the next character + } + } + + return nil, nil, fmt.Errorf(`multiline basic string not terminated by """`) +} diff --git a/toml.go b/toml.go index e66c9ee..f9fa173 100644 --- a/toml.go +++ b/toml.go @@ -1,850 +1 @@ package toml - -import ( - "fmt" - "unicode/utf8" -) - -type position struct { - line int - column int -} - -// eof is a rune value indicating end-of-file. -const eof = -1 - -type lookahead struct { - r rune - size int -} - -func (l lookahead) empty() bool { - return l.r == 0 -} - -type lexer struct { - parser parser - - data []byte - start int - end int - - lookahead lookahead -} - -func (l *lexer) at(i int) rune { - if l.end+i >= len(l.data) { - return eof - } - return rune(l.data[l.end+i]) -} - -func (l *lexer) follows(s string) bool { - for i := 0; i < len(s); i++ { - if rune(s[i]) != l.at(i) { - return false - } - } - return true -} - -func (l *lexer) peek() rune { - return l.at(0) -} - -func (l *lexer) next() rune { - x := l.peek() - if x != eof { - l.end++ - } - return x -} - -func (l *lexer) expect(expected rune) error { - r := l.next() - if r != expected { - return &UnexpectedCharacter{ - r: r, - expected: expected, - } - } - return nil -} - -func (l *lexer) peekRune() rune { - if l.lookahead.empty() { - l.lookahead.r, l.lookahead.size = utf8.DecodeRune(l.data[l.end:]) - if l.lookahead.r == utf8.RuneError && l.lookahead.size == 0 { - l.lookahead.r = eof - } - } - return l.lookahead.r -} - -func (l *lexer) nextRune() rune { - r := l.peekRune() - if r != eof { - l.end += l.lookahead.size - l.lookahead.r = 0 - l.lookahead.size = 0 - } - return r -} - -func (l *lexer) ignore() { - if l.empty() { - panic("cannot ignore empty token") - } - l.start = l.end -} - -func (l *lexer) accept() []byte { - if l.empty() { - panic("cannot accept empty token") - } - x := l.data[l.start:l.end] - l.start = l.end - return x -} - -func (l *lexer) expectRune(expected rune) error { - r := l.nextRune() - if r != expected { - return &UnexpectedCharacter{ - r: r, - expected: expected, - } - } - return nil -} - -func (l *lexer) empty() bool { - return l.start == l.end -} - -type InvalidCharacter struct { - r rune -} - -func (e *InvalidCharacter) Error() string { - return fmt.Sprintf("unexpected character '%#U'", e.r) -} - -type UnexpectedCharacter struct { - r rune - expected rune -} - -func (e *UnexpectedCharacter) Error() string { - return fmt.Sprintf("expected character '%#U' but got '%#U'", e.expected, e.r) -} - -func (l *lexer) run() error { - for { - err := l.lexExpression() - if err != nil { - return err - } - - // new lines between expressions - r := l.next() - switch r { - case eof: - return nil - case '\n': - l.ignore() - continue - case '\r': - r = l.next() - if r == '\n' { - l.ignore() - continue - } - } - return &InvalidCharacter{r: r} - } -} - -func (l *lexer) lexRequiredNewline() error { - r := l.next() - switch r { - case '\n': - l.ignore() - return nil - case '\r': - r = l.next() - if r == '\n' { - l.ignore() - return nil - } - } - return &InvalidCharacter{r: r} -} - -func (l *lexer) lexExpression() error { - //expression = ws [ comment ] - //expression =/ ws keyval ws [ comment ] - //expression =/ ws table ws [ comment ] - - err := l.lexWhitespace() - if err != nil { - return err - } - - r := l.peek() - - // Line with just whitespace and a comment. We can exit early. - if r == '#' { - return l.lexComment() - } - - // or line with something? - if r == '[' { - // parse table. could be either a standard table or an array table - err := l.lexTable() - if err != nil { - return err - } - } else if isUnquotedKeyRune(r) || r == '\'' || r == '"' { - err := l.lexKeyval() - if err != nil { - return err - } - } - - // parse trailing whitespace and comment - - err = l.lexWhitespace() - if err != nil { - return err - } - - r = l.peek() - if r == '#' { - return l.lexComment() - } - - return nil -} - -func (l *lexer) lexKeyval() error { - // key keyval-sep val - //keyval-sep = ws %x3D ws ; = - - err := l.lexKey() - if err != nil { - return err - } - - err = l.lexWhitespace() - if err != nil { - return err - } - - err = l.expect('=') - if err != nil { - return err - } - l.parser.Equal(l.accept()) - - err = l.lexWhitespace() - if err != nil { - return err - } - - return l.lexVal() -} - -func (l *lexer) lexVal() error { - //val = string / boolean / array / inline-table / date-time / float / integer - // string = ml-basic-string / basic-string / ml-literal-string / literal-string - - r := l.peek() - - switch r { - case 't', 'f': - return l.lexBool() - case '\'', '"': - return l.lexString() - case '[': - return l.lexArray() - case '{': - return l.lexInlineTable() - // TODO - default: - return &InvalidCharacter{r: r} - } -} - -func (l *lexer) lexInlineTable() error { - //inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close - // - //inline-table-open = %x7B ws ; { - // inline-table-close = ws %x7D ; } - //inline-table-sep = ws %x2C ws ; , Comma - // - //inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ] - - err := l.expect('{') - if err != nil { - panic("inline tables should start with {") - } - l.ignore() - l.parser.InlineTableBegin() - - err = l.lexWhitespace() - if err != nil { - return err - } - - r := l.peek() - if r == '}' { - l.next() - l.ignore() - l.parser.InlineTableEnd() - return nil - } - - err = l.lexKeyval() - if err != nil { - return err - } - - for { - err = l.lexWhitespace() - if err != nil { - return err - } - - r := l.peek() - if r == '}' { - l.next() - l.ignore() - l.parser.InlineTableEnd() - return nil - } - - err := l.expect(',') - if err != nil { - return err - } - l.parser.InlineTableSeparator() - l.ignore() - - err = l.lexWhitespace() - if err != nil { - return err - } - - err = l.lexKeyval() - if err != nil { - return err - } - } -} - -func (l *lexer) lexArray() error { - //array = array-open [ array-values ] ws-comment-newline array-close - - err := l.expect('[') - if err != nil { - panic("arrays should start with [") - } - l.ignore() - - l.parser.ArrayBegin() - - err = l.lexWhitespaceCommentNewline() - if err != nil { - return err - } - - r := l.peek() - - if r == ']' { - l.next() - l.ignore() - l.parser.ArrayEnd() - return nil - } - - err = l.lexVal() - if err != nil { - return err - } - - for { - err = l.lexWhitespaceCommentNewline() - if err != nil { - return err - } - - r := l.peek() - - if r == ']' { - l.next() - l.ignore() - l.parser.ArrayEnd() - return nil - } - - err := l.expect(',') - if err != nil { - return err - } - l.parser.ArraySeparator() - l.ignore() - - err = l.lexWhitespaceCommentNewline() - if err != nil { - return err - } - - err = l.lexVal() - if err != nil { - return err - } - } -} - -func (l *lexer) lexWhitespaceCommentNewline() error { - // ws-comment-newline = *( wschar / ([ comment ] newline) ) - - for { - if isWhitespace(l.peek()) { - err := l.lexWhitespace() - if err != nil { - return err - } - } - if l.peek() == '#' { - err := l.lexComment() - if err != nil { - return err - } - } - r := l.peek() - if r != '\n' && r != '\r' { - return nil - } - err := l.lexRequiredNewline() - if err != nil { - return err - } - } -} - -func (l *lexer) lexString() error { - r := l.peek() - - if r == '\'' { - if l.follows("'''") { - // TODO ml-literal-string - panic("TODO") - } else { - return l.lexLiteralString() - } - } else if r == '"' { - if l.follows("\"\"\"") { - // TODO ml-basic-string - panic("TODO") - } else { - return l.lexBasicString() - } - } else { - panic("string should start with ' or \"") - } -} - -func (l *lexer) lexBool() error { - r := l.peek() - - if r == 't' { - l.next() - err := l.expect('r') - if err != nil { - return err - } - err = l.expect('u') - if err != nil { - return err - } - err = l.expect('e') - if err != nil { - return err - } - } else if r == 'f' { - l.next() - err := l.expect('a') - if err != nil { - return err - } - err = l.expect('l') - if err != nil { - return err - } - err = l.expect('s') - if err != nil { - return err - } - err = l.expect('e') - if err != nil { - return err - } - } else { - return &InvalidCharacter{r: r} - } - - l.parser.Boolean(l.accept()) - return nil -} - -func (l *lexer) lexKey() error { - // simple-key / dotted-key - // dotted-key = simple-key 1*( dot-sep simple-key ) - // dot-sep = ws %x2E ws - - for { - err := l.lexSimpleKey() - if err != nil { - return err - } - - err = l.lexWhitespace() - if err != nil { - return err - } - - r := l.peek() - if r != '.' { - break - } - - l.next() - l.parser.Dot(l.accept()) - - err = l.lexWhitespace() - if err != nil { - return err - } - } - - err := l.lexWhitespace() - if err != nil { - return err - } - - return nil -} - -func isUnquotedKeyRune(r rune) bool { - return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' -} - -func (l *lexer) lexSimpleKey() error { - // simple-key = quoted-key / unquoted-key - // quoted-key = basic-string / literal-string - // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ - // basic-string = quotation-mark *basic-char quotation-mark - // literal-string = apostrophe *literal-char apostrophe - - r := l.peek() - - switch r { - case '\'': - return l.lexLiteralString() - case '"': - return l.lexBasicString() - default: - return l.lexUnquotedKey() - } -} - -func (l *lexer) lexUnquotedKey() error { - // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ - - r := l.next() - - if !isUnquotedKeyRune(r) { - return &InvalidCharacter{r: r} - } - - for { - r := l.peek() - if !isUnquotedKeyRune(r) { - break - } - l.next() - } - l.parser.UnquotedKey(l.accept()) - return nil -} - -func (l *lexer) lexComment() error { - if err := l.expect('#'); err != nil { - return err - } - - for { - r := l.peek() - if r == eof || r == '\n' { - l.parser.Comment(l.accept()) - return nil - } - l.next() - } -} - -func isWhitespace(r rune) bool { - return r == 0x20 || r == 0x09 -} - -type InvalidUnicodeError struct { - r rune -} - -func (e *InvalidUnicodeError) Error() string { - return fmt.Sprintf("invalid unicode: %#U", e.r) -} - -func (l *lexer) lexWhitespace() error { - for { - r := l.peek() - if isWhitespace(r) { - l.next() - } else { - if !l.empty() { - l.parser.Whitespace(l.accept()) - } - return nil - } - } -} - -func isNonAsciiChar(r rune) bool { - return (r >= 0x80 && r <= 0xD7FF) || (r >= 0xE000 && r <= 0x10FFFF) -} - -func isLiteralChar(r rune) bool { - return r == 0x09 || (r >= 0x20 && r <= 0x26) || (r >= 0x28 && r <= 0x7E) || isNonAsciiChar(r) -} - -func (l *lexer) lexLiteralString() error { - // literal-string = apostrophe *literal-char apostrophe - // literal-char = %x09 / %x20-26 / %x28-7E / non-ascii - // non-ascii = %x80-D7FF / %xE000-10FFFF - - err := l.expect('\'') - if err != nil { - return err - } - l.ignore() - - for { - r := l.peekRune() - if r == '\'' { - l.parser.LiteralString(l.accept()) - l.nextRune() - l.ignore() - return nil - } - if !isLiteralChar(r) { - return &InvalidCharacter{r: r} - } - l.nextRune() - } -} - -func isBasicStringChar(r rune) bool { - return r == ' ' || r == 0x21 || r >= 0x23 && r <= 0x5B || r >= 0x5D && r <= 0x7E || isNonAsciiChar(r) -} - -func isEscapeChar(r rune) bool { - return r == '"' || r == '\\' || r == 'b' || r == 'f' || r == 'n' || r == 'r' || r == 't' -} - -func isHex(r rune) bool { - return (r >= '0' && r <= '9') || (r >= 'A' && r <= 'F') -} - -func (l *lexer) lexBasicString() error { - // basic-string = quotation-mark *basic-char quotation-mark - // basic-char = basic-unescaped / escaped - // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii - // escaped = escape escape-seq-char - //escape = %x5C ; \ - //escape-seq-char = %x22 ; " quotation mark U+0022 - //escape-seq-char =/ %x5C ; \ reverse solidus U+005C - //escape-seq-char =/ %x62 ; b backspace U+0008 - //escape-seq-char =/ %x66 ; f form feed U+000C - //escape-seq-char =/ %x6E ; n line feed U+000A - //escape-seq-char =/ %x72 ; r carriage return U+000D - //escape-seq-char =/ %x74 ; t tab U+0009 - //escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX - //escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX - // HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" - - err := l.expect('"') - if err != nil { - return err - } - l.ignore() - - for { - r := l.peekRune() - - if r == '"' { - l.parser.BasicString(l.accept()) - l.nextRune() - l.ignore() - return nil - } - - if r == '\\' { - l.nextRune() - r := l.peekRune() - if isEscapeChar(r) { - l.nextRune() - continue - } - - if r == 'u' { - l.nextRune() - for i := 0; i < 4; i++ { - r := l.nextRune() - if !isHex(r) { - return &InvalidCharacter{r: r} - } - } - continue - } - - if r == 'U' { - l.nextRune() - for i := 0; i < 8; i++ { - r := l.nextRune() - if !isHex(r) { - return &InvalidCharacter{r: r} - } - } - continue - } - - return &InvalidCharacter{r: r} - } - - if isBasicStringChar(r) { - l.nextRune() - continue - } - } -} - -func (l *lexer) lexTable() error { - //;; Table - // - //table = std-table / array-table - // - //;; Standard Table - // - //std-table = std-table-open key std-table-close - // - //std-table-open = %x5B ws ; [ Left square bracket - //std-table-close = ws %x5D ; ] Right square bracket - // - //;; Array Table - // - //array-table = array-table-open key array-table-close - // - //array-table-open = %x5B.5B ws ; [[ Double left square bracket - //array-table-close = ws %x5D.5D ; ]] Double right square bracket - - if l.follows("[[") { - return l.lexArrayTable() - } - - return l.lexStandardTable() -} - -func (l *lexer) lexArrayTable() error { - //;; Array Table - // - //array-table = array-table-open key array-table-close - // - //array-table-open = %x5B.5B ws ; [[ Double left square bracket - //array-table-close = ws %x5D.5D ; ]] Double right square bracket - err := l.expect('[') - if err != nil { - return err - } - err = l.expect('[') - if err != nil { - return err - } - l.ignore() - l.parser.ArrayTableBegin() - - err = l.lexWhitespace() - if err != nil { - return err - } - - err = l.lexKey() - if err != nil { - return err - } - - err = l.lexWhitespace() - if err != nil { - return err - } - err = l.expect(']') - if err != nil { - return err - } - err = l.expect(']') - if err != nil { - return err - } - l.ignore() - l.parser.ArrayTableEnd() - return nil -} - -func (l *lexer) lexStandardTable() error { - //;; Standard Table - // - //std-table = std-table-open key std-table-close - // - //std-table-open = %x5B ws ; [ Left square bracket - //std-table-close = ws %x5D ; ] Right square bracket - - err := l.expect('[') - if err != nil { - panic("std-table should start with [") - } - l.ignore() - l.parser.StandardTableBegin() - - err = l.lexWhitespace() - if err != nil { - return err - } - - err = l.lexKey() - if err != nil { - return err - } - - err = l.lexWhitespace() - if err != nil { - return err - } - err = l.expect(']') - if err != nil { - return err - } - l.ignore() - l.parser.StandardTableEnd() - return nil -} diff --git a/toml_test.go b/toml_test.go index 66dc41a..446b60a 100644 --- a/toml_test.go +++ b/toml_test.go @@ -4,7 +4,7 @@ import ( "fmt" "testing" - "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) var inputs = []string{ @@ -43,9 +43,14 @@ func TestParse(t *testing.T) { for i, data := range inputs { t.Run(fmt.Sprintf("example %d", i), func(t *testing.T) { fmt.Printf("input:\n\t`%s`\n", data) - doc, err := Parse([]byte(data)) - assert.NoError(t, err) - fmt.Println(doc) + b := []byte(data) + var token []byte + var err error + for len(b) > 0 { + token, b, err = scan(b) + require.NoError(t, err) + fmt.Printf("token => '%s'\n", string(token)) + } }) } } @@ -72,17 +77,18 @@ func (n noopParser) Dot(b []byte) {} func (n noopParser) Boolean(b []byte) {} func (n noopParser) Equal(b []byte) {} -func BenchmarkParseAll(b *testing.B) { - b.ReportAllocs() - - for i := 0; i < b.N; i++ { - for _, data := range inputs { - p := noopParser{} - l := lexer{parser: &p, data: []byte(data)} - err := l.run() - if err != nil { - b.Fatalf("error: %s", err) - } - } - } -} +// +//func BenchmarkParseAll(b *testing.B) { +// b.ReportAllocs() +// +// for i := 0; i < b.N; i++ { +// for _, data := range inputs { +// p := noopParser{} +// l := lexer{parser: &p, data: []byte(data)} +// err := l.run() +// if err != nil { +// b.Fatalf("error: %s", err) +// } +// } +// } +//}