6e26017b00
The only real change in this commit is that MaxInt is made private. Everything else should be gofmt'ing, docs and cleanup of lint.
611 lines
11 KiB
Go
611 lines
11 KiB
Go
// TOML lexer.
|
|
//
|
|
// Written using the principles developped by Rob Pike in
|
|
// http://www.youtube.com/watch?v=HxaD_trXwRE
|
|
|
|
package toml
|
|
|
|
import (
|
|
"fmt"
|
|
"github.com/pelletier/go-buffruneio"
|
|
"io"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
var dateRegexp *regexp.Regexp
|
|
|
|
// Define state functions
|
|
type tomlLexStateFn func() tomlLexStateFn
|
|
|
|
// Define lexer
|
|
type tomlLexer struct {
|
|
input *buffruneio.Reader // Textual source
|
|
buffer []rune // Runes composing the current token
|
|
tokens chan token
|
|
depth int
|
|
line int
|
|
col int
|
|
endbufferLine int
|
|
endbufferCol int
|
|
}
|
|
|
|
// Basic read operations on input
|
|
|
|
func (l *tomlLexer) read() rune {
|
|
r, err := l.input.ReadRune()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
if r == '\n' {
|
|
l.endbufferLine++
|
|
l.endbufferCol = 1
|
|
} else {
|
|
l.endbufferCol++
|
|
}
|
|
return r
|
|
}
|
|
|
|
func (l *tomlLexer) next() rune {
|
|
r := l.read()
|
|
|
|
if r != eof {
|
|
l.buffer = append(l.buffer, r)
|
|
}
|
|
return r
|
|
}
|
|
|
|
func (l *tomlLexer) ignore() {
|
|
l.buffer = make([]rune, 0)
|
|
l.line = l.endbufferLine
|
|
l.col = l.endbufferCol
|
|
}
|
|
|
|
func (l *tomlLexer) skip() {
|
|
l.next()
|
|
l.ignore()
|
|
}
|
|
|
|
func (l *tomlLexer) fastForward(n int) {
|
|
for i := 0; i < n; i++ {
|
|
l.next()
|
|
}
|
|
}
|
|
|
|
func (l *tomlLexer) emitWithValue(t tokenType, value string) {
|
|
l.tokens <- token{
|
|
Position: Position{l.line, l.col},
|
|
typ: t,
|
|
val: value,
|
|
}
|
|
l.ignore()
|
|
}
|
|
|
|
func (l *tomlLexer) emit(t tokenType) {
|
|
l.emitWithValue(t, string(l.buffer))
|
|
}
|
|
|
|
func (l *tomlLexer) peek() rune {
|
|
r, err := l.input.ReadRune()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
l.input.UnreadRune()
|
|
return r
|
|
}
|
|
|
|
func (l *tomlLexer) follow(next string) bool {
|
|
for _, expectedRune := range next {
|
|
r, err := l.input.ReadRune()
|
|
defer l.input.UnreadRune()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
if expectedRune != r {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// Error management
|
|
|
|
func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn {
|
|
l.tokens <- token{
|
|
Position: Position{l.line, l.col},
|
|
typ: tokenError,
|
|
val: fmt.Sprintf(format, args...),
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// State functions
|
|
|
|
func (l *tomlLexer) lexVoid() tomlLexStateFn {
|
|
for {
|
|
next := l.peek()
|
|
switch next {
|
|
case '[':
|
|
return l.lexKeyGroup
|
|
case '#':
|
|
return l.lexComment
|
|
case '=':
|
|
return l.lexEqual
|
|
case '\r':
|
|
fallthrough
|
|
case '\n':
|
|
l.skip()
|
|
continue
|
|
}
|
|
|
|
if isSpace(next) {
|
|
l.skip()
|
|
}
|
|
|
|
if l.depth > 0 {
|
|
return l.lexRvalue
|
|
}
|
|
|
|
if isKeyStartChar(next) {
|
|
return l.lexKey
|
|
}
|
|
|
|
if next == eof {
|
|
l.next()
|
|
break
|
|
}
|
|
}
|
|
|
|
l.emit(tokenEOF)
|
|
return nil
|
|
}
|
|
|
|
func (l *tomlLexer) lexRvalue() tomlLexStateFn {
|
|
for {
|
|
next := l.peek()
|
|
switch next {
|
|
case '.':
|
|
return l.errorf("cannot start float with a dot")
|
|
case '=':
|
|
return l.lexEqual
|
|
case '[':
|
|
l.depth++
|
|
return l.lexLeftBracket
|
|
case ']':
|
|
l.depth--
|
|
return l.lexRightBracket
|
|
case '{':
|
|
return l.lexLeftCurlyBrace
|
|
case '}':
|
|
return l.lexRightCurlyBrace
|
|
case '#':
|
|
return l.lexComment
|
|
case '"':
|
|
return l.lexString
|
|
case '\'':
|
|
return l.lexLiteralString
|
|
case ',':
|
|
return l.lexComma
|
|
case '\r':
|
|
fallthrough
|
|
case '\n':
|
|
l.skip()
|
|
if l.depth == 0 {
|
|
return l.lexVoid
|
|
}
|
|
return l.lexRvalue
|
|
case '_':
|
|
return l.errorf("cannot start number with underscore")
|
|
}
|
|
|
|
if l.follow("true") {
|
|
return l.lexTrue
|
|
}
|
|
|
|
if l.follow("false") {
|
|
return l.lexFalse
|
|
}
|
|
|
|
if isSpace(next) {
|
|
l.skip()
|
|
continue
|
|
}
|
|
|
|
if next == eof {
|
|
l.next()
|
|
break
|
|
}
|
|
|
|
possibleDate := string(l.input.Peek(35))
|
|
dateMatch := dateRegexp.FindString(possibleDate)
|
|
if dateMatch != "" {
|
|
l.fastForward(len(dateMatch))
|
|
return l.lexDate
|
|
}
|
|
|
|
if next == '+' || next == '-' || isDigit(next) {
|
|
return l.lexNumber
|
|
}
|
|
|
|
if isAlphanumeric(next) {
|
|
return l.lexKey
|
|
}
|
|
|
|
}
|
|
|
|
l.emit(tokenEOF)
|
|
return nil
|
|
}
|
|
|
|
func (l *tomlLexer) lexLeftCurlyBrace() tomlLexStateFn {
|
|
l.next()
|
|
l.emit(tokenLeftCurlyBrace)
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexRightCurlyBrace() tomlLexStateFn {
|
|
l.next()
|
|
l.emit(tokenRightCurlyBrace)
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexDate() tomlLexStateFn {
|
|
l.emit(tokenDate)
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexTrue() tomlLexStateFn {
|
|
l.fastForward(4)
|
|
l.emit(tokenTrue)
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexFalse() tomlLexStateFn {
|
|
l.fastForward(5)
|
|
l.emit(tokenFalse)
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexEqual() tomlLexStateFn {
|
|
l.next()
|
|
l.emit(tokenEqual)
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexComma() tomlLexStateFn {
|
|
l.next()
|
|
l.emit(tokenComma)
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexKey() tomlLexStateFn {
|
|
inQuotes := false
|
|
for r := l.peek(); isKeyChar(r) || r == '\n' || r == '\r'; r = l.peek() {
|
|
if r == '"' {
|
|
inQuotes = !inQuotes
|
|
} else if r == '\n' {
|
|
return l.errorf("keys cannot contain new lines")
|
|
} else if isSpace(r) && !inQuotes {
|
|
break
|
|
} else if !isValidBareChar(r) && !inQuotes {
|
|
return l.errorf("keys cannot contain %c character", r)
|
|
}
|
|
l.next()
|
|
}
|
|
l.emit(tokenKey)
|
|
return l.lexVoid
|
|
}
|
|
|
|
func (l *tomlLexer) lexComment() tomlLexStateFn {
|
|
for next := l.peek(); next != '\n' && next != eof; next = l.peek() {
|
|
if next == '\r' && l.follow("\r\n") {
|
|
break
|
|
}
|
|
l.next()
|
|
}
|
|
l.ignore()
|
|
return l.lexVoid
|
|
}
|
|
|
|
func (l *tomlLexer) lexLeftBracket() tomlLexStateFn {
|
|
l.next()
|
|
l.emit(tokenLeftBracket)
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexLiteralString() tomlLexStateFn {
|
|
l.skip()
|
|
growingString := ""
|
|
|
|
// handle special case for triple-quote
|
|
terminator := "'"
|
|
if l.follow("''") {
|
|
l.skip()
|
|
l.skip()
|
|
terminator = "'''"
|
|
|
|
// special case: discard leading newline
|
|
if l.follow("\r\n") {
|
|
l.skip()
|
|
l.skip()
|
|
} else if l.peek() == '\n' {
|
|
l.skip()
|
|
}
|
|
}
|
|
|
|
// find end of string
|
|
for {
|
|
if l.follow(terminator) {
|
|
l.emitWithValue(tokenString, growingString)
|
|
l.fastForward(len(terminator))
|
|
l.ignore()
|
|
return l.lexRvalue
|
|
}
|
|
|
|
next := l.peek()
|
|
if next == eof {
|
|
break
|
|
}
|
|
growingString += string(l.next())
|
|
}
|
|
|
|
return l.errorf("unclosed string")
|
|
}
|
|
|
|
func (l *tomlLexer) lexString() tomlLexStateFn {
|
|
l.skip()
|
|
growingString := ""
|
|
|
|
// handle special case for triple-quote
|
|
terminator := "\""
|
|
if l.follow("\"\"") {
|
|
l.skip()
|
|
l.skip()
|
|
terminator = "\"\"\""
|
|
|
|
// special case: discard leading newline
|
|
if l.follow("\r\n") {
|
|
l.skip()
|
|
l.skip()
|
|
} else if l.peek() == '\n' {
|
|
l.skip()
|
|
}
|
|
}
|
|
|
|
for {
|
|
if l.follow(terminator) {
|
|
l.emitWithValue(tokenString, growingString)
|
|
l.fastForward(len(terminator))
|
|
l.ignore()
|
|
return l.lexRvalue
|
|
}
|
|
|
|
if l.follow("\\") {
|
|
l.next()
|
|
switch l.peek() {
|
|
case '\r':
|
|
fallthrough
|
|
case '\n':
|
|
fallthrough
|
|
case '\t':
|
|
fallthrough
|
|
case ' ':
|
|
// skip all whitespace chars following backslash
|
|
for strings.ContainsRune("\r\n\t ", l.peek()) {
|
|
l.next()
|
|
}
|
|
case '"':
|
|
growingString += "\""
|
|
l.next()
|
|
case 'n':
|
|
growingString += "\n"
|
|
l.next()
|
|
case 'b':
|
|
growingString += "\b"
|
|
l.next()
|
|
case 'f':
|
|
growingString += "\f"
|
|
l.next()
|
|
case '/':
|
|
growingString += "/"
|
|
l.next()
|
|
case 't':
|
|
growingString += "\t"
|
|
l.next()
|
|
case 'r':
|
|
growingString += "\r"
|
|
l.next()
|
|
case '\\':
|
|
growingString += "\\"
|
|
l.next()
|
|
case 'u':
|
|
l.next()
|
|
code := ""
|
|
for i := 0; i < 4; i++ {
|
|
c := l.peek()
|
|
if !isHexDigit(c) {
|
|
return l.errorf("unfinished unicode escape")
|
|
}
|
|
l.next()
|
|
code = code + string(c)
|
|
}
|
|
intcode, err := strconv.ParseInt(code, 16, 32)
|
|
if err != nil {
|
|
return l.errorf("invalid unicode escape: \\u" + code)
|
|
}
|
|
growingString += string(rune(intcode))
|
|
case 'U':
|
|
l.next()
|
|
code := ""
|
|
for i := 0; i < 8; i++ {
|
|
c := l.peek()
|
|
if !isHexDigit(c) {
|
|
return l.errorf("unfinished unicode escape")
|
|
}
|
|
l.next()
|
|
code = code + string(c)
|
|
}
|
|
intcode, err := strconv.ParseInt(code, 16, 64)
|
|
if err != nil {
|
|
return l.errorf("invalid unicode escape: \\U" + code)
|
|
}
|
|
growingString += string(rune(intcode))
|
|
default:
|
|
return l.errorf("invalid escape sequence: \\" + string(l.peek()))
|
|
}
|
|
} else {
|
|
r := l.peek()
|
|
if 0x00 <= r && r <= 0x1F {
|
|
return l.errorf("unescaped control character %U", r)
|
|
}
|
|
l.next()
|
|
growingString += string(r)
|
|
}
|
|
|
|
if l.peek() == eof {
|
|
break
|
|
}
|
|
}
|
|
|
|
return l.errorf("unclosed string")
|
|
}
|
|
|
|
func (l *tomlLexer) lexKeyGroup() tomlLexStateFn {
|
|
l.next()
|
|
|
|
if l.peek() == '[' {
|
|
// token '[[' signifies an array of anonymous key groups
|
|
l.next()
|
|
l.emit(tokenDoubleLeftBracket)
|
|
return l.lexInsideKeyGroupArray
|
|
}
|
|
// vanilla key group
|
|
l.emit(tokenLeftBracket)
|
|
return l.lexInsideKeyGroup
|
|
}
|
|
|
|
func (l *tomlLexer) lexInsideKeyGroupArray() tomlLexStateFn {
|
|
for r := l.peek(); r != eof; r = l.peek() {
|
|
switch r {
|
|
case ']':
|
|
if len(l.buffer) > 0 {
|
|
l.emit(tokenKeyGroupArray)
|
|
}
|
|
l.next()
|
|
if l.peek() != ']' {
|
|
break
|
|
}
|
|
l.next()
|
|
l.emit(tokenDoubleRightBracket)
|
|
return l.lexVoid
|
|
case '[':
|
|
return l.errorf("group name cannot contain ']'")
|
|
default:
|
|
l.next()
|
|
}
|
|
}
|
|
return l.errorf("unclosed key group array")
|
|
}
|
|
|
|
func (l *tomlLexer) lexInsideKeyGroup() tomlLexStateFn {
|
|
for r := l.peek(); r != eof; r = l.peek() {
|
|
switch r {
|
|
case ']':
|
|
if len(l.buffer) > 0 {
|
|
l.emit(tokenKeyGroup)
|
|
}
|
|
l.next()
|
|
l.emit(tokenRightBracket)
|
|
return l.lexVoid
|
|
case '[':
|
|
return l.errorf("group name cannot contain ']'")
|
|
default:
|
|
l.next()
|
|
}
|
|
}
|
|
return l.errorf("unclosed key group")
|
|
}
|
|
|
|
func (l *tomlLexer) lexRightBracket() tomlLexStateFn {
|
|
l.next()
|
|
l.emit(tokenRightBracket)
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexNumber() tomlLexStateFn {
|
|
r := l.peek()
|
|
if r == '+' || r == '-' {
|
|
l.next()
|
|
}
|
|
pointSeen := false
|
|
expSeen := false
|
|
digitSeen := false
|
|
for {
|
|
next := l.peek()
|
|
if next == '.' {
|
|
if pointSeen {
|
|
return l.errorf("cannot have two dots in one float")
|
|
}
|
|
l.next()
|
|
if !isDigit(l.peek()) {
|
|
return l.errorf("float cannot end with a dot")
|
|
}
|
|
pointSeen = true
|
|
} else if next == 'e' || next == 'E' {
|
|
expSeen = true
|
|
l.next()
|
|
r := l.peek()
|
|
if r == '+' || r == '-' {
|
|
l.next()
|
|
}
|
|
} else if isDigit(next) {
|
|
digitSeen = true
|
|
l.next()
|
|
} else if next == '_' {
|
|
l.next()
|
|
} else {
|
|
break
|
|
}
|
|
if pointSeen && !digitSeen {
|
|
return l.errorf("cannot start float with a dot")
|
|
}
|
|
}
|
|
|
|
if !digitSeen {
|
|
return l.errorf("no digit in that number")
|
|
}
|
|
if pointSeen || expSeen {
|
|
l.emit(tokenFloat)
|
|
} else {
|
|
l.emit(tokenInteger)
|
|
}
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) run() {
|
|
for state := l.lexVoid; state != nil; {
|
|
state = state()
|
|
}
|
|
close(l.tokens)
|
|
}
|
|
|
|
func init() {
|
|
dateRegexp = regexp.MustCompile("^\\d{1,4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d{1,9})?(Z|[+-]\\d{2}:\\d{2})")
|
|
}
|
|
|
|
// Entry point
|
|
func lexToml(input io.Reader) chan token {
|
|
bufferedInput := buffruneio.NewReader(input)
|
|
l := &tomlLexer{
|
|
input: bufferedInput,
|
|
tokens: make(chan token),
|
|
line: 1,
|
|
col: 1,
|
|
endbufferLine: 1,
|
|
endbufferCol: 1,
|
|
}
|
|
go l.run()
|
|
return l.tokens
|
|
}
|