Files
go-toml/query/lexer.go
T
Thomas Pelletier 23f644976a Move query to its own subpackage (#152)
Move all the query system to its own package. The reason is to
avoid it to rely on unexported methods and structures, and move
it out of the main package since this is really not a core
feature. It is still tied to the toml.TomlTree and toml.Position
structures for now.

* Move query mechanism to its own subpackage
* Rename QueryResult to Result to avoid stutter
* Add query.CompileAndExecute

Fixes #116
2017-05-07 17:14:13 -07:00

358 lines
6.3 KiB
Go

// TOML JSONPath lexer.
//
// Written using the principles developed by Rob Pike in
// http://www.youtube.com/watch?v=HxaD_trXwRE
package query
import (
"fmt"
"strconv"
"strings"
"unicode/utf8"
"github.com/pelletier/go-toml"
)
// Lexer state function
type queryLexStateFn func() queryLexStateFn
// Lexer definition
type queryLexer struct {
input string
start int
pos int
width int
tokens chan token
depth int
line int
col int
stringTerm string
}
func (l *queryLexer) run() {
for state := l.lexVoid; state != nil; {
state = state()
}
close(l.tokens)
}
func (l *queryLexer) nextStart() {
// iterate by runes (utf8 characters)
// search for newlines and advance line/col counts
for i := l.start; i < l.pos; {
r, width := utf8.DecodeRuneInString(l.input[i:])
if r == '\n' {
l.line++
l.col = 1
} else {
l.col++
}
i += width
}
// advance start position to next token
l.start = l.pos
}
func (l *queryLexer) emit(t tokenType) {
l.tokens <- token{
Position: toml.Position{Line:l.line, Col:l.col},
typ: t,
val: l.input[l.start:l.pos],
}
l.nextStart()
}
func (l *queryLexer) emitWithValue(t tokenType, value string) {
l.tokens <- token{
Position: toml.Position{Line:l.line, Col:l.col},
typ: t,
val: value,
}
l.nextStart()
}
func (l *queryLexer) next() rune {
if l.pos >= len(l.input) {
l.width = 0
return eof
}
var r rune
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
return r
}
func (l *queryLexer) ignore() {
l.nextStart()
}
func (l *queryLexer) backup() {
l.pos -= l.width
}
func (l *queryLexer) errorf(format string, args ...interface{}) queryLexStateFn {
l.tokens <- token{
Position: toml.Position{Line:l.line, Col:l.col},
typ: tokenError,
val: fmt.Sprintf(format, args...),
}
return nil
}
func (l *queryLexer) peek() rune {
r := l.next()
l.backup()
return r
}
func (l *queryLexer) accept(valid string) bool {
if strings.ContainsRune(valid, l.next()) {
return true
}
l.backup()
return false
}
func (l *queryLexer) follow(next string) bool {
return strings.HasPrefix(l.input[l.pos:], next)
}
func (l *queryLexer) lexVoid() queryLexStateFn {
for {
next := l.peek()
switch next {
case '$':
l.pos++
l.emit(tokenDollar)
continue
case '.':
if l.follow("..") {
l.pos += 2
l.emit(tokenDotDot)
} else {
l.pos++
l.emit(tokenDot)
}
continue
case '[':
l.pos++
l.emit(tokenLeftBracket)
continue
case ']':
l.pos++
l.emit(tokenRightBracket)
continue
case ',':
l.pos++
l.emit(tokenComma)
continue
case '*':
l.pos++
l.emit(tokenStar)
continue
case '(':
l.pos++
l.emit(tokenLeftParen)
continue
case ')':
l.pos++
l.emit(tokenRightParen)
continue
case '?':
l.pos++
l.emit(tokenQuestion)
continue
case ':':
l.pos++
l.emit(tokenColon)
continue
case '\'':
l.ignore()
l.stringTerm = string(next)
return l.lexString
case '"':
l.ignore()
l.stringTerm = string(next)
return l.lexString
}
if isSpace(next) {
l.next()
l.ignore()
continue
}
if isAlphanumeric(next) {
return l.lexKey
}
if next == '+' || next == '-' || isDigit(next) {
return l.lexNumber
}
if l.next() == eof {
break
}
return l.errorf("unexpected char: '%v'", next)
}
l.emit(tokenEOF)
return nil
}
func (l *queryLexer) lexKey() queryLexStateFn {
for {
next := l.peek()
if !isAlphanumeric(next) {
l.emit(tokenKey)
return l.lexVoid
}
if l.next() == eof {
break
}
}
l.emit(tokenEOF)
return nil
}
func (l *queryLexer) lexString() queryLexStateFn {
l.pos++
l.ignore()
growingString := ""
for {
if l.follow(l.stringTerm) {
l.emitWithValue(tokenString, growingString)
l.pos++
l.ignore()
return l.lexVoid
}
if l.follow("\\\"") {
l.pos++
growingString += "\""
} else if l.follow("\\'") {
l.pos++
growingString += "'"
} else if l.follow("\\n") {
l.pos++
growingString += "\n"
} else if l.follow("\\b") {
l.pos++
growingString += "\b"
} else if l.follow("\\f") {
l.pos++
growingString += "\f"
} else if l.follow("\\/") {
l.pos++
growingString += "/"
} else if l.follow("\\t") {
l.pos++
growingString += "\t"
} else if l.follow("\\r") {
l.pos++
growingString += "\r"
} else if l.follow("\\\\") {
l.pos++
growingString += "\\"
} else if l.follow("\\u") {
l.pos += 2
code := ""
for i := 0; i < 4; i++ {
c := l.peek()
l.pos++
if !isHexDigit(c) {
return l.errorf("unfinished unicode escape")
}
code = code + string(c)
}
l.pos--
intcode, err := strconv.ParseInt(code, 16, 32)
if err != nil {
return l.errorf("invalid unicode escape: \\u" + code)
}
growingString += string(rune(intcode))
} else if l.follow("\\U") {
l.pos += 2
code := ""
for i := 0; i < 8; i++ {
c := l.peek()
l.pos++
if !isHexDigit(c) {
return l.errorf("unfinished unicode escape")
}
code = code + string(c)
}
l.pos--
intcode, err := strconv.ParseInt(code, 16, 32)
if err != nil {
return l.errorf("invalid unicode escape: \\u" + code)
}
growingString += string(rune(intcode))
} else if l.follow("\\") {
l.pos++
return l.errorf("invalid escape sequence: \\" + string(l.peek()))
} else {
growingString += string(l.peek())
}
if l.next() == eof {
break
}
}
return l.errorf("unclosed string")
}
func (l *queryLexer) lexNumber() queryLexStateFn {
l.ignore()
if !l.accept("+") {
l.accept("-")
}
pointSeen := false
digitSeen := false
for {
next := l.next()
if next == '.' {
if pointSeen {
return l.errorf("cannot have two dots in one float")
}
if !isDigit(l.peek()) {
return l.errorf("float cannot end with a dot")
}
pointSeen = true
} else if isDigit(next) {
digitSeen = true
} else {
l.backup()
break
}
if pointSeen && !digitSeen {
return l.errorf("cannot start float with a dot")
}
}
if !digitSeen {
return l.errorf("no digit in that number")
}
if pointSeen {
l.emit(tokenFloat)
} else {
l.emit(tokenInteger)
}
return l.lexVoid
}
// Entry point
func lexQuery(input string) chan token {
l := &queryLexer{
input: input,
tokens: make(chan token),
line: 1,
col: 1,
}
go l.run()
return l.tokens
}