Move query to its own subpackage (#152)

Move all the query system to its own package. The reason is to
avoid it to rely on unexported methods and structures, and move
it out of the main package since this is really not a core
feature. It is still tied to the toml.TomlTree and toml.Position
structures for now.

* Move query mechanism to its own subpackage
* Rename QueryResult to Result to avoid stutter
* Add query.CompileAndExecute

Fixes #116
This commit is contained in:
Thomas Pelletier
2017-05-07 17:14:13 -07:00
committed by GitHub
parent 64bc956d5e
commit 23f644976a
17 changed files with 665 additions and 532 deletions
+357
View File
@@ -0,0 +1,357 @@
// TOML JSONPath lexer.
//
// Written using the principles developed by Rob Pike in
// http://www.youtube.com/watch?v=HxaD_trXwRE
package query
import (
"fmt"
"strconv"
"strings"
"unicode/utf8"
"github.com/pelletier/go-toml"
)
// Lexer state function
type queryLexStateFn func() queryLexStateFn
// Lexer definition
type queryLexer struct {
input string
start int
pos int
width int
tokens chan token
depth int
line int
col int
stringTerm string
}
func (l *queryLexer) run() {
for state := l.lexVoid; state != nil; {
state = state()
}
close(l.tokens)
}
func (l *queryLexer) nextStart() {
// iterate by runes (utf8 characters)
// search for newlines and advance line/col counts
for i := l.start; i < l.pos; {
r, width := utf8.DecodeRuneInString(l.input[i:])
if r == '\n' {
l.line++
l.col = 1
} else {
l.col++
}
i += width
}
// advance start position to next token
l.start = l.pos
}
func (l *queryLexer) emit(t tokenType) {
l.tokens <- token{
Position: toml.Position{Line:l.line, Col:l.col},
typ: t,
val: l.input[l.start:l.pos],
}
l.nextStart()
}
func (l *queryLexer) emitWithValue(t tokenType, value string) {
l.tokens <- token{
Position: toml.Position{Line:l.line, Col:l.col},
typ: t,
val: value,
}
l.nextStart()
}
func (l *queryLexer) next() rune {
if l.pos >= len(l.input) {
l.width = 0
return eof
}
var r rune
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
return r
}
func (l *queryLexer) ignore() {
l.nextStart()
}
func (l *queryLexer) backup() {
l.pos -= l.width
}
func (l *queryLexer) errorf(format string, args ...interface{}) queryLexStateFn {
l.tokens <- token{
Position: toml.Position{Line:l.line, Col:l.col},
typ: tokenError,
val: fmt.Sprintf(format, args...),
}
return nil
}
func (l *queryLexer) peek() rune {
r := l.next()
l.backup()
return r
}
func (l *queryLexer) accept(valid string) bool {
if strings.ContainsRune(valid, l.next()) {
return true
}
l.backup()
return false
}
func (l *queryLexer) follow(next string) bool {
return strings.HasPrefix(l.input[l.pos:], next)
}
func (l *queryLexer) lexVoid() queryLexStateFn {
for {
next := l.peek()
switch next {
case '$':
l.pos++
l.emit(tokenDollar)
continue
case '.':
if l.follow("..") {
l.pos += 2
l.emit(tokenDotDot)
} else {
l.pos++
l.emit(tokenDot)
}
continue
case '[':
l.pos++
l.emit(tokenLeftBracket)
continue
case ']':
l.pos++
l.emit(tokenRightBracket)
continue
case ',':
l.pos++
l.emit(tokenComma)
continue
case '*':
l.pos++
l.emit(tokenStar)
continue
case '(':
l.pos++
l.emit(tokenLeftParen)
continue
case ')':
l.pos++
l.emit(tokenRightParen)
continue
case '?':
l.pos++
l.emit(tokenQuestion)
continue
case ':':
l.pos++
l.emit(tokenColon)
continue
case '\'':
l.ignore()
l.stringTerm = string(next)
return l.lexString
case '"':
l.ignore()
l.stringTerm = string(next)
return l.lexString
}
if isSpace(next) {
l.next()
l.ignore()
continue
}
if isAlphanumeric(next) {
return l.lexKey
}
if next == '+' || next == '-' || isDigit(next) {
return l.lexNumber
}
if l.next() == eof {
break
}
return l.errorf("unexpected char: '%v'", next)
}
l.emit(tokenEOF)
return nil
}
func (l *queryLexer) lexKey() queryLexStateFn {
for {
next := l.peek()
if !isAlphanumeric(next) {
l.emit(tokenKey)
return l.lexVoid
}
if l.next() == eof {
break
}
}
l.emit(tokenEOF)
return nil
}
func (l *queryLexer) lexString() queryLexStateFn {
l.pos++
l.ignore()
growingString := ""
for {
if l.follow(l.stringTerm) {
l.emitWithValue(tokenString, growingString)
l.pos++
l.ignore()
return l.lexVoid
}
if l.follow("\\\"") {
l.pos++
growingString += "\""
} else if l.follow("\\'") {
l.pos++
growingString += "'"
} else if l.follow("\\n") {
l.pos++
growingString += "\n"
} else if l.follow("\\b") {
l.pos++
growingString += "\b"
} else if l.follow("\\f") {
l.pos++
growingString += "\f"
} else if l.follow("\\/") {
l.pos++
growingString += "/"
} else if l.follow("\\t") {
l.pos++
growingString += "\t"
} else if l.follow("\\r") {
l.pos++
growingString += "\r"
} else if l.follow("\\\\") {
l.pos++
growingString += "\\"
} else if l.follow("\\u") {
l.pos += 2
code := ""
for i := 0; i < 4; i++ {
c := l.peek()
l.pos++
if !isHexDigit(c) {
return l.errorf("unfinished unicode escape")
}
code = code + string(c)
}
l.pos--
intcode, err := strconv.ParseInt(code, 16, 32)
if err != nil {
return l.errorf("invalid unicode escape: \\u" + code)
}
growingString += string(rune(intcode))
} else if l.follow("\\U") {
l.pos += 2
code := ""
for i := 0; i < 8; i++ {
c := l.peek()
l.pos++
if !isHexDigit(c) {
return l.errorf("unfinished unicode escape")
}
code = code + string(c)
}
l.pos--
intcode, err := strconv.ParseInt(code, 16, 32)
if err != nil {
return l.errorf("invalid unicode escape: \\u" + code)
}
growingString += string(rune(intcode))
} else if l.follow("\\") {
l.pos++
return l.errorf("invalid escape sequence: \\" + string(l.peek()))
} else {
growingString += string(l.peek())
}
if l.next() == eof {
break
}
}
return l.errorf("unclosed string")
}
func (l *queryLexer) lexNumber() queryLexStateFn {
l.ignore()
if !l.accept("+") {
l.accept("-")
}
pointSeen := false
digitSeen := false
for {
next := l.next()
if next == '.' {
if pointSeen {
return l.errorf("cannot have two dots in one float")
}
if !isDigit(l.peek()) {
return l.errorf("float cannot end with a dot")
}
pointSeen = true
} else if isDigit(next) {
digitSeen = true
} else {
l.backup()
break
}
if pointSeen && !digitSeen {
return l.errorf("cannot start float with a dot")
}
}
if !digitSeen {
return l.errorf("no digit in that number")
}
if pointSeen {
l.emit(tokenFloat)
} else {
l.emit(tokenInteger)
}
return l.lexVoid
}
// Entry point
func lexQuery(input string) chan token {
l := &queryLexer{
input: input,
tokens: make(chan token),
line: 1,
col: 1,
}
go l.run()
return l.tokens
}