Move query to its own subpackage (#152)

Move all the query system to its own package. The reason is to avoid it to rely on unexported methods and structures, and move it out of the main package since this is really not a core feature. It is still tied to the toml.TomlTree and toml.Position structures for now. * Move query mechanism to its own subpackage * Rename QueryResult to Result to avoid stutter * Add query.CompileAndExecute Fixes #116
2017-05-07 17:14:13 -07:00
parent 64bc956d5e
commit 23f644976a
17 changed files with 665 additions and 532 deletions
@@ -0,0 +1,357 @@
+// TOML JSONPath lexer.
+//
+// Written using the principles developed by Rob Pike in
+// http://www.youtube.com/watch?v=HxaD_trXwRE
+
+package query
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+	"unicode/utf8"
+	"github.com/pelletier/go-toml"
+)
+
+// Lexer state function
+type queryLexStateFn func() queryLexStateFn
+
+// Lexer definition
+type queryLexer struct {
+	input      string
+	start      int
+	pos        int
+	width      int
+	tokens     chan token
+	depth      int
+	line       int
+	col        int
+	stringTerm string
+}
+
+func (l *queryLexer) run() {
+	for state := l.lexVoid; state != nil; {
+		state = state()
+	}
+	close(l.tokens)
+}
+
+func (l *queryLexer) nextStart() {
+	// iterate by runes (utf8 characters)
+	// search for newlines and advance line/col counts
+	for i := l.start; i < l.pos; {
+		r, width := utf8.DecodeRuneInString(l.input[i:])
+		if r == '\n' {
+			l.line++
+			l.col = 1
+		} else {
+			l.col++
+		}
+		i += width
+	}
+	// advance start position to next token
+	l.start = l.pos
+}
+
+func (l *queryLexer) emit(t tokenType) {
+	l.tokens <- token{
+		Position: toml.Position{Line:l.line, Col:l.col},
+		typ:      t,
+		val:      l.input[l.start:l.pos],
+	}
+	l.nextStart()
+}
+
+func (l *queryLexer) emitWithValue(t tokenType, value string) {
+	l.tokens <- token{
+		Position: toml.Position{Line:l.line, Col:l.col},
+		typ:      t,
+		val:      value,
+	}
+	l.nextStart()
+}
+
+func (l *queryLexer) next() rune {
+	if l.pos >= len(l.input) {
+		l.width = 0
+		return eof
+	}
+	var r rune
+	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+	l.pos += l.width
+	return r
+}
+
+func (l *queryLexer) ignore() {
+	l.nextStart()
+}
+
+func (l *queryLexer) backup() {
+	l.pos -= l.width
+}
+
+func (l *queryLexer) errorf(format string, args ...interface{}) queryLexStateFn {
+	l.tokens <- token{
+		Position: toml.Position{Line:l.line, Col:l.col},
+		typ:      tokenError,
+		val:      fmt.Sprintf(format, args...),
+	}
+	return nil
+}
+
+func (l *queryLexer) peek() rune {
+	r := l.next()
+	l.backup()
+	return r
+}
+
+func (l *queryLexer) accept(valid string) bool {
+	if strings.ContainsRune(valid, l.next()) {
+		return true
+	}
+	l.backup()
+	return false
+}
+
+func (l *queryLexer) follow(next string) bool {
+	return strings.HasPrefix(l.input[l.pos:], next)
+}
+
+func (l *queryLexer) lexVoid() queryLexStateFn {
+	for {
+		next := l.peek()
+		switch next {
+		case '$':
+			l.pos++
+			l.emit(tokenDollar)
+			continue
+		case '.':
+			if l.follow("..") {
+				l.pos += 2
+				l.emit(tokenDotDot)
+			} else {
+				l.pos++
+				l.emit(tokenDot)
+			}
+			continue
+		case '[':
+			l.pos++
+			l.emit(tokenLeftBracket)
+			continue
+		case ']':
+			l.pos++
+			l.emit(tokenRightBracket)
+			continue
+		case ',':
+			l.pos++
+			l.emit(tokenComma)
+			continue
+		case '*':
+			l.pos++
+			l.emit(tokenStar)
+			continue
+		case '(':
+			l.pos++
+			l.emit(tokenLeftParen)
+			continue
+		case ')':
+			l.pos++
+			l.emit(tokenRightParen)
+			continue
+		case '?':
+			l.pos++
+			l.emit(tokenQuestion)
+			continue
+		case ':':
+			l.pos++
+			l.emit(tokenColon)
+			continue
+		case '\'':
+			l.ignore()
+			l.stringTerm = string(next)
+			return l.lexString
+		case '"':
+			l.ignore()
+			l.stringTerm = string(next)
+			return l.lexString
+		}
+
+		if isSpace(next) {
+			l.next()
+			l.ignore()
+			continue
+		}
+
+		if isAlphanumeric(next) {
+			return l.lexKey
+		}
+
+		if next == '+' || next == '-' || isDigit(next) {
+			return l.lexNumber
+		}
+
+		if l.next() == eof {
+			break
+		}
+
+		return l.errorf("unexpected char: '%v'", next)
+	}
+	l.emit(tokenEOF)
+	return nil
+}
+
+func (l *queryLexer) lexKey() queryLexStateFn {
+	for {
+		next := l.peek()
+		if !isAlphanumeric(next) {
+			l.emit(tokenKey)
+			return l.lexVoid
+		}
+
+		if l.next() == eof {
+			break
+		}
+	}
+	l.emit(tokenEOF)
+	return nil
+}
+
+func (l *queryLexer) lexString() queryLexStateFn {
+	l.pos++
+	l.ignore()
+	growingString := ""
+
+	for {
+		if l.follow(l.stringTerm) {
+			l.emitWithValue(tokenString, growingString)
+			l.pos++
+			l.ignore()
+			return l.lexVoid
+		}
+
+		if l.follow("\\\"") {
+			l.pos++
+			growingString += "\""
+		} else if l.follow("\\'") {
+			l.pos++
+			growingString += "'"
+		} else if l.follow("\\n") {
+			l.pos++
+			growingString += "\n"
+		} else if l.follow("\\b") {
+			l.pos++
+			growingString += "\b"
+		} else if l.follow("\\f") {
+			l.pos++
+			growingString += "\f"
+		} else if l.follow("\\/") {
+			l.pos++
+			growingString += "/"
+		} else if l.follow("\\t") {
+			l.pos++
+			growingString += "\t"
+		} else if l.follow("\\r") {
+			l.pos++
+			growingString += "\r"
+		} else if l.follow("\\\\") {
+			l.pos++
+			growingString += "\\"
+		} else if l.follow("\\u") {
+			l.pos += 2
+			code := ""
+			for i := 0; i < 4; i++ {
+				c := l.peek()
+				l.pos++
+				if !isHexDigit(c) {
+					return l.errorf("unfinished unicode escape")
+				}
+				code = code + string(c)
+			}
+			l.pos--
+			intcode, err := strconv.ParseInt(code, 16, 32)
+			if err != nil {
+				return l.errorf("invalid unicode escape: \\u" + code)
+			}
+			growingString += string(rune(intcode))
+		} else if l.follow("\\U") {
+			l.pos += 2
+			code := ""
+			for i := 0; i < 8; i++ {
+				c := l.peek()
+				l.pos++
+				if !isHexDigit(c) {
+					return l.errorf("unfinished unicode escape")
+				}
+				code = code + string(c)
+			}
+			l.pos--
+			intcode, err := strconv.ParseInt(code, 16, 32)
+			if err != nil {
+				return l.errorf("invalid unicode escape: \\u" + code)
+			}
+			growingString += string(rune(intcode))
+		} else if l.follow("\\") {
+			l.pos++
+			return l.errorf("invalid escape sequence: \\" + string(l.peek()))
+		} else {
+			growingString += string(l.peek())
+		}
+
+		if l.next() == eof {
+			break
+		}
+	}
+
+	return l.errorf("unclosed string")
+}
+
+func (l *queryLexer) lexNumber() queryLexStateFn {
+	l.ignore()
+	if !l.accept("+") {
+		l.accept("-")
+	}
+	pointSeen := false
+	digitSeen := false
+	for {
+		next := l.next()
+		if next == '.' {
+			if pointSeen {
+				return l.errorf("cannot have two dots in one float")
+			}
+			if !isDigit(l.peek()) {
+				return l.errorf("float cannot end with a dot")
+			}
+			pointSeen = true
+		} else if isDigit(next) {
+			digitSeen = true
+		} else {
+			l.backup()
+			break
+		}
+		if pointSeen && !digitSeen {
+			return l.errorf("cannot start float with a dot")
+		}
+	}
+
+	if !digitSeen {
+		return l.errorf("no digit in that number")
+	}
+	if pointSeen {
+		l.emit(tokenFloat)
+	} else {
+		l.emit(tokenInteger)
+	}
+	return l.lexVoid
+}
+
+// Entry point
+func lexQuery(input string) chan token {
+	l := &queryLexer{
+		input:  input,
+		tokens: make(chan token),
+		line:   1,
+		col:    1,
+	}
+	go l.run()
+	return l.tokens
+}