Files
go-toml/jpath/parser.go
T
eanderton 12e974f892 Query interface with callback functions
* Added public Query interface
* Added filter function callback support
* Added "script" function callback support

Queries are generated via Compile(), which then may be run via Execute()
as many times as needed.  Much like compiling a regex, this is done to
elide the need to re-parse and build the funciton tree for each
execution.

The distinction between 'filter' and 'script' is borrowed from their
syntactic equivalents in jsonpath.  Right now, these accept no arguments
in the query, and instead merely pass the current node to the callback.
Filters return a bool and determine if the node is kept or culled out.
'Scripts' return a string or an in64, which is in turn used in an index
or key filter (respectively) on the current node's data.

A few callbacks are provided by default, with the ability to add
additional callbacks before calling Execute() on a compiled query.
2014-09-08 22:08:28 -04:00

290 lines
6.3 KiB
Go

/*
Based on the "jsonpath" spec/concept.
http://goessner.net/articles/JsonPath/
https://code.google.com/p/json-path/
*/
package jpath
import (
"fmt"
"math"
)
type parser struct {
flow chan token
tokensBuffer []token
path *Query
union []PathFn
}
type parserStateFn func(*parser) parserStateFn
// Formats and panics an error message based on a token
func (p *parser) raiseError(tok *token, msg string, args ...interface{}) {
panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...))
}
func (p *parser) run() {
for state := parseStart; state != nil; {
state = state(p)
}
}
func (p *parser) backup(tok *token) {
p.tokensBuffer = append(p.tokensBuffer, *tok)
}
func (p *parser) peek() *token {
if len(p.tokensBuffer) != 0 {
return &(p.tokensBuffer[0])
}
tok, ok := <-p.flow
if !ok {
return nil
}
p.backup(&tok)
return &tok
}
func (p *parser) lookahead(types... tokenType) bool {
result := true
buffer := []token{}
for _, typ := range types {
tok := p.getToken()
if tok == nil {
result = false
break
}
buffer = append(buffer, *tok)
if tok.typ != typ {
result = false
break
}
}
// add the tokens back to the buffer, and return
p.tokensBuffer = append(p.tokensBuffer, buffer...)
return result
}
func (p *parser) getToken() *token {
if len(p.tokensBuffer) != 0 {
tok := p.tokensBuffer[0]
p.tokensBuffer = p.tokensBuffer[1:]
return &tok
}
tok, ok := <-p.flow
if !ok {
return nil
}
return &tok
}
func parseStart(p *parser) parserStateFn {
tok := p.getToken()
if tok == nil || tok.typ == tokenEOF {
return nil
}
if tok.typ != tokenDollar {
p.raiseError(tok, "Expected '$' at start of expression")
}
return parseMatchExpr
}
// handle '.' prefix, '[]', and '..'
func parseMatchExpr(p *parser) parserStateFn {
tok := p.getToken()
switch tok.typ {
case tokenDotDot:
p.path.appendPath(&matchRecursiveFn{})
// nested parse for '..'
tok := p.getToken()
switch tok.typ {
case tokenKey:
p.path.appendPath(newMatchKeyFn(tok.val))
return parseMatchExpr
case tokenLBracket:
return parseBracketExpr
case tokenStar:
// do nothing - the recursive predicate is enough
return parseMatchExpr
}
case tokenDot:
// nested parse for '.'
tok := p.getToken()
switch tok.typ {
case tokenKey:
p.path.appendPath(newMatchKeyFn(tok.val))
return parseMatchExpr
case tokenStar:
p.path.appendPath(&matchAnyFn{})
return parseMatchExpr
}
case tokenLBracket:
return parseBracketExpr
case tokenEOF:
return nil // allow EOF at this stage
}
p.raiseError(tok, "expected match expression")
return nil
}
func parseBracketExpr(p *parser) parserStateFn {
if p.lookahead(tokenInteger, tokenColon) {
return parseSliceExpr
}
if p.peek().typ == tokenColon {
return parseSliceExpr
}
return parseUnionExpr
}
func parseUnionExpr(p *parser) parserStateFn {
var tok *token
// this state can be traversed after some sub-expressions
// so be careful when setting up state in the parser
if p.union == nil {
p.union = []PathFn{}
}
loop: // labeled loop for easy breaking
for {
if len(p.union) > 0 {
// parse delimiter or terminator
tok = p.getToken()
switch tok.typ {
case tokenComma:
// do nothing
case tokenRBracket:
break loop
default:
p.raiseError(tok, "expected ',' or ']', not '%s'", tok.val)
}
}
// parse sub expression
tok = p.getToken()
switch tok.typ {
case tokenInteger:
p.union = append(p.union, newMatchIndexFn(tok.Int()))
case tokenKey:
p.union = append(p.union, newMatchKeyFn(tok.val))
case tokenString:
p.union = append(p.union, newMatchKeyFn(tok.val))
case tokenQuestion:
return parseFilterExpr
case tokenLParen:
return parseScriptExpr
default:
p.raiseError(tok, "expected union sub expression, not '%s', %d", tok.val, len(p.union))
}
}
// if there is only one sub-expression, use that instead
if len(p.union) == 1 {
p.path.appendPath(p.union[0])
}else {
p.path.appendPath(&matchUnionFn{p.union})
}
p.union = nil // clear out state
return parseMatchExpr
}
func parseSliceExpr(p *parser) parserStateFn {
// init slice to grab all elements
start, end, step := 0, math.MaxInt64, 1
// parse optional start
tok := p.getToken()
if tok.typ == tokenInteger {
start = tok.Int()
tok = p.getToken()
}
if tok.typ != tokenColon {
p.raiseError(tok, "expected ':'")
}
// parse optional end
tok = p.getToken()
if tok.typ == tokenInteger {
end = tok.Int()
tok = p.getToken()
}
if tok.typ == tokenRBracket {
p.path.appendPath(newMatchSliceFn(start, end, step))
return parseMatchExpr
}
if tok.typ != tokenColon {
p.raiseError(tok, "expected ']' or ':'")
}
// parse optional step
tok = p.getToken()
if tok.typ == tokenInteger {
step = tok.Int()
if step < 0 {
p.raiseError(tok, "step must be a positive value")
}
tok = p.getToken()
}
if tok.typ != tokenRBracket {
p.raiseError(tok, "expected ']'")
}
p.path.appendPath(newMatchSliceFn(start, end, step))
return parseMatchExpr
}
func parseFilterExpr(p *parser) parserStateFn {
tok := p.getToken()
if tok.typ != tokenLParen {
p.raiseError(tok, "expected left-parenthesis for filter expression")
}
tok = p.getToken()
if tok.typ != tokenKey && tok.typ != tokenString {
p.raiseError(tok, "expected key or string for filter funciton name")
}
name := tok.val
tok = p.getToken()
if tok.typ != tokenRParen {
p.raiseError(tok, "expected right-parenthesis for filter expression")
}
p.union = append(p.union, newMatchFilterFn(name, tok.Position))
return parseUnionExpr
}
func parseScriptExpr(p *parser) parserStateFn {
tok := p.getToken()
if tok.typ != tokenKey && tok.typ != tokenString {
p.raiseError(tok, "expected key or string for script funciton name")
}
name := tok.val
tok = p.getToken()
if tok.typ != tokenRParen {
p.raiseError(tok, "expected right-parenthesis for script expression")
}
p.union = append(p.union, newMatchScriptFn(name, tok.Position))
return parseUnionExpr
}
func parse(flow chan token) *Query {
parser := &parser{
flow: flow,
tokensBuffer: []token{},
path: newQuery(),
}
parser.run()
return parser.path
}