Changed match func strategy; added tests

As it turns out, closures are very hard to validate without running them.
Since table-driven tests tend to rely on value types that can be
compared directly, using structs that adhere to a generic callback
interface is more work, but is more easily tested.

* Changed jsonpath match functions to structs with Call() methods
* Added tests to verify the generation of jsonpath QueryPath data
* Added tests to verify jsonpath lexer
* Fixed jsonpath whitespace handling bug
* Fixed numerous flaws in jsonpath parser
This commit is contained in:
eanderton
2014-09-04 23:17:29 -04:00
parent 27416cc1b9
commit b74544d345
6 changed files with 447 additions and 156 deletions
+20 -14
View File
@@ -79,18 +79,26 @@ func (tt tokenType) String() string {
return "Unknown"
}
func (i token) String() string {
switch i.typ {
func (t token) Int() int {
if result, err := strconv.Atoi(t.val); err != nil {
panic(err)
} else {
return result
}
}
func (t token) String() string {
switch t.typ {
case tokenEOF:
return "EOF"
case tokenError:
return i.val
return t.val
}
if len(i.val) > 10 {
return fmt.Sprintf("%.10q...", i.val)
if len(t.val) > 10 {
return fmt.Sprintf("%.10q...", t.val)
}
return fmt.Sprintf("%q", i.val)
return fmt.Sprintf("%q", t.val)
}
func isSpace(r rune) bool {
@@ -283,6 +291,12 @@ func lexVoid(l *lexer) stateFn {
return lexString
}
if isSpace(next) {
l.next()
l.ignore()
continue
}
if isAlphanumeric(next) {
return lexKey
}
@@ -291,14 +305,6 @@ func lexVoid(l *lexer) stateFn {
return lexNumber
}
if isAlphanumeric(next) {
return lexKey
}
if isSpace(next) {
l.ignore()
}
if l.next() == eof {
break
}
+31
View File
@@ -54,7 +54,9 @@ func TestLexString(t *testing.T) {
token{Position{1, 2}, tokenString, "foo"},
token{Position{1, 6}, tokenEOF, ""},
})
}
func TestLexDoubleString(t *testing.T) {
testFlow(t, `"bar"`, []token{
token{Position{1, 2}, tokenString, "bar"},
token{Position{1, 6}, tokenEOF, ""},
@@ -67,3 +69,32 @@ func TestLexKey(t *testing.T) {
token{Position{1, 4}, tokenEOF, ""},
})
}
func TestLexRecurse(t *testing.T) {
testFlow(t, "$..*", []token{
token{Position{1, 1}, tokenDollar, "$"},
token{Position{1, 2}, tokenDotDot, ".."},
token{Position{1, 4}, tokenStar, "*"},
token{Position{1, 5}, tokenEOF, ""},
})
}
func TestLexBracketKey(t *testing.T) {
testFlow(t, "$[foo]", []token{
token{Position{1, 1}, tokenDollar, "$"},
token{Position{1, 2}, tokenLBracket, "["},
token{Position{1, 3}, tokenKey, "foo"},
token{Position{1, 6}, tokenRBracket, "]"},
token{Position{1, 7}, tokenEOF, ""},
})
}
func TestLexSpace(t *testing.T) {
testFlow(t, "foo bar baz", []token{
token{Position{1, 1}, tokenKey, "foo"},
token{Position{1, 5}, tokenKey, "bar"},
token{Position{1, 9}, tokenKey, "baz"},
token{Position{1, 12}, tokenEOF, ""},
})
}
+112 -84
View File
@@ -4,111 +4,139 @@ import (
. "github.com/pelletier/go-toml"
)
type QueryPath []PathFn
type PathFn func(context interface{}, next QueryPath)
func (path QueryPath) Fn(context interface{}) {
path[0](context, path[1:])
type PathFn interface{
Call(context interface{}, next QueryPath)
}
type QueryPath []PathFn
func (path QueryPath) Fn(context interface{}) {
path[0].Call(context, path[1:])
}
// shim to ease functor writing
func treeValue(tree *TomlTree, key string) interface{} {
return tree.GetPath([]string{key})
}
func matchKeyFn(name string) PathFn {
return func(context interface{}, next QueryPath) {
if tree, ok := context.(*TomlTree); ok {
item := treeValue(tree, name)
if item != nil {
next.Fn(item)
}
}
}
// match single key
type matchKeyFn struct {
Name string
}
func matchIndexFn(idx int) PathFn {
return func(context interface{}, next QueryPath) {
if arr, ok := context.([]interface{}); ok {
if idx < len(arr) && idx >= 0 {
next.Fn(arr[idx])
}
}
}
func (f *matchKeyFn) Call(context interface{}, next QueryPath) {
if tree, ok := context.(*TomlTree); ok {
item := treeValue(tree, f.Name)
if item != nil {
next.Fn(item)
}
}
}
func matchSliceFn(start, end, step int) PathFn {
return func(context interface{}, next QueryPath) {
if arr, ok := context.([]interface{}); ok {
// adjust indexes for negative values, reverse ordering
realStart, realEnd := start, end
if realStart < 0 {
realStart = len(arr) + realStart
}
if realEnd < 0 {
realEnd = len(arr) + realEnd
}
if realEnd < realStart {
realEnd, realStart = realStart, realEnd // swap
}
// loop and gather
for idx := realStart; idx < realEnd; idx += step {
next.Fn(arr[idx])
}
}
}
// match single index
type matchIndexFn struct {
Idx int
}
func matchAnyFn() PathFn {
return func(context interface{}, next QueryPath) {
if tree, ok := context.(*TomlTree); ok {
for _, key := range tree.Keys() {
item := treeValue(tree, key)
next.Fn(item)
}
}
}
func (f *matchIndexFn) Call(context interface{}, next QueryPath) {
if arr, ok := context.([]interface{}); ok {
if f.Idx < len(arr) && f.Idx >= 0 {
next.Fn(arr[f.Idx])
}
}
}
func matchUnionFn(union QueryPath) PathFn {
return func(context interface{}, next QueryPath) {
for _, fn := range union {
fn(context, next)
}
}
// filter by slicing
type matchSliceFn struct {
Start, End, Step int
}
func matchRecurseFn() PathFn {
return func(context interface{}, next QueryPath) {
if tree, ok := context.(*TomlTree); ok {
var visit func(tree *TomlTree)
visit = func(tree *TomlTree) {
for _, key := range tree.Keys() {
item := treeValue(tree, key)
next.Fn(item)
switch node := item.(type) {
case *TomlTree:
visit(node)
case []*TomlTree:
for _, subtree := range node {
visit(subtree)
}
}
}
}
visit(tree)
}
}
func (f *matchSliceFn) Call(context interface{}, next QueryPath) {
if arr, ok := context.([]interface{}); ok {
// adjust indexes for negative values, reverse ordering
realStart, realEnd := f.Start, f.End
if realStart < 0 {
realStart = len(arr) + realStart
}
if realEnd < 0 {
realEnd = len(arr) + realEnd
}
if realEnd < realStart {
realEnd, realStart = realStart, realEnd // swap
}
// loop and gather
for idx := realStart; idx < realEnd; idx += f.Step {
next.Fn(arr[idx])
}
}
}
// match anything
type matchAnyFn struct {
// empty
}
func (f *matchAnyFn) Call(context interface{}, next QueryPath) {
if tree, ok := context.(*TomlTree); ok {
for _, key := range tree.Keys() {
item := treeValue(tree, key)
next.Fn(item)
}
}
}
// filter through union
type matchUnionFn struct {
Union QueryPath
}
func (f *matchUnionFn) Call(context interface{}, next QueryPath) {
for _, fn := range f.Union {
fn.Call(context, next)
}
}
// match every single last node in the tree
type matchRecursiveFn struct {
// empty
}
func (f *matchRecursiveFn) Call(context interface{}, next QueryPath) {
if tree, ok := context.(*TomlTree); ok {
var visit func(tree *TomlTree)
visit = func(tree *TomlTree) {
for _, key := range tree.Keys() {
item := treeValue(tree, key)
next.Fn(item)
switch node := item.(type) {
case *TomlTree:
visit(node)
case []*TomlTree:
for _, subtree := range node {
visit(subtree)
}
}
}
}
visit(tree)
}
}
// terminating expression
type matchEndFn struct {
Result []interface{}
}
func (f *matchEndFn) Call(context interface{}, next QueryPath) {
f.Result = append(f.Result, context)
}
func processPath(path QueryPath, context interface{}) []interface{} {
// terminate the path with a collection funciton
result := []interface{}{}
newPath := append(path, func(context interface{}, next QueryPath) {
result = append(result, context)
})
endFn := &matchEndFn{ []interface{}{} }
newPath := append(path, endFn)
// execute the path
newPath.Fn(context)
return result
return endFn.Result
}
+186
View File
@@ -0,0 +1,186 @@
package jpath
import (
"fmt"
"math"
"testing"
)
func pathString(path QueryPath) string {
result := "["
for _, v := range path {
result += fmt.Sprintf("%T:%v, ", v, v)
}
return result + "]"
}
func assertPathMatch(t *testing.T, path, ref QueryPath) bool {
if len(path) != len(ref) {
t.Errorf("lengths do not match: %v vs %v",
pathString(path), pathString(ref))
return false
} else {
for i, v := range ref {
pass := false
node := path[i]
// compare by value
switch refNode := v.(type) {
case *matchKeyFn:
castNode, ok := node.(*matchKeyFn)
pass = ok && (*refNode == *castNode)
case *matchIndexFn:
castNode, ok := node.(*matchIndexFn)
pass = ok && (*refNode == *castNode)
case *matchSliceFn:
castNode, ok := node.(*matchSliceFn)
pass = ok && (*refNode == *castNode)
case *matchAnyFn:
castNode, ok := node.(*matchAnyFn)
pass = ok && (*refNode == *castNode)
case *matchUnionFn:
castNode, ok := node.(*matchUnionFn)
// special case - comapre all contents
pass = ok && assertPathMatch(t, castNode.Union, refNode.Union)
case *matchRecursiveFn:
castNode, ok := node.(*matchRecursiveFn)
pass = ok && (*refNode == *castNode)
}
if !pass {
t.Errorf("paths do not match at index %d: %v vs %v",
i, pathString(path), pathString(ref))
return false
}
}
}
return true
}
func assertPath(t *testing.T, query string, ref QueryPath) {
_, flow := lex(query)
path := parse(flow)
assertPathMatch(t, path, ref)
}
func TestPathRoot(t *testing.T) {
assertPath(t,
"$",
QueryPath{
// empty
})
}
func TestPathKey(t *testing.T) {
assertPath(t,
"$.foo",
QueryPath{
&matchKeyFn{ "foo" },
})
}
func TestPathBracketKey(t *testing.T) {
assertPath(t,
"$[foo]",
QueryPath{
&matchKeyFn{ "foo" },
})
}
func TestPathBracketStringKey(t *testing.T) {
assertPath(t,
"$['foo']",
QueryPath{
&matchKeyFn{ "foo" },
})
}
func TestPathIndex(t *testing.T) {
assertPath(t,
"$[123]",
QueryPath{
&matchIndexFn{ 123 },
})
}
func TestPathSliceStart(t *testing.T) {
assertPath(t,
"$[123:]",
QueryPath{
&matchSliceFn{ 123, math.MaxInt64, 1 },
})
}
func TestPathSliceStartEnd(t *testing.T) {
assertPath(t,
"$[123:456]",
QueryPath{
&matchSliceFn{ 123, 456, 1 },
})
}
func TestPathSliceStartEndColon(t *testing.T) {
assertPath(t,
"$[123:456:]",
QueryPath{
&matchSliceFn{ 123, 456, 1 },
})
}
func TestPathSliceStartStep(t *testing.T) {
assertPath(t,
"$[123::7]",
QueryPath{
&matchSliceFn{ 123, math.MaxInt64, 7 },
})
}
func TestPathSliceEndStep(t *testing.T) {
assertPath(t,
"$[:456:7]",
QueryPath{
&matchSliceFn{ 0, 456, 7 },
})
}
func TestPathSliceStep(t *testing.T) {
assertPath(t,
"$[::7]",
QueryPath{
&matchSliceFn{ 0, math.MaxInt64, 7 },
})
}
func TestPathSliceAll(t *testing.T) {
assertPath(t,
"$[123:456:7]",
QueryPath{
&matchSliceFn{ 123, 456, 7 },
})
}
func TestPathAny(t *testing.T) {
assertPath(t,
"$.*",
QueryPath{
&matchAnyFn{},
})
}
func TestPathUnion(t *testing.T) {
assertPath(t,
"$[foo, bar, baz]",
QueryPath{
&matchUnionFn{ []PathFn {
&matchKeyFn{ "foo" },
&matchKeyFn{ "bar" },
&matchKeyFn{ "baz" },
}},
})
}
func TestPathRecurse(t *testing.T) {
assertPath(t,
"$..*",
QueryPath{
&matchRecursiveFn{},
})
}
+90 -55
View File
@@ -3,13 +3,13 @@ package jpath
import (
"fmt"
"math"
"strconv"
)
type parser struct {
flow chan token
tokensBuffer []token
path []PathFn
union []PathFn
}
type parserStateFn func(*parser) parserStateFn
@@ -42,6 +42,27 @@ func (p *parser) peek() *token {
return &tok
}
func (p *parser) lookahead(types... tokenType) bool {
result := true
buffer := []token{}
for _, typ := range types {
tok := p.getToken()
if tok == nil {
result = false
break
}
buffer = append(buffer, *tok)
if tok.typ != typ {
result = false
break
}
}
// add the tokens back to the buffer, and return
p.tokensBuffer = append(p.tokensBuffer, buffer...)
return result
}
func (p *parser) getToken() *token {
if len(p.tokensBuffer) != 0 {
tok := p.tokensBuffer[0]
@@ -73,20 +94,40 @@ func parseStart(p *parser) parserStateFn {
return parseMatchExpr
}
// handle '.' prefix, '[]', and '..'
func parseMatchExpr(p *parser) parserStateFn {
tok := p.getToken()
switch tok.typ {
case tokenDot:
p.appendPath(matchKeyFn(tok.val))
return parseMatchExpr
case tokenDotDot:
p.appendPath(matchRecurseFn())
return parseSimpleMatchExpr
p.appendPath(&matchRecursiveFn{})
// nested parse for '..'
tok := p.getToken()
switch tok.typ {
case tokenKey:
p.appendPath(&matchKeyFn{tok.val})
return parseMatchExpr
case tokenLBracket:
return parseBracketExpr
case tokenStar:
// do nothing - the recursive predicate is enough
return parseMatchExpr
}
case tokenDot:
// nested parse for '.'
tok := p.getToken()
switch tok.typ {
case tokenKey:
p.appendPath(&matchKeyFn{tok.val})
return parseMatchExpr
case tokenStar:
p.appendPath(&matchAnyFn{})
return parseMatchExpr
}
case tokenLBracket:
return parseBracketExpr
case tokenStar:
p.appendPath(matchAnyFn())
return parseMatchExpr
case tokenEOF:
return nil // allow EOF at this stage
}
@@ -94,57 +135,40 @@ func parseMatchExpr(p *parser) parserStateFn {
return nil
}
func parseSimpleMatchExpr(p *parser) parserStateFn {
tok := p.getToken()
switch tok.typ {
case tokenLBracket:
return parseBracketExpr
case tokenKey:
p.appendPath(matchKeyFn(tok.val))
return parseMatchExpr
case tokenStar:
p.appendPath(matchAnyFn())
return parseMatchExpr
}
p.raiseError(tok, "expected match expression")
return nil
}
func parseBracketExpr(p *parser) parserStateFn {
tok := p.peek()
switch tok.typ {
case tokenInteger:
// look ahead for a ':'
p.getToken()
next := p.peek()
p.backup(tok)
if next.typ == tokenColon {
return parseSliceExpr
}
return parseUnionExpr
case tokenColon:
return parseSliceExpr
}
if p.lookahead(tokenInteger, tokenColon) {
return parseSliceExpr
}
if p.peek().typ == tokenColon {
return parseSliceExpr
}
return parseUnionExpr
}
func parseUnionExpr(p *parser) parserStateFn {
union := []PathFn{}
for {
// this state can be traversed after some sub-expressions
// so be careful when setting up state in the parser
if p.union == nil {
p.union = []PathFn{}
}
loop: // labeled loop for easy breaking
for {
// parse sub expression
tok := p.getToken()
switch tok.typ {
case tokenInteger:
idx, _ := strconv.Atoi(tok.val)
union = append(union, matchIndexFn(idx))
p.union = append(p.union, &matchIndexFn{tok.Int()})
case tokenKey:
union = append(union, matchKeyFn(tok.val))
p.union = append(p.union, &matchKeyFn{tok.val})
case tokenString:
p.union = append(p.union, &matchKeyFn{tok.val})
case tokenQuestion:
return parseFilterExpr
case tokenLParen:
return parseScriptExpr
default:
p.raiseError(tok, "expected union sub expression")
p.raiseError(tok, "expected union sub expression, not '%s'", tok.val)
}
// parse delimiter or terminator
tok = p.getToken()
@@ -152,12 +176,20 @@ func parseUnionExpr(p *parser) parserStateFn {
case tokenComma:
continue
case tokenRBracket:
break
break loop
default:
p.raiseError(tok, "expected ',' or ']'")
}
}
p.appendPath(matchUnionFn(union))
// if there is only one sub-expression, use that instead
if len(p.union) == 1 {
p.appendPath(p.union[0])
}else {
p.appendPath(&matchUnionFn{p.union})
}
p.union = nil // clear out state
return parseMatchExpr
}
@@ -168,7 +200,7 @@ func parseSliceExpr(p *parser) parserStateFn {
// parse optional start
tok := p.getToken()
if tok.typ == tokenInteger {
start, _ = strconv.Atoi(tok.val)
start = tok.Int()
tok = p.getToken()
}
if tok.typ != tokenColon {
@@ -178,17 +210,21 @@ func parseSliceExpr(p *parser) parserStateFn {
// parse optional end
tok = p.getToken()
if tok.typ == tokenInteger {
end, _ = strconv.Atoi(tok.val)
end = tok.Int()
tok = p.getToken()
}
if tok.typ != tokenColon || tok.typ != tokenRBracket {
if tok.typ == tokenRBracket {
p.appendPath(&matchSliceFn{start, end, step})
return parseMatchExpr
}
if tok.typ != tokenColon {
p.raiseError(tok, "expected ']' or ':'")
}
// parse optional step
tok = p.getToken()
if tok.typ == tokenInteger {
step, _ = strconv.Atoi(tok.val)
step = tok.Int()
if step < 0 {
p.raiseError(tok, "step must be a positive value")
}
@@ -198,7 +234,7 @@ func parseSliceExpr(p *parser) parserStateFn {
p.raiseError(tok, "expected ']'")
}
p.appendPath(matchSliceFn(start, end, step))
p.appendPath(&matchSliceFn{start, end, step})
return parseMatchExpr
}
@@ -213,12 +249,11 @@ func parseScriptExpr(p *parser) parserStateFn {
}
func parse(flow chan token) []PathFn {
result := []PathFn{}
parser := &parser{
flow: flow,
tokensBuffer: []token{},
path: result,
path: []PathFn{},
}
parser.run()
return result
return parser.path
}
+8 -3
View File
@@ -29,9 +29,14 @@ func assertValue(t *testing.T, result, ref interface{}, location string) {
t.Errorf("{%s} result value not of type %T: %T",
location, node, resultNode)
} else {
for i, v := range node {
assertValue(t, resultNode[i], v, fmt.Sprintf("%s[%d]", location, i))
}
if len(node) != len(resultNode) {
t.Errorf("{%s} lengths do not match: %v vs %v",
location, node, resultNode)
} else {
for i, v := range node {
assertValue(t, resultNode[i], v, fmt.Sprintf("%s[%d]", location, i))
}
}
}
case map[string]interface{}:
if resultNode, ok := result.(*TomlTree); !ok {