Changed match func strategy; added tests

As it turns out, closures are very hard to validate without running them. Since table-driven tests tend to rely on value types that can be compared directly, using structs that adhere to a generic callback interface is more work, but is more easily tested. * Changed jsonpath match functions to structs with Call() methods * Added tests to verify the generation of jsonpath QueryPath data * Added tests to verify jsonpath lexer * Fixed jsonpath whitespace handling bug * Fixed numerous flaws in jsonpath parser
2014-09-04 23:17:29 -04:00
parent 27416cc1b9
commit b74544d345
6 changed files with 447 additions and 156 deletions
@@ -79,18 +79,26 @@ func (tt tokenType) String() string {
 	return "Unknown"
 }

-func (i token) String() string {
-	switch i.typ {
+func (t token) Int() int {
+  if result, err := strconv.Atoi(t.val); err != nil {
+    panic(err)
+  } else {
+    return result
+  }
+}
+
+func (t token) String() string {
+	switch t.typ {
 	case tokenEOF:
 		return "EOF"
 	case tokenError:
-		return i.val
+		return t.val
 	}

-	if len(i.val) > 10 {
-		return fmt.Sprintf("%.10q...", i.val)
+	if len(t.val) > 10 {
+		return fmt.Sprintf("%.10q...", t.val)
 	}
-	return fmt.Sprintf("%q", i.val)
+	return fmt.Sprintf("%q", t.val)
 }

 func isSpace(r rune) bool {
@@ -283,6 +291,12 @@ func lexVoid(l *lexer) stateFn {
 			return lexString
 		}

+    if isSpace(next) {
+			l.next()
+      l.ignore()
+      continue
+		}
+
 		if isAlphanumeric(next) {
 			return lexKey
 		}
@@ -291,14 +305,6 @@ func lexVoid(l *lexer) stateFn {
 			return lexNumber
 		}

-		if isAlphanumeric(next) {
-			return lexKey
-		}
-
-		if isSpace(next) {
-			l.ignore()
-		}
-
 		if l.next() == eof {
 			break
 		}
@@ -54,7 +54,9 @@ func TestLexString(t *testing.T) {
 		token{Position{1, 2}, tokenString, "foo"},
 		token{Position{1, 6}, tokenEOF, ""},
 	})
+}

+func TestLexDoubleString(t *testing.T) {
 	testFlow(t, `"bar"`, []token{
 		token{Position{1, 2}, tokenString, "bar"},
 		token{Position{1, 6}, tokenEOF, ""},
@@ -67,3 +69,32 @@ func TestLexKey(t *testing.T) {
 		token{Position{1, 4}, tokenEOF, ""},
 	})
 }
+
+func TestLexRecurse(t *testing.T) {
+	testFlow(t, "$..*", []token{
+		token{Position{1, 1}, tokenDollar, "$"},
+		token{Position{1, 2}, tokenDotDot, ".."},
+		token{Position{1, 4}, tokenStar, "*"},
+		token{Position{1, 5}, tokenEOF, ""},
+	})
+}
+
+func TestLexBracketKey(t *testing.T) {
+	testFlow(t, "$[foo]", []token{
+		token{Position{1, 1}, tokenDollar, "$"},
+		token{Position{1, 2}, tokenLBracket, "["},
+		token{Position{1, 3}, tokenKey, "foo"},
+		token{Position{1, 6}, tokenRBracket, "]"},
+		token{Position{1, 7}, tokenEOF, ""},
+	})
+}
+
+func TestLexSpace(t *testing.T) {
+	testFlow(t, "foo bar baz", []token{
+		token{Position{1, 1}, tokenKey, "foo"},
+		token{Position{1, 5}, tokenKey, "bar"},
+		token{Position{1, 9}, tokenKey, "baz"},
+		token{Position{1, 12}, tokenEOF, ""},
+	})
+}
+
@@ -4,111 +4,139 @@ import (
 	. "github.com/pelletier/go-toml"
 )

-type QueryPath []PathFn
-
-type PathFn func(context interface{}, next QueryPath)
-
-func (path QueryPath) Fn(context interface{}) {
-	path[0](context, path[1:])
+type PathFn interface{
+  Call(context interface{}, next QueryPath)
 }

+type QueryPath []PathFn
+
+func (path QueryPath) Fn(context interface{}) {
+	path[0].Call(context, path[1:])
+}
+
+// shim to ease functor writing
 func treeValue(tree *TomlTree, key string) interface{} {
 	return tree.GetPath([]string{key})
 }

-func matchKeyFn(name string) PathFn {
-	return func(context interface{}, next QueryPath) {
-		if tree, ok := context.(*TomlTree); ok {
-			item := treeValue(tree, name)
-			if item != nil {
-				next.Fn(item)
-			}
-		}
-	}
+// match single key
+type matchKeyFn struct {
+  Name string
 }

-func matchIndexFn(idx int) PathFn {
-	return func(context interface{}, next QueryPath) {
-		if arr, ok := context.([]interface{}); ok {
-			if idx < len(arr) && idx >= 0 {
-				next.Fn(arr[idx])
-			}
-		}
-	}
+func (f *matchKeyFn) Call(context interface{}, next QueryPath) {
+  if tree, ok := context.(*TomlTree); ok {
+    item := treeValue(tree, f.Name)
+    if item != nil {
+      next.Fn(item)
+    }
+  }
 }

-func matchSliceFn(start, end, step int) PathFn {
-	return func(context interface{}, next QueryPath) {
-		if arr, ok := context.([]interface{}); ok {
-			// adjust indexes for negative values, reverse ordering
-			realStart, realEnd := start, end
-			if realStart < 0 {
-				realStart = len(arr) + realStart
-			}
-			if realEnd < 0 {
-				realEnd = len(arr) + realEnd
-			}
-			if realEnd < realStart {
-				realEnd, realStart = realStart, realEnd // swap
-			}
-			// loop and gather
-			for idx := realStart; idx < realEnd; idx += step {
-				next.Fn(arr[idx])
-			}
-		}
-	}
+// match single index
+type matchIndexFn struct {
+  Idx int
 }

-func matchAnyFn() PathFn {
-	return func(context interface{}, next QueryPath) {
-		if tree, ok := context.(*TomlTree); ok {
-			for _, key := range tree.Keys() {
-				item := treeValue(tree, key)
-				next.Fn(item)
-			}
-		}
-	}
+func (f *matchIndexFn) Call(context interface{}, next QueryPath) {
+  if arr, ok := context.([]interface{}); ok {
+    if f.Idx < len(arr) && f.Idx >= 0 {
+      next.Fn(arr[f.Idx])
+    }
+  }
 }

-func matchUnionFn(union QueryPath) PathFn {
-	return func(context interface{}, next QueryPath) {
-		for _, fn := range union {
-			fn(context, next)
-		}
-	}
+// filter by slicing
+type matchSliceFn struct {
+  Start, End, Step int
 }

-func matchRecurseFn() PathFn {
-	return func(context interface{}, next QueryPath) {
-		if tree, ok := context.(*TomlTree); ok {
-			var visit func(tree *TomlTree)
-			visit = func(tree *TomlTree) {
-				for _, key := range tree.Keys() {
-					item := treeValue(tree, key)
-					next.Fn(item)
-					switch node := item.(type) {
-					case *TomlTree:
-						visit(node)
-					case []*TomlTree:
-						for _, subtree := range node {
-							visit(subtree)
-						}
-					}
-				}
-			}
-			visit(tree)
-		}
-	}
+func (f *matchSliceFn) Call(context interface{}, next QueryPath) {
+  if arr, ok := context.([]interface{}); ok {
+    // adjust indexes for negative values, reverse ordering
+    realStart, realEnd := f.Start, f.End
+    if realStart < 0 {
+      realStart = len(arr) + realStart
+    }
+    if realEnd < 0 {
+      realEnd = len(arr) + realEnd
+    }
+    if realEnd < realStart {
+      realEnd, realStart = realStart, realEnd // swap
+    }
+    // loop and gather
+    for idx := realStart; idx < realEnd; idx += f.Step {
+      next.Fn(arr[idx])
+    }
+  }
+}
+
+// match anything
+type matchAnyFn struct {
+  // empty
+}
+
+func (f *matchAnyFn) Call(context interface{}, next QueryPath) {
+  if tree, ok := context.(*TomlTree); ok {
+    for _, key := range tree.Keys() {
+      item := treeValue(tree, key)
+      next.Fn(item)
+    }
+  }
+}
+
+// filter through union
+type matchUnionFn struct {
+  Union QueryPath
+}
+
+func (f *matchUnionFn) Call(context interface{}, next QueryPath) {
+  for _, fn := range f.Union {
+    fn.Call(context, next)
+  }
+}
+
+// match every single last node in the tree
+type matchRecursiveFn struct {
+  // empty
+}
+
+func (f *matchRecursiveFn) Call(context interface{}, next QueryPath) {
+  if tree, ok := context.(*TomlTree); ok {
+    var visit func(tree *TomlTree)
+    visit = func(tree *TomlTree) {
+      for _, key := range tree.Keys() {
+        item := treeValue(tree, key)
+        next.Fn(item)
+        switch node := item.(type) {
+        case *TomlTree:
+          visit(node)
+        case []*TomlTree:
+          for _, subtree := range node {
+            visit(subtree)
+          }
+        }
+      }
+    }
+    visit(tree)
+  }
+}
+
+// terminating expression
+type matchEndFn struct {
+	Result []interface{}
+}
+
+func (f *matchEndFn) Call(context interface{}, next QueryPath) {
+	f.Result = append(f.Result, context)
 }

 func processPath(path QueryPath, context interface{}) []interface{} {
 	// terminate the path with a collection funciton
-	result := []interface{}{}
-	newPath := append(path, func(context interface{}, next QueryPath) {
-		result = append(result, context)
-	})
+  endFn := &matchEndFn{ []interface{}{} }
+	newPath := append(path, endFn)

 	// execute the path
 	newPath.Fn(context)
-	return result
+	return endFn.Result
 }
@@ -0,0 +1,186 @@
+package jpath
+
+import (
+  "fmt"
+  "math"
+	"testing"
+)
+
+func pathString(path QueryPath) string {
+  result := "["
+  for _, v := range path {
+    result += fmt.Sprintf("%T:%v, ", v, v)
+  }
+  return result + "]"
+}
+
+func assertPathMatch(t *testing.T, path, ref QueryPath) bool {
+  if len(path) != len(ref) {
+    t.Errorf("lengths do not match: %v vs %v",
+      pathString(path), pathString(ref))
+    return false
+  } else {
+    for i, v := range ref {
+      pass := false
+      node := path[i]
+      // compare by value
+      switch refNode := v.(type) {
+      case *matchKeyFn:
+        castNode, ok := node.(*matchKeyFn)
+        pass = ok && (*refNode == *castNode)
+      case *matchIndexFn:
+        castNode, ok := node.(*matchIndexFn)
+        pass = ok && (*refNode == *castNode)
+      case *matchSliceFn:
+        castNode, ok := node.(*matchSliceFn)
+        pass = ok && (*refNode == *castNode)
+      case *matchAnyFn:
+        castNode, ok := node.(*matchAnyFn)
+        pass = ok && (*refNode == *castNode)
+      case *matchUnionFn:
+        castNode, ok := node.(*matchUnionFn)
+        // special case - comapre all contents
+        pass = ok && assertPathMatch(t, castNode.Union, refNode.Union)
+      case *matchRecursiveFn:
+        castNode, ok := node.(*matchRecursiveFn)
+        pass = ok && (*refNode == *castNode)
+      }
+      if !pass {
+        t.Errorf("paths do not match at index %d: %v vs %v",
+          i, pathString(path), pathString(ref))
+        return false
+      }
+    }
+  }
+  return true
+}
+
+func assertPath(t *testing.T, query string, ref QueryPath) {
+	_, flow := lex(query)
+	path := parse(flow)
+  assertPathMatch(t, path, ref)
+}
+
+func TestPathRoot(t *testing.T) {
+	assertPath(t,
+		"$",
+		QueryPath{
+      // empty
+    })
+}
+
+func TestPathKey(t *testing.T) {
+	assertPath(t,
+		"$.foo",
+		QueryPath{
+      &matchKeyFn{ "foo" },
+    })
+}
+
+func TestPathBracketKey(t *testing.T) {
+	assertPath(t,
+		"$[foo]",
+		QueryPath{
+      &matchKeyFn{ "foo" },
+    })
+}
+
+func TestPathBracketStringKey(t *testing.T) {
+	assertPath(t,
+		"$['foo']",
+		QueryPath{
+      &matchKeyFn{ "foo" },
+    })
+}
+
+func TestPathIndex(t *testing.T) {
+	assertPath(t,
+		"$[123]",
+		QueryPath{
+      &matchIndexFn{ 123 },
+    })
+}
+
+func TestPathSliceStart(t *testing.T) {
+	assertPath(t,
+		"$[123:]",
+		QueryPath{
+      &matchSliceFn{ 123, math.MaxInt64, 1 },
+    })
+}
+
+func TestPathSliceStartEnd(t *testing.T) {
+	assertPath(t,
+		"$[123:456]",
+		QueryPath{
+      &matchSliceFn{ 123, 456, 1 },
+    })
+}
+
+func TestPathSliceStartEndColon(t *testing.T) {
+	assertPath(t,
+		"$[123:456:]",
+		QueryPath{
+      &matchSliceFn{ 123, 456, 1 },
+    })
+}
+
+func TestPathSliceStartStep(t *testing.T) {
+	assertPath(t,
+		"$[123::7]",
+		QueryPath{
+      &matchSliceFn{ 123, math.MaxInt64, 7 },
+    })
+}
+
+func TestPathSliceEndStep(t *testing.T) {
+	assertPath(t,
+		"$[:456:7]",
+		QueryPath{
+      &matchSliceFn{ 0, 456, 7 },
+    })
+}
+
+func TestPathSliceStep(t *testing.T) {
+	assertPath(t,
+		"$[::7]",
+		QueryPath{
+      &matchSliceFn{ 0, math.MaxInt64, 7 },
+    })
+}
+
+func TestPathSliceAll(t *testing.T) {
+	assertPath(t,
+		"$[123:456:7]",
+		QueryPath{
+      &matchSliceFn{ 123, 456, 7 },
+    })
+}
+
+func TestPathAny(t *testing.T) {
+	assertPath(t,
+		"$.*",
+		QueryPath{
+      &matchAnyFn{},
+    })
+}
+
+func TestPathUnion(t *testing.T) {
+	assertPath(t,
+		"$[foo, bar, baz]",
+		QueryPath{
+      &matchUnionFn{ []PathFn {
+        &matchKeyFn{ "foo" },
+        &matchKeyFn{ "bar" },
+        &matchKeyFn{ "baz" },
+      }},
+    })
+}
+
+func TestPathRecurse(t *testing.T) {
+	assertPath(t,
+		"$..*",
+		QueryPath{
+      &matchRecursiveFn{},
+    })
+}
@@ -3,13 +3,13 @@ package jpath
 import (
 	"fmt"
 	"math"
-	"strconv"
 )

 type parser struct {
 	flow         chan token
 	tokensBuffer []token
 	path         []PathFn
+  union        []PathFn
 }

 type parserStateFn func(*parser) parserStateFn
@@ -42,6 +42,27 @@ func (p *parser) peek() *token {
 	return &tok
 }

+func (p *parser) lookahead(types... tokenType) bool {
+  result := true
+  buffer := []token{}
+
+  for _, typ := range types {
+    tok := p.getToken()
+    if tok == nil {
+      result = false
+      break
+    }
+    buffer = append(buffer, *tok)
+    if tok.typ != typ {
+      result = false
+      break
+    }
+  }
+  // add the tokens back to the buffer, and return
+  p.tokensBuffer = append(p.tokensBuffer, buffer...)
+  return result
+}
+
 func (p *parser) getToken() *token {
 	if len(p.tokensBuffer) != 0 {
 		tok := p.tokensBuffer[0]
@@ -73,20 +94,40 @@ func parseStart(p *parser) parserStateFn {
 	return parseMatchExpr
 }

+// handle '.' prefix, '[]', and '..'
 func parseMatchExpr(p *parser) parserStateFn {
 	tok := p.getToken()
 	switch tok.typ {
-	case tokenDot:
-		p.appendPath(matchKeyFn(tok.val))
-		return parseMatchExpr
 	case tokenDotDot:
-		p.appendPath(matchRecurseFn())
-		return parseSimpleMatchExpr
+    p.appendPath(&matchRecursiveFn{})
+    // nested parse for '..'
+    tok := p.getToken()
+    switch tok.typ {
+    case tokenKey:
+      p.appendPath(&matchKeyFn{tok.val})
+      return parseMatchExpr
+    case tokenLBracket:
+      return parseBracketExpr
+    case tokenStar:
+      // do nothing - the recursive predicate is enough
+      return parseMatchExpr
+    }
+
+	case tokenDot:
+    // nested parse for '.'
+    tok := p.getToken()
+    switch tok.typ {
+    case tokenKey:
+      p.appendPath(&matchKeyFn{tok.val})
+      return parseMatchExpr
+    case tokenStar:
+      p.appendPath(&matchAnyFn{})
+      return parseMatchExpr
+    }
+
 	case tokenLBracket:
 		return parseBracketExpr
-	case tokenStar:
-		p.appendPath(matchAnyFn())
-		return parseMatchExpr
+
 	case tokenEOF:
 		return nil // allow EOF at this stage
 	}
@@ -94,57 +135,40 @@ func parseMatchExpr(p *parser) parserStateFn {
 	return nil
 }

-func parseSimpleMatchExpr(p *parser) parserStateFn {
-	tok := p.getToken()
-	switch tok.typ {
-	case tokenLBracket:
-		return parseBracketExpr
-	case tokenKey:
-		p.appendPath(matchKeyFn(tok.val))
-		return parseMatchExpr
-	case tokenStar:
-		p.appendPath(matchAnyFn())
-		return parseMatchExpr
-	}
-	p.raiseError(tok, "expected match expression")
-	return nil
-}
-
 func parseBracketExpr(p *parser) parserStateFn {
-	tok := p.peek()
-	switch tok.typ {
-	case tokenInteger:
-		// look ahead for a ':'
-		p.getToken()
-		next := p.peek()
-		p.backup(tok)
-		if next.typ == tokenColon {
-			return parseSliceExpr
-		}
-		return parseUnionExpr
-	case tokenColon:
-		return parseSliceExpr
-	}
+  if p.lookahead(tokenInteger, tokenColon) {
+    return parseSliceExpr
+  }
+  if p.peek().typ == tokenColon {
+    return parseSliceExpr
+  }
 	return parseUnionExpr
 }

 func parseUnionExpr(p *parser) parserStateFn {
-	union := []PathFn{}
-	for {
+  // this state can be traversed after some sub-expressions
+  // so be careful when setting up state in the parser
+	if p.union == nil {
+    p.union = []PathFn{}
+  }
+
+loop: // labeled loop for easy breaking
+  for {
 		// parse sub expression
 		tok := p.getToken()
 		switch tok.typ {
 		case tokenInteger:
-			idx, _ := strconv.Atoi(tok.val)
-			union = append(union, matchIndexFn(idx))
+			p.union = append(p.union, &matchIndexFn{tok.Int()})
 		case tokenKey:
-			union = append(union, matchKeyFn(tok.val))
+			p.union = append(p.union, &matchKeyFn{tok.val})
+		case tokenString:
+			p.union = append(p.union, &matchKeyFn{tok.val})
 		case tokenQuestion:
 			return parseFilterExpr
 		case tokenLParen:
 			return parseScriptExpr
 		default:
-			p.raiseError(tok, "expected union sub expression")
+			p.raiseError(tok, "expected union sub expression, not '%s'", tok.val)
 		}
 		// parse delimiter or terminator
 		tok = p.getToken()
@@ -152,12 +176,20 @@ func parseUnionExpr(p *parser) parserStateFn {
 		case tokenComma:
 			continue
 		case tokenRBracket:
-			break
+			break loop
 		default:
 			p.raiseError(tok, "expected ',' or ']'")
 		}
 	}
-	p.appendPath(matchUnionFn(union))
+
+  // if there is only one sub-expression, use that instead
+  if len(p.union) == 1 {
+    p.appendPath(p.union[0])
+  }else {
+    p.appendPath(&matchUnionFn{p.union})
+  }
+
+  p.union = nil // clear out state
 	return parseMatchExpr
 }

@@ -168,7 +200,7 @@ func parseSliceExpr(p *parser) parserStateFn {
 	// parse optional start
 	tok := p.getToken()
 	if tok.typ == tokenInteger {
-		start, _ = strconv.Atoi(tok.val)
+		start = tok.Int()
 		tok = p.getToken()
 	}
 	if tok.typ != tokenColon {
@@ -178,17 +210,21 @@ func parseSliceExpr(p *parser) parserStateFn {
 	// parse optional end
 	tok = p.getToken()
 	if tok.typ == tokenInteger {
-		end, _ = strconv.Atoi(tok.val)
+		end = tok.Int()
 		tok = p.getToken()
 	}
-	if tok.typ != tokenColon || tok.typ != tokenRBracket {
+  if tok.typ == tokenRBracket {
+	  p.appendPath(&matchSliceFn{start, end, step})
+    return parseMatchExpr
+  }
+  if tok.typ != tokenColon {
 		p.raiseError(tok, "expected ']' or ':'")
 	}

 	// parse optional step
 	tok = p.getToken()
 	if tok.typ == tokenInteger {
-		step, _ = strconv.Atoi(tok.val)
+		step = tok.Int()
 		if step < 0 {
 			p.raiseError(tok, "step must be a positive value")
 		}
@@ -198,7 +234,7 @@ func parseSliceExpr(p *parser) parserStateFn {
 		p.raiseError(tok, "expected ']'")
 	}

-	p.appendPath(matchSliceFn(start, end, step))
+	p.appendPath(&matchSliceFn{start, end, step})
 	return parseMatchExpr
 }

@@ -213,12 +249,11 @@ func parseScriptExpr(p *parser) parserStateFn {
 }

 func parse(flow chan token) []PathFn {
-	result := []PathFn{}
 	parser := &parser{
 		flow:         flow,
 		tokensBuffer: []token{},
-		path:         result,
+		path:         []PathFn{},
 	}
 	parser.run()
-	return result
+	return parser.path
 }
@@ -29,9 +29,14 @@ func assertValue(t *testing.T, result, ref interface{}, location string) {
 			t.Errorf("{%s} result value not of type %T: %T",
 				location, node, resultNode)
 		} else {
-			for i, v := range node {
-				assertValue(t, resultNode[i], v, fmt.Sprintf("%s[%d]", location, i))
-			}
+      if len(node) != len(resultNode) {
+        t.Errorf("{%s} lengths do not match: %v vs %v",
+          location, node, resultNode)
+      } else {
+        for i, v := range node {
+          assertValue(t, resultNode[i], v, fmt.Sprintf("%s[%d]", location, i))
+        }
+      }
 		}
 	case map[string]interface{}:
 		if resultNode, ok := result.(*TomlTree); !ok {