Add \U support to query lexer (#88)

This commit is contained in:
Cameron Moore
2016-08-22 03:55:12 -05:00
committed by Thomas Pelletier
parent 31c735e72c
commit 887411a2a8
2 changed files with 105 additions and 7 deletions
+17
View File
@@ -272,6 +272,23 @@ func (l *queryLexer) lexString() queryLexStateFn {
return l.errorf("invalid unicode escape: \\u" + code)
}
growingString += string(rune(intcode))
} else if l.follow("\\U") {
l.pos += 2
code := ""
for i := 0; i < 8; i++ {
c := l.peek()
l.pos++
if !isHexDigit(c) {
return l.errorf("unfinished unicode escape")
}
code = code + string(c)
}
l.pos--
intcode, err := strconv.ParseInt(code, 16, 32)
if err != nil {
return l.errorf("invalid unicode escape: \\u" + code)
}
growingString += string(rune(intcode))
} else if l.follow("\\") {
l.pos++
return l.errorf("invalid escape sequence: \\" + string(l.peek()))
+88 -7
View File
@@ -10,11 +10,13 @@ func testQLFlow(t *testing.T, input string, expectedFlow []token) {
token := <-ch
if token != expected {
t.Log("While testing #", idx, ":", input)
t.Log("compared (got)", token, "to (expected)", expected)
t.Log("\tvalue:", token.val, "<->", expected.val)
t.Log("\tvalue as bytes:", []byte(token.val), "<->", []byte(expected.val))
t.Log("\ttype:", token.typ.String(), "<->", expected.typ.String())
t.Log("\tline:", token.Line, "<->", expected.Line)
t.Log("\tcolumn:", token.Col, "<->", expected.Col)
t.Log("compared", token, "to", expected)
t.Log(token.val, "<->", expected.val)
t.Log(token.typ, "<->", expected.typ)
t.Log(token.Line, "<->", expected.Line)
t.Log(token.Col, "<->", expected.Col)
t.FailNow()
}
}
@@ -48,9 +50,9 @@ func TestLexSpecialChars(t *testing.T) {
}
func TestLexString(t *testing.T) {
testQLFlow(t, "'foo'", []token{
token{Position{1, 2}, tokenString, "foo"},
token{Position{1, 6}, tokenEOF, ""},
testQLFlow(t, "'foo\n'", []token{
token{Position{1, 2}, tokenString, "foo\n"},
token{Position{2, 2}, tokenEOF, ""},
})
}
@@ -61,6 +63,37 @@ func TestLexDoubleString(t *testing.T) {
})
}
func TestLexStringEscapes(t *testing.T) {
testQLFlow(t, `"foo \" \' \b \f \/ \t \r \\ \u03A9 \U00012345 \n bar"`, []token{
token{Position{1, 2}, tokenString, "foo \" ' \b \f / \t \r \\ \u03A9 \U00012345 \n bar"},
token{Position{1, 55}, tokenEOF, ""},
})
}
func TestLexStringUnfinishedUnicode4(t *testing.T) {
testQLFlow(t, `"\u000"`, []token{
token{Position{1, 2}, tokenError, "unfinished unicode escape"},
})
}
func TestLexStringUnfinishedUnicode8(t *testing.T) {
testQLFlow(t, `"\U0000"`, []token{
token{Position{1, 2}, tokenError, "unfinished unicode escape"},
})
}
func TestLexStringInvalidEscape(t *testing.T) {
testQLFlow(t, `"\x"`, []token{
token{Position{1, 2}, tokenError, "invalid escape sequence: \\x"},
})
}
func TestLexStringUnfinished(t *testing.T) {
testQLFlow(t, `"bar`, []token{
token{Position{1, 2}, tokenError, "unclosed string"},
})
}
func TestLexKey(t *testing.T) {
testQLFlow(t, "foo", []token{
token{Position{1, 1}, tokenKey, "foo"},
@@ -95,3 +128,51 @@ func TestLexSpace(t *testing.T) {
token{Position{1, 12}, tokenEOF, ""},
})
}
func TestLexInteger(t *testing.T) {
testQLFlow(t, "100 +200 -300", []token{
token{Position{1, 1}, tokenInteger, "100"},
token{Position{1, 5}, tokenInteger, "+200"},
token{Position{1, 10}, tokenInteger, "-300"},
token{Position{1, 14}, tokenEOF, ""},
})
}
func TestLexFloat(t *testing.T) {
testQLFlow(t, "100.0 +200.0 -300.0", []token{
token{Position{1, 1}, tokenFloat, "100.0"},
token{Position{1, 7}, tokenFloat, "+200.0"},
token{Position{1, 14}, tokenFloat, "-300.0"},
token{Position{1, 20}, tokenEOF, ""},
})
}
func TestLexFloatWithMultipleDots(t *testing.T) {
testQLFlow(t, "4.2.", []token{
token{Position{1, 1}, tokenError, "cannot have two dots in one float"},
})
}
func TestLexFloatLeadingDot(t *testing.T) {
testQLFlow(t, "+.1", []token{
token{Position{1, 1}, tokenError, "cannot start float with a dot"},
})
}
func TestLexFloatWithTrailingDot(t *testing.T) {
testQLFlow(t, "42.", []token{
token{Position{1, 1}, tokenError, "float cannot end with a dot"},
})
}
func TestLexNumberWithoutDigit(t *testing.T) {
testQLFlow(t, "+", []token{
token{Position{1, 1}, tokenError, "no digit in that number"},
})
}
func TestLexUnknown(t *testing.T) {
testQLFlow(t, "^", []token{
token{Position{1, 1}, tokenError, "unexpected char: '94'"},
})
}