Added line/col support to lexer

This commit is contained in:
eanderton
2014-08-06 07:13:15 -04:00
parent e493544dfd
commit dd04a2f3cd
2 changed files with 235 additions and 206 deletions
+29 -5
View File
@@ -46,6 +46,8 @@ const (
type token struct {
typ tokenType
val string
line int
col int
}
func (i token) String() string {
@@ -93,6 +95,8 @@ type lexer struct {
width int
tokens chan token
depth int
line int
col int
}
func (l *lexer) run() {
@@ -102,14 +106,32 @@ func (l *lexer) run() {
close(l.tokens)
}
func (l *lexer) nextStart() {
// iterate by runes (utf8 characters)
// search for newlines and advance line/col counts
for i:=l.start; i<l.pos; {
r, width := utf8.DecodeRuneInString(l.input[i:])
if r == '\n' {
l.line += 1
l.col = 0
} else {
l.col += 1
}
i += width
// fmt.Printf("'%c'\n", r)
}
// advance start position to next token
l.start = l.pos
}
func (l *lexer) emit(t tokenType) {
l.tokens <- token{t, l.input[l.start:l.pos]}
l.start = l.pos
l.tokens <- token{t, l.input[l.start:l.pos], l.line, l.col}
l.nextStart()
}
func (l *lexer) emitWithValue(t tokenType, value string) {
l.tokens <- token{t, value}
l.start = l.pos
l.tokens <- token{t, value, l.line, l.col}
l.nextStart()
}
func (l *lexer) next() rune {
@@ -124,7 +146,7 @@ func (l *lexer) next() rune {
}
func (l *lexer) ignore() {
l.start = l.pos
l.nextStart()
}
func (l *lexer) backup() {
@@ -135,6 +157,8 @@ func (l *lexer) errorf(format string, args ...interface{}) stateFn {
l.tokens <- token{
tokenError,
fmt.Sprintf(format, args...),
l.line,
l.col,
}
return nil
}
+206 -201
View File
@@ -7,9 +7,12 @@ func testFlow(t *testing.T, input string, expectedFlow []token) {
for _, expected := range expectedFlow {
token := <-ch
if token != expected {
t.Log("While testing: ", input)
t.Log("compared", token, "to", expected)
t.Log(token.val, "<->", expected.val)
t.Log(token.typ, "<->", expected.typ)
t.Log(token.val, "<->", expected.val)
t.Log(token.typ, "<->", expected.typ)
t.Log(token.line, "<->", expected.line)
t.Log(token.col, "<->", expected.col)
t.FailNow()
}
}
@@ -29,244 +32,246 @@ func testFlow(t *testing.T, input string, expectedFlow []token) {
func TestValidKeyGroup(t *testing.T) {
testFlow(t, "[hello world]", []token{
token{tokenLeftBracket, "["},
token{tokenKeyGroup, "hello world"},
token{tokenRightBracket, "]"},
token{tokenEOF, ""},
token{tokenLeftBracket, "[", 0, 0},
token{tokenKeyGroup, "hello world", 0, 1},
token{tokenRightBracket, "]", 0, 12},
token{tokenEOF, "", 0, 13},
})
}
func TestUnclosedKeyGroup(t *testing.T) {
testFlow(t, "[hello world", []token{
token{tokenLeftBracket, "["},
token{tokenError, "unclosed key group"},
token{tokenLeftBracket, "[", 0, 0},
token{tokenError, "unclosed key group", 0, 1},
})
}
func TestComment(t *testing.T) {
testFlow(t, "# blahblah", []token{
token{tokenEOF, ""},
token{tokenEOF, "", 0, 10},
})
}
func TestKeyGroupComment(t *testing.T) {
testFlow(t, "[hello world] # blahblah", []token{
token{tokenLeftBracket, "["},
token{tokenKeyGroup, "hello world"},
token{tokenRightBracket, "]"},
token{tokenEOF, ""},
token{tokenLeftBracket, "[", 0, 0},
token{tokenKeyGroup, "hello world", 0, 1},
token{tokenRightBracket, "]", 0, 12},
token{tokenEOF, "", 0, 24},
})
}
func TestMultipleKeyGroupsComment(t *testing.T) {
testFlow(t, "[hello world] # blahblah\n[test]", []token{
token{tokenLeftBracket, "["},
token{tokenKeyGroup, "hello world"},
token{tokenRightBracket, "]"},
token{tokenLeftBracket, "["},
token{tokenKeyGroup, "test"},
token{tokenRightBracket, "]"},
token{tokenEOF, ""},
token{tokenLeftBracket, "[", 0, 0},
token{tokenKeyGroup, "hello world", 0, 1},
token{tokenRightBracket, "]", 0, 12},
token{tokenLeftBracket, "[", 1, 0},
token{tokenKeyGroup, "test", 1, 1},
token{tokenRightBracket, "]", 1, 5},
token{tokenEOF, "", 1, 6},
})
}
func TestBasicKey(t *testing.T) {
testFlow(t, "hello", []token{
token{tokenKey, "hello"},
token{tokenEOF, ""},
token{tokenKey, "hello", 0, 0},
token{tokenEOF, "", 0, 5},
})
}
func TestBasicKeyWithUnderscore(t *testing.T) {
testFlow(t, "hello_hello", []token{
token{tokenKey, "hello_hello"},
token{tokenEOF, ""},
token{tokenKey, "hello_hello", 0, 0},
token{tokenEOF, "", 0, 11},
})
}
func TestBasicKeyWithDash(t *testing.T) {
testFlow(t, "hello-world", []token{
token{tokenKey, "hello-world"},
token{tokenEOF, ""},
token{tokenKey, "hello-world", 0, 0},
token{tokenEOF, "", 0, 11},
})
}
func TestBasicKeyWithUppercaseMix(t *testing.T) {
testFlow(t, "helloHELLOHello", []token{
token{tokenKey, "helloHELLOHello"},
token{tokenEOF, ""},
token{tokenKey, "helloHELLOHello", 0, 0},
token{tokenEOF, "", 0, 15},
})
}
func TestBasicKeyWithInternationalCharacters(t *testing.T) {
testFlow(t, "héllÖ", []token{
token{tokenKey, "héllÖ"},
token{tokenEOF, ""},
token{tokenKey, "héllÖ", 0, 0},
token{tokenEOF, "", 0, 5},
})
}
func TestBasicKeyAndEqual(t *testing.T) {
testFlow(t, "hello =", []token{
token{tokenKey, "hello"},
token{tokenEqual, "="},
token{tokenEOF, ""},
token{tokenKey, "hello", 0, 0},
token{tokenEqual, "=", 0, 6},
token{tokenEOF, "", 0, 7},
})
}
func TestKeyWithSharpAndEqual(t *testing.T) {
testFlow(t, "key#name = 5", []token{
token{tokenKey, "key#name"},
token{tokenEqual, "="},
token{tokenInteger, "5"},
token{tokenEOF, ""},
token{tokenKey, "key#name", 0, 0},
token{tokenEqual, "=", 0, 9},
token{tokenInteger, "5", 0, 11},
token{tokenEOF, "", 0, 12},
})
}
func TestKeyWithSymbolsAndEqual(t *testing.T) {
testFlow(t, "~!@#$^&*()_+-`1234567890[]\\|/?><.,;:' = 5", []token{
token{tokenKey, "~!@#$^&*()_+-`1234567890[]\\|/?><.,;:'"},
token{tokenEqual, "="},
token{tokenInteger, "5"},
token{tokenEOF, ""},
token{tokenKey, "~!@#$^&*()_+-`1234567890[]\\|/?><.,;:'", 0, 0},
token{tokenEqual, "=", 0, 38},
token{tokenInteger, "5", 0, 40},
token{tokenEOF, "", 0, 41},
})
}
func TestKeyEqualStringEscape(t *testing.T) {
testFlow(t, "foo = \"hello\\\"\"", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenString, "hello\""},
token{tokenEOF, ""},
testFlow(t, `foo = "hello\""`, []token{
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenString, "hello\"" ,0, 7},
token{tokenEOF, "", 0, 15},
})
}
func TestKeyEqualStringUnfinished(t *testing.T) {
testFlow(t, "foo = \"bar", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenError, "unclosed string"},
testFlow(t, `foo = "bar`, []token{
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenError, "unclosed string", 0, 7},
})
}
func TestKeyEqualString(t *testing.T) {
testFlow(t, "foo = \"bar\"", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenString, "bar"},
token{tokenEOF, ""},
testFlow(t, `foo = "bar"`, []token{
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenString, "bar", 0, 7},
token{tokenEOF, "", 0, 11},
})
}
func TestKeyEqualTrue(t *testing.T) {
testFlow(t, "foo = true", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenTrue, "true"},
token{tokenEOF, ""},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenTrue, "true", 0, 6},
token{tokenEOF, "", 0, 10},
})
}
func TestKeyEqualFalse(t *testing.T) {
testFlow(t, "foo = false", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenFalse, "false"},
token{tokenEOF, ""},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenFalse, "false", 0, 6},
token{tokenEOF, "", 0, 11},
})
}
func TestArrayNestedString(t *testing.T) {
testFlow(t, "a = [ [\"hello\", \"world\"] ]", []token{
token{tokenKey, "a"},
token{tokenEqual, "="},
token{tokenLeftBracket, "["},
token{tokenLeftBracket, "["},
token{tokenString, "hello"},
token{tokenComma, ","},
token{tokenString, "world"},
token{tokenRightBracket, "]"},
token{tokenRightBracket, "]"},
token{tokenEOF, ""},
testFlow(t, `a = [ ["hello", "world"] ]`, []token{
token{tokenKey, "a", 0, 0},
token{tokenEqual, "=", 0, 2},
token{tokenLeftBracket, "[", 0, 4},
token{tokenLeftBracket, "[", 0, 6},
token{tokenString, "hello", 0, 8},
token{tokenComma, ",", 0, 14},
token{tokenString, "world", 0, 17},
token{tokenRightBracket, "]", 0, 23},
token{tokenRightBracket, "]", 0, 25},
token{tokenEOF, "", 0, 26},
})
}
func TestArrayNestedInts(t *testing.T) {
testFlow(t, "a = [ [42, 21], [10] ]", []token{
token{tokenKey, "a"},
token{tokenEqual, "="},
token{tokenLeftBracket, "["},
token{tokenLeftBracket, "["},
token{tokenInteger, "42"},
token{tokenComma, ","},
token{tokenInteger, "21"},
token{tokenRightBracket, "]"},
token{tokenComma, ","},
token{tokenLeftBracket, "["},
token{tokenInteger, "10"},
token{tokenRightBracket, "]"},
token{tokenRightBracket, "]"},
token{tokenEOF, ""},
token{tokenKey, "a", 0, 0},
token{tokenEqual, "=", 0, 2},
token{tokenLeftBracket, "[", 0, 4},
token{tokenLeftBracket, "[", 0, 6},
token{tokenInteger, "42", 0, 7},
token{tokenComma, ",", 0, 9},
token{tokenInteger, "21", 0, 11},
token{tokenRightBracket, "]", 0, 13},
token{tokenComma, ",", 0, 14},
token{tokenLeftBracket, "[", 0, 16},
token{tokenInteger, "10", 0, 17},
token{tokenRightBracket, "]", 0, 19},
token{tokenRightBracket, "]", 0, 21},
token{tokenEOF, "", 0, 22},
})
}
func TestArrayInts(t *testing.T) {
testFlow(t, "a = [ 42, 21, 10, ]", []token{
token{tokenKey, "a"},
token{tokenEqual, "="},
token{tokenLeftBracket, "["},
token{tokenInteger, "42"},
token{tokenComma, ","},
token{tokenInteger, "21"},
token{tokenComma, ","},
token{tokenInteger, "10"},
token{tokenComma, ","},
token{tokenRightBracket, "]"},
token{tokenEOF, ""},
token{tokenKey, "a", 0, 0},
token{tokenEqual, "=", 0, 2},
token{tokenLeftBracket, "[", 0, 4},
token{tokenInteger, "42", 0, 6},
token{tokenComma, ",", 0, 8},
token{tokenInteger, "21", 0, 10},
token{tokenComma, ",", 0, 12},
token{tokenInteger, "10", 0, 14},
token{tokenComma, ",", 0, 16},
token{tokenRightBracket, "]", 0, 18},
token{tokenEOF, "", 0, 19},
})
}
func TestMultilineArrayComments(t *testing.T) {
testFlow(t, "a = [1, # wow\n2, # such items\n3, # so array\n]", []token{
token{tokenKey, "a"},
token{tokenEqual, "="},
token{tokenLeftBracket, "["},
token{tokenInteger, "1"},
token{tokenComma, ","},
token{tokenInteger, "2"},
token{tokenComma, ","},
token{tokenInteger, "3"},
token{tokenComma, ","},
token{tokenRightBracket, "]"},
token{tokenEOF, ""},
token{tokenKey, "a", 0, 0},
token{tokenEqual, "=", 0, 2},
token{tokenLeftBracket, "[", 0, 4},
token{tokenInteger, "1", 0, 5},
token{tokenComma, ",", 0, 6},
token{tokenInteger, "2", 1, 0},
token{tokenComma, ",", 1, 1},
token{tokenInteger, "3", 2, 0},
token{tokenComma, ",", 2, 1},
token{tokenRightBracket, "]", 3, 0},
token{tokenEOF, "", 3, 1},
})
}
func TestKeyEqualArrayBools(t *testing.T) {
testFlow(t, "foo = [true, false, true]", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenLeftBracket, "["},
token{tokenTrue, "true"},
token{tokenComma, ","},
token{tokenFalse, "false"},
token{tokenComma, ","},
token{tokenTrue, "true"},
token{tokenRightBracket, "]"},
token{tokenEOF, ""},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenLeftBracket, "[", 0, 6},
token{tokenTrue, "true", 0, 7},
token{tokenComma, ",", 0, 11},
token{tokenFalse, "false", 0, 13},
token{tokenComma, ",", 0, 18},
token{tokenTrue, "true", 0, 20},
token{tokenRightBracket, "]", 0, 24},
token{tokenEOF, "", 0, 25},
})
}
func TestKeyEqualArrayBoolsWithComments(t *testing.T) {
testFlow(t, "foo = [true, false, true] # YEAH", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenLeftBracket, "["},
token{tokenTrue, "true"},
token{tokenComma, ","},
token{tokenFalse, "false"},
token{tokenComma, ","},
token{tokenTrue, "true"},
token{tokenRightBracket, "]"},
token{tokenEOF, ""},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenLeftBracket, "[", 0, 6},
token{tokenTrue, "true", 0, 7},
token{tokenComma, ",", 0, 11},
token{tokenFalse, "false", 0, 13},
token{tokenComma, ",", 0, 18},
token{tokenTrue, "true", 0, 20},
token{tokenRightBracket, "]", 0, 24},
token{tokenEOF, "", 0, 32},
})
}
@@ -278,138 +283,138 @@ func TestDateRegexp(t *testing.T) {
func TestKeyEqualDate(t *testing.T) {
testFlow(t, "foo = 1979-05-27T07:32:00Z", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenDate, "1979-05-27T07:32:00Z"},
token{tokenEOF, ""},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenDate, "1979-05-27T07:32:00Z", 0, 6},
token{tokenEOF, "", 0, 26},
})
}
func TestFloatEndingWithDot(t *testing.T) {
testFlow(t, "foo = 42.", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenError, "float cannot end with a dot"},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenError, "float cannot end with a dot", 0, 6},
})
}
func TestFloatWithTwoDots(t *testing.T) {
testFlow(t, "foo = 4.2.", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenError, "cannot have two dots in one float"},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenError, "cannot have two dots in one float", 0, 6},
})
}
func TestDoubleEqualKey(t *testing.T) {
testFlow(t, "foo= = 2", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenError, "cannot have multiple equals for the same key"},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 3},
token{tokenError, "cannot have multiple equals for the same key", 0, 4},
})
}
func TestInvalidEsquapeSequence(t *testing.T) {
testFlow(t, "foo = \"\\x\"", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenError, "invalid escape sequence: \\x"},
testFlow(t, `foo = "\x"`, []token{
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenError, "invalid escape sequence: \\x", 0, 7},
})
}
func TestNestedArrays(t *testing.T) {
testFlow(t, "foo = [[[]]]", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenLeftBracket, "["},
token{tokenLeftBracket, "["},
token{tokenLeftBracket, "["},
token{tokenRightBracket, "]"},
token{tokenRightBracket, "]"},
token{tokenRightBracket, "]"},
token{tokenEOF, ""},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenLeftBracket, "[", 0, 6},
token{tokenLeftBracket, "[", 0, 7},
token{tokenLeftBracket, "[", 0, 8},
token{tokenRightBracket, "]", 0, 9},
token{tokenRightBracket, "]", 0, 10},
token{tokenRightBracket, "]", 0, 11},
token{tokenEOF, "", 0, 12},
})
}
func TestKeyEqualNumber(t *testing.T) {
testFlow(t, "foo = 42", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenInteger, "42"},
token{tokenEOF, ""},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenInteger, "42", 0, 6},
token{tokenEOF, "", 0, 8},
})
testFlow(t, "foo = +42", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenInteger, "+42"},
token{tokenEOF, ""},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenInteger, "+42", 0, 6},
token{tokenEOF, "", 0, 9},
})
testFlow(t, "foo = -42", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenInteger, "-42"},
token{tokenEOF, ""},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenInteger, "-42", 0, 6},
token{tokenEOF, "", 0, 9},
})
testFlow(t, "foo = 4.2", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenFloat, "4.2"},
token{tokenEOF, ""},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenFloat, "4.2", 0, 6},
token{tokenEOF, "", 0, 9},
})
testFlow(t, "foo = +4.2", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenFloat, "+4.2"},
token{tokenEOF, ""},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenFloat, "+4.2", 0, 6},
token{tokenEOF, "", 0, 10},
})
testFlow(t, "foo = -4.2", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenFloat, "-4.2"},
token{tokenEOF, ""},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenFloat, "-4.2", 0, 6},
token{tokenEOF, "", 0, 10},
})
}
func TestMultiline(t *testing.T) {
testFlow(t, "foo = 42\nbar=21", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenInteger, "42"},
token{tokenKey, "bar"},
token{tokenEqual, "="},
token{tokenInteger, "21"},
token{tokenEOF, ""},
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenInteger, "42", 0, 6},
token{tokenKey, "bar", 1, 0},
token{tokenEqual, "=", 1, 3},
token{tokenInteger, "21", 1, 4},
token{tokenEOF, "", 1, 6},
})
}
func TestKeyEqualStringUnicodeEscape(t *testing.T) {
testFlow(t, "foo = \"hello \\u2665\"", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenString, "hello ♥"},
token{tokenEOF, ""},
testFlow(t, `foo = "hello \u2665"`, []token{
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenString, "hello ♥", 0, 7},
token{tokenEOF, "", 0, 20},
})
}
func TestUnicodeString(t *testing.T) {
testFlow(t, "foo = \"hello ♥ world\"", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenString, "hello ♥ world"},
token{tokenEOF, ""},
testFlow(t, `foo = "hello ♥ world"`, []token{
token{tokenKey, "foo", 0, 0},
token{tokenEqual, "=", 0, 4},
token{tokenString, "hello ♥ world", 0, 7},
token{tokenEOF, "", 0, 21},
})
}
func TestKeyGroupArray(t *testing.T) {
testFlow(t, "[[foo]]", []token{
token{tokenDoubleLeftBracket, "[["},
token{tokenKeyGroupArray, "foo"},
token{tokenDoubleRightBracket, "]]"},
token{tokenEOF, ""},
token{tokenDoubleLeftBracket, "[[", 0, 0},
token{tokenKeyGroupArray, "foo", 0, 2},
token{tokenDoubleRightBracket, "]]", 0, 5},
token{tokenEOF, "", 0, 7},
})
}