Fixes #6: implement \uXXXX escaping

This commit is contained in:
Thomas Pelletier
2013-07-01 21:38:00 +02:00
parent f2a1344e12
commit 71141675e5
2 changed files with 32 additions and 0 deletions
+23
View File
@@ -6,6 +6,7 @@ package toml
import (
"fmt"
"regexp"
"strconv"
"strings"
"unicode"
"unicode/utf8"
@@ -70,6 +71,11 @@ func isDigit(r rune) bool {
return unicode.IsNumber(r)
}
func isHexDigit(r rune) bool {
return isDigit(r) ||
r == 'A' || r == 'B' || r == 'C' || r == 'D' || r == 'E' || r == 'F'
}
// Define lexer
type lexer struct {
input string
@@ -323,6 +329,23 @@ func lexString(l *lexer) stateFn {
} else if l.follow("\\\\") {
l.pos += 1
growing_string += "\\"
} else if l.follow("\\u") {
l.pos += 2
code := ""
for i := 0; i < 4; i++ {
c := l.peek()
l.pos += 1
if !isHexDigit(c) {
return l.errorf("unfinished unicode escape")
}
code = code + string(c)
}
l.pos -= 1
intcode, err := strconv.ParseInt(code, 16, 32)
if err != nil {
return l.errorf("invalid unicode escape: \\u" + code)
}
growing_string += string(rune(intcode))
} else {
growing_string += string(l.peek())
}
+9
View File
@@ -300,3 +300,12 @@ func TestMultiline(t *testing.T) {
token{tokenEOF, ""},
})
}
func TestKeyEqualStringUnicodeEscape(t *testing.T) {
testFlow(t, "foo = \"hello \\u2665\"", []token{
token{tokenKey, "foo"},
token{tokenEqual, "="},
token{tokenString, "hello ♥"},
token{tokenEOF, ""},
})
}