From 222e90a7d3d7016b4a95a3de6cefe26d40653af1 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Thu, 21 May 2015 18:52:26 -0700 Subject: [PATCH] Parse long unicode --- lexer.go | 17 +++++++++++++++++ lexer_test.go | 6 ++++++ 2 files changed, 23 insertions(+) diff --git a/lexer.go b/lexer.go index abb6551..e6c3566 100644 --- a/lexer.go +++ b/lexer.go @@ -395,6 +395,23 @@ func (l *tomlLexer) lexString() tomlLexStateFn { return l.errorf("invalid unicode escape: \\u" + code) } growingString += string(rune(intcode)) + case 'U': + l.pos++ + code := "" + for i := 0; i < 8; i++ { + c := l.peek() + l.pos++ + if !isHexDigit(c) { + return l.errorf("unfinished unicode escape") + } + code = code + string(c) + } + l.pos-- + intcode, err := strconv.ParseInt(code, 16, 64) + if err != nil { + return l.errorf("invalid unicode escape: \\U" + code) + } + growingString += string(rune(intcode)) default: return l.errorf("invalid escape sequence: \\" + string(l.peek())) } diff --git a/lexer_test.go b/lexer_test.go index e9d0f14..683d7f7 100644 --- a/lexer_test.go +++ b/lexer_test.go @@ -458,6 +458,12 @@ func TestKeyEqualStringUnicodeEscape(t *testing.T) { token{Position{1, 8}, tokenString, "hello ♥"}, token{Position{1, 21}, tokenEOF, ""}, }) + testFlow(t, `foo = "hello \U000003B4"`, []token{ + token{Position{1, 1}, tokenKey, "foo"}, + token{Position{1, 5}, tokenEqual, "="}, + token{Position{1, 8}, tokenString, "hello δ"}, + token{Position{1, 25}, tokenEOF, ""}, + }) } func TestLiteralString(t *testing.T) {