From 861c4734acb84ea440f819d95197f1c24c9975fb Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Fri, 22 Dec 2017 12:24:26 +0100 Subject: [PATCH] Support for hex, oct, and bin integers (#205) Add support for non-decimal integers. At the time of writing, this is an unreleased backward-compatible feature of TOML: ``` Non-negative integer values may also be expressed in hexadecimal, octal, or binary. In these formats, leading zeros are allowed (after the prefix). Hex values are case insensitive. Underscores are allowed between digits (but not between the prefix and the value). # hexadecimal with prefix `0x` hex1 = 0xDEADBEEF hex2 = 0xdeadbeef hex3 = 0xdead_beef # octal with prefix `0o` oct1 = 0o01234567 oct2 = 0o755 # useful for Unix file permissions # binary with prefix `0b` bin1 = 0b11010110 ``` Fixes #204 --- lexer.go | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ parser.go | 60 ++++++++++++++++++++++++++++++++++++++++++-------- parser_test.go | 55 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 164 insertions(+), 10 deletions(-) diff --git a/lexer.go b/lexer.go index 1b6647d..2096656 100644 --- a/lexer.go +++ b/lexer.go @@ -575,8 +575,67 @@ func (l *tomlLexer) lexRightBracket() tomlLexStateFn { return l.lexRvalue } +type validRuneFn func(r rune) bool + +func isValidHexRune(r rune) bool { + return r >= 'a' && r <= 'f' || + r >= 'A' && r <= 'F' || + r >= '0' && r <= '9' || + r == '_' +} + +func isValidOctalRune(r rune) bool { + return r >= '0' && r <= '7' || r == '_' +} + +func isValidBinaryRune(r rune) bool { + return r == '0' || r == '1' || r == '_' +} + func (l *tomlLexer) lexNumber() tomlLexStateFn { r := l.peek() + + if r == '0' { + follow := l.peekString(2) + if len(follow) == 2 { + var isValidRune validRuneFn + switch follow[1] { + case 'x': + isValidRune = isValidHexRune + case 'o': + isValidRune = isValidOctalRune + case 'b': + isValidRune = isValidBinaryRune + default: + if follow[1] >= 'a' && follow[1] <= 'z' || follow[1] >= 'A' && follow[1] <= 'Z' { + return l.errorf("unknown number base: %s. possible options are x (hex) o (octal) b (binary)", string(follow[1])) + } + } + + if isValidRune != nil { + l.next() + l.next() + digitSeen := false + for { + next := l.peek() + if !isValidRune(next) { + break + } + digitSeen = true + l.next() + } + + if !digitSeen { + return l.errorf("number needs at least one digit") + } + + l.emit(tokenInteger) + + return l.lexRvalue + } + } + } + if r == '+' || r == '-' { l.next() } diff --git a/parser.go b/parser.go index d492a1e..0f2ab7a 100644 --- a/parser.go +++ b/parser.go @@ -212,13 +212,25 @@ func (p *tomlParser) parseAssign() tomlParserStateFn { } var numberUnderscoreInvalidRegexp *regexp.Regexp +var hexNumberUnderscoreInvalidRegexp *regexp.Regexp -func cleanupNumberToken(value string) (string, error) { +func numberContainsInvalidUnderscore(value string) error { if numberUnderscoreInvalidRegexp.MatchString(value) { - return "", errors.New("invalid use of _ in number") + return errors.New("invalid use of _ in number") } + return nil +} + +func hexNumberContainsInvalidUnderscore(value string) error { + if hexNumberUnderscoreInvalidRegexp.MatchString(value) { + return errors.New("invalid use of _ in hex number") + } + return nil +} + +func cleanupNumberToken(value string) string { cleanedVal := strings.Replace(value, "_", "", -1) - return cleanedVal, nil + return cleanedVal } func (p *tomlParser) parseRvalue() interface{} { @@ -235,20 +247,49 @@ func (p *tomlParser) parseRvalue() interface{} { case tokenFalse: return false case tokenInteger: - cleanedVal, err := cleanupNumberToken(tok.val) - if err != nil { - p.raiseError(tok, "%s", err) + cleanedVal := cleanupNumberToken(tok.val) + var err error + var val int64 + if len(cleanedVal) >= 3 && cleanedVal[0] == '0' { + switch cleanedVal[1] { + case 'x': + err = hexNumberContainsInvalidUnderscore(tok.val) + if err != nil { + p.raiseError(tok, "%s", err) + } + val, err = strconv.ParseInt(cleanedVal[2:], 16, 64) + case 'o': + err = numberContainsInvalidUnderscore(tok.val) + if err != nil { + p.raiseError(tok, "%s", err) + } + val, err = strconv.ParseInt(cleanedVal[2:], 8, 64) + case 'b': + err = numberContainsInvalidUnderscore(tok.val) + if err != nil { + p.raiseError(tok, "%s", err) + } + val, err = strconv.ParseInt(cleanedVal[2:], 2, 64) + default: + panic("invalid base") // the lexer should catch this first + } + } else { + err = numberContainsInvalidUnderscore(tok.val) + if err != nil { + p.raiseError(tok, "%s", err) + } + val, err = strconv.ParseInt(cleanedVal, 10, 64) } - val, err := strconv.ParseInt(cleanedVal, 10, 64) if err != nil { p.raiseError(tok, "%s", err) } return val case tokenFloat: - cleanedVal, err := cleanupNumberToken(tok.val) + err := numberContainsInvalidUnderscore(tok.val) if err != nil { p.raiseError(tok, "%s", err) } + cleanedVal := cleanupNumberToken(tok.val) val, err := strconv.ParseFloat(cleanedVal, 64) if err != nil { p.raiseError(tok, "%s", err) @@ -379,5 +420,6 @@ func parseToml(flow []token) *Tree { } func init() { - numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d]|_$|^_)`) + numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d])|_$|^_`) + hexNumberUnderscoreInvalidRegexp = regexp.MustCompile(`(^0x_)|([^\da-f]_|_[^\da-f])|_$|^_`) } diff --git a/parser_test.go b/parser_test.go index bc7903c..6c8eec6 100644 --- a/parser_test.go +++ b/parser_test.go @@ -82,6 +82,59 @@ func TestSimpleNumbers(t *testing.T) { }) } +func TestHexIntegers(t *testing.T) { + tree, err := Load(`a = 0xDEADBEEF`) + assertTree(t, tree, err, map[string]interface{}{"a": int64(3735928559)}) + + tree, err = Load(`a = 0xdeadbeef`) + assertTree(t, tree, err, map[string]interface{}{"a": int64(3735928559)}) + + tree, err = Load(`a = 0xdead_beef`) + assertTree(t, tree, err, map[string]interface{}{"a": int64(3735928559)}) + + _, err = Load(`a = 0x_1`) + if err.Error() != "(1, 5): invalid use of _ in hex number" { + t.Error("Bad error message:", err.Error()) + } +} + +func TestOctIntegers(t *testing.T) { + tree, err := Load(`a = 0o01234567`) + assertTree(t, tree, err, map[string]interface{}{"a": int64(342391)}) + + tree, err = Load(`a = 0o755`) + assertTree(t, tree, err, map[string]interface{}{"a": int64(493)}) + + _, err = Load(`a = 0o_1`) + if err.Error() != "(1, 5): invalid use of _ in number" { + t.Error("Bad error message:", err.Error()) + } +} + +func TestBinIntegers(t *testing.T) { + tree, err := Load(`a = 0b11010110`) + assertTree(t, tree, err, map[string]interface{}{"a": int64(214)}) + + _, err = Load(`a = 0b_1`) + if err.Error() != "(1, 5): invalid use of _ in number" { + t.Error("Bad error message:", err.Error()) + } +} + +func TestBadIntegerBase(t *testing.T) { + _, err := Load(`a = 0k1`) + if err.Error() != "(1, 5): unknown number base: k. possible options are x (hex) o (octal) b (binary)" { + t.Error("Error should have been returned.") + } +} + +func TestIntegerNoDigit(t *testing.T) { + _, err := Load(`a = 0b`) + if err.Error() != "(1, 5): number needs at least one digit" { + t.Error("Bad error message:", err.Error()) + } +} + func TestNumbersWithUnderscores(t *testing.T) { tree, err := Load("a = 1_000") assertTree(t, tree, err, map[string]interface{}{ @@ -642,7 +695,7 @@ func TestTomlValueStringRepresentation(t *testing.T) { {int64(12345), "12345"}, {uint64(50), "50"}, {float64(123.45), "123.45"}, - {bool(true), "true"}, + {true, "true"}, {"hello world", "\"hello world\""}, {"\b\t\n\f\r\"\\", "\"\\b\\t\\n\\f\\r\\\"\\\\\""}, {"\x05", "\"\\u0005\""},