Support for hex, oct, and bin integers (#205)

Add support for non-decimal integers. At the time of writing, this is an
unreleased backward-compatible feature of TOML:

```
  Non-negative integer values may also be expressed in hexadecimal, octal, or
  binary. In these formats, leading zeros are allowed (after the prefix). Hex
  values are case insensitive. Underscores are allowed between digits (but
  not between the prefix and the value).

  # hexadecimal with prefix `0x`
  hex1 = 0xDEADBEEF
  hex2 = 0xdeadbeef
  hex3 = 0xdead_beef

  # octal with prefix `0o`
  oct1 = 0o01234567
  oct2 = 0o755 # useful for Unix file permissions

  # binary with prefix `0b`
  bin1 = 0b11010110
```

Fixes #204
This commit is contained in:
Thomas Pelletier
2017-12-22 12:24:26 +01:00
committed by GitHub
parent b8b5e76965
commit 861c4734ac
3 changed files with 164 additions and 10 deletions
+59
View File
@@ -575,8 +575,67 @@ func (l *tomlLexer) lexRightBracket() tomlLexStateFn {
return l.lexRvalue
}
type validRuneFn func(r rune) bool
func isValidHexRune(r rune) bool {
return r >= 'a' && r <= 'f' ||
r >= 'A' && r <= 'F' ||
r >= '0' && r <= '9' ||
r == '_'
}
func isValidOctalRune(r rune) bool {
return r >= '0' && r <= '7' || r == '_'
}
func isValidBinaryRune(r rune) bool {
return r == '0' || r == '1' || r == '_'
}
func (l *tomlLexer) lexNumber() tomlLexStateFn {
r := l.peek()
if r == '0' {
follow := l.peekString(2)
if len(follow) == 2 {
var isValidRune validRuneFn
switch follow[1] {
case 'x':
isValidRune = isValidHexRune
case 'o':
isValidRune = isValidOctalRune
case 'b':
isValidRune = isValidBinaryRune
default:
if follow[1] >= 'a' && follow[1] <= 'z' || follow[1] >= 'A' && follow[1] <= 'Z' {
return l.errorf("unknown number base: %s. possible options are x (hex) o (octal) b (binary)", string(follow[1]))
}
}
if isValidRune != nil {
l.next()
l.next()
digitSeen := false
for {
next := l.peek()
if !isValidRune(next) {
break
}
digitSeen = true
l.next()
}
if !digitSeen {
return l.errorf("number needs at least one digit")
}
l.emit(tokenInteger)
return l.lexRvalue
}
}
}
if r == '+' || r == '-' {
l.next()
}
+51 -9
View File
@@ -212,13 +212,25 @@ func (p *tomlParser) parseAssign() tomlParserStateFn {
}
var numberUnderscoreInvalidRegexp *regexp.Regexp
var hexNumberUnderscoreInvalidRegexp *regexp.Regexp
func cleanupNumberToken(value string) (string, error) {
func numberContainsInvalidUnderscore(value string) error {
if numberUnderscoreInvalidRegexp.MatchString(value) {
return "", errors.New("invalid use of _ in number")
return errors.New("invalid use of _ in number")
}
return nil
}
func hexNumberContainsInvalidUnderscore(value string) error {
if hexNumberUnderscoreInvalidRegexp.MatchString(value) {
return errors.New("invalid use of _ in hex number")
}
return nil
}
func cleanupNumberToken(value string) string {
cleanedVal := strings.Replace(value, "_", "", -1)
return cleanedVal, nil
return cleanedVal
}
func (p *tomlParser) parseRvalue() interface{} {
@@ -235,20 +247,49 @@ func (p *tomlParser) parseRvalue() interface{} {
case tokenFalse:
return false
case tokenInteger:
cleanedVal, err := cleanupNumberToken(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
cleanedVal := cleanupNumberToken(tok.val)
var err error
var val int64
if len(cleanedVal) >= 3 && cleanedVal[0] == '0' {
switch cleanedVal[1] {
case 'x':
err = hexNumberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
val, err = strconv.ParseInt(cleanedVal[2:], 16, 64)
case 'o':
err = numberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
val, err = strconv.ParseInt(cleanedVal[2:], 8, 64)
case 'b':
err = numberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
val, err = strconv.ParseInt(cleanedVal[2:], 2, 64)
default:
panic("invalid base") // the lexer should catch this first
}
} else {
err = numberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
val, err = strconv.ParseInt(cleanedVal, 10, 64)
}
val, err := strconv.ParseInt(cleanedVal, 10, 64)
if err != nil {
p.raiseError(tok, "%s", err)
}
return val
case tokenFloat:
cleanedVal, err := cleanupNumberToken(tok.val)
err := numberContainsInvalidUnderscore(tok.val)
if err != nil {
p.raiseError(tok, "%s", err)
}
cleanedVal := cleanupNumberToken(tok.val)
val, err := strconv.ParseFloat(cleanedVal, 64)
if err != nil {
p.raiseError(tok, "%s", err)
@@ -379,5 +420,6 @@ func parseToml(flow []token) *Tree {
}
func init() {
numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d]|_$|^_)`)
numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d])|_$|^_`)
hexNumberUnderscoreInvalidRegexp = regexp.MustCompile(`(^0x_)|([^\da-f]_|_[^\da-f])|_$|^_`)
}
+54 -1
View File
@@ -82,6 +82,59 @@ func TestSimpleNumbers(t *testing.T) {
})
}
func TestHexIntegers(t *testing.T) {
tree, err := Load(`a = 0xDEADBEEF`)
assertTree(t, tree, err, map[string]interface{}{"a": int64(3735928559)})
tree, err = Load(`a = 0xdeadbeef`)
assertTree(t, tree, err, map[string]interface{}{"a": int64(3735928559)})
tree, err = Load(`a = 0xdead_beef`)
assertTree(t, tree, err, map[string]interface{}{"a": int64(3735928559)})
_, err = Load(`a = 0x_1`)
if err.Error() != "(1, 5): invalid use of _ in hex number" {
t.Error("Bad error message:", err.Error())
}
}
func TestOctIntegers(t *testing.T) {
tree, err := Load(`a = 0o01234567`)
assertTree(t, tree, err, map[string]interface{}{"a": int64(342391)})
tree, err = Load(`a = 0o755`)
assertTree(t, tree, err, map[string]interface{}{"a": int64(493)})
_, err = Load(`a = 0o_1`)
if err.Error() != "(1, 5): invalid use of _ in number" {
t.Error("Bad error message:", err.Error())
}
}
func TestBinIntegers(t *testing.T) {
tree, err := Load(`a = 0b11010110`)
assertTree(t, tree, err, map[string]interface{}{"a": int64(214)})
_, err = Load(`a = 0b_1`)
if err.Error() != "(1, 5): invalid use of _ in number" {
t.Error("Bad error message:", err.Error())
}
}
func TestBadIntegerBase(t *testing.T) {
_, err := Load(`a = 0k1`)
if err.Error() != "(1, 5): unknown number base: k. possible options are x (hex) o (octal) b (binary)" {
t.Error("Error should have been returned.")
}
}
func TestIntegerNoDigit(t *testing.T) {
_, err := Load(`a = 0b`)
if err.Error() != "(1, 5): number needs at least one digit" {
t.Error("Bad error message:", err.Error())
}
}
func TestNumbersWithUnderscores(t *testing.T) {
tree, err := Load("a = 1_000")
assertTree(t, tree, err, map[string]interface{}{
@@ -642,7 +695,7 @@ func TestTomlValueStringRepresentation(t *testing.T) {
{int64(12345), "12345"},
{uint64(50), "50"},
{float64(123.45), "123.45"},
{bool(true), "true"},
{true, "true"},
{"hello world", "\"hello world\""},
{"\b\t\n\f\r\"\\", "\"\\b\\t\\n\\f\\r\\\"\\\\\""},
{"\x05", "\"\\u0005\""},