v2: errors (#534)

```
name                              old time/op    new time/op    delta
UnmarshalDataset/config-32          86.7ms ± 2%    87.5ms ± 2%     ~     (p=0.113 n=9+10)
UnmarshalDataset/canada-32           129ms ± 4%     106ms ± 3%  -17.94%  (p=0.000 n=10+10)
UnmarshalDataset/citm_catalog-32    59.4ms ± 5%    58.7ms ± 5%     ~     (p=0.393 n=10+10)
UnmarshalDataset/twitter-32         27.0ms ± 7%    26.9ms ± 6%     ~     (p=0.720 n=10+9)
UnmarshalDataset/code-32             326ms ± 4%     322ms ± 7%     ~     (p=0.661 n=9+10)
UnmarshalDataset/example-32          510µs ±11%     526µs ± 7%     ~     (p=0.182 n=10+9)
UnmarshalSimple-32                  1.41µs ± 6%    1.41µs ± 4%     ~     (p=0.736 n=10+9)
ReferenceFile-32                    45.6µs ± 3%    43.9µs ±10%     ~     (p=0.089 n=10+10)

name                              old speed      new speed      delta
UnmarshalDataset/config-32        12.1MB/s ± 2%  12.0MB/s ± 2%     ~     (p=0.108 n=9+10)
UnmarshalDataset/canada-32        17.1MB/s ± 4%  20.9MB/s ± 3%  +21.86%  (p=0.000 n=10+10)
UnmarshalDataset/citm_catalog-32  9.41MB/s ± 5%  9.51MB/s ± 5%     ~     (p=0.362 n=10+10)
UnmarshalDataset/twitter-32       16.4MB/s ± 8%  16.5MB/s ± 6%     ~     (p=0.704 n=10+9)
UnmarshalDataset/code-32          8.24MB/s ± 4%  8.34MB/s ± 7%     ~     (p=0.675 n=9+10)
UnmarshalDataset/example-32       15.9MB/s ±11%  15.4MB/s ± 7%     ~     (p=0.182 n=10+9)
ReferenceFile-32                   115MB/s ± 4%   120MB/s ±10%     ~     (p=0.085 n=10+10)

name                              old alloc/op   new alloc/op   delta
UnmarshalDataset/config-32          16.9MB ± 0%    16.9MB ± 0%   -0.02%  (p=0.000 n=10+10)
UnmarshalDataset/canada-32          76.8MB ± 0%    74.3MB ± 0%   -3.31%  (p=0.000 n=10+10)
UnmarshalDataset/citm_catalog-32    37.3MB ± 0%    37.1MB ± 0%   -0.60%  (p=0.000 n=9+10)
UnmarshalDataset/twitter-32         15.6MB ± 0%    15.6MB ± 0%   -0.09%  (p=0.000 n=10+10)
UnmarshalDataset/code-32            60.2MB ± 0%    59.3MB ± 0%   -1.51%  (p=0.000 n=10+9)
UnmarshalDataset/example-32          238kB ± 0%     238kB ± 0%   -0.18%  (p=0.000 n=10+10)
ReferenceFile-32                    11.8kB ± 0%    11.8kB ± 0%     ~     (all equal)

name                              old allocs/op  new allocs/op  delta
UnmarshalDataset/config-32            653k ± 0%      645k ± 0%   -1.20%  (p=0.000 n=10+6)
UnmarshalDataset/canada-32           1.01M ± 0%     0.90M ± 0%  -11.04%  (p=0.000 n=9+10)
UnmarshalDataset/citm_catalog-32      384k ± 0%      370k ± 0%   -3.75%  (p=0.000 n=10+10)
UnmarshalDataset/twitter-32           160k ± 0%      157k ± 0%   -1.32%  (p=0.000 n=10+10)
UnmarshalDataset/code-32             2.97M ± 0%     2.91M ± 0%   -2.15%  (p=0.000 n=10+7)
UnmarshalDataset/example-32          3.69k ± 0%     3.63k ± 0%   -1.52%  (p=0.000 n=10+10)
ReferenceFile-32                       253 ± 0%       253 ± 0%     ~     (all equal)
```
This commit is contained in:
Thomas Pelletier
2021-05-08 16:04:25 -04:00
committed by GitHub
parent 4545a3e94b
commit ea225df3ed
12 changed files with 325 additions and 656 deletions
+66 -111
View File
@@ -1,11 +1,8 @@
package toml
import (
"errors"
"fmt"
"math"
"strconv"
"strings"
"time"
)
@@ -59,14 +56,12 @@ func parseLocalDate(b []byte) (LocalDate, error) {
return date, nil
}
var errNotDigit = errors.New("not a digit")
func parseDecimalDigits(b []byte) (int, error) {
v := 0
for _, c := range b {
for i, c := range b {
if !isDigit(c) {
return 0, fmt.Errorf("%s: %w", b, errNotDigit)
return 0, newDecodeError(b[i:i+1], "should be a digit (0-9)")
}
v *= 10
@@ -76,13 +71,14 @@ func parseDecimalDigits(b []byte) (int, error) {
return v, nil
}
var errParseDateTimeMissingInfo = errors.New("date-time missing timezone information")
func parseDateTime(b []byte) (time.Time, error) {
// offset-date-time = full-date time-delim full-time
// full-time = partial-time time-offset
// time-offset = "Z" / time-numoffset
// time-numoffset = ( "+" / "-" ) time-hour ":" time-minute
originalBytes := b
dt, b, err := parseLocalDateTime(b)
if err != nil {
return time.Time{}, err
@@ -91,7 +87,7 @@ func parseDateTime(b []byte) (time.Time, error) {
var zone *time.Location
if len(b) == 0 {
return time.Time{}, errParseDateTimeMissingInfo
return time.Time{}, newDecodeError(originalBytes, "date-time is missing timezone")
}
if b[0] == 'Z' {
@@ -134,19 +130,12 @@ func parseDateTime(b []byte) (time.Time, error) {
return t, nil
}
var (
errParseLocalDateTimeWrongLength = errors.New(
"local datetimes are expected to have the format YYYY-MM-DDTHH:MM:SS[.NNNNNNNNN]",
)
errParseLocalDateTimeWrongSeparator = errors.New("datetime separator is expected to be T or a space")
)
func parseLocalDateTime(b []byte) (LocalDateTime, []byte, error) {
var dt LocalDateTime
const localDateTimeByteMinLen = 11
if len(b) < localDateTimeByteMinLen {
return dt, nil, errParseLocalDateTimeWrongLength
return dt, nil, newDecodeError(b, "local datetimes are expected to have the format YYYY-MM-DDTHH:MM:SS[.NNNNNNNNN]")
}
date, err := parseLocalDate(b[:10])
@@ -157,7 +146,7 @@ func parseLocalDateTime(b []byte) (LocalDateTime, []byte, error) {
sep := b[10]
if sep != 'T' && sep != ' ' {
return dt, nil, errParseLocalDateTimeWrongSeparator
return dt, nil, newDecodeError(b[10:11], "datetime separator is expected to be T or a space")
}
t, rest, err := parseLocalTime(b[11:])
@@ -169,8 +158,6 @@ func parseLocalDateTime(b []byte) (LocalDateTime, []byte, error) {
return dt, rest, nil
}
var errParseLocalTimeWrongLength = errors.New("times are expected to have the format HH:MM:SS[.NNNNNN]")
// parseLocalTime is a bit different because it also returns the remaining
// []byte that is didn't need. This is to allow parseDateTime to parse those
// remaining bytes as a timezone.
@@ -183,7 +170,7 @@ func parseLocalTime(b []byte) (LocalTime, []byte, error) {
const localTimeByteLen = 8
if len(b) < localTimeByteLen {
return t, nil, errParseLocalTimeWrongLength
return t, nil, newDecodeError(b, "times are expected to have the format HH:MM:SS[.NNNNNN]")
}
var err error
@@ -242,11 +229,6 @@ func parseLocalTime(b []byte) (LocalTime, []byte, error) {
return t, b[8:], nil
}
var (
errParseFloatStartDot = errors.New("float cannot start with a dot")
errParseFloatEndDot = errors.New("float cannot end with a dot")
)
//nolint:cyclop
func parseFloat(b []byte) (float64, error) {
//nolint:godox
@@ -255,150 +237,123 @@ func parseFloat(b []byte) (float64, error) {
return math.NaN(), nil
}
tok := string(b)
err := numberContainsInvalidUnderscore(tok)
cleaned, err := checkAndRemoveUnderscores(b)
if err != nil {
return 0, err
}
cleanedVal := cleanupNumberToken(tok)
if cleanedVal[0] == '.' {
return 0, errParseFloatStartDot
if cleaned[0] == '.' {
return 0, newDecodeError(b, "float cannot start with a dot")
}
if cleanedVal[len(cleanedVal)-1] == '.' {
return 0, errParseFloatEndDot
if cleaned[len(cleaned)-1] == '.' {
return 0, newDecodeError(b, "float cannot end with a dot")
}
f, err := strconv.ParseFloat(cleanedVal, 64)
f, err := strconv.ParseFloat(string(cleaned), 64)
if err != nil {
return 0, fmt.Errorf("coudn't ParseFloat %w", err)
return 0, newDecodeError(b, "coudn't parse float: %w", err)
}
return f, nil
}
func parseIntHex(b []byte) (int64, error) {
tok := string(b)
cleanedVal := cleanupNumberToken(tok)
err := hexNumberContainsInvalidUnderscore(cleanedVal)
cleaned, err := checkAndRemoveUnderscores(b[2:])
if err != nil {
return 0, err
}
i, err := strconv.ParseInt(cleanedVal[2:], 16, 64)
i, err := strconv.ParseInt(string(cleaned), 16, 64)
if err != nil {
return 0, fmt.Errorf("coudn't ParseIntHex %w", err)
return 0, newDecodeError(b, "couldn't parse hexadecimal number: %w", err)
}
return i, nil
}
func parseIntOct(b []byte) (int64, error) {
tok := string(b)
cleanedVal := cleanupNumberToken(tok)
err := numberContainsInvalidUnderscore(cleanedVal)
cleaned, err := checkAndRemoveUnderscores(b[2:])
if err != nil {
return 0, err
}
i, err := strconv.ParseInt(cleanedVal[2:], 8, 64)
i, err := strconv.ParseInt(string(cleaned), 8, 64)
if err != nil {
return 0, fmt.Errorf("coudn't ParseIntOct %w", err)
return 0, newDecodeError(b, "couldn't parse octal number: %w", err)
}
return i, nil
}
func parseIntBin(b []byte) (int64, error) {
tok := string(b)
cleanedVal := cleanupNumberToken(tok)
err := numberContainsInvalidUnderscore(cleanedVal)
cleaned, err := checkAndRemoveUnderscores(b[2:])
if err != nil {
return 0, err
}
i, err := strconv.ParseInt(cleanedVal[2:], 2, 64)
i, err := strconv.ParseInt(string(cleaned), 2, 64)
if err != nil {
return 0, fmt.Errorf("coudn't ParseIntBin %w", err)
return 0, newDecodeError(b, "couldn't parse binary number: %w", err)
}
return i, nil
}
func parseIntDec(b []byte) (int64, error) {
tok := string(b)
cleanedVal := cleanupNumberToken(tok)
err := numberContainsInvalidUnderscore(cleanedVal)
cleaned, err := checkAndRemoveUnderscores(b)
if err != nil {
return 0, err
}
i, err := strconv.ParseInt(cleanedVal, 10, 64)
i, err := strconv.ParseInt(string(cleaned), 10, 64)
if err != nil {
return 0, fmt.Errorf("coudn't parseIntDec %w", err)
return 0, newDecodeError(b, "couldn't parse decimal number: %w", err)
}
return i, nil
}
func numberContainsInvalidUnderscore(value string) error {
// For large numbers, you may use underscores between digits to enhance
// readability. Each underscore must be surrounded by at least one digit on
// each side.
hasBefore := false
for idx, r := range value {
if r == '_' {
if !hasBefore || idx+1 >= len(value) {
// can't end with an underscore
return errInvalidUnderscore
}
}
hasBefore = isDigitRune(r)
func checkAndRemoveUnderscores(b []byte) ([]byte, error) {
if len(b) == 0 {
return b, nil
}
return nil
}
func hexNumberContainsInvalidUnderscore(value string) error {
hasBefore := false
for idx, r := range value {
if r == '_' {
if !hasBefore || idx+1 >= len(value) {
// can't end with an underscore
return errInvalidUnderscoreHex
}
}
hasBefore = isHexDigit(r)
if b[0] == '_' {
return nil, newDecodeError(b[0:1], "number cannot start with underscore")
}
return nil
if b[len(b)-1] == '_' {
return nil, newDecodeError(b[len(b)-1:], "number cannot end with underscore")
}
// fast path
i := 0
for ; i < len(b); i++ {
if b[i] == '_' {
break
}
}
if i == len(b) {
return b, nil
}
before := false
cleaned := make([]byte, i, len(b))
copy(cleaned, b)
for i++; i < len(b); i++ {
c := b[i]
if c == '_' {
if !before {
return nil, newDecodeError(b[i-1:i+1], "number must have at least one digit between underscores")
}
before = false
} else {
before = true
cleaned = append(cleaned, c)
}
}
return cleaned, nil
}
func cleanupNumberToken(value string) string {
cleanedVal := strings.ReplaceAll(value, "_", "")
return cleanedVal
}
func isHexDigit(r rune) bool {
return isDigitRune(r) ||
(r >= 'a' && r <= 'f') ||
(r >= 'A' && r <= 'F')
}
func isDigitRune(r rune) bool {
return r >= '0' && r <= '9'
}
var (
errInvalidUnderscore = errors.New("invalid use of _ in number")
errInvalidUnderscoreHex = errors.New("invalid use of _ in hex number")
)