ea225df3ed
``` name old time/op new time/op delta UnmarshalDataset/config-32 86.7ms ± 2% 87.5ms ± 2% ~ (p=0.113 n=9+10) UnmarshalDataset/canada-32 129ms ± 4% 106ms ± 3% -17.94% (p=0.000 n=10+10) UnmarshalDataset/citm_catalog-32 59.4ms ± 5% 58.7ms ± 5% ~ (p=0.393 n=10+10) UnmarshalDataset/twitter-32 27.0ms ± 7% 26.9ms ± 6% ~ (p=0.720 n=10+9) UnmarshalDataset/code-32 326ms ± 4% 322ms ± 7% ~ (p=0.661 n=9+10) UnmarshalDataset/example-32 510µs ±11% 526µs ± 7% ~ (p=0.182 n=10+9) UnmarshalSimple-32 1.41µs ± 6% 1.41µs ± 4% ~ (p=0.736 n=10+9) ReferenceFile-32 45.6µs ± 3% 43.9µs ±10% ~ (p=0.089 n=10+10) name old speed new speed delta UnmarshalDataset/config-32 12.1MB/s ± 2% 12.0MB/s ± 2% ~ (p=0.108 n=9+10) UnmarshalDataset/canada-32 17.1MB/s ± 4% 20.9MB/s ± 3% +21.86% (p=0.000 n=10+10) UnmarshalDataset/citm_catalog-32 9.41MB/s ± 5% 9.51MB/s ± 5% ~ (p=0.362 n=10+10) UnmarshalDataset/twitter-32 16.4MB/s ± 8% 16.5MB/s ± 6% ~ (p=0.704 n=10+9) UnmarshalDataset/code-32 8.24MB/s ± 4% 8.34MB/s ± 7% ~ (p=0.675 n=9+10) UnmarshalDataset/example-32 15.9MB/s ±11% 15.4MB/s ± 7% ~ (p=0.182 n=10+9) ReferenceFile-32 115MB/s ± 4% 120MB/s ±10% ~ (p=0.085 n=10+10) name old alloc/op new alloc/op delta UnmarshalDataset/config-32 16.9MB ± 0% 16.9MB ± 0% -0.02% (p=0.000 n=10+10) UnmarshalDataset/canada-32 76.8MB ± 0% 74.3MB ± 0% -3.31% (p=0.000 n=10+10) UnmarshalDataset/citm_catalog-32 37.3MB ± 0% 37.1MB ± 0% -0.60% (p=0.000 n=9+10) UnmarshalDataset/twitter-32 15.6MB ± 0% 15.6MB ± 0% -0.09% (p=0.000 n=10+10) UnmarshalDataset/code-32 60.2MB ± 0% 59.3MB ± 0% -1.51% (p=0.000 n=10+9) UnmarshalDataset/example-32 238kB ± 0% 238kB ± 0% -0.18% (p=0.000 n=10+10) ReferenceFile-32 11.8kB ± 0% 11.8kB ± 0% ~ (all equal) name old allocs/op new allocs/op delta UnmarshalDataset/config-32 653k ± 0% 645k ± 0% -1.20% (p=0.000 n=10+6) UnmarshalDataset/canada-32 1.01M ± 0% 0.90M ± 0% -11.04% (p=0.000 n=9+10) UnmarshalDataset/citm_catalog-32 384k ± 0% 370k ± 0% -3.75% (p=0.000 n=10+10) UnmarshalDataset/twitter-32 160k ± 0% 157k ± 0% -1.32% (p=0.000 n=10+10) UnmarshalDataset/code-32 2.97M ± 0% 2.91M ± 0% -2.15% (p=0.000 n=10+7) UnmarshalDataset/example-32 3.69k ± 0% 3.63k ± 0% -1.52% (p=0.000 n=10+10) ReferenceFile-32 253 ± 0% 253 ± 0% ~ (all equal) ```
360 lines
7.2 KiB
Go
360 lines
7.2 KiB
Go
package toml
|
|
|
|
import (
|
|
"math"
|
|
"strconv"
|
|
"time"
|
|
)
|
|
|
|
func parseInteger(b []byte) (int64, error) {
|
|
if len(b) > 2 && b[0] == '0' {
|
|
switch b[1] {
|
|
case 'x':
|
|
return parseIntHex(b)
|
|
case 'b':
|
|
return parseIntBin(b)
|
|
case 'o':
|
|
return parseIntOct(b)
|
|
default:
|
|
return 0, newDecodeError(b[1:2], "invalid base: '%c'", b[1])
|
|
}
|
|
}
|
|
|
|
return parseIntDec(b)
|
|
}
|
|
|
|
func parseLocalDate(b []byte) (LocalDate, error) {
|
|
// full-date = date-fullyear "-" date-month "-" date-mday
|
|
// date-fullyear = 4DIGIT
|
|
// date-month = 2DIGIT ; 01-12
|
|
// date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year
|
|
var date LocalDate
|
|
|
|
if len(b) != 10 || b[4] != '-' || b[7] != '-' {
|
|
return date, newDecodeError(b, "dates are expected to have the format YYYY-MM-DD")
|
|
}
|
|
|
|
var err error
|
|
|
|
date.Year, err = parseDecimalDigits(b[0:4])
|
|
if err != nil {
|
|
return date, err
|
|
}
|
|
|
|
v, err := parseDecimalDigits(b[5:7])
|
|
if err != nil {
|
|
return date, err
|
|
}
|
|
|
|
date.Month = time.Month(v)
|
|
|
|
date.Day, err = parseDecimalDigits(b[8:10])
|
|
if err != nil {
|
|
return date, err
|
|
}
|
|
|
|
return date, nil
|
|
}
|
|
|
|
func parseDecimalDigits(b []byte) (int, error) {
|
|
v := 0
|
|
|
|
for i, c := range b {
|
|
if !isDigit(c) {
|
|
return 0, newDecodeError(b[i:i+1], "should be a digit (0-9)")
|
|
}
|
|
|
|
v *= 10
|
|
v += int(c - '0')
|
|
}
|
|
|
|
return v, nil
|
|
}
|
|
|
|
func parseDateTime(b []byte) (time.Time, error) {
|
|
// offset-date-time = full-date time-delim full-time
|
|
// full-time = partial-time time-offset
|
|
// time-offset = "Z" / time-numoffset
|
|
// time-numoffset = ( "+" / "-" ) time-hour ":" time-minute
|
|
|
|
originalBytes := b
|
|
|
|
dt, b, err := parseLocalDateTime(b)
|
|
if err != nil {
|
|
return time.Time{}, err
|
|
}
|
|
|
|
var zone *time.Location
|
|
|
|
if len(b) == 0 {
|
|
return time.Time{}, newDecodeError(originalBytes, "date-time is missing timezone")
|
|
}
|
|
|
|
if b[0] == 'Z' {
|
|
b = b[1:]
|
|
zone = time.UTC
|
|
} else {
|
|
const dateTimeByteLen = 6
|
|
if len(b) != dateTimeByteLen {
|
|
return time.Time{}, newDecodeError(b, "invalid date-time timezone")
|
|
}
|
|
direction := 1
|
|
switch b[0] {
|
|
case '+':
|
|
case '-':
|
|
direction = -1
|
|
default:
|
|
return time.Time{}, newDecodeError(b[0:1], "invalid timezone offset character")
|
|
}
|
|
|
|
hours := digitsToInt(b[1:3])
|
|
minutes := digitsToInt(b[4:6])
|
|
seconds := direction * (hours*3600 + minutes*60)
|
|
zone = time.FixedZone("", seconds)
|
|
}
|
|
|
|
if len(b) > 0 {
|
|
return time.Time{}, newDecodeError(b, "extra bytes at the end of the timezone")
|
|
}
|
|
|
|
t := time.Date(
|
|
dt.Date.Year,
|
|
dt.Date.Month,
|
|
dt.Date.Day,
|
|
dt.Time.Hour,
|
|
dt.Time.Minute,
|
|
dt.Time.Second,
|
|
dt.Time.Nanosecond,
|
|
zone)
|
|
|
|
return t, nil
|
|
}
|
|
|
|
func parseLocalDateTime(b []byte) (LocalDateTime, []byte, error) {
|
|
var dt LocalDateTime
|
|
|
|
const localDateTimeByteMinLen = 11
|
|
if len(b) < localDateTimeByteMinLen {
|
|
return dt, nil, newDecodeError(b, "local datetimes are expected to have the format YYYY-MM-DDTHH:MM:SS[.NNNNNNNNN]")
|
|
}
|
|
|
|
date, err := parseLocalDate(b[:10])
|
|
if err != nil {
|
|
return dt, nil, err
|
|
}
|
|
dt.Date = date
|
|
|
|
sep := b[10]
|
|
if sep != 'T' && sep != ' ' {
|
|
return dt, nil, newDecodeError(b[10:11], "datetime separator is expected to be T or a space")
|
|
}
|
|
|
|
t, rest, err := parseLocalTime(b[11:])
|
|
if err != nil {
|
|
return dt, nil, err
|
|
}
|
|
dt.Time = t
|
|
|
|
return dt, rest, nil
|
|
}
|
|
|
|
// parseLocalTime is a bit different because it also returns the remaining
|
|
// []byte that is didn't need. This is to allow parseDateTime to parse those
|
|
// remaining bytes as a timezone.
|
|
//nolint:cyclop,funlen
|
|
func parseLocalTime(b []byte) (LocalTime, []byte, error) {
|
|
var (
|
|
nspow = [10]int{0, 1e8, 1e7, 1e6, 1e5, 1e4, 1e3, 1e2, 1e1, 1e0}
|
|
t LocalTime
|
|
)
|
|
|
|
const localTimeByteLen = 8
|
|
if len(b) < localTimeByteLen {
|
|
return t, nil, newDecodeError(b, "times are expected to have the format HH:MM:SS[.NNNNNN]")
|
|
}
|
|
|
|
var err error
|
|
|
|
t.Hour, err = parseDecimalDigits(b[0:2])
|
|
if err != nil {
|
|
return t, nil, err
|
|
}
|
|
|
|
if b[2] != ':' {
|
|
return t, nil, newDecodeError(b[2:3], "expecting colon between hours and minutes")
|
|
}
|
|
|
|
t.Minute, err = parseDecimalDigits(b[3:5])
|
|
if err != nil {
|
|
return t, nil, err
|
|
}
|
|
|
|
if b[5] != ':' {
|
|
return t, nil, newDecodeError(b[5:6], "expecting colon between minutes and seconds")
|
|
}
|
|
|
|
t.Second, err = parseDecimalDigits(b[6:8])
|
|
if err != nil {
|
|
return t, nil, err
|
|
}
|
|
|
|
if len(b) >= 9 && b[8] == '.' {
|
|
frac := 0
|
|
digits := 0
|
|
|
|
for i, c := range b[9:] {
|
|
if !isDigit(c) {
|
|
if i == 0 {
|
|
return t, nil, newDecodeError(b[i:i+1], "need at least one digit after fraction point")
|
|
}
|
|
|
|
break
|
|
}
|
|
|
|
//nolint:gomnd
|
|
if i >= 9 {
|
|
return t, nil, newDecodeError(b[i:i+1], "maximum precision for date time is nanosecond")
|
|
}
|
|
|
|
frac *= 10
|
|
frac += int(c - '0')
|
|
digits++
|
|
}
|
|
|
|
t.Nanosecond = frac * nspow[digits]
|
|
|
|
return t, b[9+digits:], nil
|
|
}
|
|
|
|
return t, b[8:], nil
|
|
}
|
|
|
|
//nolint:cyclop
|
|
func parseFloat(b []byte) (float64, error) {
|
|
//nolint:godox
|
|
// TODO: inefficient
|
|
if len(b) == 4 && (b[0] == '+' || b[0] == '-') && b[1] == 'n' && b[2] == 'a' && b[3] == 'n' {
|
|
return math.NaN(), nil
|
|
}
|
|
|
|
cleaned, err := checkAndRemoveUnderscores(b)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
if cleaned[0] == '.' {
|
|
return 0, newDecodeError(b, "float cannot start with a dot")
|
|
}
|
|
|
|
if cleaned[len(cleaned)-1] == '.' {
|
|
return 0, newDecodeError(b, "float cannot end with a dot")
|
|
}
|
|
|
|
f, err := strconv.ParseFloat(string(cleaned), 64)
|
|
if err != nil {
|
|
return 0, newDecodeError(b, "coudn't parse float: %w", err)
|
|
}
|
|
|
|
return f, nil
|
|
}
|
|
|
|
func parseIntHex(b []byte) (int64, error) {
|
|
cleaned, err := checkAndRemoveUnderscores(b[2:])
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
i, err := strconv.ParseInt(string(cleaned), 16, 64)
|
|
if err != nil {
|
|
return 0, newDecodeError(b, "couldn't parse hexadecimal number: %w", err)
|
|
}
|
|
|
|
return i, nil
|
|
}
|
|
|
|
func parseIntOct(b []byte) (int64, error) {
|
|
cleaned, err := checkAndRemoveUnderscores(b[2:])
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
i, err := strconv.ParseInt(string(cleaned), 8, 64)
|
|
if err != nil {
|
|
return 0, newDecodeError(b, "couldn't parse octal number: %w", err)
|
|
}
|
|
|
|
return i, nil
|
|
}
|
|
|
|
func parseIntBin(b []byte) (int64, error) {
|
|
cleaned, err := checkAndRemoveUnderscores(b[2:])
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
i, err := strconv.ParseInt(string(cleaned), 2, 64)
|
|
if err != nil {
|
|
return 0, newDecodeError(b, "couldn't parse binary number: %w", err)
|
|
}
|
|
|
|
return i, nil
|
|
}
|
|
|
|
func parseIntDec(b []byte) (int64, error) {
|
|
cleaned, err := checkAndRemoveUnderscores(b)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
i, err := strconv.ParseInt(string(cleaned), 10, 64)
|
|
if err != nil {
|
|
return 0, newDecodeError(b, "couldn't parse decimal number: %w", err)
|
|
}
|
|
|
|
return i, nil
|
|
}
|
|
|
|
func checkAndRemoveUnderscores(b []byte) ([]byte, error) {
|
|
if len(b) == 0 {
|
|
return b, nil
|
|
}
|
|
|
|
if b[0] == '_' {
|
|
return nil, newDecodeError(b[0:1], "number cannot start with underscore")
|
|
}
|
|
|
|
if b[len(b)-1] == '_' {
|
|
return nil, newDecodeError(b[len(b)-1:], "number cannot end with underscore")
|
|
}
|
|
|
|
// fast path
|
|
i := 0
|
|
for ; i < len(b); i++ {
|
|
if b[i] == '_' {
|
|
break
|
|
}
|
|
}
|
|
if i == len(b) {
|
|
return b, nil
|
|
}
|
|
|
|
before := false
|
|
cleaned := make([]byte, i, len(b))
|
|
copy(cleaned, b)
|
|
|
|
for i++; i < len(b); i++ {
|
|
c := b[i]
|
|
if c == '_' {
|
|
if !before {
|
|
return nil, newDecodeError(b[i-1:i+1], "number must have at least one digit between underscores")
|
|
}
|
|
before = false
|
|
} else {
|
|
before = true
|
|
cleaned = append(cleaned, c)
|
|
}
|
|
}
|
|
|
|
return cleaned, nil
|
|
}
|