ea225df3ed
``` name old time/op new time/op delta UnmarshalDataset/config-32 86.7ms ± 2% 87.5ms ± 2% ~ (p=0.113 n=9+10) UnmarshalDataset/canada-32 129ms ± 4% 106ms ± 3% -17.94% (p=0.000 n=10+10) UnmarshalDataset/citm_catalog-32 59.4ms ± 5% 58.7ms ± 5% ~ (p=0.393 n=10+10) UnmarshalDataset/twitter-32 27.0ms ± 7% 26.9ms ± 6% ~ (p=0.720 n=10+9) UnmarshalDataset/code-32 326ms ± 4% 322ms ± 7% ~ (p=0.661 n=9+10) UnmarshalDataset/example-32 510µs ±11% 526µs ± 7% ~ (p=0.182 n=10+9) UnmarshalSimple-32 1.41µs ± 6% 1.41µs ± 4% ~ (p=0.736 n=10+9) ReferenceFile-32 45.6µs ± 3% 43.9µs ±10% ~ (p=0.089 n=10+10) name old speed new speed delta UnmarshalDataset/config-32 12.1MB/s ± 2% 12.0MB/s ± 2% ~ (p=0.108 n=9+10) UnmarshalDataset/canada-32 17.1MB/s ± 4% 20.9MB/s ± 3% +21.86% (p=0.000 n=10+10) UnmarshalDataset/citm_catalog-32 9.41MB/s ± 5% 9.51MB/s ± 5% ~ (p=0.362 n=10+10) UnmarshalDataset/twitter-32 16.4MB/s ± 8% 16.5MB/s ± 6% ~ (p=0.704 n=10+9) UnmarshalDataset/code-32 8.24MB/s ± 4% 8.34MB/s ± 7% ~ (p=0.675 n=9+10) UnmarshalDataset/example-32 15.9MB/s ±11% 15.4MB/s ± 7% ~ (p=0.182 n=10+9) ReferenceFile-32 115MB/s ± 4% 120MB/s ±10% ~ (p=0.085 n=10+10) name old alloc/op new alloc/op delta UnmarshalDataset/config-32 16.9MB ± 0% 16.9MB ± 0% -0.02% (p=0.000 n=10+10) UnmarshalDataset/canada-32 76.8MB ± 0% 74.3MB ± 0% -3.31% (p=0.000 n=10+10) UnmarshalDataset/citm_catalog-32 37.3MB ± 0% 37.1MB ± 0% -0.60% (p=0.000 n=9+10) UnmarshalDataset/twitter-32 15.6MB ± 0% 15.6MB ± 0% -0.09% (p=0.000 n=10+10) UnmarshalDataset/code-32 60.2MB ± 0% 59.3MB ± 0% -1.51% (p=0.000 n=10+9) UnmarshalDataset/example-32 238kB ± 0% 238kB ± 0% -0.18% (p=0.000 n=10+10) ReferenceFile-32 11.8kB ± 0% 11.8kB ± 0% ~ (all equal) name old allocs/op new allocs/op delta UnmarshalDataset/config-32 653k ± 0% 645k ± 0% -1.20% (p=0.000 n=10+6) UnmarshalDataset/canada-32 1.01M ± 0% 0.90M ± 0% -11.04% (p=0.000 n=9+10) UnmarshalDataset/citm_catalog-32 384k ± 0% 370k ± 0% -3.75% (p=0.000 n=10+10) UnmarshalDataset/twitter-32 160k ± 0% 157k ± 0% -1.32% (p=0.000 n=10+10) UnmarshalDataset/code-32 2.97M ± 0% 2.91M ± 0% -2.15% (p=0.000 n=10+7) UnmarshalDataset/example-32 3.69k ± 0% 3.63k ± 0% -1.52% (p=0.000 n=10+10) ReferenceFile-32 253 ± 0% 253 ± 0% ~ (all equal) ```
174 lines
4.6 KiB
Go
174 lines
4.6 KiB
Go
package toml
|
|
|
|
func scanFollows(b []byte, pattern string) bool {
|
|
n := len(pattern)
|
|
|
|
return len(b) >= n && string(b[:n]) == pattern
|
|
}
|
|
|
|
func scanFollowsMultilineBasicStringDelimiter(b []byte) bool {
|
|
return scanFollows(b, `"""`)
|
|
}
|
|
|
|
func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool {
|
|
return scanFollows(b, `'''`)
|
|
}
|
|
|
|
func scanFollowsTrue(b []byte) bool {
|
|
return scanFollows(b, `true`)
|
|
}
|
|
|
|
func scanFollowsFalse(b []byte) bool {
|
|
return scanFollows(b, `false`)
|
|
}
|
|
|
|
func scanFollowsInf(b []byte) bool {
|
|
return scanFollows(b, `inf`)
|
|
}
|
|
|
|
func scanFollowsNan(b []byte) bool {
|
|
return scanFollows(b, `nan`)
|
|
}
|
|
|
|
func scanUnquotedKey(b []byte) ([]byte, []byte, error) {
|
|
// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
|
|
for i := 0; i < len(b); i++ {
|
|
if !isUnquotedKeyChar(b[i]) {
|
|
return b[:i], b[i:], nil
|
|
}
|
|
}
|
|
|
|
return b, b[len(b):], nil
|
|
}
|
|
|
|
func isUnquotedKeyChar(r byte) bool {
|
|
return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_'
|
|
}
|
|
|
|
func scanLiteralString(b []byte) ([]byte, []byte, error) {
|
|
// literal-string = apostrophe *literal-char apostrophe
|
|
// apostrophe = %x27 ; ' apostrophe
|
|
// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
|
|
for i := 1; i < len(b); i++ {
|
|
switch b[i] {
|
|
case '\'':
|
|
return b[:i+1], b[i+1:], nil
|
|
case '\n':
|
|
return nil, nil, newDecodeError(b[i:i+1], "literal strings cannot have new lines")
|
|
}
|
|
}
|
|
|
|
return nil, nil, newDecodeError(b[len(b):], "unterminated literal string")
|
|
}
|
|
|
|
func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
|
|
// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
|
|
// ml-literal-string-delim
|
|
// ml-literal-string-delim = 3apostrophe
|
|
// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
|
|
//
|
|
// mll-content = mll-char / newline
|
|
// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
|
|
// mll-quotes = 1*2apostrophe
|
|
for i := 3; i < len(b); i++ {
|
|
if b[i] == '\'' && scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
|
|
return b[:i+3], b[i+3:], nil
|
|
}
|
|
}
|
|
|
|
return nil, nil, newDecodeError(b[len(b):], `multiline literal string not terminated by '''`)
|
|
}
|
|
|
|
func scanWindowsNewline(b []byte) ([]byte, []byte, error) {
|
|
const lenCRLF = 2
|
|
if len(b) < lenCRLF {
|
|
return nil, nil, newDecodeError(b, "windows new line expected")
|
|
}
|
|
|
|
if b[1] != '\n' {
|
|
return nil, nil, newDecodeError(b, `windows new line should be \r\n`)
|
|
}
|
|
|
|
return b[:lenCRLF], b[lenCRLF:], nil
|
|
}
|
|
|
|
func scanWhitespace(b []byte) ([]byte, []byte) {
|
|
for i := 0; i < len(b); i++ {
|
|
switch b[i] {
|
|
case ' ', '\t':
|
|
continue
|
|
default:
|
|
return b[:i], b[i:]
|
|
}
|
|
}
|
|
|
|
return b, b[len(b):]
|
|
}
|
|
|
|
//nolint:unparam
|
|
func scanComment(b []byte) ([]byte, []byte) {
|
|
// comment-start-symbol = %x23 ; #
|
|
// non-ascii = %x80-D7FF / %xE000-10FFFF
|
|
// non-eol = %x09 / %x20-7F / non-ascii
|
|
//
|
|
// comment = comment-start-symbol *non-eol
|
|
for i := 1; i < len(b); i++ {
|
|
if b[i] == '\n' {
|
|
return b[:i], b[i:]
|
|
}
|
|
}
|
|
|
|
return b, nil
|
|
}
|
|
|
|
func scanBasicString(b []byte) ([]byte, []byte, error) {
|
|
// basic-string = quotation-mark *basic-char quotation-mark
|
|
// quotation-mark = %x22 ; "
|
|
// basic-char = basic-unescaped / escaped
|
|
// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
|
|
// escaped = escape escape-seq-char
|
|
for i := 1; i < len(b); i++ {
|
|
switch b[i] {
|
|
case '"':
|
|
return b[:i+1], b[i+1:], nil
|
|
case '\n':
|
|
return nil, nil, newDecodeError(b[i:i+1], "basic strings cannot have new lines")
|
|
case '\\':
|
|
if len(b) < i+2 {
|
|
return nil, nil, newDecodeError(b[i:i+1], "need a character after \\")
|
|
}
|
|
i++ // skip the next character
|
|
}
|
|
}
|
|
|
|
return nil, nil, newDecodeError(b[len(b):], `basic string not terminated by "`)
|
|
}
|
|
|
|
func scanMultilineBasicString(b []byte) ([]byte, []byte, error) {
|
|
// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
|
|
// ml-basic-string-delim
|
|
// ml-basic-string-delim = 3quotation-mark
|
|
// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
|
|
//
|
|
// mlb-content = mlb-char / newline / mlb-escaped-nl
|
|
// mlb-char = mlb-unescaped / escaped
|
|
// mlb-quotes = 1*2quotation-mark
|
|
// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
|
|
// mlb-escaped-nl = escape ws newline *( wschar / newline )
|
|
for i := 3; i < len(b); i++ {
|
|
switch b[i] {
|
|
case '"':
|
|
if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
|
|
return b[:i+3], b[i+3:], nil
|
|
}
|
|
case '\\':
|
|
if len(b) < i+2 {
|
|
return nil, nil, newDecodeError(b[len(b):], "need a character after \\")
|
|
}
|
|
i++ // skip the next character
|
|
}
|
|
}
|
|
|
|
return nil, nil, newDecodeError(b[len(b):], `multiline basic string not terminated by """`)
|
|
}
|