utf8: use lookup table to validate ASCII (#654)

This commit is contained in:
Thomas Pelletier
2021-11-04 16:05:36 -04:00
committed by GitHub
parent 3dbca20bc9
commit 6617e7e73d
2 changed files with 57 additions and 1 deletions
+19
View File
@@ -1,6 +1,8 @@
package toml package toml
import ( import (
"strconv"
"strings"
"testing" "testing"
"github.com/pelletier/go-toml/v2/internal/ast" "github.com/pelletier/go-toml/v2/internal/ast"
@@ -371,6 +373,23 @@ func BenchmarkParseBasicStringWithUnicode(b *testing.B) {
}) })
} }
func BenchmarkParseBasicStringsEasy(b *testing.B) {
p := &parser{}
for _, size := range []int{1, 4, 8, 16, 21} {
b.Run(strconv.Itoa(size), func(b *testing.B) {
input := []byte(`"` + strings.Repeat("A", size) + `"`)
b.ReportAllocs()
b.SetBytes(int64(len(input)))
for i := 0; i < b.N; i++ {
p.parseBasicString(input)
}
})
}
}
func TestParser_AST_DateTimes(t *testing.T) { func TestParser_AST_DateTimes(t *testing.T) {
examples := []struct { examples := []struct {
desc string desc string
+38 -1
View File
@@ -140,8 +140,45 @@ func utf8ValidNext(p []byte) int {
return size return size
} }
var invalidAsciiTable = [256]bool{
0x00: true,
0x01: true,
0x02: true,
0x03: true,
0x04: true,
0x05: true,
0x06: true,
0x07: true,
0x08: true,
// 0x09 TAB
// 0x0A LF
0x0B: true,
0x0C: true,
// 0x0D CR
0x0E: true,
0x0F: true,
0x10: true,
0x11: true,
0x12: true,
0x13: true,
0x14: true,
0x15: true,
0x16: true,
0x17: true,
0x18: true,
0x19: true,
0x1A: true,
0x1B: true,
0x1C: true,
0x1D: true,
0x1E: true,
0x1F: true,
// 0x20 - 0x7E Printable ASCII characters
0x7F: true,
}
func invalidAscii(b byte) bool { func invalidAscii(b byte) bool {
return b <= 0x08 || (b > 0x0A && b < 0x0D) || (b > 0x0D && b <= 0x1F) || b == 0x7F return invalidAsciiTable[b]
} }
// acceptRange gives the range of valid values for the second byte in a UTF-8 // acceptRange gives the range of valid values for the second byte in a UTF-8