diff --git a/parser_test.go b/parser_test.go index 1f260d1..541c2f4 100644 --- a/parser_test.go +++ b/parser_test.go @@ -1,6 +1,8 @@ package toml import ( + "strconv" + "strings" "testing" "github.com/pelletier/go-toml/v2/internal/ast" @@ -371,6 +373,23 @@ func BenchmarkParseBasicStringWithUnicode(b *testing.B) { }) } +func BenchmarkParseBasicStringsEasy(b *testing.B) { + p := &parser{} + + for _, size := range []int{1, 4, 8, 16, 21} { + b.Run(strconv.Itoa(size), func(b *testing.B) { + input := []byte(`"` + strings.Repeat("A", size) + `"`) + + b.ReportAllocs() + b.SetBytes(int64(len(input))) + + for i := 0; i < b.N; i++ { + p.parseBasicString(input) + } + }) + } +} + func TestParser_AST_DateTimes(t *testing.T) { examples := []struct { desc string diff --git a/utf8.go b/utf8.go index 79fb28e..d47a4f2 100644 --- a/utf8.go +++ b/utf8.go @@ -140,8 +140,45 @@ func utf8ValidNext(p []byte) int { return size } +var invalidAsciiTable = [256]bool{ + 0x00: true, + 0x01: true, + 0x02: true, + 0x03: true, + 0x04: true, + 0x05: true, + 0x06: true, + 0x07: true, + 0x08: true, + // 0x09 TAB + // 0x0A LF + 0x0B: true, + 0x0C: true, + // 0x0D CR + 0x0E: true, + 0x0F: true, + 0x10: true, + 0x11: true, + 0x12: true, + 0x13: true, + 0x14: true, + 0x15: true, + 0x16: true, + 0x17: true, + 0x18: true, + 0x19: true, + 0x1A: true, + 0x1B: true, + 0x1C: true, + 0x1D: true, + 0x1E: true, + 0x1F: true, + // 0x20 - 0x7E Printable ASCII characters + 0x7F: true, +} + func invalidAscii(b byte) bool { - return b <= 0x08 || (b > 0x0A && b < 0x0D) || (b > 0x0D && b <= 0x1F) || b == 0x7F + return invalidAsciiTable[b] } // acceptRange gives the range of valid values for the second byte in a UTF-8