Default to use bytes instead of runes

benchmark               old ns/op     new ns/op     delta
BenchmarkParseAll-8     3238          1941          -40.06%
This commit is contained in:
Thomas Pelletier
2021-02-01 20:20:24 -05:00
parent b96c535061
commit 2ab0f8c733
+69 -83
View File
@@ -110,7 +110,33 @@ type parser struct {
lookahead lookahead lookahead lookahead
} }
func (p *parser) peek() (rune, error) { func (p *parser) peek() rune {
if p.end >= len(p.data) {
return eof
}
return rune(p.data[p.end])
}
func (p *parser) next() rune {
x := p.peek()
if x != eof {
p.end++
}
return x
}
func (p *parser) expect(expected rune) error {
r := p.next()
if r != expected {
return &UnexpectedCharacter{
r: r,
expected: expected,
}
}
return nil
}
func (p *parser) peekRune() (rune, error) {
if p.lookahead.empty() { if p.lookahead.empty() {
p.lookahead.r, p.lookahead.size = utf8.DecodeRune(p.data[p.end:]) p.lookahead.r, p.lookahead.size = utf8.DecodeRune(p.data[p.end:])
if p.lookahead.r == utf8.RuneError { if p.lookahead.r == utf8.RuneError {
@@ -129,8 +155,8 @@ func (p *parser) peek() (rune, error) {
return p.lookahead.r, nil return p.lookahead.r, nil
} }
func (p *parser) next() (rune, error) { func (p *parser) nextRune() (rune, error) {
r, err := p.peek() r, err := p.peekRune()
if err == nil { if err == nil {
p.end += p.lookahead.size p.end += p.lookahead.size
p.lookahead.r = 0 p.lookahead.r = 0
@@ -139,8 +165,8 @@ func (p *parser) next() (rune, error) {
return r, err return r, err
} }
func (p *parser) sureNext() { func (p *parser) sureNextRune() {
_, err := p.next() _, err := p.nextRune()
if err != nil { if err != nil {
panic(err) panic(err)
} }
@@ -162,8 +188,8 @@ func (p *parser) accept() []byte {
return x return x
} }
func (p *parser) expect(expected rune) error { func (p *parser) expectRune(expected rune) error {
r, err := p.next() r, err := p.nextRune()
if err != nil { if err != nil {
return err return err
} }
@@ -205,10 +231,7 @@ func (p *parser) parse() error {
} }
// new lines between expressions // new lines between expressions
r, err := p.next() r := p.next()
if err != nil {
return err
}
switch r { switch r {
case eof: case eof:
return nil return nil
@@ -216,10 +239,7 @@ func (p *parser) parse() error {
p.ignore() p.ignore()
continue continue
case '\r': case '\r':
r, err = p.next() r = p.next()
if err != nil {
return err
}
if r == '\n' { if r == '\n' {
p.ignore() p.ignore()
continue continue
@@ -235,10 +255,7 @@ func (p *parser) parseExpression() error {
return err return err
} }
r, err := p.peek() r := p.peek()
if err != nil {
return err
}
// Line with just whitespace and a comment. We can exit early. // Line with just whitespace and a comment. We can exit early.
if r == '#' { if r == '#' {
@@ -267,10 +284,7 @@ func (p *parser) parseExpression() error {
return err return err
} }
r, err = p.peek() r = p.peek()
if err != nil {
return err
}
if r == '#' { if r == '#' {
return p.parseComment() return p.parseComment()
} }
@@ -310,10 +324,7 @@ func (p *parser) parseVal() error {
//val = string / boolean / array / inline-table / date-time / float / integer //val = string / boolean / array / inline-table / date-time / float / integer
// string = ml-basic-string / basic-string / ml-literal-string / literal-string // string = ml-basic-string / basic-string / ml-literal-string / literal-string
r, err := p.peek() r := p.peek()
if err != nil {
return err
}
switch r { switch r {
case 't', 'f': case 't', 'f':
@@ -325,14 +336,11 @@ func (p *parser) parseVal() error {
} }
func (p *parser) parseBool() error { func (p *parser) parseBool() error {
r, err := p.peek() r := p.peek()
if err != nil {
return err
}
if r == 't' { if r == 't' {
p.sureNext() p.next()
err = p.expect('r') err := p.expect('r')
if err != nil { if err != nil {
return err return err
} }
@@ -345,8 +353,8 @@ func (p *parser) parseBool() error {
return err return err
} }
} else if r == 'f' { } else if r == 'f' {
p.sureNext() p.next()
err = p.expect('a') err := p.expect('a')
if err != nil { if err != nil {
return err return err
} }
@@ -386,16 +394,12 @@ func (p *parser) parseKey() error {
return err return err
} }
r, err := p.peek() r := p.peek()
if err != nil {
return err
}
if r != '.' { if r != '.' {
break break
} }
p.sureNext() p.next()
p.builder.Dot(p.accept()) p.builder.Dot(p.accept())
err = p.parseWhitespace() err = p.parseWhitespace()
@@ -423,10 +427,7 @@ func (p *parser) parseSimpleKey() error {
// basic-string = quotation-mark *basic-char quotation-mark // basic-string = quotation-mark *basic-char quotation-mark
// literal-string = apostrophe *literal-char apostrophe // literal-string = apostrophe *literal-char apostrophe
r, err := p.peek() r := p.peek()
if err != nil {
return err
}
switch r { switch r {
case '\'': case '\'':
@@ -439,24 +440,20 @@ func (p *parser) parseSimpleKey() error {
} }
func (p *parser) parseUnquotedKey() error { func (p *parser) parseUnquotedKey() error {
r, err := p.next() // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
if err != nil {
return err r := p.next()
}
if !isUnquotedKeyRune(r) { if !isUnquotedKeyRune(r) {
return &InvalidCharacter{r: r} return &InvalidCharacter{r: r}
} }
for { for {
r, err := p.peek() r := p.peek()
if err != nil {
return err
}
if !isUnquotedKeyRune(r) { if !isUnquotedKeyRune(r) {
break break
} }
p.sureNext() p.next()
} }
p.builder.UnquotedKey(p.accept()) p.builder.UnquotedKey(p.accept())
return nil return nil
@@ -468,25 +465,17 @@ func (p *parser) parseComment() error {
} }
for { for {
r, err := p.peek() r := p.peek()
if err != nil {
return err
}
if r == eof || r == '\n' { if r == eof || r == '\n' {
p.builder.Comment(p.accept()) p.builder.Comment(p.accept())
return nil return nil
} }
p.sureNext() p.next()
} }
} }
func isWhitespace(r rune) bool { func isWhitespace(r rune) bool {
switch r { return r == 0x20 || r == 0x09
case 0x20, 0x09:
return true
default:
return false
}
} }
type InvalidUnicodeError struct { type InvalidUnicodeError struct {
@@ -499,12 +488,9 @@ func (e *InvalidUnicodeError) Error() string {
func (p *parser) parseWhitespace() error { func (p *parser) parseWhitespace() error {
for { for {
r, err := p.peek() r := p.peek()
if err != nil {
return err
}
if isWhitespace(r) { if isWhitespace(r) {
p.sureNext() p.next()
} else { } else {
if !p.empty() { if !p.empty() {
p.builder.Whitespace(p.accept()) p.builder.Whitespace(p.accept())
@@ -534,20 +520,20 @@ func (p *parser) parseLiteralString() error {
p.ignore() p.ignore()
for { for {
r, err := p.peek() r, err := p.peekRune()
if err != nil { if err != nil {
return err return err
} }
if r == '\'' { if r == '\'' {
p.builder.LiteralString(p.accept()) p.builder.LiteralString(p.accept())
p.sureNext() p.sureNextRune()
p.ignore() p.ignore()
return nil return nil
} }
if !isLiteralChar(r) { if !isLiteralChar(r) {
return &InvalidCharacter{r: r} return &InvalidCharacter{r: r}
} }
p.sureNext() p.sureNextRune()
} }
} }
@@ -587,33 +573,33 @@ func (p *parser) parseBasicString() error {
p.ignore() p.ignore()
for { for {
r, err := p.peek() r, err := p.peekRune()
if err != nil { if err != nil {
return err return err
} }
if r == '"' { if r == '"' {
p.builder.BasicString(p.accept()) p.builder.BasicString(p.accept())
p.sureNext() p.sureNextRune()
p.ignore() p.ignore()
return nil return nil
} }
if r == '\\' { if r == '\\' {
p.sureNext() p.sureNextRune()
r, err := p.peek() r, err := p.peekRune()
if err != nil { if err != nil {
return err return err
} }
if isEscapeChar(r) { if isEscapeChar(r) {
p.sureNext() p.sureNextRune()
continue continue
} }
if r == 'u' { if r == 'u' {
p.sureNext() p.sureNextRune()
for i := 0; i < 4; i++ { for i := 0; i < 4; i++ {
r, err := p.next() r, err := p.nextRune()
if err != nil { if err != nil {
return err return err
} }
@@ -625,9 +611,9 @@ func (p *parser) parseBasicString() error {
} }
if r == 'U' { if r == 'U' {
p.sureNext() p.sureNextRune()
for i := 0; i < 8; i++ { for i := 0; i < 8; i++ {
r, err := p.next() r, err := p.nextRune()
if err != nil { if err != nil {
return err return err
} }
@@ -642,7 +628,7 @@ func (p *parser) parseBasicString() error {
} }
if isBasicStringChar(r) { if isBasicStringChar(r) {
p.sureNext() p.sureNextRune()
continue continue
} }
} }