Lex performance improvement (#176)

* Use []token instead of chan token

name             old time/op    new time/op    delta
ParseToml-8        1.18ms ± 0%    0.91ms ± 0%  -22.98%
UnmarshalToml-8    1.29ms ± 0%    0.95ms ± 0%  -25.96%

name             old alloc/op   new alloc/op   delta
ParseToml-8         429kB ± 0%     444kB ± 0%   +3.49%
UnmarshalToml-8     451kB ± 0%     466kB ± 0%   +3.32%

name             old allocs/op  new allocs/op  delta
ParseToml-8         14.1k ± 0%     13.7k ± 0%   -2.31%
UnmarshalToml-8     15.1k ± 0%     14.7k ± 0%   -2.16%

* Lex on []byte instead of io.Reader

name             old time/op    new time/op    delta
ParseToml-8        1.18ms ± 0%    0.29ms ± 0%  -75.18%
UnmarshalToml-8    1.27ms ± 0%    0.38ms ± 0%  -70.38%

name             old alloc/op   new alloc/op   delta
ParseToml-8         429kB ± 0%     135kB ± 0%  -68.53%
UnmarshalToml-8     451kB ± 0%     157kB ± 0%  -65.22%

name             old allocs/op  new allocs/op  delta
ParseToml-8         14.1k ± 0%      3.2k ± 0%  -77.20%
UnmarshalToml-8     15.1k ± 0%      4.2k ± 0%  -72.00%
This commit is contained in:
Thomas Pelletier
2017-06-27 18:26:37 -07:00
committed by GitHub
parent ef23ce9e92
commit 69d355db53
4 changed files with 70 additions and 105 deletions
+35 -42
View File
@@ -9,12 +9,9 @@ import (
"bytes" "bytes"
"errors" "errors"
"fmt" "fmt"
"io"
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
"github.com/pelletier/go-buffruneio"
) )
var dateRegexp *regexp.Regexp var dateRegexp *regexp.Regexp
@@ -24,9 +21,11 @@ type tomlLexStateFn func() tomlLexStateFn
// Define lexer // Define lexer
type tomlLexer struct { type tomlLexer struct {
input *buffruneio.Reader // Textual source inputIdx int
buffer bytes.Buffer // Runes composing the current token input []rune // Textual source
tokens chan token currentTokenStart int
currentTokenStop int
tokens []token
depth int depth int
line int line int
col int col int
@@ -37,16 +36,14 @@ type tomlLexer struct {
// Basic read operations on input // Basic read operations on input
func (l *tomlLexer) read() rune { func (l *tomlLexer) read() rune {
r, _, err := l.input.ReadRune() r := l.peek()
if err != nil {
panic(err)
}
if r == '\n' { if r == '\n' {
l.endbufferLine++ l.endbufferLine++
l.endbufferCol = 1 l.endbufferCol = 1
} else { } else {
l.endbufferCol++ l.endbufferCol++
} }
l.inputIdx++
return r return r
} }
@@ -54,13 +51,13 @@ func (l *tomlLexer) next() rune {
r := l.read() r := l.read()
if r != eof { if r != eof {
l.buffer.WriteRune(r) l.currentTokenStop++
} }
return r return r
} }
func (l *tomlLexer) ignore() { func (l *tomlLexer) ignore() {
l.buffer.Reset() l.currentTokenStart = l.currentTokenStop
l.line = l.endbufferLine l.line = l.endbufferLine
l.col = l.endbufferCol l.col = l.endbufferCol
} }
@@ -77,49 +74,46 @@ func (l *tomlLexer) fastForward(n int) {
} }
func (l *tomlLexer) emitWithValue(t tokenType, value string) { func (l *tomlLexer) emitWithValue(t tokenType, value string) {
l.tokens <- token{ l.tokens = append(l.tokens, token{
Position: Position{l.line, l.col}, Position: Position{l.line, l.col},
typ: t, typ: t,
val: value, val: value,
} })
l.ignore() l.ignore()
} }
func (l *tomlLexer) emit(t tokenType) { func (l *tomlLexer) emit(t tokenType) {
l.emitWithValue(t, l.buffer.String()) l.emitWithValue(t, string(l.input[l.currentTokenStart:l.currentTokenStop]))
} }
func (l *tomlLexer) peek() rune { func (l *tomlLexer) peek() rune {
r, _, err := l.input.ReadRune() if l.inputIdx >= len(l.input) {
if err != nil { return eof
panic(err)
} }
l.input.UnreadRune() return l.input[l.inputIdx]
return r }
func (l *tomlLexer) peekString(size int) string {
maxIdx := len(l.input)
upperIdx := l.inputIdx + size // FIXME: potential overflow
if upperIdx > maxIdx {
upperIdx = maxIdx
}
return string(l.input[l.inputIdx:upperIdx])
} }
func (l *tomlLexer) follow(next string) bool { func (l *tomlLexer) follow(next string) bool {
for _, expectedRune := range next { return next == l.peekString(len(next))
r, _, err := l.input.ReadRune()
defer l.input.UnreadRune()
if err != nil {
panic(err)
}
if expectedRune != r {
return false
}
}
return true
} }
// Error management // Error management
func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn { func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn {
l.tokens <- token{ l.tokens = append(l.tokens, token{
Position: Position{l.line, l.col}, Position: Position{l.line, l.col},
typ: tokenError, typ: tokenError,
val: fmt.Sprintf(format, args...), val: fmt.Sprintf(format, args...),
} })
return nil return nil
} }
@@ -220,7 +214,7 @@ func (l *tomlLexer) lexRvalue() tomlLexStateFn {
break break
} }
possibleDate := string(l.input.PeekRunes(35)) possibleDate := l.peekString(35)
dateMatch := dateRegexp.FindString(possibleDate) dateMatch := dateRegexp.FindString(possibleDate)
if dateMatch != "" { if dateMatch != "" {
l.fastForward(len(dateMatch)) l.fastForward(len(dateMatch))
@@ -537,7 +531,7 @@ func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
for r := l.peek(); r != eof; r = l.peek() { for r := l.peek(); r != eof; r = l.peek() {
switch r { switch r {
case ']': case ']':
if l.buffer.Len() > 0 { if l.currentTokenStop > l.currentTokenStart {
l.emit(tokenKeyGroupArray) l.emit(tokenKeyGroupArray)
} }
l.next() l.next()
@@ -560,7 +554,7 @@ func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
for r := l.peek(); r != eof; r = l.peek() { for r := l.peek(); r != eof; r = l.peek() {
switch r { switch r {
case ']': case ']':
if l.buffer.Len() > 0 { if l.currentTokenStop > l.currentTokenStart {
l.emit(tokenKeyGroup) l.emit(tokenKeyGroup)
} }
l.next() l.next()
@@ -635,7 +629,6 @@ func (l *tomlLexer) run() {
for state := l.lexVoid; state != nil; { for state := l.lexVoid; state != nil; {
state = state() state = state()
} }
close(l.tokens)
} }
func init() { func init() {
@@ -643,16 +636,16 @@ func init() {
} }
// Entry point // Entry point
func lexToml(input io.Reader) chan token { func lexToml(inputBytes []byte) []token {
bufferedInput := buffruneio.NewReader(input) runes := bytes.Runes(inputBytes)
l := &tomlLexer{ l := &tomlLexer{
input: bufferedInput, input: runes,
tokens: make(chan token), tokens: make([]token, 0, 256),
line: 1, line: 1,
col: 1, col: 1,
endbufferLine: 1, endbufferLine: 1,
endbufferCol: 1, endbufferCol: 1,
} }
go l.run() l.run()
return l.tokens return l.tokens
} }
+5 -34
View File
@@ -1,38 +1,14 @@
package toml package toml
import ( import (
"os" "reflect"
"strings"
"testing" "testing"
) )
func testFlow(t *testing.T, input string, expectedFlow []token) { func testFlow(t *testing.T, input string, expectedFlow []token) {
ch := lexToml(strings.NewReader(input)) tokens := lexToml([]byte(input))
for _, expected := range expectedFlow { if !reflect.DeepEqual(tokens, expectedFlow) {
token := <-ch t.Fatal("Different flows. Expected\n", expectedFlow, "\nGot:\n", tokens)
if token != expected {
t.Log("While testing: ", input)
t.Log("compared (got)", token, "to (expected)", expected)
t.Log("\tvalue:", token.val, "<->", expected.val)
t.Log("\tvalue as bytes:", []byte(token.val), "<->", []byte(expected.val))
t.Log("\ttype:", token.typ.String(), "<->", expected.typ.String())
t.Log("\tline:", token.Line, "<->", expected.Line)
t.Log("\tcolumn:", token.Col, "<->", expected.Col)
t.Log("compared", token, "to", expected)
t.FailNow()
}
}
tok, ok := <-ch
if ok {
t.Log("channel is not closed!")
t.Log(len(ch)+1, "tokens remaining:")
t.Log("token ->", tok)
for token := range ch {
t.Log("token ->", token)
}
t.FailNow()
} }
} }
@@ -767,13 +743,8 @@ pluralizeListTitles = false
url = "https://github.com/spf13/hugo/releases" url = "https://github.com/spf13/hugo/releases"
weight = -200 weight = -200
` `
rd := strings.NewReader(sample)
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
rd.Seek(0, os.SEEK_SET) lexToml([]byte(sample))
ch := lexToml(rd)
for _ = range ch {
}
} }
} }
+10 -20
View File
@@ -13,9 +13,9 @@ import (
) )
type tomlParser struct { type tomlParser struct {
flow chan token flowIdx int
flow []token
tree *Tree tree *Tree
tokensBuffer []token
currentTable []string currentTable []string
seenTableKeys []string seenTableKeys []string
} }
@@ -34,16 +34,10 @@ func (p *tomlParser) run() {
} }
func (p *tomlParser) peek() *token { func (p *tomlParser) peek() *token {
if len(p.tokensBuffer) != 0 { if p.flowIdx >= len(p.flow) {
return &(p.tokensBuffer[0])
}
tok, ok := <-p.flow
if !ok {
return nil return nil
} }
p.tokensBuffer = append(p.tokensBuffer, tok) return &p.flow[p.flowIdx]
return &tok
} }
func (p *tomlParser) assume(typ tokenType) { func (p *tomlParser) assume(typ tokenType) {
@@ -57,16 +51,12 @@ func (p *tomlParser) assume(typ tokenType) {
} }
func (p *tomlParser) getToken() *token { func (p *tomlParser) getToken() *token {
if len(p.tokensBuffer) != 0 { tok := p.peek()
tok := p.tokensBuffer[0] if tok == nil {
p.tokensBuffer = p.tokensBuffer[1:]
return &tok
}
tok, ok := <-p.flow
if !ok {
return nil return nil
} }
return &tok p.flowIdx++
return tok
} }
func (p *tomlParser) parseStart() tomlParserStateFn { func (p *tomlParser) parseStart() tomlParserStateFn {
@@ -374,13 +364,13 @@ func (p *tomlParser) parseArray() interface{} {
return array return array
} }
func parseToml(flow chan token) *Tree { func parseToml(flow []token) *Tree {
result := newTree() result := newTree()
result.position = Position{1, 1} result.position = Position{1, 1}
parser := &tomlParser{ parser := &tomlParser{
flowIdx: 0,
flow: flow, flow: flow,
tree: result, tree: result,
tokensBuffer: make([]token, 0),
currentTable: make([]string, 0), currentTable: make([]string, 0),
seenTableKeys: make([]string, 0), seenTableKeys: make([]string, 0),
} }
+15 -4
View File
@@ -4,6 +4,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"os" "os"
"runtime" "runtime"
"strings" "strings"
@@ -251,8 +252,8 @@ func (t *Tree) createSubTree(keys []string, pos Position) error {
return nil return nil
} }
// LoadReader creates a Tree from any io.Reader. // LoadBytes creates a Tree from a []byte.
func LoadReader(reader io.Reader) (tree *Tree, err error) { func LoadBytes(b []byte) (tree *Tree, err error) {
defer func() { defer func() {
if r := recover(); r != nil { if r := recover(); r != nil {
if _, ok := r.(runtime.Error); ok { if _, ok := r.(runtime.Error); ok {
@@ -261,13 +262,23 @@ func LoadReader(reader io.Reader) (tree *Tree, err error) {
err = errors.New(r.(string)) err = errors.New(r.(string))
} }
}() }()
tree = parseToml(lexToml(reader)) tree = parseToml(lexToml(b))
return
}
// LoadReader creates a Tree from any io.Reader.
func LoadReader(reader io.Reader) (tree *Tree, err error) {
inputBytes, err := ioutil.ReadAll(reader)
if err != nil {
return
}
tree, err = LoadBytes(inputBytes)
return return
} }
// Load creates a Tree from a string. // Load creates a Tree from a string.
func Load(content string) (tree *Tree, err error) { func Load(content string) (tree *Tree, err error) {
return LoadReader(strings.NewReader(content)) return LoadBytes([]byte(content))
} }
// LoadFile creates a Tree from a file. // LoadFile creates a Tree from a file.