Lex performance improvement (#176)

* Use []token instead of chan token

name             old time/op    new time/op    delta
ParseToml-8        1.18ms ± 0%    0.91ms ± 0%  -22.98%
UnmarshalToml-8    1.29ms ± 0%    0.95ms ± 0%  -25.96%

name             old alloc/op   new alloc/op   delta
ParseToml-8         429kB ± 0%     444kB ± 0%   +3.49%
UnmarshalToml-8     451kB ± 0%     466kB ± 0%   +3.32%

name             old allocs/op  new allocs/op  delta
ParseToml-8         14.1k ± 0%     13.7k ± 0%   -2.31%
UnmarshalToml-8     15.1k ± 0%     14.7k ± 0%   -2.16%

* Lex on []byte instead of io.Reader

name             old time/op    new time/op    delta
ParseToml-8        1.18ms ± 0%    0.29ms ± 0%  -75.18%
UnmarshalToml-8    1.27ms ± 0%    0.38ms ± 0%  -70.38%

name             old alloc/op   new alloc/op   delta
ParseToml-8         429kB ± 0%     135kB ± 0%  -68.53%
UnmarshalToml-8     451kB ± 0%     157kB ± 0%  -65.22%

name             old allocs/op  new allocs/op  delta
ParseToml-8         14.1k ± 0%      3.2k ± 0%  -77.20%
UnmarshalToml-8     15.1k ± 0%      4.2k ± 0%  -72.00%
This commit is contained in:
Thomas Pelletier
2017-06-27 18:26:37 -07:00
committed by GitHub
parent ef23ce9e92
commit 69d355db53
4 changed files with 70 additions and 105 deletions
+40 -47
View File
@@ -9,12 +9,9 @@ import (
"bytes"
"errors"
"fmt"
"io"
"regexp"
"strconv"
"strings"
"github.com/pelletier/go-buffruneio"
)
var dateRegexp *regexp.Regexp
@@ -24,29 +21,29 @@ type tomlLexStateFn func() tomlLexStateFn
// Define lexer
type tomlLexer struct {
input *buffruneio.Reader // Textual source
buffer bytes.Buffer // Runes composing the current token
tokens chan token
depth int
line int
col int
endbufferLine int
endbufferCol int
inputIdx int
input []rune // Textual source
currentTokenStart int
currentTokenStop int
tokens []token
depth int
line int
col int
endbufferLine int
endbufferCol int
}
// Basic read operations on input
func (l *tomlLexer) read() rune {
r, _, err := l.input.ReadRune()
if err != nil {
panic(err)
}
r := l.peek()
if r == '\n' {
l.endbufferLine++
l.endbufferCol = 1
} else {
l.endbufferCol++
}
l.inputIdx++
return r
}
@@ -54,13 +51,13 @@ func (l *tomlLexer) next() rune {
r := l.read()
if r != eof {
l.buffer.WriteRune(r)
l.currentTokenStop++
}
return r
}
func (l *tomlLexer) ignore() {
l.buffer.Reset()
l.currentTokenStart = l.currentTokenStop
l.line = l.endbufferLine
l.col = l.endbufferCol
}
@@ -77,49 +74,46 @@ func (l *tomlLexer) fastForward(n int) {
}
func (l *tomlLexer) emitWithValue(t tokenType, value string) {
l.tokens <- token{
l.tokens = append(l.tokens, token{
Position: Position{l.line, l.col},
typ: t,
val: value,
}
})
l.ignore()
}
func (l *tomlLexer) emit(t tokenType) {
l.emitWithValue(t, l.buffer.String())
l.emitWithValue(t, string(l.input[l.currentTokenStart:l.currentTokenStop]))
}
func (l *tomlLexer) peek() rune {
r, _, err := l.input.ReadRune()
if err != nil {
panic(err)
if l.inputIdx >= len(l.input) {
return eof
}
l.input.UnreadRune()
return r
return l.input[l.inputIdx]
}
func (l *tomlLexer) peekString(size int) string {
maxIdx := len(l.input)
upperIdx := l.inputIdx + size // FIXME: potential overflow
if upperIdx > maxIdx {
upperIdx = maxIdx
}
return string(l.input[l.inputIdx:upperIdx])
}
func (l *tomlLexer) follow(next string) bool {
for _, expectedRune := range next {
r, _, err := l.input.ReadRune()
defer l.input.UnreadRune()
if err != nil {
panic(err)
}
if expectedRune != r {
return false
}
}
return true
return next == l.peekString(len(next))
}
// Error management
func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn {
l.tokens <- token{
l.tokens = append(l.tokens, token{
Position: Position{l.line, l.col},
typ: tokenError,
val: fmt.Sprintf(format, args...),
}
})
return nil
}
@@ -220,7 +214,7 @@ func (l *tomlLexer) lexRvalue() tomlLexStateFn {
break
}
possibleDate := string(l.input.PeekRunes(35))
possibleDate := l.peekString(35)
dateMatch := dateRegexp.FindString(possibleDate)
if dateMatch != "" {
l.fastForward(len(dateMatch))
@@ -537,7 +531,7 @@ func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
for r := l.peek(); r != eof; r = l.peek() {
switch r {
case ']':
if l.buffer.Len() > 0 {
if l.currentTokenStop > l.currentTokenStart {
l.emit(tokenKeyGroupArray)
}
l.next()
@@ -560,7 +554,7 @@ func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
for r := l.peek(); r != eof; r = l.peek() {
switch r {
case ']':
if l.buffer.Len() > 0 {
if l.currentTokenStop > l.currentTokenStart {
l.emit(tokenKeyGroup)
}
l.next()
@@ -635,7 +629,6 @@ func (l *tomlLexer) run() {
for state := l.lexVoid; state != nil; {
state = state()
}
close(l.tokens)
}
func init() {
@@ -643,16 +636,16 @@ func init() {
}
// Entry point
func lexToml(input io.Reader) chan token {
bufferedInput := buffruneio.NewReader(input)
func lexToml(inputBytes []byte) []token {
runes := bytes.Runes(inputBytes)
l := &tomlLexer{
input: bufferedInput,
tokens: make(chan token),
input: runes,
tokens: make([]token, 0, 256),
line: 1,
col: 1,
endbufferLine: 1,
endbufferCol: 1,
}
go l.run()
l.run()
return l.tokens
}
+5 -34
View File
@@ -1,38 +1,14 @@
package toml
import (
"os"
"strings"
"reflect"
"testing"
)
func testFlow(t *testing.T, input string, expectedFlow []token) {
ch := lexToml(strings.NewReader(input))
for _, expected := range expectedFlow {
token := <-ch
if token != expected {
t.Log("While testing: ", input)
t.Log("compared (got)", token, "to (expected)", expected)
t.Log("\tvalue:", token.val, "<->", expected.val)
t.Log("\tvalue as bytes:", []byte(token.val), "<->", []byte(expected.val))
t.Log("\ttype:", token.typ.String(), "<->", expected.typ.String())
t.Log("\tline:", token.Line, "<->", expected.Line)
t.Log("\tcolumn:", token.Col, "<->", expected.Col)
t.Log("compared", token, "to", expected)
t.FailNow()
}
}
tok, ok := <-ch
if ok {
t.Log("channel is not closed!")
t.Log(len(ch)+1, "tokens remaining:")
t.Log("token ->", tok)
for token := range ch {
t.Log("token ->", token)
}
t.FailNow()
tokens := lexToml([]byte(input))
if !reflect.DeepEqual(tokens, expectedFlow) {
t.Fatal("Different flows. Expected\n", expectedFlow, "\nGot:\n", tokens)
}
}
@@ -767,13 +743,8 @@ pluralizeListTitles = false
url = "https://github.com/spf13/hugo/releases"
weight = -200
`
rd := strings.NewReader(sample)
b.ResetTimer()
for i := 0; i < b.N; i++ {
rd.Seek(0, os.SEEK_SET)
ch := lexToml(rd)
for _ = range ch {
}
lexToml([]byte(sample))
}
}
+10 -20
View File
@@ -13,9 +13,9 @@ import (
)
type tomlParser struct {
flow chan token
flowIdx int
flow []token
tree *Tree
tokensBuffer []token
currentTable []string
seenTableKeys []string
}
@@ -34,16 +34,10 @@ func (p *tomlParser) run() {
}
func (p *tomlParser) peek() *token {
if len(p.tokensBuffer) != 0 {
return &(p.tokensBuffer[0])
}
tok, ok := <-p.flow
if !ok {
if p.flowIdx >= len(p.flow) {
return nil
}
p.tokensBuffer = append(p.tokensBuffer, tok)
return &tok
return &p.flow[p.flowIdx]
}
func (p *tomlParser) assume(typ tokenType) {
@@ -57,16 +51,12 @@ func (p *tomlParser) assume(typ tokenType) {
}
func (p *tomlParser) getToken() *token {
if len(p.tokensBuffer) != 0 {
tok := p.tokensBuffer[0]
p.tokensBuffer = p.tokensBuffer[1:]
return &tok
}
tok, ok := <-p.flow
if !ok {
tok := p.peek()
if tok == nil {
return nil
}
return &tok
p.flowIdx++
return tok
}
func (p *tomlParser) parseStart() tomlParserStateFn {
@@ -374,13 +364,13 @@ func (p *tomlParser) parseArray() interface{} {
return array
}
func parseToml(flow chan token) *Tree {
func parseToml(flow []token) *Tree {
result := newTree()
result.position = Position{1, 1}
parser := &tomlParser{
flowIdx: 0,
flow: flow,
tree: result,
tokensBuffer: make([]token, 0),
currentTable: make([]string, 0),
seenTableKeys: make([]string, 0),
}
+15 -4
View File
@@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"runtime"
"strings"
@@ -251,8 +252,8 @@ func (t *Tree) createSubTree(keys []string, pos Position) error {
return nil
}
// LoadReader creates a Tree from any io.Reader.
func LoadReader(reader io.Reader) (tree *Tree, err error) {
// LoadBytes creates a Tree from a []byte.
func LoadBytes(b []byte) (tree *Tree, err error) {
defer func() {
if r := recover(); r != nil {
if _, ok := r.(runtime.Error); ok {
@@ -261,13 +262,23 @@ func LoadReader(reader io.Reader) (tree *Tree, err error) {
err = errors.New(r.(string))
}
}()
tree = parseToml(lexToml(reader))
tree = parseToml(lexToml(b))
return
}
// LoadReader creates a Tree from any io.Reader.
func LoadReader(reader io.Reader) (tree *Tree, err error) {
inputBytes, err := ioutil.ReadAll(reader)
if err != nil {
return
}
tree, err = LoadBytes(inputBytes)
return
}
// Load creates a Tree from a string.
func Load(content string) (tree *Tree, err error) {
return LoadReader(strings.NewReader(content))
return LoadBytes([]byte(content))
}
// LoadFile creates a Tree from a file.