Lex performance improvement (#176)
* Use []token instead of chan token name old time/op new time/op delta ParseToml-8 1.18ms ± 0% 0.91ms ± 0% -22.98% UnmarshalToml-8 1.29ms ± 0% 0.95ms ± 0% -25.96% name old alloc/op new alloc/op delta ParseToml-8 429kB ± 0% 444kB ± 0% +3.49% UnmarshalToml-8 451kB ± 0% 466kB ± 0% +3.32% name old allocs/op new allocs/op delta ParseToml-8 14.1k ± 0% 13.7k ± 0% -2.31% UnmarshalToml-8 15.1k ± 0% 14.7k ± 0% -2.16% * Lex on []byte instead of io.Reader name old time/op new time/op delta ParseToml-8 1.18ms ± 0% 0.29ms ± 0% -75.18% UnmarshalToml-8 1.27ms ± 0% 0.38ms ± 0% -70.38% name old alloc/op new alloc/op delta ParseToml-8 429kB ± 0% 135kB ± 0% -68.53% UnmarshalToml-8 451kB ± 0% 157kB ± 0% -65.22% name old allocs/op new allocs/op delta ParseToml-8 14.1k ± 0% 3.2k ± 0% -77.20% UnmarshalToml-8 15.1k ± 0% 4.2k ± 0% -72.00%
This commit is contained in:
@@ -9,12 +9,9 @@ import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/pelletier/go-buffruneio"
|
||||
)
|
||||
|
||||
var dateRegexp *regexp.Regexp
|
||||
@@ -24,29 +21,29 @@ type tomlLexStateFn func() tomlLexStateFn
|
||||
|
||||
// Define lexer
|
||||
type tomlLexer struct {
|
||||
input *buffruneio.Reader // Textual source
|
||||
buffer bytes.Buffer // Runes composing the current token
|
||||
tokens chan token
|
||||
depth int
|
||||
line int
|
||||
col int
|
||||
endbufferLine int
|
||||
endbufferCol int
|
||||
inputIdx int
|
||||
input []rune // Textual source
|
||||
currentTokenStart int
|
||||
currentTokenStop int
|
||||
tokens []token
|
||||
depth int
|
||||
line int
|
||||
col int
|
||||
endbufferLine int
|
||||
endbufferCol int
|
||||
}
|
||||
|
||||
// Basic read operations on input
|
||||
|
||||
func (l *tomlLexer) read() rune {
|
||||
r, _, err := l.input.ReadRune()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
r := l.peek()
|
||||
if r == '\n' {
|
||||
l.endbufferLine++
|
||||
l.endbufferCol = 1
|
||||
} else {
|
||||
l.endbufferCol++
|
||||
}
|
||||
l.inputIdx++
|
||||
return r
|
||||
}
|
||||
|
||||
@@ -54,13 +51,13 @@ func (l *tomlLexer) next() rune {
|
||||
r := l.read()
|
||||
|
||||
if r != eof {
|
||||
l.buffer.WriteRune(r)
|
||||
l.currentTokenStop++
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func (l *tomlLexer) ignore() {
|
||||
l.buffer.Reset()
|
||||
l.currentTokenStart = l.currentTokenStop
|
||||
l.line = l.endbufferLine
|
||||
l.col = l.endbufferCol
|
||||
}
|
||||
@@ -77,49 +74,46 @@ func (l *tomlLexer) fastForward(n int) {
|
||||
}
|
||||
|
||||
func (l *tomlLexer) emitWithValue(t tokenType, value string) {
|
||||
l.tokens <- token{
|
||||
l.tokens = append(l.tokens, token{
|
||||
Position: Position{l.line, l.col},
|
||||
typ: t,
|
||||
val: value,
|
||||
}
|
||||
})
|
||||
l.ignore()
|
||||
}
|
||||
|
||||
func (l *tomlLexer) emit(t tokenType) {
|
||||
l.emitWithValue(t, l.buffer.String())
|
||||
l.emitWithValue(t, string(l.input[l.currentTokenStart:l.currentTokenStop]))
|
||||
}
|
||||
|
||||
func (l *tomlLexer) peek() rune {
|
||||
r, _, err := l.input.ReadRune()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
if l.inputIdx >= len(l.input) {
|
||||
return eof
|
||||
}
|
||||
l.input.UnreadRune()
|
||||
return r
|
||||
return l.input[l.inputIdx]
|
||||
}
|
||||
|
||||
func (l *tomlLexer) peekString(size int) string {
|
||||
maxIdx := len(l.input)
|
||||
upperIdx := l.inputIdx + size // FIXME: potential overflow
|
||||
if upperIdx > maxIdx {
|
||||
upperIdx = maxIdx
|
||||
}
|
||||
return string(l.input[l.inputIdx:upperIdx])
|
||||
}
|
||||
|
||||
func (l *tomlLexer) follow(next string) bool {
|
||||
for _, expectedRune := range next {
|
||||
r, _, err := l.input.ReadRune()
|
||||
defer l.input.UnreadRune()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if expectedRune != r {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
return next == l.peekString(len(next))
|
||||
}
|
||||
|
||||
// Error management
|
||||
|
||||
func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn {
|
||||
l.tokens <- token{
|
||||
l.tokens = append(l.tokens, token{
|
||||
Position: Position{l.line, l.col},
|
||||
typ: tokenError,
|
||||
val: fmt.Sprintf(format, args...),
|
||||
}
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -220,7 +214,7 @@ func (l *tomlLexer) lexRvalue() tomlLexStateFn {
|
||||
break
|
||||
}
|
||||
|
||||
possibleDate := string(l.input.PeekRunes(35))
|
||||
possibleDate := l.peekString(35)
|
||||
dateMatch := dateRegexp.FindString(possibleDate)
|
||||
if dateMatch != "" {
|
||||
l.fastForward(len(dateMatch))
|
||||
@@ -537,7 +531,7 @@ func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
|
||||
for r := l.peek(); r != eof; r = l.peek() {
|
||||
switch r {
|
||||
case ']':
|
||||
if l.buffer.Len() > 0 {
|
||||
if l.currentTokenStop > l.currentTokenStart {
|
||||
l.emit(tokenKeyGroupArray)
|
||||
}
|
||||
l.next()
|
||||
@@ -560,7 +554,7 @@ func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
|
||||
for r := l.peek(); r != eof; r = l.peek() {
|
||||
switch r {
|
||||
case ']':
|
||||
if l.buffer.Len() > 0 {
|
||||
if l.currentTokenStop > l.currentTokenStart {
|
||||
l.emit(tokenKeyGroup)
|
||||
}
|
||||
l.next()
|
||||
@@ -635,7 +629,6 @@ func (l *tomlLexer) run() {
|
||||
for state := l.lexVoid; state != nil; {
|
||||
state = state()
|
||||
}
|
||||
close(l.tokens)
|
||||
}
|
||||
|
||||
func init() {
|
||||
@@ -643,16 +636,16 @@ func init() {
|
||||
}
|
||||
|
||||
// Entry point
|
||||
func lexToml(input io.Reader) chan token {
|
||||
bufferedInput := buffruneio.NewReader(input)
|
||||
func lexToml(inputBytes []byte) []token {
|
||||
runes := bytes.Runes(inputBytes)
|
||||
l := &tomlLexer{
|
||||
input: bufferedInput,
|
||||
tokens: make(chan token),
|
||||
input: runes,
|
||||
tokens: make([]token, 0, 256),
|
||||
line: 1,
|
||||
col: 1,
|
||||
endbufferLine: 1,
|
||||
endbufferCol: 1,
|
||||
}
|
||||
go l.run()
|
||||
l.run()
|
||||
return l.tokens
|
||||
}
|
||||
|
||||
+5
-34
@@ -1,38 +1,14 @@
|
||||
package toml
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func testFlow(t *testing.T, input string, expectedFlow []token) {
|
||||
ch := lexToml(strings.NewReader(input))
|
||||
for _, expected := range expectedFlow {
|
||||
token := <-ch
|
||||
if token != expected {
|
||||
t.Log("While testing: ", input)
|
||||
t.Log("compared (got)", token, "to (expected)", expected)
|
||||
t.Log("\tvalue:", token.val, "<->", expected.val)
|
||||
t.Log("\tvalue as bytes:", []byte(token.val), "<->", []byte(expected.val))
|
||||
t.Log("\ttype:", token.typ.String(), "<->", expected.typ.String())
|
||||
t.Log("\tline:", token.Line, "<->", expected.Line)
|
||||
t.Log("\tcolumn:", token.Col, "<->", expected.Col)
|
||||
t.Log("compared", token, "to", expected)
|
||||
t.FailNow()
|
||||
}
|
||||
}
|
||||
|
||||
tok, ok := <-ch
|
||||
if ok {
|
||||
t.Log("channel is not closed!")
|
||||
t.Log(len(ch)+1, "tokens remaining:")
|
||||
|
||||
t.Log("token ->", tok)
|
||||
for token := range ch {
|
||||
t.Log("token ->", token)
|
||||
}
|
||||
t.FailNow()
|
||||
tokens := lexToml([]byte(input))
|
||||
if !reflect.DeepEqual(tokens, expectedFlow) {
|
||||
t.Fatal("Different flows. Expected\n", expectedFlow, "\nGot:\n", tokens)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -767,13 +743,8 @@ pluralizeListTitles = false
|
||||
url = "https://github.com/spf13/hugo/releases"
|
||||
weight = -200
|
||||
`
|
||||
rd := strings.NewReader(sample)
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
rd.Seek(0, os.SEEK_SET)
|
||||
ch := lexToml(rd)
|
||||
for _ = range ch {
|
||||
}
|
||||
lexToml([]byte(sample))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,9 +13,9 @@ import (
|
||||
)
|
||||
|
||||
type tomlParser struct {
|
||||
flow chan token
|
||||
flowIdx int
|
||||
flow []token
|
||||
tree *Tree
|
||||
tokensBuffer []token
|
||||
currentTable []string
|
||||
seenTableKeys []string
|
||||
}
|
||||
@@ -34,16 +34,10 @@ func (p *tomlParser) run() {
|
||||
}
|
||||
|
||||
func (p *tomlParser) peek() *token {
|
||||
if len(p.tokensBuffer) != 0 {
|
||||
return &(p.tokensBuffer[0])
|
||||
}
|
||||
|
||||
tok, ok := <-p.flow
|
||||
if !ok {
|
||||
if p.flowIdx >= len(p.flow) {
|
||||
return nil
|
||||
}
|
||||
p.tokensBuffer = append(p.tokensBuffer, tok)
|
||||
return &tok
|
||||
return &p.flow[p.flowIdx]
|
||||
}
|
||||
|
||||
func (p *tomlParser) assume(typ tokenType) {
|
||||
@@ -57,16 +51,12 @@ func (p *tomlParser) assume(typ tokenType) {
|
||||
}
|
||||
|
||||
func (p *tomlParser) getToken() *token {
|
||||
if len(p.tokensBuffer) != 0 {
|
||||
tok := p.tokensBuffer[0]
|
||||
p.tokensBuffer = p.tokensBuffer[1:]
|
||||
return &tok
|
||||
}
|
||||
tok, ok := <-p.flow
|
||||
if !ok {
|
||||
tok := p.peek()
|
||||
if tok == nil {
|
||||
return nil
|
||||
}
|
||||
return &tok
|
||||
p.flowIdx++
|
||||
return tok
|
||||
}
|
||||
|
||||
func (p *tomlParser) parseStart() tomlParserStateFn {
|
||||
@@ -374,13 +364,13 @@ func (p *tomlParser) parseArray() interface{} {
|
||||
return array
|
||||
}
|
||||
|
||||
func parseToml(flow chan token) *Tree {
|
||||
func parseToml(flow []token) *Tree {
|
||||
result := newTree()
|
||||
result.position = Position{1, 1}
|
||||
parser := &tomlParser{
|
||||
flowIdx: 0,
|
||||
flow: flow,
|
||||
tree: result,
|
||||
tokensBuffer: make([]token, 0),
|
||||
currentTable: make([]string, 0),
|
||||
seenTableKeys: make([]string, 0),
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"runtime"
|
||||
"strings"
|
||||
@@ -251,8 +252,8 @@ func (t *Tree) createSubTree(keys []string, pos Position) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// LoadReader creates a Tree from any io.Reader.
|
||||
func LoadReader(reader io.Reader) (tree *Tree, err error) {
|
||||
// LoadBytes creates a Tree from a []byte.
|
||||
func LoadBytes(b []byte) (tree *Tree, err error) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
if _, ok := r.(runtime.Error); ok {
|
||||
@@ -261,13 +262,23 @@ func LoadReader(reader io.Reader) (tree *Tree, err error) {
|
||||
err = errors.New(r.(string))
|
||||
}
|
||||
}()
|
||||
tree = parseToml(lexToml(reader))
|
||||
tree = parseToml(lexToml(b))
|
||||
return
|
||||
}
|
||||
|
||||
// LoadReader creates a Tree from any io.Reader.
|
||||
func LoadReader(reader io.Reader) (tree *Tree, err error) {
|
||||
inputBytes, err := ioutil.ReadAll(reader)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
tree, err = LoadBytes(inputBytes)
|
||||
return
|
||||
}
|
||||
|
||||
// Load creates a Tree from a string.
|
||||
func Load(content string) (tree *Tree, err error) {
|
||||
return LoadReader(strings.NewReader(content))
|
||||
return LoadBytes([]byte(content))
|
||||
}
|
||||
|
||||
// LoadFile creates a Tree from a file.
|
||||
|
||||
Reference in New Issue
Block a user