diff --git a/lexer.go b/lexer.go index db3ab4e..1b6647d 100644 --- a/lexer.go +++ b/lexer.go @@ -9,12 +9,9 @@ import ( "bytes" "errors" "fmt" - "io" "regexp" "strconv" "strings" - - "github.com/pelletier/go-buffruneio" ) var dateRegexp *regexp.Regexp @@ -24,29 +21,29 @@ type tomlLexStateFn func() tomlLexStateFn // Define lexer type tomlLexer struct { - input *buffruneio.Reader // Textual source - buffer bytes.Buffer // Runes composing the current token - tokens chan token - depth int - line int - col int - endbufferLine int - endbufferCol int + inputIdx int + input []rune // Textual source + currentTokenStart int + currentTokenStop int + tokens []token + depth int + line int + col int + endbufferLine int + endbufferCol int } // Basic read operations on input func (l *tomlLexer) read() rune { - r, _, err := l.input.ReadRune() - if err != nil { - panic(err) - } + r := l.peek() if r == '\n' { l.endbufferLine++ l.endbufferCol = 1 } else { l.endbufferCol++ } + l.inputIdx++ return r } @@ -54,13 +51,13 @@ func (l *tomlLexer) next() rune { r := l.read() if r != eof { - l.buffer.WriteRune(r) + l.currentTokenStop++ } return r } func (l *tomlLexer) ignore() { - l.buffer.Reset() + l.currentTokenStart = l.currentTokenStop l.line = l.endbufferLine l.col = l.endbufferCol } @@ -77,49 +74,46 @@ func (l *tomlLexer) fastForward(n int) { } func (l *tomlLexer) emitWithValue(t tokenType, value string) { - l.tokens <- token{ + l.tokens = append(l.tokens, token{ Position: Position{l.line, l.col}, typ: t, val: value, - } + }) l.ignore() } func (l *tomlLexer) emit(t tokenType) { - l.emitWithValue(t, l.buffer.String()) + l.emitWithValue(t, string(l.input[l.currentTokenStart:l.currentTokenStop])) } func (l *tomlLexer) peek() rune { - r, _, err := l.input.ReadRune() - if err != nil { - panic(err) + if l.inputIdx >= len(l.input) { + return eof } - l.input.UnreadRune() - return r + return l.input[l.inputIdx] +} + +func (l *tomlLexer) peekString(size int) string { + maxIdx := len(l.input) + upperIdx := l.inputIdx + size // FIXME: potential overflow + if upperIdx > maxIdx { + upperIdx = maxIdx + } + return string(l.input[l.inputIdx:upperIdx]) } func (l *tomlLexer) follow(next string) bool { - for _, expectedRune := range next { - r, _, err := l.input.ReadRune() - defer l.input.UnreadRune() - if err != nil { - panic(err) - } - if expectedRune != r { - return false - } - } - return true + return next == l.peekString(len(next)) } // Error management func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn { - l.tokens <- token{ + l.tokens = append(l.tokens, token{ Position: Position{l.line, l.col}, typ: tokenError, val: fmt.Sprintf(format, args...), - } + }) return nil } @@ -220,7 +214,7 @@ func (l *tomlLexer) lexRvalue() tomlLexStateFn { break } - possibleDate := string(l.input.PeekRunes(35)) + possibleDate := l.peekString(35) dateMatch := dateRegexp.FindString(possibleDate) if dateMatch != "" { l.fastForward(len(dateMatch)) @@ -537,7 +531,7 @@ func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn { for r := l.peek(); r != eof; r = l.peek() { switch r { case ']': - if l.buffer.Len() > 0 { + if l.currentTokenStop > l.currentTokenStart { l.emit(tokenKeyGroupArray) } l.next() @@ -560,7 +554,7 @@ func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn { for r := l.peek(); r != eof; r = l.peek() { switch r { case ']': - if l.buffer.Len() > 0 { + if l.currentTokenStop > l.currentTokenStart { l.emit(tokenKeyGroup) } l.next() @@ -635,7 +629,6 @@ func (l *tomlLexer) run() { for state := l.lexVoid; state != nil; { state = state() } - close(l.tokens) } func init() { @@ -643,16 +636,16 @@ func init() { } // Entry point -func lexToml(input io.Reader) chan token { - bufferedInput := buffruneio.NewReader(input) +func lexToml(inputBytes []byte) []token { + runes := bytes.Runes(inputBytes) l := &tomlLexer{ - input: bufferedInput, - tokens: make(chan token), + input: runes, + tokens: make([]token, 0, 256), line: 1, col: 1, endbufferLine: 1, endbufferCol: 1, } - go l.run() + l.run() return l.tokens } diff --git a/lexer_test.go b/lexer_test.go index dce7a63..313b83c 100644 --- a/lexer_test.go +++ b/lexer_test.go @@ -1,38 +1,14 @@ package toml import ( - "os" - "strings" + "reflect" "testing" ) func testFlow(t *testing.T, input string, expectedFlow []token) { - ch := lexToml(strings.NewReader(input)) - for _, expected := range expectedFlow { - token := <-ch - if token != expected { - t.Log("While testing: ", input) - t.Log("compared (got)", token, "to (expected)", expected) - t.Log("\tvalue:", token.val, "<->", expected.val) - t.Log("\tvalue as bytes:", []byte(token.val), "<->", []byte(expected.val)) - t.Log("\ttype:", token.typ.String(), "<->", expected.typ.String()) - t.Log("\tline:", token.Line, "<->", expected.Line) - t.Log("\tcolumn:", token.Col, "<->", expected.Col) - t.Log("compared", token, "to", expected) - t.FailNow() - } - } - - tok, ok := <-ch - if ok { - t.Log("channel is not closed!") - t.Log(len(ch)+1, "tokens remaining:") - - t.Log("token ->", tok) - for token := range ch { - t.Log("token ->", token) - } - t.FailNow() + tokens := lexToml([]byte(input)) + if !reflect.DeepEqual(tokens, expectedFlow) { + t.Fatal("Different flows. Expected\n", expectedFlow, "\nGot:\n", tokens) } } @@ -767,13 +743,8 @@ pluralizeListTitles = false url = "https://github.com/spf13/hugo/releases" weight = -200 ` - rd := strings.NewReader(sample) - b.ResetTimer() for i := 0; i < b.N; i++ { - rd.Seek(0, os.SEEK_SET) - ch := lexToml(rd) - for _ = range ch { - } + lexToml([]byte(sample)) } } diff --git a/parser.go b/parser.go index 64eb0e5..8ee49cb 100644 --- a/parser.go +++ b/parser.go @@ -13,9 +13,9 @@ import ( ) type tomlParser struct { - flow chan token + flowIdx int + flow []token tree *Tree - tokensBuffer []token currentTable []string seenTableKeys []string } @@ -34,16 +34,10 @@ func (p *tomlParser) run() { } func (p *tomlParser) peek() *token { - if len(p.tokensBuffer) != 0 { - return &(p.tokensBuffer[0]) - } - - tok, ok := <-p.flow - if !ok { + if p.flowIdx >= len(p.flow) { return nil } - p.tokensBuffer = append(p.tokensBuffer, tok) - return &tok + return &p.flow[p.flowIdx] } func (p *tomlParser) assume(typ tokenType) { @@ -57,16 +51,12 @@ func (p *tomlParser) assume(typ tokenType) { } func (p *tomlParser) getToken() *token { - if len(p.tokensBuffer) != 0 { - tok := p.tokensBuffer[0] - p.tokensBuffer = p.tokensBuffer[1:] - return &tok - } - tok, ok := <-p.flow - if !ok { + tok := p.peek() + if tok == nil { return nil } - return &tok + p.flowIdx++ + return tok } func (p *tomlParser) parseStart() tomlParserStateFn { @@ -374,13 +364,13 @@ func (p *tomlParser) parseArray() interface{} { return array } -func parseToml(flow chan token) *Tree { +func parseToml(flow []token) *Tree { result := newTree() result.position = Position{1, 1} parser := &tomlParser{ + flowIdx: 0, flow: flow, tree: result, - tokensBuffer: make([]token, 0), currentTable: make([]string, 0), seenTableKeys: make([]string, 0), } diff --git a/toml.go b/toml.go index 2d2f640..64f19ed 100644 --- a/toml.go +++ b/toml.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "io" + "io/ioutil" "os" "runtime" "strings" @@ -251,8 +252,8 @@ func (t *Tree) createSubTree(keys []string, pos Position) error { return nil } -// LoadReader creates a Tree from any io.Reader. -func LoadReader(reader io.Reader) (tree *Tree, err error) { +// LoadBytes creates a Tree from a []byte. +func LoadBytes(b []byte) (tree *Tree, err error) { defer func() { if r := recover(); r != nil { if _, ok := r.(runtime.Error); ok { @@ -261,13 +262,23 @@ func LoadReader(reader io.Reader) (tree *Tree, err error) { err = errors.New(r.(string)) } }() - tree = parseToml(lexToml(reader)) + tree = parseToml(lexToml(b)) + return +} + +// LoadReader creates a Tree from any io.Reader. +func LoadReader(reader io.Reader) (tree *Tree, err error) { + inputBytes, err := ioutil.ReadAll(reader) + if err != nil { + return + } + tree, err = LoadBytes(inputBytes) return } // Load creates a Tree from a string. func Load(content string) (tree *Tree, err error) { - return LoadReader(strings.NewReader(content)) + return LoadBytes([]byte(content)) } // LoadFile creates a Tree from a file.