From 5c66c78bc578ce3d4a07c91769e87c15fd545c34 Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Sun, 11 Oct 2020 15:31:33 -0400 Subject: [PATCH] Remove date regexp (#447) * Remove date regexp Hand-roll the date matching logic to avoid trying to match a regexp on every integer. ``` benchmark old ns/op new ns/op delta BenchmarkUnmarshalToml-8 293449 272134 -7.26% benchmark old allocs new allocs delta BenchmarkUnmarshalToml-8 2746 2650 -3.50% benchmark old bytes new bytes delta BenchmarkUnmarshalToml-8 133604 127548 -4.53% ``` * Remove fuzzit The company has been acquired by GitLab and shutting down. --- azure-pipelines.yml | 42 ---- benchmark/benchmark_test.go | 1 + fuzzit.sh | 26 --- lexer.go | 304 ++++++++++++++++++++++++---- lexer_test.go | 384 +++++++++++++++++++++++++++++------- parser.go | 56 +++--- parser_test.go | 28 +++ token.go | 6 +- token_test.go | 3 +- 9 files changed, 643 insertions(+), 207 deletions(-) delete mode 100755 fuzzit.sh diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ff5376b..bd22e7d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -2,30 +2,6 @@ trigger: - master stages: -- stage: fuzzit - displayName: "Run Fuzzit" - dependsOn: [] - condition: and(succeeded(), eq(variables['Build.SourceBranchName'], 'master')) - jobs: - - job: submit - displayName: "Submit" - pool: - vmImage: ubuntu-latest - steps: - - task: GoTool@0 - displayName: "Install Go 1.15" - inputs: - version: "1.15" - - script: echo "##vso[task.setvariable variable=PATH]${PATH}:/home/vsts/go/bin/" - - script: mkdir -p ${HOME}/go/src/github.com/pelletier/go-toml - - script: cp -R . ${HOME}/go/src/github.com/pelletier/go-toml - - task: Bash@3 - inputs: - filePath: './fuzzit.sh' - env: - TYPE: fuzzing - FUZZIT_API_KEY: $(FUZZIT_API_KEY) - - stage: run_checks displayName: "Check" dependsOn: [] @@ -80,24 +56,6 @@ stages: filePath: './benchmark.sh' arguments: "master $(Build.Repository.Uri)" - - job: fuzzing - displayName: "fuzzing" - pool: - vmImage: ubuntu-latest - steps: - - task: GoTool@0 - displayName: "Install Go 1.15" - inputs: - version: "1.15" - - script: echo "##vso[task.setvariable variable=PATH]${PATH}:/home/vsts/go/bin/" - - script: mkdir -p ${HOME}/go/src/github.com/pelletier/go-toml - - script: cp -R . ${HOME}/go/src/github.com/pelletier/go-toml - - task: Bash@3 - inputs: - filePath: './fuzzit.sh' - env: - TYPE: local-regression - - job: go_unit_tests displayName: "unit tests" strategy: diff --git a/benchmark/benchmark_test.go b/benchmark/benchmark_test.go index 8709b3b..faf2da8 100644 --- a/benchmark/benchmark_test.go +++ b/benchmark/benchmark_test.go @@ -137,6 +137,7 @@ func BenchmarkUnmarshalToml(b *testing.B) { if err != nil { b.Fatal(err) } + b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { target := benchmarkDoc{} diff --git a/fuzzit.sh b/fuzzit.sh deleted file mode 100755 index b575a60..0000000 --- a/fuzzit.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -set -xe - -# go-fuzz doesn't support modules yet, so ensure we do everything -# in the old style GOPATH way -export GO111MODULE="off" - -# install go-fuzz -go get -u github.com/dvyukov/go-fuzz/go-fuzz github.com/dvyukov/go-fuzz/go-fuzz-build - -# target name can only contain lower-case letters (a-z), digits (0-9) and a dash (-) -# to add another target, make sure to create it with `fuzzit create target` -# before using `fuzzit create job` -TARGET=toml-fuzzer - -go-fuzz-build -libfuzzer -o ${TARGET}.a github.com/pelletier/go-toml -clang -fsanitize=fuzzer ${TARGET}.a -o ${TARGET} - -# install fuzzit for talking to fuzzit.dev service -# or latest version: -# https://github.com/fuzzitdev/fuzzit/releases/latest/download/fuzzit_Linux_x86_64 -wget -q -O fuzzit https://github.com/fuzzitdev/fuzzit/releases/download/v2.4.52/fuzzit_Linux_x86_64 -chmod a+x fuzzit - -# TODO: change kkowalczyk to go-toml and create toml-fuzzer target there -./fuzzit create job --type $TYPE go-toml/${TARGET} ${TARGET} diff --git a/lexer.go b/lexer.go index 4b14956..313908e 100644 --- a/lexer.go +++ b/lexer.go @@ -9,13 +9,10 @@ import ( "bytes" "errors" "fmt" - "regexp" "strconv" "strings" ) -var dateRegexp *regexp.Regexp - // Define state functions type tomlLexStateFn func() tomlLexStateFn @@ -216,18 +213,12 @@ func (l *tomlLexer) lexRvalue() tomlLexStateFn { break } - possibleDate := l.peekString(35) - dateSubmatches := dateRegexp.FindStringSubmatch(possibleDate) - if dateSubmatches != nil && dateSubmatches[0] != "" { - l.fastForward(len(dateSubmatches[0])) - if dateSubmatches[2] == "" { // no timezone information => local date - return l.lexLocalDate - } - return l.lexDate + if next == '+' || next == '-' { + return l.lexNumber } - if next == '+' || next == '-' || isDigit(next) { - return l.lexNumber + if isDigit(next) { + return l.lexDateTimeOrNumber } return l.errorf("no value can start with %c", next) @@ -237,6 +228,32 @@ func (l *tomlLexer) lexRvalue() tomlLexStateFn { return nil } +func (l *tomlLexer) lexDateTimeOrNumber() tomlLexStateFn { + // Could be either a date/time, or a digit. + // The options for date/times are: + // YYYY-... => date or date-time + // HH:... => time + // Anything else should be a number. + + lookAhead := l.peekString(5) + if len(lookAhead) < 3 { + return l.lexNumber() + } + + for idx, r := range lookAhead { + if !isDigit(r) { + if idx == 2 && r == ':' { + return l.lexDateTimeOrTime() + } + if idx == 4 && r == '-' { + return l.lexDateTimeOrTime() + } + return l.lexNumber() + } + } + return l.lexNumber() +} + func (l *tomlLexer) lexLeftCurlyBrace() tomlLexStateFn { l.next() l.emit(tokenLeftCurlyBrace) @@ -254,14 +271,245 @@ func (l *tomlLexer) lexRightCurlyBrace() tomlLexStateFn { return l.lexRvalue } -func (l *tomlLexer) lexDate() tomlLexStateFn { - l.emit(tokenDate) +func (l *tomlLexer) lexDateTimeOrTime() tomlLexStateFn { + // Example matches: + // 1979-05-27T07:32:00Z + // 1979-05-27T00:32:00-07:00 + // 1979-05-27T00:32:00.999999-07:00 + // 1979-05-27 07:32:00Z + // 1979-05-27 00:32:00-07:00 + // 1979-05-27 00:32:00.999999-07:00 + // 1979-05-27T07:32:00 + // 1979-05-27T00:32:00.999999 + // 1979-05-27 07:32:00 + // 1979-05-27 00:32:00.999999 + // 1979-05-27 + // 07:32:00 + // 00:32:00.999999 + + // we already know those two are digits + l.next() + l.next() + + // Got 2 digits. At that point it could be either a time or a date(-time). + + r := l.next() + if r == ':' { + return l.lexTime() + } + + return l.lexDateTime() +} + +func (l *tomlLexer) lexDateTime() tomlLexStateFn { + // This state accepts an offset date-time, a local date-time, or a local date. + // + // v--- cursor + // 1979-05-27T07:32:00Z + // 1979-05-27T00:32:00-07:00 + // 1979-05-27T00:32:00.999999-07:00 + // 1979-05-27 07:32:00Z + // 1979-05-27 00:32:00-07:00 + // 1979-05-27 00:32:00.999999-07:00 + // 1979-05-27T07:32:00 + // 1979-05-27T00:32:00.999999 + // 1979-05-27 07:32:00 + // 1979-05-27 00:32:00.999999 + // 1979-05-27 + + // date + + // already checked by lexRvalue + l.next() // digit + l.next() // - + + for i := 0; i < 2; i++ { + r := l.next() + if !isDigit(r) { + return l.errorf("invalid month digit in date: %c", r) + } + } + + r := l.next() + if r != '-' { + return l.errorf("expected - to separate month of a date, not %c", r) + } + + for i := 0; i < 2; i++ { + r := l.next() + if !isDigit(r) { + return l.errorf("invalid day digit in date: %c", r) + } + } + + l.emit(tokenLocalDate) + + r = l.peek() + + if r == eof { + + return l.lexRvalue + } + + if r != ' ' && r != 'T' { + return l.errorf("incorrect date/time separation character: %c", r) + } + + if r == ' ' { + lookAhead := l.peekString(3)[1:] + if len(lookAhead) < 2 { + return l.lexRvalue + } + for _, r := range lookAhead { + if !isDigit(r) { + return l.lexRvalue + } + } + } + + l.skip() // skip the T or ' ' + + // time + + for i := 0; i < 2; i++ { + r := l.next() + if !isDigit(r) { + return l.errorf("invalid hour digit in time: %c", r) + } + } + + r = l.next() + if r != ':' { + return l.errorf("time hour/minute separator should be :, not %c", r) + } + + for i := 0; i < 2; i++ { + r := l.next() + if !isDigit(r) { + return l.errorf("invalid minute digit in time: %c", r) + } + } + + r = l.next() + if r != ':' { + return l.errorf("time minute/second separator should be :, not %c", r) + } + + for i := 0; i < 2; i++ { + r := l.next() + if !isDigit(r) { + return l.errorf("invalid second digit in time: %c", r) + } + } + + r = l.peek() + if r == '.' { + l.next() + r := l.next() + if !isDigit(r) { + return l.errorf("expected at least one digit in time's fraction, not %c", r) + } + + for { + r := l.peek() + if !isDigit(r) { + break + } + l.next() + } + } + + l.emit(tokenLocalTime) + + return l.lexTimeOffset + +} + +func (l *tomlLexer) lexTimeOffset() tomlLexStateFn { + // potential offset + + // Z + // -07:00 + // +07:00 + // nothing + + r := l.peek() + + if r == 'Z' { + l.next() + l.emit(tokenTimeOffset) + } else if r == '+' || r == '-' { + l.next() + + for i := 0; i < 2; i++ { + r := l.next() + if !isDigit(r) { + return l.errorf("invalid hour digit in time offset: %c", r) + } + } + + r = l.next() + if r != ':' { + return l.errorf("time offset hour/minute separator should be :, not %c", r) + } + + for i := 0; i < 2; i++ { + r := l.next() + if !isDigit(r) { + return l.errorf("invalid minute digit in time offset: %c", r) + } + } + + l.emit(tokenTimeOffset) + } + return l.lexRvalue } -func (l *tomlLexer) lexLocalDate() tomlLexStateFn { - l.emit(tokenLocalDate) +func (l *tomlLexer) lexTime() tomlLexStateFn { + // v--- cursor + // 07:32:00 + // 00:32:00.999999 + + for i := 0; i < 2; i++ { + r := l.next() + if !isDigit(r) { + return l.errorf("invalid minute digit in time: %c", r) + } + } + + r := l.next() + if r != ':' { + return l.errorf("time minute/second separator should be :, not %c", r) + } + + for i := 0; i < 2; i++ { + r := l.next() + if !isDigit(r) { + return l.errorf("invalid second digit in time: %c", r) + } + } + + r = l.peek() + if r == '.' { + l.next() + r := l.next() + if !isDigit(r) { + return l.errorf("expected at least one digit in time's fraction, not %c", r) + } + + for { + r := l.peek() + if !isDigit(r) { + break + } + l.next() + } + } + + l.emit(tokenLocalTime) return l.lexRvalue + } func (l *tomlLexer) lexTrue() tomlLexStateFn { @@ -767,28 +1015,6 @@ func (l *tomlLexer) run() { } } -func init() { - // Regexp for all date/time formats supported by TOML. - // Group 1: nano precision - // Group 2: timezone - // - // Example matches: - // 1979-05-27T07:32:00Z - // 1979-05-27T00:32:00-07:00 - // 1979-05-27T00:32:00.999999-07:00 - // 1979-05-27 07:32:00Z - // 1979-05-27 00:32:00-07:00 - // 1979-05-27 00:32:00.999999-07:00 - // 1979-05-27T07:32:00 - // 1979-05-27T00:32:00.999999 - // 1979-05-27 07:32:00 - // 1979-05-27 00:32:00.999999 - // 1979-05-27 - // 07:32:00 - // 00:32:00.999999 - dateRegexp = regexp.MustCompile(`^(?:\d{4}-\d{2}-\d{2}[T\s]?)?(?:\d{2}:\d{2}:\d{2}(\.\d{1,9})?(Z|[+-]\d{2}:\d{2})?)?`) -} - // Entry point func lexToml(inputBytes []byte) []token { runes := bytes.Runes(inputBytes) diff --git a/lexer_test.go b/lexer_test.go index d999f99..016e122 100644 --- a/lexer_test.go +++ b/lexer_test.go @@ -1,18 +1,63 @@ package toml import ( + "bytes" + "fmt" "reflect" - "strings" "testing" + "text/tabwriter" ) func testFlow(t *testing.T, input string, expectedFlow []token) { tokens := lexToml([]byte(input)) + if !reflect.DeepEqual(tokens, expectedFlow) { - t.Fatalf("Different flows.\nExpected:\n%v\nGot:\n%v", expectedFlow, tokens) + diffFlowsColumnsFatal(t, expectedFlow, tokens) } } +func diffFlowsColumnsFatal(t *testing.T, expectedFlow []token, actualFlow []token) { + max := len(expectedFlow) + if len(actualFlow) > max { + max = len(actualFlow) + } + + b := &bytes.Buffer{} + w := tabwriter.NewWriter(b, 0, 0, 1, ' ', tabwriter.Debug) + + fmt.Fprintln(w, "expected\tT\tP\tactual\tT\tP\tdiff") + + for i := 0; i < max; i++ { + expected := "" + expectedType := "" + expectedPos := "" + if i < len(expectedFlow) { + expected = fmt.Sprintf("%s", expectedFlow[i]) + expectedType = fmt.Sprintf("%s", expectedFlow[i].typ) + expectedPos = expectedFlow[i].Position.String() + } + actual := "" + actualType := "" + actualPos := "" + if i < len(actualFlow) { + actual = fmt.Sprintf("%s", actualFlow[i]) + actualType = fmt.Sprintf("%s", actualFlow[i].typ) + actualPos = actualFlow[i].Position.String() + } + different := "" + if i >= len(expectedFlow) { + different = "+" + } else if i >= len(actualFlow) { + different = "-" + } else if !reflect.DeepEqual(expectedFlow[i], actualFlow[i]) { + different = "x" + } + fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%s\t%s\n", expected, expectedType, expectedPos, actual, actualType, actualPos, different) + } + w.Flush() + t.Errorf("Different flows:\n%s", b.String()) +} + func TestValidKeyGroup(t *testing.T) { testFlow(t, "[hello world]", []token{ {Position{1, 1}, tokenLeftBracket, "["}, @@ -299,81 +344,280 @@ func TestKeyEqualArrayBoolsWithComments(t *testing.T) { }) } -func TestDateRegexp(t *testing.T) { - cases := map[string]string{ - "basic": "1979-05-27T07:32:00Z", - "offset": "1979-05-27T00:32:00-07:00", - "nano precision": "1979-05-27T00:32:00.999999-07:00", - "basic-no-T": "1979-05-27 07:32:00Z", - "offset-no-T": "1979-05-27 00:32:00-07:00", - "nano precision-no-T": "1979-05-27 00:32:00.999999-07:00", - "no-tz": "1979-05-27T07:32:00", - "no-tz-nano": "1979-05-27T00:32:00.999999", - "no-tz-no-t": "1979-05-27 07:32:00", - "no-tz-no-t-nano": "1979-05-27 00:32:00.999999", - "date-no-tz": "1979-05-27", - "time-no-tz": "07:32:00", - "time-no-tz-nano": "00:32:00.999999", - "err:date-1year": "9-05-27", - "err:date-2year": "79-05-27", - "err:date-3year": "979-05-27", - "err:date-T-prefix": "T07:32:00", - } - - for name, value := range cases { - res := dateRegexp.FindString(value) - if strings.HasPrefix(name, "err:") { - if res != "" { - t.Error("failed date regexp test", name) - } - } else if res == "" { - t.Error("failed date regexp test", name) - } - } -} - func TestKeyEqualDate(t *testing.T) { - testFlow(t, "foo = 1979-05-27T07:32:00Z", []token{ - {Position{1, 1}, tokenKey, "foo"}, - {Position{1, 5}, tokenEqual, "="}, - {Position{1, 7}, tokenDate, "1979-05-27T07:32:00Z"}, - {Position{1, 27}, tokenEOF, ""}, + t.Run("local date time", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27T07:32:00", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenLocalTime, "07:32:00"}, + {Position{1, 26}, tokenEOF, ""}, + }) }) - testFlow(t, "foo = 1979-05-27T00:32:00-07:00", []token{ - {Position{1, 1}, tokenKey, "foo"}, - {Position{1, 5}, tokenEqual, "="}, - {Position{1, 7}, tokenDate, "1979-05-27T00:32:00-07:00"}, - {Position{1, 32}, tokenEOF, ""}, + + t.Run("local date time space", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27 07:32:00", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenLocalTime, "07:32:00"}, + {Position{1, 26}, tokenEOF, ""}, + }) }) - testFlow(t, "foo = 1979-05-27T00:32:00.999999-07:00", []token{ - {Position{1, 1}, tokenKey, "foo"}, - {Position{1, 5}, tokenEqual, "="}, - {Position{1, 7}, tokenDate, "1979-05-27T00:32:00.999999-07:00"}, - {Position{1, 39}, tokenEOF, ""}, + + t.Run("local date time fraction", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27T00:32:00.999999", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenLocalTime, "00:32:00.999999"}, + {Position{1, 33}, tokenEOF, ""}, + }) }) - testFlow(t, "foo = 1979-05-27 07:32:00Z", []token{ - {Position{1, 1}, tokenKey, "foo"}, - {Position{1, 5}, tokenEqual, "="}, - {Position{1, 7}, tokenDate, "1979-05-27 07:32:00Z"}, - {Position{1, 27}, tokenEOF, ""}, + + t.Run("local date time fraction space", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27 00:32:00.999999", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenLocalTime, "00:32:00.999999"}, + {Position{1, 33}, tokenEOF, ""}, + }) }) - testFlow(t, "foo = 07:32:00", []token{ - {Position{1, 1}, tokenKey, "foo"}, - {Position{1, 5}, tokenEqual, "="}, - {Position{1, 7}, tokenLocalDate, "07:32:00"}, - {Position{1, 15}, tokenEOF, ""}, + + t.Run("offset date-time utc", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27T07:32:00Z", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenLocalTime, "07:32:00"}, + {Position{1, 26}, tokenTimeOffset, "Z"}, + {Position{1, 27}, tokenEOF, ""}, + }) }) - testFlow(t, "foo = 07:32:00Z", []token{ - {Position{1, 1}, tokenKey, "foo"}, - {Position{1, 5}, tokenEqual, "="}, - {Position{1, 7}, tokenDate, "07:32:00Z"}, - {Position{1, 16}, tokenEOF, ""}, + + t.Run("offset date-time -07:00", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27T00:32:00-07:00", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenLocalTime, "00:32:00"}, + {Position{1, 26}, tokenTimeOffset, "-07:00"}, + {Position{1, 32}, tokenEOF, ""}, + }) }) - testFlow(t, "foo = 00:32:00.999999-07:00", []token{ - {Position{1, 1}, tokenKey, "foo"}, - {Position{1, 5}, tokenEqual, "="}, - {Position{1, 7}, tokenDate, "00:32:00.999999-07:00"}, - {Position{1, 28}, tokenEOF, ""}, + + t.Run("offset date-time fractions -07:00", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27T00:32:00.999999-07:00", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenLocalTime, "00:32:00.999999"}, + {Position{1, 33}, tokenTimeOffset, "-07:00"}, + {Position{1, 39}, tokenEOF, ""}, + }) + }) + + t.Run("offset date-time space separated utc", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27 07:32:00Z", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenLocalTime, "07:32:00"}, + {Position{1, 26}, tokenTimeOffset, "Z"}, + {Position{1, 27}, tokenEOF, ""}, + }) + }) + + t.Run("offset date-time space separated offset", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27 00:32:00-07:00", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenLocalTime, "00:32:00"}, + {Position{1, 26}, tokenTimeOffset, "-07:00"}, + {Position{1, 32}, tokenEOF, ""}, + }) + }) + + t.Run("offset date-time space separated fraction offset", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27 00:32:00.999999-07:00", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenLocalTime, "00:32:00.999999"}, + {Position{1, 33}, tokenTimeOffset, "-07:00"}, + {Position{1, 39}, tokenEOF, ""}, + }) + }) + + t.Run("local date", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 17}, tokenEOF, ""}, + }) + }) + + t.Run("local time", func(t *testing.T) { + testFlow(t, "foo = 07:32:00", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalTime, "07:32:00"}, + {Position{1, 15}, tokenEOF, ""}, + }) + }) + + t.Run("local time fraction", func(t *testing.T) { + testFlow(t, "foo = 00:32:00.999999", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalTime, "00:32:00.999999"}, + {Position{1, 22}, tokenEOF, ""}, + }) + }) + + t.Run("local time invalid minute digit", func(t *testing.T) { + testFlow(t, "foo = 00:3x:00.999999", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenError, "invalid minute digit in time: x"}, + }) + }) + + t.Run("local time invalid minute/second digit", func(t *testing.T) { + testFlow(t, "foo = 00:30x00.999999", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenError, "time minute/second separator should be :, not x"}, + }) + }) + + t.Run("local time invalid second digit", func(t *testing.T) { + testFlow(t, "foo = 00:30:x0.999999", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenError, "invalid second digit in time: x"}, + }) + }) + + t.Run("local time invalid second digit", func(t *testing.T) { + testFlow(t, "foo = 00:30:00.F", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenError, "expected at least one digit in time's fraction, not F"}, + }) + }) + + t.Run("local date-time invalid minute digit", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27 00:3x:00.999999", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenError, "invalid minute digit in time: x"}, + }) + }) + + t.Run("local date-time invalid hour digit", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27T0x:30:00.999999", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenError, "invalid hour digit in time: x"}, + }) + }) + + t.Run("local date-time invalid hour digit", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27T00x30:00.999999", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenError, "time hour/minute separator should be :, not x"}, + }) + }) + + t.Run("local date-time invalid minute/second digit", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27 00:30x00.999999", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenError, "time minute/second separator should be :, not x"}, + }) + }) + + t.Run("local date-time invalid second digit", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27 00:30:x0.999999", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenError, "invalid second digit in time: x"}, + }) + }) + + t.Run("local date-time invalid fraction", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27 00:30:00.F", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenError, "expected at least one digit in time's fraction, not F"}, + }) + }) + + t.Run("local date-time invalid month-date separator", func(t *testing.T) { + testFlow(t, "foo = 1979-05X27 00:30:00.F", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenError, "expected - to separate month of a date, not X"}, + }) + }) + + t.Run("local date-time extra whitespace", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27 ", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 19}, tokenEOF, ""}, + }) + }) + + t.Run("local date-time extra whitespace", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27 ", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 22}, tokenEOF, ""}, + }) + }) + + t.Run("offset date-time space separated offset", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27 00:32:00-0x:00", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenLocalTime, "00:32:00"}, + {Position{1, 26}, tokenError, "invalid hour digit in time offset: x"}, + }) + }) + + t.Run("offset date-time space separated offset", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27 00:32:00-07x00", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenLocalTime, "00:32:00"}, + {Position{1, 26}, tokenError, "time offset hour/minute separator should be :, not x"}, + }) + }) + + t.Run("offset date-time space separated offset", func(t *testing.T) { + testFlow(t, "foo = 1979-05-27 00:32:00-07:x0", []token{ + {Position{1, 1}, tokenKey, "foo"}, + {Position{1, 5}, tokenEqual, "="}, + {Position{1, 7}, tokenLocalDate, "1979-05-27"}, + {Position{1, 18}, tokenLocalTime, "00:32:00"}, + {Position{1, 26}, tokenError, "invalid minute digit in time offset: x"}, + }) }) } diff --git a/parser.go b/parser.go index 7bf40bb..7eb672e 100644 --- a/parser.go +++ b/parser.go @@ -322,42 +322,44 @@ func (p *tomlParser) parseRvalue() interface{} { p.raiseError(tok, "%s", err) } return val - case tokenDate: - layout := time.RFC3339Nano - if !strings.Contains(tok.val, "T") { - layout = strings.Replace(layout, "T", " ", 1) - } - val, err := time.ParseInLocation(layout, tok.val, time.UTC) + case tokenLocalTime: + val, err := ParseLocalTime(tok.val) if err != nil { p.raiseError(tok, "%s", err) } return val case tokenLocalDate: - v := strings.Replace(tok.val, " ", "T", -1) - isDateTime := false - isTime := false - for _, c := range v { - if c == 'T' || c == 't' { - isDateTime = true - break - } - if c == ':' { - isTime = true - break + // a local date may be followed by: + // * nothing: this is a local date + // * a local time: this is a local date-time + + next := p.peek() + if next == nil || next.typ != tokenLocalTime { + val, err := ParseLocalDate(tok.val) + if err != nil { + p.raiseError(tok, "%s", err) } + return val } - var val interface{} - var err error + localDate := tok + localTime := p.getToken() - if isDateTime { - val, err = ParseLocalDateTime(v) - } else if isTime { - val, err = ParseLocalTime(v) - } else { - val, err = ParseLocalDate(v) + next = p.peek() + if next == nil || next.typ != tokenTimeOffset { + v := localDate.val + "T" + localTime.val + val, err := ParseLocalDateTime(v) + if err != nil { + p.raiseError(tok, "%s", err) + } + return val } + offset := p.getToken() + + layout := time.RFC3339Nano + v := localDate.val + "T" + localTime.val + offset.val + val, err := time.ParseInLocation(layout, v, time.UTC) if err != nil { p.raiseError(tok, "%s", err) } @@ -370,10 +372,10 @@ func (p *tomlParser) parseRvalue() interface{} { p.raiseError(tok, "cannot have multiple equals for the same key") case tokenError: p.raiseError(tok, "%s", tok) + default: + panic(fmt.Errorf("unhandled token: %v", tok)) } - p.raiseError(tok, "never reached") - return nil } diff --git a/parser_test.go b/parser_test.go index 5e96b84..60fa375 100644 --- a/parser_test.go +++ b/parser_test.go @@ -274,6 +274,34 @@ func TestLocalDate(t *testing.T) { }) } +func TestLocalDateError(t *testing.T) { + _, err := Load("a = 2020-09-31") + if err == nil { + t.Fatalf("should error") + } +} + +func TestLocalTimeError(t *testing.T) { + _, err := Load("a = 07:99:00") + if err == nil { + t.Fatalf("should error") + } +} + +func TestLocalDateTimeError(t *testing.T) { + _, err := Load("a = 2020-09-31T07:99:00") + if err == nil { + t.Fatalf("should error") + } +} + +func TestDateTimeOffsetError(t *testing.T) { + _, err := Load("a = 2020-09-31T07:99:00Z") + if err == nil { + t.Fatalf("should error") + } +} + func TestLocalTime(t *testing.T) { tree, err := Load("a = 07:32:00") assertTree(t, tree, err, map[string]interface{}{ diff --git a/token.go b/token.go index 6af4ec4..b437fdd 100644 --- a/token.go +++ b/token.go @@ -30,8 +30,9 @@ const ( tokenRightParen tokenDoubleLeftBracket tokenDoubleRightBracket - tokenDate tokenLocalDate + tokenLocalTime + tokenTimeOffset tokenKeyGroup tokenKeyGroupArray tokenComma @@ -66,7 +67,8 @@ var tokenTypeNames = []string{ "]]", "[[", "LocalDate", - "LocalDate", + "LocalTime", + "TimeOffset", "KeyGroup", "KeyGroupArray", ",", diff --git a/token_test.go b/token_test.go index 13aad28..4508225 100644 --- a/token_test.go +++ b/token_test.go @@ -25,8 +25,9 @@ func TestTokenStringer(t *testing.T) { {tokenRightParen, ")"}, {tokenDoubleLeftBracket, "]]"}, {tokenDoubleRightBracket, "[["}, - {tokenDate, "LocalDate"}, {tokenLocalDate, "LocalDate"}, + {tokenLocalTime, "LocalTime"}, + {tokenTimeOffset, "TimeOffset"}, {tokenKeyGroup, "KeyGroup"}, {tokenKeyGroupArray, "KeyGroupArray"}, {tokenComma, ","},