diff --git a/parser.go b/parser.go index 67353c8..7439cd2 100644 --- a/parser.go +++ b/parser.go @@ -570,13 +570,25 @@ func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er // When the last non-whitespace character on a line is an unescaped \, // it will be trimmed along with all whitespace (including newlines) up // to the next non-whitespace character or closing delimiter. - if token[i+1] == '\n' || (token[i+1] == '\r' && token[i+2] == '\n') { - i++ // skip the \ + + isLastNonWhitespaceOnLine := false + j := 1 + findEOLLoop: + for ; j < len(token)-3-i; j++ { + switch token[i+j] { + case ' ', '\t': + continue + case '\n': + isLastNonWhitespaceOnLine = true + } + break findEOLLoop + } + if isLastNonWhitespaceOnLine { + i += j for ; i < len(token)-3; i++ { c := token[i] if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') { i-- - break } } diff --git a/scanner.go b/scanner.go index 4a5ccda..870c960 100644 --- a/scanner.go +++ b/scanner.go @@ -76,8 +76,30 @@ func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) { // mll-char = %x09 / %x20-26 / %x28-7E / non-ascii // mll-quotes = 1*2apostrophe for i := 3; i < len(b); { - if b[i] == '\'' && scanFollowsMultilineLiteralStringDelimiter(b[i:]) { - return b[:i+3], b[i+3:], nil + if scanFollowsMultilineLiteralStringDelimiter(b[i:]) { + i += 3 + + // At that point we found 3 apostrophe, and i is the + // index of the byte after the third one. The scanner + // needs to be eager, because there can be an extra 2 + // apostrophe that can be accepted at the end of the + // string. + + if i >= len(b) || b[i] != '\'' { + return b[:i], b[i:], nil + } + i++ + + if i >= len(b) || b[i] != '\'' { + return b[:i], b[i:], nil + } + i++ + + if i < len(b) && b[i] == '\'' { + return nil, nil, newDecodeError(b[i-3:i+1], "''' not allowed in multiline literal string") + } + + return b[:i], b[i:], nil } size := utf8ValidNext(b[i:]) if size == 0 { @@ -201,7 +223,29 @@ loop: switch b[i] { case '"': if scanFollowsMultilineBasicStringDelimiter(b[i:]) { - return b[:i+3], escaped, b[i+3:], nil + i += 3 + + // At that point we found 3 apostrophe, and i is the + // index of the byte after the third one. The scanner + // needs to be eager, because there can be an extra 2 + // apostrophe that can be accepted at the end of the + // string. + + if i >= len(b) || b[i] != '"' { + return b[:i], escaped, b[i:], nil + } + i++ + + if i >= len(b) || b[i] != '"' { + return b[:i], escaped, b[i:], nil + } + i++ + + if i < len(b) && b[i] == '"' { + return nil, escaped, nil, newDecodeError(b[i-3:i+1], `""" not allowed in multiline basic string`) + } + + return b[:i], escaped, b[i:], nil } case '\\': if len(b) < i+2 { diff --git a/toml_testgen_support_test.go b/toml_testgen_support_test.go index b1e2b4c..6ac2579 100644 --- a/toml_testgen_support_test.go +++ b/toml_testgen_support_test.go @@ -7,6 +7,7 @@ import ( "encoding/json" "testing" + "github.com/pelletier/go-toml/v2" "github.com/pelletier/go-toml/v2/testsuite" "github.com/stretchr/testify/require" ) @@ -37,6 +38,9 @@ func testgenValid(t *testing.T, input string, jsonRef string) { err := testsuite.Unmarshal([]byte(input), &doc) if err != nil { + if de, ok := err.(*toml.DecodeError); ok { + t.Logf("%s\n%s", err, de) + } t.Fatalf("failed parsing toml: %s", err) } j, err := testsuite.ValueToTaggedJSON(doc) diff --git a/toml_testgen_test.go b/toml_testgen_test.go index 61985ff..5718419 100644 --- a/toml_testgen_test.go +++ b/toml_testgen_test.go @@ -1379,14 +1379,12 @@ func TestTOMLTest_Valid_String_Escapes(t *testing.T) { } func TestTOMLTest_Valid_String_MultilineQuotes(t *testing.T) { - t.Skip("FIXME") input := "# Make sure that quotes inside multiline strings are allowed, including right\n# after the opening '''/\"\"\" and before the closing '''/\"\"\"\n\nlit_one = ''''one quote''''\nlit_two = '''''two quotes'''''\nlit_one_space = ''' 'one quote' '''\nlit_two_space = ''' ''two quotes'' '''\n\none = \"\"\"\"one quote\"\"\"\"\ntwo = \"\"\"\"\"two quotes\"\"\"\"\"\none_space = \"\"\" \"one quote\" \"\"\"\ntwo_space = \"\"\" \"\"two quotes\"\" \"\"\"\n\nmismatch1 = \"\"\"aaa'''bbb\"\"\"\nmismatch2 = '''aaa\"\"\"bbb'''\n" jsonRef := "{\n \"lit_one\": {\n \"type\": \"string\",\n \"value\": \"'one quote'\"\n },\n \"lit_one_space\": {\n \"type\": \"string\",\n \"value\": \" 'one quote' \"\n },\n \"lit_two\": {\n \"type\": \"string\",\n \"value\": \"''two quotes''\"\n },\n \"lit_two_space\": {\n \"type\": \"string\",\n \"value\": \" ''two quotes'' \"\n },\n \"mismatch1\": {\n \"type\": \"string\",\n \"value\": \"aaa'''bbb\"\n },\n \"mismatch2\": {\n \"type\": \"string\",\n \"value\": \"aaa\\\"\\\"\\\"bbb\"\n },\n \"one\": {\n \"type\": \"string\",\n \"value\": \"\\\"one quote\\\"\"\n },\n \"one_space\": {\n \"type\": \"string\",\n \"value\": \" \\\"one quote\\\" \"\n },\n \"two\": {\n \"type\": \"string\",\n \"value\": \"\\\"\\\"two quotes\\\"\\\"\"\n },\n \"two_space\": {\n \"type\": \"string\",\n \"value\": \" \\\"\\\"two quotes\\\"\\\" \"\n }\n}\n" testgenValid(t, input, jsonRef) } func TestTOMLTest_Valid_String_Multiline(t *testing.T) { - t.Skip("FIXME") input := "# NOTE: this file includes some literal tab characters.\n\nmultiline_empty_one = \"\"\"\"\"\"\nmultiline_empty_two = \"\"\"\n\"\"\"\nmultiline_empty_three = \"\"\"\\\n \"\"\"\nmultiline_empty_four = \"\"\"\\\n \\\n \\ \n \"\"\"\n\nequivalent_one = \"The quick brown fox jumps over the lazy dog.\"\nequivalent_two = \"\"\"\nThe quick brown \\\n\n\n fox jumps over \\\n the lazy dog.\"\"\"\n\nequivalent_three = \"\"\"\\\n The quick brown \\\n fox jumps over \\\n the lazy dog.\\\n \"\"\"\n\nwhitespace-after-bs = \"\"\"\\\n The quick brown \\\n fox jumps over \\ \n the lazy dog.\\\t\n \"\"\"\n\nno-space = \"\"\"a\\\n b\"\"\"\n\nkeep-ws-before = \"\"\"a \t\\\n b\"\"\"\n\nescape-bs-1 = \"\"\"a \\\\\nb\"\"\"\n\nescape-bs-2 = \"\"\"a \\\\\\\nb\"\"\"\n\nescape-bs-3 = \"\"\"a \\\\\\\\\n b\"\"\"\n" jsonRef := "{\n \"equivalent_one\": {\n \"type\": \"string\",\n \"value\": \"The quick brown fox jumps over the lazy dog.\"\n },\n \"equivalent_three\": {\n \"type\": \"string\",\n \"value\": \"The quick brown fox jumps over the lazy dog.\"\n },\n \"equivalent_two\": {\n \"type\": \"string\",\n \"value\": \"The quick brown fox jumps over the lazy dog.\"\n },\n \"escape-bs-1\": {\n \"type\": \"string\",\n \"value\": \"a \\\\\\nb\"\n },\n \"escape-bs-2\": {\n \"type\": \"string\",\n \"value\": \"a \\\\b\"\n },\n \"escape-bs-3\": {\n \"type\": \"string\",\n \"value\": \"a \\\\\\\\\\n b\"\n },\n \"keep-ws-before\": {\n \"type\": \"string\",\n \"value\": \"a \\tb\"\n },\n \"multiline_empty_four\": {\n \"type\": \"string\",\n \"value\": \"\"\n },\n \"multiline_empty_one\": {\n \"type\": \"string\",\n \"value\": \"\"\n },\n \"multiline_empty_three\": {\n \"type\": \"string\",\n \"value\": \"\"\n },\n \"multiline_empty_two\": {\n \"type\": \"string\",\n \"value\": \"\"\n },\n \"no-space\": {\n \"type\": \"string\",\n \"value\": \"ab\"\n },\n \"whitespace-after-bs\": {\n \"type\": \"string\",\n \"value\": \"The quick brown fox jumps over the lazy dog.\"\n }\n}\n" testgenValid(t, input, jsonRef)