From ede644560890e63f58ed159bab1a28ed7f3f888b Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Tue, 30 Nov 2021 10:44:48 -0500 Subject: [PATCH] Decoder: flag bad \r in literal multiline strings (#687) Fixes #685 --- scanner.go | 48 ++++++++++++++++++++++++++++----------------- unmarshaler_test.go | 12 ++++++++++-- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/scanner.go b/scanner.go index 4884713..bb445fa 100644 --- a/scanner.go +++ b/scanner.go @@ -76,30 +76,42 @@ func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) { // mll-char = %x09 / %x20-26 / %x28-7E / non-ascii // mll-quotes = 1*2apostrophe for i := 3; i < len(b); { - if scanFollowsMultilineLiteralStringDelimiter(b[i:]) { - i += 3 + switch b[i] { + case '\'': + if scanFollowsMultilineLiteralStringDelimiter(b[i:]) { + i += 3 - // At that point we found 3 apostrophe, and i is the - // index of the byte after the third one. The scanner - // needs to be eager, because there can be an extra 2 - // apostrophe that can be accepted at the end of the - // string. + // At that point we found 3 apostrophe, and i is the + // index of the byte after the third one. The scanner + // needs to be eager, because there can be an extra 2 + // apostrophe that can be accepted at the end of the + // string. + + if i >= len(b) || b[i] != '\'' { + return b[:i], b[i:], nil + } + i++ + + if i >= len(b) || b[i] != '\'' { + return b[:i], b[i:], nil + } + i++ + + if i < len(b) && b[i] == '\'' { + return nil, nil, newDecodeError(b[i-3:i+1], "''' not allowed in multiline literal string") + } - if i >= len(b) || b[i] != '\'' { return b[:i], b[i:], nil } - i++ - - if i >= len(b) || b[i] != '\'' { - return b[:i], b[i:], nil + case '\r': + if len(b) < i+2 { + return nil, nil, newDecodeError(b[len(b):], `need a \n after \r`) } - i++ - - if i < len(b) && b[i] == '\'' { - return nil, nil, newDecodeError(b[i-3:i+1], "''' not allowed in multiline literal string") + if b[i+1] != '\n' { + return nil, nil, newDecodeError(b[i:i+2], `need a \n after \r`) } - - return b[:i], b[i:], nil + i += 2 // skip the \n + continue } size := utf8ValidNext(b[i:]) if size == 0 { diff --git a/unmarshaler_test.go b/unmarshaler_test.go index b10d703..6f0b910 100644 --- a/unmarshaler_test.go +++ b/unmarshaler_test.go @@ -571,7 +571,7 @@ func TestUnmarshal(t *testing.T) { }, { desc: "multiline basic string with windows newline", - input: "A = \"\"\"\r\nTest\"\"\"", + input: "A = \"\"\"\r\nTe\r\nst\"\"\"", gen: func() test { type doc struct { A string @@ -579,7 +579,7 @@ func TestUnmarshal(t *testing.T) { return test{ target: &doc{}, - expected: &doc{A: "Test"}, + expected: &doc{A: "Te\r\nst"}, } }, }, @@ -2652,6 +2652,14 @@ world'`, desc: `carriage return inside literal string`, data: "A = '\r'", }, + { + desc: `carriage return inside multiline literal string`, + data: "a='''\r'''", + }, + { + desc: `carriage return at trail of multiline literal string`, + data: "a='''\r", + }, { desc: `carriage return in comment`, data: "# this is a test\ra=1",