Decoder: flag bad \r in literal multiline strings (#687)

Fixes #685
2021-11-30 10:44:48 -05:00
parent b226db6a29
commit ede6445608
2 changed files with 40 additions and 20 deletions
@@ -76,30 +76,42 @@ func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
 	// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
 	// mll-quotes = 1*2apostrophe
 	for i := 3; i < len(b); {
-		if scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
+		switch b[i] {
-			i += 3
+		case '\'':
 			if scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
 				i += 3
-			// At that point we found 3 apostrophe, and i is the
+				// At that point we found 3 apostrophe, and i is the
-			// index of the byte after the third one. The scanner
+				// index of the byte after the third one. The scanner
-			// needs to be eager, because there can be an extra 2
+				// needs to be eager, because there can be an extra 2
-			// apostrophe that can be accepted at the end of the
+				// apostrophe that can be accepted at the end of the
-			// string.
+				// string.
 				if i >= len(b) || b[i] != '\'' {
 					return b[:i], b[i:], nil
 				}
 				i++
 				if i >= len(b) || b[i] != '\'' {
 					return b[:i], b[i:], nil
 				}
 				i++
 				if i < len(b) && b[i] == '\'' {
 					return nil, nil, newDecodeError(b[i-3:i+1], "''' not allowed in multiline literal string")
 				}
 			if i >= len(b) || b[i] != '\'' {
 				return b[:i], b[i:], nil
 			}
-			i++
+		case '\r':
-
+			if len(b) < i+2 {
-			if i >= len(b) || b[i] != '\'' {
+				return nil, nil, newDecodeError(b[len(b):], `need a \n after \r`)
 				return b[:i], b[i:], nil
 			}
-			i++
+			if b[i+1] != '\n' {
-
+				return nil, nil, newDecodeError(b[i:i+2], `need a \n after \r`)
 			if i < len(b) && b[i] == '\'' {
 				return nil, nil, newDecodeError(b[i-3:i+1], "''' not allowed in multiline literal string")
 			}
-
+			i += 2 // skip the \n
-			return b[:i], b[i:], nil
+			continue
 		}
 		size := utf8ValidNext(b[i:])
 		if size == 0 {
@@ -571,7 +571,7 @@ func TestUnmarshal(t *testing.T) {
 		},
 		{
 			desc:  "multiline basic string with windows newline",
-			input: "A = \"\"\"\r\nTest\"\"\"",
+			input: "A = \"\"\"\r\nTe\r\nst\"\"\"",
 			gen: func() test {
 				type doc struct {
 					A string
@@ -579,7 +579,7 @@ func TestUnmarshal(t *testing.T) {
 				return test{
 					target:   &doc{},
-					expected: &doc{A: "Test"},
+					expected: &doc{A: "Te\r\nst"},
 				}
 			},
 		},
@@ -2652,6 +2652,14 @@ world'`,
 			desc: `carriage return inside literal string`,
 			data: "A = '\r'",
 		},
 		{
 			desc: `carriage return inside multiline literal string`,
 			data: "a='''\r'''",
 		},
 		{
 			desc: `carriage return at trail of multiline literal string`,
 			data: "a='''\r",
 		},
 		{
 			desc: `carriage return in comment`,
 			data: "# this is a test\ra=1",