Decoder: flag invalid carriage returns in strings (#652)

Fixes #651
This commit is contained in:
Thomas Pelletier
2021-11-02 10:02:25 -04:00
committed by GitHub
parent 85c0658984
commit 3dbca20bc9
3 changed files with 17 additions and 41 deletions
+2 -2
View File
@@ -549,7 +549,7 @@ func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er
startIdx := i startIdx := i
endIdx := len(token) - len(`"""`) endIdx := len(token) - len(`"""`)
if escaped < 0 { if !escaped {
str := token[startIdx:endIdx] str := token[startIdx:endIdx]
verr := utf8TomlValidAlreadyEscaped(str) verr := utf8TomlValidAlreadyEscaped(str)
if verr.Zero() { if verr.Zero() {
@@ -736,7 +736,7 @@ func (p *parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
// Fast path. If there is no escape sequence, the string should just be // Fast path. If there is no escape sequence, the string should just be
// an UTF-8 encoded string, which is the same as Go. In that case, // an UTF-8 encoded string, which is the same as Go. In that case,
// validate the string and return a direct reference to the buffer. // validate the string and return a direct reference to the buffer.
if escaped < 0 { if !escaped {
str := token[startIdx:endIdx] str := token[startIdx:endIdx]
verr := utf8TomlValidAlreadyEscaped(str) verr := utf8TomlValidAlreadyEscaped(str)
if verr.Zero() { if verr.Zero() {
+7 -39
View File
@@ -160,42 +160,26 @@ func scanComment(b []byte) ([]byte, []byte, error) {
return b, b[len(b):], nil return b, b[len(b):], nil
} }
func scanBasicString(b []byte) ([]byte, int, []byte, error) { func scanBasicString(b []byte) ([]byte, bool, []byte, error) {
// basic-string = quotation-mark *basic-char quotation-mark // basic-string = quotation-mark *basic-char quotation-mark
// quotation-mark = %x22 ; " // quotation-mark = %x22 ; "
// basic-char = basic-unescaped / escaped // basic-char = basic-unescaped / escaped
// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
// escaped = escape escape-seq-char // escaped = escape escape-seq-char
escaped := -1 // index of the first \. -1 means no escape character in there. escaped := false
i := 1 i := 1
loop:
for ; i < len(b); i++ { for ; i < len(b); i++ {
switch b[i] { switch b[i] {
case '"': case '"':
return b[:i+1], escaped, b[i+1:], nil return b[:i+1], escaped, b[i+1:], nil
case '\n': case '\n', '\r':
return nil, escaped, nil, newDecodeError(b[i:i+1], "basic strings cannot have new lines")
case '\\':
if len(b) < i+2 {
return nil, escaped, nil, newDecodeError(b[i:i+1], "need a character after \\")
}
escaped = i
i += 2 // skip the next character
break loop
}
}
for ; i < len(b); i++ {
switch b[i] {
case '"':
return b[:i+1], escaped, b[i+1:], nil
case '\n':
return nil, escaped, nil, newDecodeError(b[i:i+1], "basic strings cannot have new lines") return nil, escaped, nil, newDecodeError(b[i:i+1], "basic strings cannot have new lines")
case '\\': case '\\':
if len(b) < i+2 { if len(b) < i+2 {
return nil, escaped, nil, newDecodeError(b[i:i+1], "need a character after \\") return nil, escaped, nil, newDecodeError(b[i:i+1], "need a character after \\")
} }
escaped = true
i++ // skip the next character i++ // skip the next character
} }
} }
@@ -203,7 +187,7 @@ loop:
return nil, escaped, nil, newDecodeError(b[len(b):], `basic string not terminated by "`) return nil, escaped, nil, newDecodeError(b[len(b):], `basic string not terminated by "`)
} }
func scanMultilineBasicString(b []byte) ([]byte, int, []byte, error) { func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) {
// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
// ml-basic-string-delim // ml-basic-string-delim
// ml-basic-string-delim = 3quotation-mark // ml-basic-string-delim = 3quotation-mark
@@ -215,10 +199,9 @@ func scanMultilineBasicString(b []byte) ([]byte, int, []byte, error) {
// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
// mlb-escaped-nl = escape ws newline *( wschar / newline ) // mlb-escaped-nl = escape ws newline *( wschar / newline )
escaped := -1 escaped := false
i := 3 i := 3
loop:
for ; i < len(b); i++ { for ; i < len(b); i++ {
switch b[i] { switch b[i] {
case '"': case '"':
@@ -251,22 +234,7 @@ loop:
if len(b) < i+2 { if len(b) < i+2 {
return nil, escaped, nil, newDecodeError(b[len(b):], "need a character after \\") return nil, escaped, nil, newDecodeError(b[len(b):], "need a character after \\")
} }
escaped = i escaped = true
i += 2 // skip the next character
break loop
}
}
for ; i < len(b); i++ {
switch b[i] {
case '"':
if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
return b[:i+3], escaped, b[i+3:], nil
}
case '\\':
if len(b) < i+2 {
return nil, escaped, nil, newDecodeError(b[len(b):], "need a character after \\")
}
i++ // skip the next character i++ // skip the next character
} }
} }
+8
View File
@@ -2211,6 +2211,14 @@ world'`,
desc: `invalid month`, desc: `invalid month`,
data: `a=2021-0--29`, data: `a=2021-0--29`,
}, },
{
desc: `carriage return inside basic key`,
data: "\"\r\"=42",
},
{
desc: `carriage return inside basic string`,
data: "A = \"\r\"",
},
} }
for _, e := range examples { for _, e := range examples {