Validate UTF-8 (#629)
This commit is contained in:
+75
-17
@@ -49,13 +49,18 @@ func scanLiteralString(b []byte) ([]byte, []byte, error) {
|
||||
// literal-string = apostrophe *literal-char apostrophe
|
||||
// apostrophe = %x27 ; ' apostrophe
|
||||
// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
|
||||
for i := 1; i < len(b); i++ {
|
||||
for i := 1; i < len(b); {
|
||||
switch b[i] {
|
||||
case '\'':
|
||||
return b[:i+1], b[i+1:], nil
|
||||
case '\n':
|
||||
return nil, nil, newDecodeError(b[i:i+1], "literal strings cannot have new lines")
|
||||
}
|
||||
size := utf8ValidNext(b[i:])
|
||||
if size == 0 {
|
||||
return nil, nil, newDecodeError(b[i:i+1], "invalid character")
|
||||
}
|
||||
i += size
|
||||
}
|
||||
|
||||
return nil, nil, newDecodeError(b[len(b):], "unterminated literal string")
|
||||
@@ -70,10 +75,15 @@ func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
|
||||
// mll-content = mll-char / newline
|
||||
// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
|
||||
// mll-quotes = 1*2apostrophe
|
||||
for i := 3; i < len(b); i++ {
|
||||
for i := 3; i < len(b); {
|
||||
if b[i] == '\'' && scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
|
||||
return b[:i+3], b[i+3:], nil
|
||||
}
|
||||
size := utf8ValidNext(b[i:])
|
||||
if size == 0 {
|
||||
return nil, nil, newDecodeError(b[i:i+1], "invalid character")
|
||||
}
|
||||
i += size
|
||||
}
|
||||
|
||||
return nil, nil, newDecodeError(b[len(b):], `multiline literal string not terminated by '''`)
|
||||
@@ -106,45 +116,72 @@ func scanWhitespace(b []byte) ([]byte, []byte) {
|
||||
}
|
||||
|
||||
//nolint:unparam
|
||||
func scanComment(b []byte) ([]byte, []byte) {
|
||||
func scanComment(b []byte) ([]byte, []byte, error) {
|
||||
// comment-start-symbol = %x23 ; #
|
||||
// non-ascii = %x80-D7FF / %xE000-10FFFF
|
||||
// non-eol = %x09 / %x20-7F / non-ascii
|
||||
//
|
||||
// comment = comment-start-symbol *non-eol
|
||||
for i := 1; i < len(b); i++ {
|
||||
|
||||
for i := 1; i < len(b); {
|
||||
if b[i] == '\n' {
|
||||
return b[:i], b[i:]
|
||||
return b[:i], b[i:], nil
|
||||
}
|
||||
size := utf8ValidNext(b[i:])
|
||||
if size == 0 {
|
||||
return nil, nil, newDecodeError(b[i:i+1], "invalid character in comment")
|
||||
}
|
||||
|
||||
i += size
|
||||
}
|
||||
|
||||
return b, b[len(b):]
|
||||
return b, b[len(b):], nil
|
||||
}
|
||||
|
||||
func scanBasicString(b []byte) ([]byte, []byte, error) {
|
||||
func scanBasicString(b []byte) ([]byte, int, []byte, error) {
|
||||
// basic-string = quotation-mark *basic-char quotation-mark
|
||||
// quotation-mark = %x22 ; "
|
||||
// basic-char = basic-unescaped / escaped
|
||||
// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
|
||||
// escaped = escape escape-seq-char
|
||||
for i := 1; i < len(b); i++ {
|
||||
escaped := -1 // index of the first \. -1 means no escape character in there.
|
||||
i := 1
|
||||
|
||||
loop:
|
||||
for ; i < len(b); i++ {
|
||||
switch b[i] {
|
||||
case '"':
|
||||
return b[:i+1], b[i+1:], nil
|
||||
return b[:i+1], escaped, b[i+1:], nil
|
||||
case '\n':
|
||||
return nil, nil, newDecodeError(b[i:i+1], "basic strings cannot have new lines")
|
||||
return nil, escaped, nil, newDecodeError(b[i:i+1], "basic strings cannot have new lines")
|
||||
case '\\':
|
||||
if len(b) < i+2 {
|
||||
return nil, nil, newDecodeError(b[i:i+1], "need a character after \\")
|
||||
return nil, escaped, nil, newDecodeError(b[i:i+1], "need a character after \\")
|
||||
}
|
||||
escaped = i
|
||||
i += 2 // skip the next character
|
||||
break loop
|
||||
}
|
||||
}
|
||||
|
||||
for ; i < len(b); i++ {
|
||||
switch b[i] {
|
||||
case '"':
|
||||
return b[:i+1], escaped, b[i+1:], nil
|
||||
case '\n':
|
||||
return nil, escaped, nil, newDecodeError(b[i:i+1], "basic strings cannot have new lines")
|
||||
case '\\':
|
||||
if len(b) < i+2 {
|
||||
return nil, escaped, nil, newDecodeError(b[i:i+1], "need a character after \\")
|
||||
}
|
||||
i++ // skip the next character
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil, newDecodeError(b[len(b):], `basic string not terminated by "`)
|
||||
return nil, escaped, nil, newDecodeError(b[len(b):], `basic string not terminated by "`)
|
||||
}
|
||||
|
||||
func scanMultilineBasicString(b []byte) ([]byte, []byte, error) {
|
||||
func scanMultilineBasicString(b []byte) ([]byte, int, []byte, error) {
|
||||
// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
|
||||
// ml-basic-string-delim
|
||||
// ml-basic-string-delim = 3quotation-mark
|
||||
@@ -155,19 +192,40 @@ func scanMultilineBasicString(b []byte) ([]byte, []byte, error) {
|
||||
// mlb-quotes = 1*2quotation-mark
|
||||
// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
|
||||
// mlb-escaped-nl = escape ws newline *( wschar / newline )
|
||||
for i := 3; i < len(b); i++ {
|
||||
|
||||
escaped := -1
|
||||
i := 3
|
||||
|
||||
loop:
|
||||
for ; i < len(b); i++ {
|
||||
switch b[i] {
|
||||
case '"':
|
||||
if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
|
||||
return b[:i+3], b[i+3:], nil
|
||||
return b[:i+3], escaped, b[i+3:], nil
|
||||
}
|
||||
case '\\':
|
||||
if len(b) < i+2 {
|
||||
return nil, nil, newDecodeError(b[len(b):], "need a character after \\")
|
||||
return nil, escaped, nil, newDecodeError(b[len(b):], "need a character after \\")
|
||||
}
|
||||
escaped = i
|
||||
i += 2 // skip the next character
|
||||
break loop
|
||||
}
|
||||
}
|
||||
|
||||
for ; i < len(b); i++ {
|
||||
switch b[i] {
|
||||
case '"':
|
||||
if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
|
||||
return b[:i+3], escaped, b[i+3:], nil
|
||||
}
|
||||
case '\\':
|
||||
if len(b) < i+2 {
|
||||
return nil, escaped, nil, newDecodeError(b[len(b):], "need a character after \\")
|
||||
}
|
||||
i++ // skip the next character
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil, newDecodeError(b[len(b):], `multiline basic string not terminated by """`)
|
||||
return nil, escaped, nil, newDecodeError(b[len(b):], `multiline basic string not terminated by """`)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user