Multiline basic string parsing
This commit is contained in:
+2
-1
@@ -58,7 +58,8 @@ func scan(b []byte) ([]byte, []byte, error) {
|
||||
case comment:
|
||||
return scanComment(b)
|
||||
case ' ', '\t':
|
||||
return scanWhitespace(b)
|
||||
data, rest := scanWhitespace(b)
|
||||
return data, rest, nil
|
||||
case '\r':
|
||||
return scanWindowsNewline(b)
|
||||
case '\n':
|
||||
|
||||
@@ -92,6 +92,126 @@ func parseKeyval(b []byte) ([]byte, error) {
|
||||
func parseVal(b []byte) ([]byte, error) {
|
||||
// val = string / boolean / array / inline-table / date-time / float / integer
|
||||
|
||||
c := b[0]
|
||||
|
||||
switch c {
|
||||
// strings
|
||||
case '"':
|
||||
var rest []byte
|
||||
var err error
|
||||
if scanFollowsMultilineBasicStringDelimiter(b) {
|
||||
_, rest, err = parseMultilineBasicString(b)
|
||||
} else {
|
||||
_, rest, err = parseBasicString(b)
|
||||
}
|
||||
return rest, err
|
||||
case '\'':
|
||||
if scanFollowsMultilineLiteralStringDelimiter(b) {
|
||||
return parseMultilineLiteralString(b)
|
||||
}
|
||||
_, rest, err := scanLiteralString(b)
|
||||
return rest, err
|
||||
// TODO boolean
|
||||
|
||||
// TODO array
|
||||
|
||||
// TODO inline-table
|
||||
|
||||
// TODO date-time
|
||||
|
||||
// TODO float
|
||||
|
||||
// TODO integer
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected char")
|
||||
}
|
||||
}
|
||||
|
||||
func parseMultilineBasicString(b []byte) (string, []byte, error) {
|
||||
//ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
|
||||
//ml-basic-string-delim
|
||||
//ml-basic-string-delim = 3quotation-mark
|
||||
//ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
|
||||
//
|
||||
//mlb-content = mlb-char / newline / mlb-escaped-nl
|
||||
//mlb-char = mlb-unescaped / escaped
|
||||
//mlb-quotes = 1*2quotation-mark
|
||||
//mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
|
||||
//mlb-escaped-nl = escape ws newline *( wschar / newline )
|
||||
|
||||
token, rest, err := scanMultilineBasicString(b)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
var builder strings.Builder
|
||||
|
||||
i := 3
|
||||
|
||||
// skip the immediate new line
|
||||
if token[i] == '\n' {
|
||||
i++
|
||||
} else if token[i] == '\r' && token[i+1] == '\n' {
|
||||
i += 2
|
||||
}
|
||||
|
||||
// The scanner ensures that the token starts and ends with quotes and that
|
||||
// escapes are balanced.
|
||||
for ; i < len(token)-3; i++ {
|
||||
c := token[i]
|
||||
if c == '\\' {
|
||||
// When the last non-whitespace character on a line is an unescaped \,
|
||||
// it will be trimmed along with all whitespace (including newlines) up
|
||||
// to the next non-whitespace character or closing delimiter.
|
||||
if token[i+1] == '\n' || (token[i+1] == '\r' && token[i+2] == '\n') {
|
||||
i++ // skip the \
|
||||
for ; i < len(token)-3; i++ {
|
||||
c := token[i]
|
||||
if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') {
|
||||
break
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// handle escaping
|
||||
i++
|
||||
c = token[i]
|
||||
switch c {
|
||||
case '"', '\\':
|
||||
builder.WriteByte(c)
|
||||
case 'b':
|
||||
builder.WriteByte('\b')
|
||||
case 'f':
|
||||
builder.WriteByte('\f')
|
||||
case 'n':
|
||||
builder.WriteByte('\n')
|
||||
case 'r':
|
||||
builder.WriteByte('\r')
|
||||
case 't':
|
||||
builder.WriteByte('\t')
|
||||
case 'u':
|
||||
x, err := hexToString(token[i+3:len(token)-3], 4)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
builder.WriteString(x)
|
||||
i += 4
|
||||
case 'U':
|
||||
x, err := hexToString(token[i+3:len(token)-3], 8)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
builder.WriteString(x)
|
||||
i += 8
|
||||
default:
|
||||
return "", nil, fmt.Errorf("invalid escaped character: %#U", c)
|
||||
}
|
||||
} else {
|
||||
builder.WriteByte(c)
|
||||
}
|
||||
}
|
||||
|
||||
return builder.String(), rest, nil
|
||||
}
|
||||
|
||||
func parseKey(b []byte) ([]byte, error) {
|
||||
|
||||
Reference in New Issue
Block a user