diff --git a/scanner.go b/scanner.go index 4bd0291..5ffb7c4 100644 --- a/scanner.go +++ b/scanner.go @@ -58,7 +58,8 @@ func scan(b []byte) ([]byte, []byte, error) { case comment: return scanComment(b) case ' ', '\t': - return scanWhitespace(b) + data, rest := scanWhitespace(b) + return data, rest, nil case '\r': return scanWindowsNewline(b) case '\n': diff --git a/toml.go b/toml.go index 32c03ad..bf6d290 100644 --- a/toml.go +++ b/toml.go @@ -92,6 +92,126 @@ func parseKeyval(b []byte) ([]byte, error) { func parseVal(b []byte) ([]byte, error) { // val = string / boolean / array / inline-table / date-time / float / integer + c := b[0] + + switch c { + // strings + case '"': + var rest []byte + var err error + if scanFollowsMultilineBasicStringDelimiter(b) { + _, rest, err = parseMultilineBasicString(b) + } else { + _, rest, err = parseBasicString(b) + } + return rest, err + case '\'': + if scanFollowsMultilineLiteralStringDelimiter(b) { + return parseMultilineLiteralString(b) + } + _, rest, err := scanLiteralString(b) + return rest, err + // TODO boolean + + // TODO array + + // TODO inline-table + + // TODO date-time + + // TODO float + + // TODO integer + default: + return nil, fmt.Errorf("unexpected char") + } +} + +func parseMultilineBasicString(b []byte) (string, []byte, error) { + //ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body + //ml-basic-string-delim + //ml-basic-string-delim = 3quotation-mark + //ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] + // + //mlb-content = mlb-char / newline / mlb-escaped-nl + //mlb-char = mlb-unescaped / escaped + //mlb-quotes = 1*2quotation-mark + //mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii + //mlb-escaped-nl = escape ws newline *( wschar / newline ) + + token, rest, err := scanMultilineBasicString(b) + if err != nil { + return "", nil, err + } + var builder strings.Builder + + i := 3 + + // skip the immediate new line + if token[i] == '\n' { + i++ + } else if token[i] == '\r' && token[i+1] == '\n' { + i += 2 + } + + // The scanner ensures that the token starts and ends with quotes and that + // escapes are balanced. + for ; i < len(token)-3; i++ { + c := token[i] + if c == '\\' { + // When the last non-whitespace character on a line is an unescaped \, + // it will be trimmed along with all whitespace (including newlines) up + // to the next non-whitespace character or closing delimiter. + if token[i+1] == '\n' || (token[i+1] == '\r' && token[i+2] == '\n') { + i++ // skip the \ + for ; i < len(token)-3; i++ { + c := token[i] + if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') { + break + } + } + continue + } + + // handle escaping + i++ + c = token[i] + switch c { + case '"', '\\': + builder.WriteByte(c) + case 'b': + builder.WriteByte('\b') + case 'f': + builder.WriteByte('\f') + case 'n': + builder.WriteByte('\n') + case 'r': + builder.WriteByte('\r') + case 't': + builder.WriteByte('\t') + case 'u': + x, err := hexToString(token[i+3:len(token)-3], 4) + if err != nil { + return "", nil, err + } + builder.WriteString(x) + i += 4 + case 'U': + x, err := hexToString(token[i+3:len(token)-3], 8) + if err != nil { + return "", nil, err + } + builder.WriteString(x) + i += 8 + default: + return "", nil, fmt.Errorf("invalid escaped character: %#U", c) + } + } else { + builder.WriteByte(c) + } + } + + return builder.String(), rest, nil } func parseKey(b []byte) ([]byte, error) {