Files
go-toml/unstable/scanner.go
Cursor Agent a646ffd9fa Make error position tracking explicit with Offset field on ParserError
Thread byte offset information through all error creation sites,
eliminating the need for SubsliceOffset to recover position from
pointer comparison.

Changes:
- Add Offset field to ParserError struct
- Add offset parameter to NewParserError
- Add Parser.offsetOf helper for suffix-length arithmetic
- Thread base offset through scanner functions (scanComment,
  scanBasicString, scanMultilineBasicString, scanLiteralString,
  scanMultilineLiteralString, scanWindowsNewline)
- Thread base offset through standalone functions (expect, hexToRune)
- Thread base offset through all decode functions (parseInteger,
  parseFloat, parseLocalDate, parseLocalTime, parseLocalDateTime,
  parseDateTime, checkAndRemoveUnderscores*)
- Update all unmarshaler call sites to pass value.Raw.Offset
- Update localtime.go UnmarshalText methods with base=0
- Update strict.go to populate Offset from key ranges
- Change wrapDecodeError to read de.Offset directly
- Change Utf8TomlValidAlreadyEscaped to return int index (-1 if valid)
  instead of a byte subslice
- Unexport SubsliceOffset (now only used internally by Range())

This makes error positions self-describing: each ParserError carries its
own byte offset, so callers no longer need the original document slice
and address arithmetic to determine where an error occurred.

Co-authored-by: Thomas Pelletier <thomas@pelletier.dev>
2026-04-12 19:08:55 +00:00

226 lines
5.4 KiB
Go

package unstable
import "github.com/pelletier/go-toml/v2/internal/characters"
func scanFollows(b []byte, pattern string) bool {
n := len(pattern)
return len(b) >= n && string(b[:n]) == pattern
}
func scanFollowsMultilineBasicStringDelimiter(b []byte) bool {
return scanFollows(b, `"""`)
}
func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool {
return scanFollows(b, `'''`)
}
func scanFollowsTrue(b []byte) bool {
return scanFollows(b, `true`)
}
func scanFollowsFalse(b []byte) bool {
return scanFollows(b, `false`)
}
func scanFollowsInf(b []byte) bool {
return scanFollows(b, `inf`)
}
func scanFollowsNan(b []byte) bool {
return scanFollows(b, `nan`)
}
func scanUnquotedKey(b []byte) ([]byte, []byte) {
// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
for i := 0; i < len(b); i++ {
if !isUnquotedKeyChar(b[i]) {
return b[:i], b[i:]
}
}
return b, b[len(b):]
}
func isUnquotedKeyChar(r byte) bool {
return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_'
}
func scanLiteralString(b []byte, base int) ([]byte, []byte, error) {
for i := 1; i < len(b); {
switch b[i] {
case '\'':
return b[:i+1], b[i+1:], nil
case '\n', '\r':
return nil, nil, NewParserError(b[i:i+1], base+i, "literal strings cannot have new lines")
}
size := characters.Utf8ValidNext(b[i:])
if size == 0 {
return nil, nil, NewParserError(b[i:i+1], base+i, "invalid character")
}
i += size
}
return nil, nil, NewParserError(b[len(b):], base+len(b), "unterminated literal string")
}
func scanMultilineLiteralString(b []byte, base int) ([]byte, []byte, error) {
for i := 3; i < len(b); {
switch b[i] {
case '\'':
if scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
i += 3
if i >= len(b) || b[i] != '\'' {
return b[:i], b[i:], nil
}
i++
if i >= len(b) || b[i] != '\'' {
return b[:i], b[i:], nil
}
i++
if i < len(b) && b[i] == '\'' {
return nil, nil, NewParserError(b[i-3:i+1], base+i-3, "''' not allowed in multiline literal string")
}
return b[:i], b[i:], nil
}
case '\r':
if len(b) < i+2 {
return nil, nil, NewParserError(b[len(b):], base+len(b), `need a \n after \r`)
}
if b[i+1] != '\n' {
return nil, nil, NewParserError(b[i:i+2], base+i, `need a \n after \r`)
}
i += 2
continue
}
size := characters.Utf8ValidNext(b[i:])
if size == 0 {
return nil, nil, NewParserError(b[i:i+1], base+i, "invalid character")
}
i += size
}
return nil, nil, NewParserError(b[len(b):], base+len(b), `multiline literal string not terminated by '''`)
}
func scanWindowsNewline(b []byte, base int) ([]byte, []byte, error) {
const lenCRLF = 2
if len(b) < lenCRLF {
return nil, nil, NewParserError(b, base, "windows new line expected")
}
if b[1] != '\n' {
return nil, nil, NewParserError(b, base, `windows new line should be \r\n`)
}
return b[:lenCRLF], b[lenCRLF:], nil
}
func scanWhitespace(b []byte) ([]byte, []byte) {
for i := 0; i < len(b); i++ {
switch b[i] {
case ' ', '\t':
continue
default:
return b[:i], b[i:]
}
}
return b, b[len(b):]
}
func scanComment(b []byte, base int) ([]byte, []byte, error) {
for i := 1; i < len(b); {
if b[i] == '\n' {
return b[:i], b[i:], nil
}
if b[i] == '\r' {
if i+1 < len(b) && b[i+1] == '\n' {
return b[:i+1], b[i+1:], nil
}
return nil, nil, NewParserError(b[i:i+1], base+i, "invalid character in comment")
}
size := characters.Utf8ValidNext(b[i:])
if size == 0 {
return nil, nil, NewParserError(b[i:i+1], base+i, "invalid character in comment")
}
i += size
}
return b, b[len(b):], nil
}
func scanBasicString(b []byte, base int) ([]byte, bool, []byte, error) {
escaped := false
i := 1
for ; i < len(b); i++ {
switch b[i] {
case '"':
return b[:i+1], escaped, b[i+1:], nil
case '\n', '\r':
return nil, escaped, nil, NewParserError(b[i:i+1], base+i, "basic strings cannot have new lines")
case '\\':
if len(b) < i+2 {
return nil, escaped, nil, NewParserError(b[i:i+1], base+i, "need a character after \\")
}
escaped = true
i++ // skip the next character
}
}
return nil, escaped, nil, NewParserError(b[len(b):], base+len(b), `basic string not terminated by "`)
}
func scanMultilineBasicString(b []byte, base int) ([]byte, bool, []byte, error) {
escaped := false
i := 3
for ; i < len(b); i++ {
switch b[i] {
case '"':
if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
i += 3
if i >= len(b) || b[i] != '"' {
return b[:i], escaped, b[i:], nil
}
i++
if i >= len(b) || b[i] != '"' {
return b[:i], escaped, b[i:], nil
}
i++
if i < len(b) && b[i] == '"' {
return nil, escaped, nil, NewParserError(b[i-3:i+1], base+i-3, `""" not allowed in multiline basic string`)
}
return b[:i], escaped, b[i:], nil
}
case '\\':
if len(b) < i+2 {
return nil, escaped, nil, NewParserError(b[len(b):], base+len(b), "need a character after \\")
}
escaped = true
i++ // skip the next character
case '\r':
if len(b) < i+2 {
return nil, escaped, nil, NewParserError(b[len(b):], base+len(b), `need a \n after \r`)
}
if b[i+1] != '\n' {
return nil, escaped, nil, NewParserError(b[i:i+2], base+i, `need a \n after \r`)
}
i++ // skip the \n
}
}
return nil, escaped, nil, NewParserError(b[len(b):], base+len(b), `multiline basic string not terminated by """`)
}