diff --git a/decode.go b/decode.go index f3f14ef..180a6ed 100644 --- a/decode.go +++ b/decode.go @@ -9,64 +9,60 @@ import ( "github.com/pelletier/go-toml/v2/unstable" ) -func parseInteger(b []byte) (int64, error) { +func parseInteger(b []byte, base int) (int64, error) { if len(b) > 2 && b[0] == '0' { switch b[1] { case 'x': - return parseIntHex(b) + return parseIntHex(b, base) case 'b': - return parseIntBin(b) + return parseIntBin(b, base) case 'o': - return parseIntOct(b) + return parseIntOct(b, base) default: panic(fmt.Errorf("invalid base '%c', should have been checked by scanIntOrFloat", b[1])) } } - return parseIntDec(b) + return parseIntDec(b, base) } -func parseLocalDate(b []byte) (LocalDate, error) { - // full-date = date-fullyear "-" date-month "-" date-mday - // date-fullyear = 4DIGIT - // date-month = 2DIGIT ; 01-12 - // date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year +func parseLocalDate(b []byte, base int) (LocalDate, error) { var date LocalDate if len(b) != 10 || b[4] != '-' || b[7] != '-' { - return date, unstable.NewParserError(b, "dates are expected to have the format YYYY-MM-DD") + return date, unstable.NewParserError(b, base, "dates are expected to have the format YYYY-MM-DD") } var err error - date.Year, err = parseDecimalDigits(b[0:4]) + date.Year, err = parseDecimalDigits(b[0:4], base) if err != nil { return LocalDate{}, err } - date.Month, err = parseDecimalDigits(b[5:7]) + date.Month, err = parseDecimalDigits(b[5:7], base+5) if err != nil { return LocalDate{}, err } - date.Day, err = parseDecimalDigits(b[8:10]) + date.Day, err = parseDecimalDigits(b[8:10], base+8) if err != nil { return LocalDate{}, err } if !isValidDate(date.Year, date.Month, date.Day) { - return LocalDate{}, unstable.NewParserError(b, "impossible date") + return LocalDate{}, unstable.NewParserError(b, base, "impossible date") } return date, nil } -func parseDecimalDigits(b []byte) (int, error) { +func parseDecimalDigits(b []byte, base int) (int, error) { v := 0 for i, c := range b { if c < '0' || c > '9' { - return 0, unstable.NewParserError(b[i:i+1], "expected digit (0-9)") + return 0, unstable.NewParserError(b[i:i+1], base+i, "expected digit (0-9)") } v *= 10 v += int(c - '0') @@ -75,21 +71,18 @@ func parseDecimalDigits(b []byte) (int, error) { return v, nil } -func parseDateTime(b []byte) (time.Time, error) { - // offset-date-time = full-date time-delim full-time - // full-time = partial-time time-offset - // time-offset = "Z" / time-numoffset - // time-numoffset = ( "+" / "-" ) time-hour ":" time-minute - - dt, b, err := parseLocalDateTime(b) +func parseDateTime(b []byte, base int) (time.Time, error) { + origLen := len(b) + dt, b, err := parseLocalDateTime(b, base) if err != nil { return time.Time{}, err } + tzBase := base + origLen - len(b) + var zone *time.Location if len(b) == 0 { - // parser should have checked that when assigning the date time node panic("date time should have a timezone") } @@ -99,7 +92,7 @@ func parseDateTime(b []byte) (time.Time, error) { } else { const dateTimeByteLen = 6 if len(b) != dateTimeByteLen { - return time.Time{}, unstable.NewParserError(b, "invalid date-time timezone") + return time.Time{}, unstable.NewParserError(b, tzBase, "invalid date-time timezone") } var direction int switch b[0] { @@ -108,27 +101,27 @@ func parseDateTime(b []byte) (time.Time, error) { case '+': direction = +1 default: - return time.Time{}, unstable.NewParserError(b[:1], "invalid timezone offset character") + return time.Time{}, unstable.NewParserError(b[:1], tzBase, "invalid timezone offset character") } if b[3] != ':' { - return time.Time{}, unstable.NewParserError(b[3:4], "expected a : separator") + return time.Time{}, unstable.NewParserError(b[3:4], tzBase+3, "expected a : separator") } - hours, err := parseDecimalDigits(b[1:3]) + hours, err := parseDecimalDigits(b[1:3], tzBase+1) if err != nil { return time.Time{}, err } if hours > 23 { - return time.Time{}, unstable.NewParserError(b[:1], "invalid timezone offset hours") + return time.Time{}, unstable.NewParserError(b[:1], tzBase, "invalid timezone offset hours") } - minutes, err := parseDecimalDigits(b[4:6]) + minutes, err := parseDecimalDigits(b[4:6], tzBase+4) if err != nil { return time.Time{}, err } if minutes > 59 { - return time.Time{}, unstable.NewParserError(b[:1], "invalid timezone offset minutes") + return time.Time{}, unstable.NewParserError(b[:1], tzBase, "invalid timezone offset minutes") } seconds := direction * (hours*3600 + minutes*60) @@ -141,7 +134,7 @@ func parseDateTime(b []byte) (time.Time, error) { } if len(b) > 0 { - return time.Time{}, unstable.NewParserError(b, "extra bytes at the end of the timezone") + return time.Time{}, unstable.NewParserError(b, tzBase, "extra bytes at the end of the timezone") } t := time.Date( @@ -157,15 +150,15 @@ func parseDateTime(b []byte) (time.Time, error) { return t, nil } -func parseLocalDateTime(b []byte) (LocalDateTime, []byte, error) { +func parseLocalDateTime(b []byte, base int) (LocalDateTime, []byte, error) { var dt LocalDateTime const localDateTimeByteMinLen = 11 if len(b) < localDateTimeByteMinLen { - return dt, nil, unstable.NewParserError(b, "local datetimes are expected to have the format YYYY-MM-DDTHH:MM:SS[.NNNNNNNNN]") + return dt, nil, unstable.NewParserError(b, base, "local datetimes are expected to have the format YYYY-MM-DDTHH:MM:SS[.NNNNNNNNN]") } - date, err := parseLocalDate(b[:10]) + date, err := parseLocalDate(b[:10], base) if err != nil { return dt, nil, err } @@ -173,10 +166,10 @@ func parseLocalDateTime(b []byte) (LocalDateTime, []byte, error) { sep := b[10] if sep != 'T' && sep != ' ' && sep != 't' { - return dt, nil, unstable.NewParserError(b[10:11], "datetime separator is expected to be T or a space") + return dt, nil, unstable.NewParserError(b[10:11], base+10, "datetime separator is expected to be T or a space") } - t, rest, err := parseLocalTime(b[11:]) + t, rest, err := parseLocalTime(b[11:], base+11) if err != nil { return dt, nil, err } @@ -188,53 +181,53 @@ func parseLocalDateTime(b []byte) (LocalDateTime, []byte, error) { // parseLocalTime is a bit different because it also returns the remaining // []byte that is didn't need. This is to allow parseDateTime to parse those // remaining bytes as a timezone. -func parseLocalTime(b []byte) (LocalTime, []byte, error) { +func parseLocalTime(b []byte, base int) (LocalTime, []byte, error) { var ( nspow = [10]int{0, 1e8, 1e7, 1e6, 1e5, 1e4, 1e3, 1e2, 1e1, 1e0} t LocalTime ) - // check if b matches to have expected format HH:MM:SS[.NNNNNN] const localTimeByteLen = 8 if len(b) < localTimeByteLen { - return t, nil, unstable.NewParserError(b, "times are expected to have the format HH:MM:SS[.NNNNNN]") + return t, nil, unstable.NewParserError(b, base, "times are expected to have the format HH:MM:SS[.NNNNNN]") } var err error - t.Hour, err = parseDecimalDigits(b[0:2]) + t.Hour, err = parseDecimalDigits(b[0:2], base) if err != nil { return t, nil, err } if t.Hour > 23 { - return t, nil, unstable.NewParserError(b[0:2], "hour cannot be greater 23") + return t, nil, unstable.NewParserError(b[0:2], base, "hour cannot be greater 23") } if b[2] != ':' { - return t, nil, unstable.NewParserError(b[2:3], "expecting colon between hours and minutes") + return t, nil, unstable.NewParserError(b[2:3], base+2, "expecting colon between hours and minutes") } - t.Minute, err = parseDecimalDigits(b[3:5]) + t.Minute, err = parseDecimalDigits(b[3:5], base+3) if err != nil { return t, nil, err } if t.Minute > 59 { - return t, nil, unstable.NewParserError(b[3:5], "minutes cannot be greater 59") + return t, nil, unstable.NewParserError(b[3:5], base+3, "minutes cannot be greater 59") } if b[5] != ':' { - return t, nil, unstable.NewParserError(b[5:6], "expecting colon between minutes and seconds") + return t, nil, unstable.NewParserError(b[5:6], base+5, "expecting colon between minutes and seconds") } - t.Second, err = parseDecimalDigits(b[6:8]) + t.Second, err = parseDecimalDigits(b[6:8], base+6) if err != nil { return t, nil, err } if t.Second > 59 { - return t, nil, unstable.NewParserError(b[6:8], "seconds cannot be greater than 59") + return t, nil, unstable.NewParserError(b[6:8], base+6, "seconds cannot be greater than 59") } b = b[8:] + base += 8 if len(b) >= 1 && b[0] == '.' { frac := 0 @@ -244,7 +237,7 @@ func parseLocalTime(b []byte) (LocalTime, []byte, error) { for i, c := range b[1:] { if !isDigit(c) { if i == 0 { - return t, nil, unstable.NewParserError(b[0:1], "need at least one digit after fraction point") + return t, nil, unstable.NewParserError(b[0:1], base, "need at least one digit after fraction point") } break } @@ -252,13 +245,6 @@ func parseLocalTime(b []byte) (LocalTime, []byte, error) { const maxFracPrecision = 9 if i >= maxFracPrecision { - // go-toml allows decoding fractional seconds - // beyond the supported precision of 9 - // digits. It truncates the fractional component - // to the supported precision and ignores the - // remaining digits. - // - // https://github.com/pelletier/go-toml/discussions/707 continue } @@ -268,7 +254,7 @@ func parseLocalTime(b []byte) (LocalTime, []byte, error) { } if precision == 0 { - return t, nil, unstable.NewParserError(b[:1], "nanoseconds need at least one digit") + return t, nil, unstable.NewParserError(b[:1], base, "nanoseconds need at least one digit") } t.Nanosecond = frac * nspow[precision] @@ -279,35 +265,35 @@ func parseLocalTime(b []byte) (LocalTime, []byte, error) { return t, b, nil } -func parseFloat(b []byte) (float64, error) { +func parseFloat(b []byte, base int) (float64, error) { if len(b) == 4 && (b[0] == '+' || b[0] == '-') && b[1] == 'n' && b[2] == 'a' && b[3] == 'n' { return math.NaN(), nil } - cleaned, err := checkAndRemoveUnderscoresFloats(b) + cleaned, err := checkAndRemoveUnderscoresFloats(b, base) if err != nil { return 0, err } if cleaned[0] == '.' { - return 0, unstable.NewParserError(b, "float cannot start with a dot") + return 0, unstable.NewParserError(b, base, "float cannot start with a dot") } if cleaned[len(cleaned)-1] == '.' { - return 0, unstable.NewParserError(b, "float cannot end with a dot") + return 0, unstable.NewParserError(b, base, "float cannot end with a dot") } dotAlreadySeen := false for i, c := range cleaned { if c == '.' { if dotAlreadySeen { - return 0, unstable.NewParserError(b[i:i+1], "float can have at most one decimal point") + return 0, unstable.NewParserError(b[i:i+1], base+i, "float can have at most one decimal point") } if !isDigit(cleaned[i-1]) { - return 0, unstable.NewParserError(b[i-1:i+1], "float decimal point must be preceded by a digit") + return 0, unstable.NewParserError(b[i-1:i+1], base+i-1, "float decimal point must be preceded by a digit") } if !isDigit(cleaned[i+1]) { - return 0, unstable.NewParserError(b[i:i+2], "float decimal point must be followed by a digit") + return 0, unstable.NewParserError(b[i:i+2], base+i, "float decimal point must be followed by a digit") } dotAlreadySeen = true } @@ -318,54 +304,54 @@ func parseFloat(b []byte) (float64, error) { start = 1 } if cleaned[start] == '0' && len(cleaned) > start+1 && isDigit(cleaned[start+1]) { - return 0, unstable.NewParserError(b, "float integer part cannot have leading zeroes") + return 0, unstable.NewParserError(b, base, "float integer part cannot have leading zeroes") } f, err := strconv.ParseFloat(string(cleaned), 64) if err != nil { - return 0, unstable.NewParserError(b, "unable to parse float: %w", err) + return 0, unstable.NewParserError(b, base, "unable to parse float: %w", err) } return f, nil } -func parseIntHex(b []byte) (int64, error) { - cleaned, err := checkAndRemoveUnderscoresIntegers(b[2:]) +func parseIntHex(b []byte, base int) (int64, error) { + cleaned, err := checkAndRemoveUnderscoresIntegers(b[2:], base+2) if err != nil { return 0, err } i, err := strconv.ParseInt(string(cleaned), 16, 64) if err != nil { - return 0, unstable.NewParserError(b, "couldn't parse hexadecimal number: %w", err) + return 0, unstable.NewParserError(b, base, "couldn't parse hexadecimal number: %w", err) } return i, nil } -func parseIntOct(b []byte) (int64, error) { - cleaned, err := checkAndRemoveUnderscoresIntegers(b[2:]) +func parseIntOct(b []byte, base int) (int64, error) { + cleaned, err := checkAndRemoveUnderscoresIntegers(b[2:], base+2) if err != nil { return 0, err } i, err := strconv.ParseInt(string(cleaned), 8, 64) if err != nil { - return 0, unstable.NewParserError(b, "couldn't parse octal number: %w", err) + return 0, unstable.NewParserError(b, base, "couldn't parse octal number: %w", err) } return i, nil } -func parseIntBin(b []byte) (int64, error) { - cleaned, err := checkAndRemoveUnderscoresIntegers(b[2:]) +func parseIntBin(b []byte, base int) (int64, error) { + cleaned, err := checkAndRemoveUnderscoresIntegers(b[2:], base+2) if err != nil { return 0, err } i, err := strconv.ParseInt(string(cleaned), 2, 64) if err != nil { - return 0, unstable.NewParserError(b, "couldn't parse binary number: %w", err) + return 0, unstable.NewParserError(b, base, "couldn't parse binary number: %w", err) } return i, nil @@ -375,8 +361,8 @@ func isSign(b byte) bool { return b == '+' || b == '-' } -func parseIntDec(b []byte) (int64, error) { - cleaned, err := checkAndRemoveUnderscoresIntegers(b) +func parseIntDec(b []byte, base int) (int64, error) { + cleaned, err := checkAndRemoveUnderscoresIntegers(b, base) if err != nil { return 0, err } @@ -388,18 +374,18 @@ func parseIntDec(b []byte) (int64, error) { } if len(cleaned) > startIdx+1 && cleaned[startIdx] == '0' { - return 0, unstable.NewParserError(b, "leading zero not allowed on decimal number") + return 0, unstable.NewParserError(b, base, "leading zero not allowed on decimal number") } i, err := strconv.ParseInt(string(cleaned), 10, 64) if err != nil { - return 0, unstable.NewParserError(b, "couldn't parse decimal number: %w", err) + return 0, unstable.NewParserError(b, base, "couldn't parse decimal number: %w", err) } return i, nil } -func checkAndRemoveUnderscoresIntegers(b []byte) ([]byte, error) { +func checkAndRemoveUnderscoresIntegers(b []byte, base int) ([]byte, error) { start := 0 if b[start] == '+' || b[start] == '-' { start++ @@ -410,11 +396,11 @@ func checkAndRemoveUnderscoresIntegers(b []byte) ([]byte, error) { } if b[start] == '_' { - return nil, unstable.NewParserError(b[start:start+1], "number cannot start with underscore") + return nil, unstable.NewParserError(b[start:start+1], base+start, "number cannot start with underscore") } if b[len(b)-1] == '_' { - return nil, unstable.NewParserError(b[len(b)-1:], "number cannot end with underscore") + return nil, unstable.NewParserError(b[len(b)-1:], base+len(b)-1, "number cannot end with underscore") } // fast path @@ -436,7 +422,7 @@ func checkAndRemoveUnderscoresIntegers(b []byte) ([]byte, error) { c := b[i] if c == '_' { if !before { - return nil, unstable.NewParserError(b[i-1:i+1], "number must have at least one digit between underscores") + return nil, unstable.NewParserError(b[i-1:i+1], base+i-1, "number must have at least one digit between underscores") } before = false } else { @@ -448,13 +434,13 @@ func checkAndRemoveUnderscoresIntegers(b []byte) ([]byte, error) { return cleaned, nil } -func checkAndRemoveUnderscoresFloats(b []byte) ([]byte, error) { +func checkAndRemoveUnderscoresFloats(b []byte, base int) ([]byte, error) { if b[0] == '_' { - return nil, unstable.NewParserError(b[0:1], "number cannot start with underscore") + return nil, unstable.NewParserError(b[0:1], base, "number cannot start with underscore") } if b[len(b)-1] == '_' { - return nil, unstable.NewParserError(b[len(b)-1:], "number cannot end with underscore") + return nil, unstable.NewParserError(b[len(b)-1:], base+len(b)-1, "number cannot end with underscore") } // fast path @@ -477,27 +463,26 @@ func checkAndRemoveUnderscoresFloats(b []byte) ([]byte, error) { switch c { case '_': if !before { - return nil, unstable.NewParserError(b[i-1:i+1], "number must have at least one digit between underscores") + return nil, unstable.NewParserError(b[i-1:i+1], base+i-1, "number must have at least one digit between underscores") } if i < len(b)-1 && (b[i+1] == 'e' || b[i+1] == 'E') { - return nil, unstable.NewParserError(b[i+1:i+2], "cannot have underscore before exponent") + return nil, unstable.NewParserError(b[i+1:i+2], base+i+1, "cannot have underscore before exponent") } before = false case '+', '-': - // signed exponents cleaned = append(cleaned, c) before = false case 'e', 'E': if i < len(b)-1 && b[i+1] == '_' { - return nil, unstable.NewParserError(b[i+1:i+2], "cannot have underscore after exponent") + return nil, unstable.NewParserError(b[i+1:i+2], base+i+1, "cannot have underscore after exponent") } cleaned = append(cleaned, c) case '.': if i < len(b)-1 && b[i+1] == '_' { - return nil, unstable.NewParserError(b[i+1:i+2], "cannot have underscore after decimal point") + return nil, unstable.NewParserError(b[i+1:i+2], base+i+1, "cannot have underscore after decimal point") } if i > 0 && b[i-1] == '_' { - return nil, unstable.NewParserError(b[i-1:i], "cannot have underscore before decimal point") + return nil, unstable.NewParserError(b[i-1:i], base+i-1, "cannot have underscore before decimal point") } cleaned = append(cleaned, c) default: diff --git a/errors.go b/errors.go index 718f8d9..38910d3 100644 --- a/errors.go +++ b/errors.go @@ -99,7 +99,7 @@ func (e *DecodeError) Key() Key { // //nolint:funlen func wrapDecodeError(document []byte, de *unstable.ParserError) *DecodeError { - offset := unstable.SubsliceOffset(document, de.Highlight) + offset := de.Offset errMessage := de.Error() errLine, errColumn := positionAtEnd(document[:offset]) diff --git a/errors_test.go b/errors_test.go index 22220a9..fe953ae 100644 --- a/errors_test.go +++ b/errors_test.go @@ -171,6 +171,7 @@ line 5`, err := wrapDecodeError(doc, &unstable.ParserError{ Highlight: hl, + Offset: start, Message: e.msg, }) diff --git a/internal/characters/utf8.go b/internal/characters/utf8.go index 7c5cb55..ce7ad1f 100644 --- a/internal/characters/utf8.go +++ b/internal/characters/utf8.go @@ -24,61 +24,57 @@ import ( // 0x9 => tab, ok // 0xA - 0x1F => invalid // 0x7F => invalid -func Utf8TomlValidAlreadyEscaped(p []byte) []byte { +func Utf8TomlValidAlreadyEscaped(p []byte) int { + consumed := 0 + // Fast path. Check for and skip 8 bytes of ASCII characters per iteration. for len(p) >= 8 { - // Combining two 32 bit loads allows the same code to be used - // for 32 and 64 bit platforms. - // The compiler can generate a 32bit load for first32 and second32 - // on many platforms. See test/codegen/memcombine.go. first32 := uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24 second32 := uint32(p[4]) | uint32(p[5])<<8 | uint32(p[6])<<16 | uint32(p[7])<<24 if (first32|second32)&0x80808080 != 0 { - // Found a non ASCII byte (>= RuneSelf). break } for i, b := range p[:8] { if InvalidASCII(b) { - return p[i : i+1] + return consumed + i } } p = p[8:] + consumed += 8 } n := len(p) for i := 0; i < n; { pi := p[i] if pi < utf8.RuneSelf { if InvalidASCII(pi) { - return p[i : i+1] + return consumed + i } i++ continue } x := first[pi] if x == xx { - // Illegal starter byte. - return p[i : i+1] + return consumed + i } size := int(x & 7) if i+size > n { - // Short or invalid. - return p[i:n] + return consumed + i } accept := acceptRanges[x>>4] if c := p[i+1]; c < accept.lo || accept.hi < c { - return p[i : i+2] + return consumed + i } else if size == 2 { //revive:disable:empty-block } else if c := p[i+2]; c < locb || hicb < c { - return p[i : i+3] + return consumed + i } else if size == 3 { //revive:disable:empty-block } else if c := p[i+3]; c < locb || hicb < c { - return p[i : i+4] + return consumed + i } i += size } - return nil + return -1 } // Utf8ValidNext returns the size of the next rune if valid, 0 otherwise. diff --git a/localtime.go b/localtime.go index 502ef2f..f55bbcc 100644 --- a/localtime.go +++ b/localtime.go @@ -32,7 +32,7 @@ func (d LocalDate) MarshalText() ([]byte, error) { // UnmarshalText parses b using RFC 3339 to fill d. func (d *LocalDate) UnmarshalText(b []byte) error { - res, err := parseLocalDate(b) + res, err := parseLocalDate(b, 0) if err != nil { return err } @@ -75,9 +75,9 @@ func (d LocalTime) MarshalText() ([]byte, error) { // UnmarshalText parses b using RFC 3339 to fill d. func (d *LocalTime) UnmarshalText(b []byte) error { - res, left, err := parseLocalTime(b) + res, left, err := parseLocalTime(b, 0) if err == nil && len(left) != 0 { - err = unstable.NewParserError(left, "extra characters") + err = unstable.NewParserError(left, len(b)-len(left), "extra characters") } if err != nil { return err @@ -109,9 +109,9 @@ func (d LocalDateTime) MarshalText() ([]byte, error) { // UnmarshalText parses b using RFC 3339 to fill d. func (d *LocalDateTime) UnmarshalText(data []byte) error { - res, left, err := parseLocalDateTime(data) + res, left, err := parseLocalDateTime(data, 0) if err == nil && len(left) != 0 { - err = unstable.NewParserError(left, "extra characters") + err = unstable.NewParserError(left, len(data)-len(left), "extra characters") } if err != nil { return err diff --git a/strict.go b/strict.go index f2fe9b2..da8d7ec 100644 --- a/strict.go +++ b/strict.go @@ -54,10 +54,12 @@ func (s *strict) MissingTable(node *unstable.Node) { return } + loc, offset := s.keyLocation(node) s.missing = append(s.missing, unstable.ParserError{ - Highlight: s.keyLocation(node), + Highlight: loc, Message: "missing table", Key: s.key.Key(), + Offset: offset, }) } @@ -66,10 +68,12 @@ func (s *strict) MissingField(node *unstable.Node) { return } + loc, offset := s.keyLocation(node) s.missing = append(s.missing, unstable.ParserError{ - Highlight: s.keyLocation(node), + Highlight: loc, Message: "missing field", Key: s.key.Key(), + Offset: offset, }) } @@ -90,7 +94,7 @@ func (s *strict) Error(doc []byte) error { return err } -func (s *strict) keyLocation(node *unstable.Node) []byte { +func (s *strict) keyLocation(node *unstable.Node) ([]byte, int) { k := node.Key() hasOne := k.Next() @@ -108,5 +112,5 @@ func (s *strict) keyLocation(node *unstable.Node) []byte { start := firstRaw.Offset end := lastRaw.Offset + lastRaw.Length - return s.doc[start:end] + return s.doc[start:end], int(start) } diff --git a/unmarshaler.go b/unmarshaler.go index e7db812..bfbc720 100644 --- a/unmarshaler.go +++ b/unmarshaler.go @@ -625,7 +625,7 @@ func (d *decoder) handleTable(key unstable.Iterator, v reflect.Value) (reflect.V } } } - return reflect.Value{}, unstable.NewParserError(key.Node().Data, "cannot store a table in a slice") + return reflect.Value{}, unstable.NewParserError(key.Node().Data, int(key.Node().Raw.Offset), "cannot store a table in a slice") } if key.Next() { // Still scoping the key @@ -748,7 +748,7 @@ func (d *decoder) tryTextUnmarshaler(node *unstable.Node, v reflect.Value) (bool if v.CanAddr() && v.Addr().Type().Implements(textUnmarshalerType) { err := v.Addr().Interface().(encoding.TextUnmarshaler).UnmarshalText(node.Data) if err != nil { - return false, unstable.NewParserError(d.p.Raw(node.Raw), "%w", err) + return false, unstable.NewParserError(d.p.Raw(node.Raw), int(node.Raw.Offset), "%w", err) } return true, nil @@ -896,7 +896,7 @@ func (d *decoder) unmarshalInlineTable(itable *unstable.Node, v reflect.Value) e } return d.unmarshalInlineTable(itable, elem) default: - return unstable.NewParserError(d.p.Raw(itable.Raw), "cannot store inline table in Go type %s", v.Kind()) + return unstable.NewParserError(d.p.Raw(itable.Raw), int(itable.Raw.Offset), "cannot store inline table in Go type %s", v.Kind()) } it := itable.Children() @@ -916,26 +916,26 @@ func (d *decoder) unmarshalInlineTable(itable *unstable.Node, v reflect.Value) e } func (d *decoder) unmarshalDateTime(value *unstable.Node, v reflect.Value) error { - dt, err := parseDateTime(value.Data) + dt, err := parseDateTime(value.Data, int(value.Raw.Offset)) if err != nil { return err } if v.Kind() != reflect.Interface && v.Type() != timeType { - return unstable.NewParserError(d.p.Raw(value.Raw), "%s", d.typeMismatchString("datetime", v.Type())) + return unstable.NewParserError(d.p.Raw(value.Raw), int(value.Raw.Offset), "%s", d.typeMismatchString("datetime", v.Type())) } v.Set(reflect.ValueOf(dt)) return nil } func (d *decoder) unmarshalLocalDate(value *unstable.Node, v reflect.Value) error { - ld, err := parseLocalDate(value.Data) + ld, err := parseLocalDate(value.Data, int(value.Raw.Offset)) if err != nil { return err } if v.Kind() != reflect.Interface && v.Type() != timeType { - return unstable.NewParserError(d.p.Raw(value.Raw), "%s", d.typeMismatchString("local date", v.Type())) + return unstable.NewParserError(d.p.Raw(value.Raw), int(value.Raw.Offset), "%s", d.typeMismatchString("local date", v.Type())) } if v.Type() == timeType { v.Set(reflect.ValueOf(ld.AsTime(time.Local))) @@ -946,34 +946,34 @@ func (d *decoder) unmarshalLocalDate(value *unstable.Node, v reflect.Value) erro } func (d *decoder) unmarshalLocalTime(value *unstable.Node, v reflect.Value) error { - lt, rest, err := parseLocalTime(value.Data) + lt, rest, err := parseLocalTime(value.Data, int(value.Raw.Offset)) if err != nil { return err } if len(rest) > 0 { - return unstable.NewParserError(rest, "extra characters at the end of a local time") + return unstable.NewParserError(rest, int(value.Raw.Offset)+len(value.Data)-len(rest), "extra characters at the end of a local time") } if v.Kind() != reflect.Interface { - return unstable.NewParserError(d.p.Raw(value.Raw), "%s", d.typeMismatchString("local time", v.Type())) + return unstable.NewParserError(d.p.Raw(value.Raw), int(value.Raw.Offset), "%s", d.typeMismatchString("local time", v.Type())) } v.Set(reflect.ValueOf(lt)) return nil } func (d *decoder) unmarshalLocalDateTime(value *unstable.Node, v reflect.Value) error { - ldt, rest, err := parseLocalDateTime(value.Data) + ldt, rest, err := parseLocalDateTime(value.Data, int(value.Raw.Offset)) if err != nil { return err } if len(rest) > 0 { - return unstable.NewParserError(rest, "extra characters at the end of a local date time") + return unstable.NewParserError(rest, int(value.Raw.Offset)+len(value.Data)-len(rest), "extra characters at the end of a local date time") } if v.Kind() != reflect.Interface && v.Type() != timeType { - return unstable.NewParserError(d.p.Raw(value.Raw), "%s", d.typeMismatchString("local datetime", v.Type())) + return unstable.NewParserError(d.p.Raw(value.Raw), int(value.Raw.Offset), "%s", d.typeMismatchString("local datetime", v.Type())) } if v.Type() == timeType { v.Set(reflect.ValueOf(ldt.AsTime(time.Local))) @@ -992,14 +992,14 @@ func (d *decoder) unmarshalBool(value *unstable.Node, v reflect.Value) error { case reflect.Interface: v.Set(reflect.ValueOf(b)) default: - return unstable.NewParserError(value.Data, "cannot assign boolean to a %t", b) + return unstable.NewParserError(value.Data, int(value.Raw.Offset), "cannot assign boolean to a %t", b) } return nil } func (d *decoder) unmarshalFloat(value *unstable.Node, v reflect.Value) error { - f, err := parseFloat(value.Data) + f, err := parseFloat(value.Data, int(value.Raw.Offset)) if err != nil { return err } @@ -1009,13 +1009,13 @@ func (d *decoder) unmarshalFloat(value *unstable.Node, v reflect.Value) error { v.SetFloat(f) case reflect.Float32: if f > math.MaxFloat32 { - return unstable.NewParserError(value.Data, "number %f does not fit in a float32", f) + return unstable.NewParserError(value.Data, int(value.Raw.Offset), "number %f does not fit in a float32", f) } v.SetFloat(f) case reflect.Interface: v.Set(reflect.ValueOf(f)) default: - return unstable.NewParserError(value.Data, "float cannot be assigned to %s", v.Kind()) + return unstable.NewParserError(value.Data, int(value.Raw.Offset), "float cannot be assigned to %s", v.Kind()) } return nil @@ -1048,7 +1048,7 @@ func (d *decoder) unmarshalInteger(value *unstable.Node, v reflect.Value) error return d.unmarshalFloat(value, v) } - i, err := parseInteger(value.Data) + i, err := parseInteger(value.Data, int(value.Raw.Offset)) if err != nil { return err } @@ -1116,7 +1116,7 @@ func (d *decoder) unmarshalInteger(value *unstable.Node, v reflect.Value) error case reflect.Interface: r = reflect.ValueOf(i) default: - return unstable.NewParserError(d.p.Raw(value.Raw), "%s", d.typeMismatchString("integer", v.Type())) + return unstable.NewParserError(d.p.Raw(value.Raw), int(value.Raw.Offset), "%s", d.typeMismatchString("integer", v.Type())) } if !r.Type().AssignableTo(v.Type()) { @@ -1135,7 +1135,7 @@ func (d *decoder) unmarshalString(value *unstable.Node, v reflect.Value) error { case reflect.Interface: v.Set(reflect.ValueOf(string(value.Data))) default: - return unstable.NewParserError(d.p.Raw(value.Raw), "%s", d.typeMismatchString("string", v.Type())) + return unstable.NewParserError(d.p.Raw(value.Raw), int(value.Raw.Offset), "%s", d.typeMismatchString("string", v.Type())) } return nil diff --git a/unstable/ast.go b/unstable/ast.go index fc3226a..a9de8c2 100644 --- a/unstable/ast.go +++ b/unstable/ast.go @@ -90,10 +90,7 @@ type Range struct { Length uint32 } -// SubsliceOffset returns the byte offset of subslice within data. -// Subslice must be a subslice of data, meaning it must point into the -// same backing array. Panics if subslice is not within data. -func SubsliceOffset(data []byte, subslice []byte) int { +func subsliceOffset(data []byte, subslice []byte) int { if len(subslice) == 0 { return len(data) } diff --git a/unstable/benchmark_test.go b/unstable/benchmark_test.go index d554d5a..f3aa084 100644 --- a/unstable/benchmark_test.go +++ b/unstable/benchmark_test.go @@ -35,7 +35,7 @@ func BenchmarkScanComments(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - _, _, _ = scanComment(input) + _, _, _ = scanComment(input, 0) } }) } diff --git a/unstable/parser.go b/unstable/parser.go index 82c85de..e915404 100644 --- a/unstable/parser.go +++ b/unstable/parser.go @@ -16,6 +16,7 @@ type ParserError struct { Highlight []byte Message string Key []string // optional + Offset int } // Error is the implementation of the error interface. @@ -27,9 +28,10 @@ func (e *ParserError) Error() string { // // Warning: Highlight needs to be a subslice of Parser.data, so only slices // returned by Parser.Raw are valid candidates. -func NewParserError(highlight []byte, format string, args ...interface{}) error { +func NewParserError(highlight []byte, offset int, format string, args ...interface{}) error { return &ParserError{ Highlight: highlight, + Offset: offset, Message: fmt.Errorf(format, args...).Error(), } } @@ -64,12 +66,18 @@ func (p *Parser) Data() []byte { return p.data } +func (p *Parser) offsetOf(b []byte) int { + return len(p.data) - len(b) +} + // Range returns a range description that corresponds to a given slice of the // input. If the argument is not a subslice of the parser input, this function // panics. +// +// Prefer using ParserError.Offset directly for error position information. func (p *Parser) Range(b []byte) Range { return Range{ - Offset: uint32(SubsliceOffset(p.data, b)), //nolint:gosec // TOML documents are small + Offset: uint32(subsliceOffset(p.data, b)), //nolint:gosec // TOML documents are small Length: uint32(len(b)), //nolint:gosec // TOML documents are small } } @@ -191,16 +199,16 @@ func (p *Parser) parseNewline(b []byte) ([]byte, error) { } if b[0] == '\r' { - _, rest, err := scanWindowsNewline(b) + _, rest, err := scanWindowsNewline(b, p.offsetOf(b)) return rest, err } - return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0]) + return nil, NewParserError(b[0:1], p.offsetOf(b), "expected newline but got %#U", b[0]) } func (p *Parser) parseComment(b []byte) (reference, []byte, error) { ref := invalidReference - data, rest, err := scanComment(b) + data, rest, err := scanComment(b, p.offsetOf(b)) if p.KeepComments && err == nil { ref = p.builder.Push(Node{ Kind: Comment, @@ -284,12 +292,12 @@ func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) { p.builder.AttachChild(ref, k) b = p.parseWhitespace(b) - b, err = expect(']', b) + b, err = expect(']', b, p.offsetOf(b)) if err != nil { return ref, nil, err } - b, err = expect(']', b) + b, err = expect(']', b, p.offsetOf(b)) return ref, b, err } @@ -314,7 +322,7 @@ func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) { b = p.parseWhitespace(b) - b, err = expect(']', b) + b, err = expect(']', b, p.offsetOf(b)) return ref, b, err } @@ -338,10 +346,10 @@ func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) { b = p.parseWhitespace(b) if len(b) == 0 { - return invalidReference, nil, NewParserError(startB[:len(startB)-len(b)], "expected = after a key, but the document ends there") + return invalidReference, nil, NewParserError(startB[:len(startB)-len(b)], p.offsetOf(startB), "expected = after a key, but the document ends there") } - b, err = expect('=', b) + b, err = expect('=', b, p.offsetOf(b)) if err != nil { return invalidReference, nil, err } @@ -370,7 +378,7 @@ func (p *Parser) parseVal(b []byte) (reference, []byte, error) { ref := invalidReference if len(b) == 0 { - return ref, nil, NewParserError(b, "expected value, not eof") + return ref, nil, NewParserError(b, p.offsetOf(b), "expected value, not eof") } var err error @@ -415,7 +423,7 @@ func (p *Parser) parseVal(b []byte) (reference, []byte, error) { return ref, b, err case 't': if !scanFollowsTrue(b) { - return ref, nil, NewParserError(atmost(b, 4), "expected 'true'") + return ref, nil, NewParserError(atmost(b, 4), p.offsetOf(b), "expected 'true'") } ref = p.builder.Push(Node{ @@ -426,7 +434,7 @@ func (p *Parser) parseVal(b []byte) (reference, []byte, error) { return ref, b[4:], nil case 'f': if !scanFollowsFalse(b) { - return ref, nil, NewParserError(atmost(b, 5), "expected 'false'") + return ref, nil, NewParserError(atmost(b, 5), p.offsetOf(b), "expected 'false'") } ref = p.builder.Push(Node{ @@ -453,7 +461,7 @@ func atmost(b []byte, n int) []byte { } func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) { - v, rest, err := scanLiteralString(b) + v, rest, err := scanLiteralString(b, p.offsetOf(b)) if err != nil { return nil, nil, nil, err } @@ -485,7 +493,7 @@ func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) { b = p.parseWhitespace(b) if len(b) == 0 { - return parent, nil, NewParserError(previousB[:1], "inline table is incomplete") + return parent, nil, NewParserError(previousB[:1], p.offsetOf(previousB), "inline table is incomplete") } if b[0] == '}' { @@ -493,7 +501,7 @@ func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) { } if !first { - b, err = expect(',', b) + b, err = expect(',', b, p.offsetOf(b)) if err != nil { return parent, nil, err } @@ -517,7 +525,7 @@ func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) { first = false } - rest, err := expect('}', b) + rest, err := expect('}', b, p.offsetOf(b)) return parent, rest, err } @@ -566,7 +574,7 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) { } if len(b) == 0 { - return parent, nil, NewParserError(arrayStart[:1], "array is incomplete") + return parent, nil, NewParserError(arrayStart[:1], p.offsetOf(arrayStart), "array is incomplete") } if b[0] == ']' { @@ -575,7 +583,7 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) { if b[0] == ',' { if first { - return parent, nil, NewParserError(b[0:1], "array cannot start with comma") + return parent, nil, NewParserError(b[0:1], p.offsetOf(b), "array cannot start with comma") } b = b[1:] @@ -587,7 +595,7 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) { addChild(cref) } } else if !first { - return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas") + return parent, nil, NewParserError(b[0:1], p.offsetOf(b), "array elements must be separated by commas") } // TOML allows trailing commas in arrays. @@ -614,7 +622,7 @@ func (p *Parser) parseValArray(b []byte) (reference, []byte, error) { first = false } - rest, err := expect(']', b) + rest, err := expect(']', b, p.offsetOf(b)) return parent, rest, err } @@ -669,7 +677,7 @@ func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []b } func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) { - token, rest, err := scanMultilineLiteralString(b) + token, rest, err := scanMultilineLiteralString(b, p.offsetOf(b)) if err != nil { return nil, nil, nil, err } @@ -698,7 +706,7 @@ func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er // mlb-quotes = 1*2quotation-mark // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii // mlb-escaped-nl = escape ws newline *( wschar / newline ) - token, escaped, rest, err := scanMultilineBasicString(b) + token, escaped, rest, err := scanMultilineBasicString(b, p.offsetOf(b)) if err != nil { return nil, nil, nil, err } @@ -715,14 +723,15 @@ func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er // fast path startIdx := i endIdx := len(token) - len(`"""`) + tokenBase := p.offsetOf(token) if !escaped { str := token[startIdx:endIdx] - highlight := characters.Utf8TomlValidAlreadyEscaped(str) - if len(highlight) == 0 { + invalidIdx := characters.Utf8TomlValidAlreadyEscaped(str) + if invalidIdx < 0 { return token, str, rest, nil } - return nil, nil, nil, NewParserError(highlight, "invalid UTF-8") + return nil, nil, nil, NewParserError(str[invalidIdx:invalidIdx+1], tokenBase+startIdx+invalidIdx, "invalid UTF-8") } var builder bytes.Buffer @@ -787,14 +796,14 @@ func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er case 'e': builder.WriteByte(0x1B) case 'u': - x, err := hexToRune(atmost(token[i+1:], 4), 4) + x, err := hexToRune(atmost(token[i+1:], 4), tokenBase+i+1, 4) if err != nil { return nil, nil, nil, err } builder.WriteRune(x) i += 4 case 'U': - x, err := hexToRune(atmost(token[i+1:], 8), 8) + x, err := hexToRune(atmost(token[i+1:], 8), tokenBase+i+1, 8) if err != nil { return nil, nil, nil, err } @@ -802,13 +811,13 @@ func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er builder.WriteRune(x) i += 8 default: - return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c) + return nil, nil, nil, NewParserError(token[i:i+1], tokenBase+i, "invalid escaped character %#U", c) } i++ } else { size := characters.Utf8ValidNext(token[i:]) if size == 0 { - return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c) + return nil, nil, nil, NewParserError(token[i:i+1], tokenBase+i, "invalid character %#U", c) } builder.Write(token[i : i+size]) i += size @@ -863,12 +872,9 @@ func (p *Parser) parseKey(b []byte) (reference, []byte, error) { func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) { if len(b) == 0 { - return nil, nil, nil, NewParserError(b, "expected key but found none") + return nil, nil, nil, NewParserError(b, p.offsetOf(b), "expected key but found none") } - // simple-key = quoted-key / unquoted-key - // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ - // quoted-key = basic-string / literal-string switch { case b[0] == '\'': return p.parseLiteralString(b) @@ -878,7 +884,7 @@ func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) { key, rest = scanUnquotedKey(b) return key, key, rest, nil default: - return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0]) + return nil, nil, nil, NewParserError(b[0:1], p.offsetOf(b), "invalid character at start of key: %c", b[0]) } } @@ -898,7 +904,7 @@ func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { // escape-seq-char =/ %x74 ; t tab U+0009 // escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX // escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX - token, escaped, rest, err := scanBasicString(b) + token, escaped, rest, err := scanBasicString(b, p.offsetOf(b)) if err != nil { return nil, nil, nil, err } @@ -909,13 +915,15 @@ func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { // Fast path. If there is no escape sequence, the string should just be // an UTF-8 encoded string, which is the same as Go. In that case, // validate the string and return a direct reference to the buffer. + tokenBase := p.offsetOf(token) + if !escaped { str := token[startIdx:endIdx] - highlight := characters.Utf8TomlValidAlreadyEscaped(str) - if len(highlight) == 0 { + invalidIdx := characters.Utf8TomlValidAlreadyEscaped(str) + if invalidIdx < 0 { return token, str, rest, nil } - return nil, nil, nil, NewParserError(highlight, "invalid UTF-8") + return nil, nil, nil, NewParserError(str[invalidIdx:invalidIdx+1], tokenBase+startIdx+invalidIdx, "invalid UTF-8") } i := startIdx @@ -946,7 +954,7 @@ func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { case 'e': builder.WriteByte(0x1B) case 'u': - x, err := hexToRune(token[i+1:len(token)-1], 4) + x, err := hexToRune(token[i+1:len(token)-1], tokenBase+i+1, 4) if err != nil { return nil, nil, nil, err } @@ -954,7 +962,7 @@ func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { builder.WriteRune(x) i += 4 case 'U': - x, err := hexToRune(token[i+1:len(token)-1], 8) + x, err := hexToRune(token[i+1:len(token)-1], tokenBase+i+1, 8) if err != nil { return nil, nil, nil, err } @@ -962,13 +970,13 @@ func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { builder.WriteRune(x) i += 8 default: - return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c) + return nil, nil, nil, NewParserError(token[i:i+1], tokenBase+i, "invalid escaped character %#U", c) } i++ } else { size := characters.Utf8ValidNext(token[i:]) if size == 0 { - return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c) + return nil, nil, nil, NewParserError(token[i:i+1], tokenBase+i, "invalid character %#U", c) } builder.Write(token[i : i+size]) i += size @@ -978,9 +986,9 @@ func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { return token, builder.Bytes(), rest, nil } -func hexToRune(b []byte, length int) (rune, error) { +func hexToRune(b []byte, base int, length int) (rune, error) { if len(b) < length { - return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b)) + return -1, NewParserError(b, base, "unicode point needs %d character, not %d", length, len(b)) } b = b[:length] @@ -995,13 +1003,13 @@ func hexToRune(b []byte, length int) (rune, error) { case 'A' <= c && c <= 'F': d = uint32(c - 'A' + 10) default: - return -1, NewParserError(b[i:i+1], "non-hex character") + return -1, NewParserError(b[i:i+1], base+i, "non-hex character") } r = r*16 + d } if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 { - return -1, NewParserError(b, "escape sequence is invalid Unicode code point") + return -1, NewParserError(b, base, "escape sequence is invalid Unicode code point") } return rune(r), nil @@ -1021,7 +1029,7 @@ func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) switch b[0] { case 'i': if !scanFollowsInf(b) { - return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'") + return invalidReference, nil, NewParserError(atmost(b, 3), p.offsetOf(b), "expected 'inf'") } return p.builder.Push(Node{ @@ -1031,7 +1039,7 @@ func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) }), b[3:], nil case 'n': if !scanFollowsNan(b) { - return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'") + return invalidReference, nil, NewParserError(atmost(b, 3), p.offsetOf(b), "expected 'nan'") } return p.builder.Push(Node{ @@ -1190,7 +1198,7 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { }), b[i+3:], nil } - return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number") + return invalidReference, nil, NewParserError(b[i:i+1], p.offsetOf(b)+i, "unexpected character 'i' while scanning for a number") } if c == 'n' { @@ -1202,14 +1210,14 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { }), b[i+3:], nil } - return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number") + return invalidReference, nil, NewParserError(b[i:i+1], p.offsetOf(b)+i, "unexpected character 'n' while scanning for a number") } break } if i == 0 { - return invalidReference, b, NewParserError(b, "incomplete number") + return invalidReference, b, NewParserError(b, p.offsetOf(b), "incomplete number") } kind := Integer @@ -1246,13 +1254,13 @@ func isValidBinaryRune(r byte) bool { return r == '0' || r == '1' || r == '_' } -func expect(x byte, b []byte) ([]byte, error) { +func expect(x byte, b []byte, base int) ([]byte, error) { if len(b) == 0 { - return nil, NewParserError(b, "expected character %c but the document ended here", x) + return nil, NewParserError(b, base, "expected character %c but the document ended here", x) } if b[0] != x { - return nil, NewParserError(b[0:1], "expected character %c", x) + return nil, NewParserError(b[0:1], base, "expected character %c", x) } return b[1:], nil diff --git a/unstable/scanner.go b/unstable/scanner.go index 0512181..3eeabbb 100644 --- a/unstable/scanner.go +++ b/unstable/scanner.go @@ -47,48 +47,31 @@ func isUnquotedKeyChar(r byte) bool { return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' } -func scanLiteralString(b []byte) ([]byte, []byte, error) { - // literal-string = apostrophe *literal-char apostrophe - // apostrophe = %x27 ; ' apostrophe - // literal-char = %x09 / %x20-26 / %x28-7E / non-ascii +func scanLiteralString(b []byte, base int) ([]byte, []byte, error) { for i := 1; i < len(b); { switch b[i] { case '\'': return b[:i+1], b[i+1:], nil case '\n', '\r': - return nil, nil, NewParserError(b[i:i+1], "literal strings cannot have new lines") + return nil, nil, NewParserError(b[i:i+1], base+i, "literal strings cannot have new lines") } size := characters.Utf8ValidNext(b[i:]) if size == 0 { - return nil, nil, NewParserError(b[i:i+1], "invalid character") + return nil, nil, NewParserError(b[i:i+1], base+i, "invalid character") } i += size } - return nil, nil, NewParserError(b[len(b):], "unterminated literal string") + return nil, nil, NewParserError(b[len(b):], base+len(b), "unterminated literal string") } -func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) { - // ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body - // ml-literal-string-delim - // ml-literal-string-delim = 3apostrophe - // ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ] - // - // mll-content = mll-char / newline - // mll-char = %x09 / %x20-26 / %x28-7E / non-ascii - // mll-quotes = 1*2apostrophe +func scanMultilineLiteralString(b []byte, base int) ([]byte, []byte, error) { for i := 3; i < len(b); { switch b[i] { case '\'': if scanFollowsMultilineLiteralStringDelimiter(b[i:]) { i += 3 - // At that point we found 3 apostrophe, and i is the - // index of the byte after the third one. The scanner - // needs to be eager, because there can be an extra 2 - // apostrophe that can be accepted at the end of the - // string. - if i >= len(b) || b[i] != '\'' { return b[:i], b[i:], nil } @@ -100,39 +83,39 @@ func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) { i++ if i < len(b) && b[i] == '\'' { - return nil, nil, NewParserError(b[i-3:i+1], "''' not allowed in multiline literal string") + return nil, nil, NewParserError(b[i-3:i+1], base+i-3, "''' not allowed in multiline literal string") } return b[:i], b[i:], nil } case '\r': if len(b) < i+2 { - return nil, nil, NewParserError(b[len(b):], `need a \n after \r`) + return nil, nil, NewParserError(b[len(b):], base+len(b), `need a \n after \r`) } if b[i+1] != '\n' { - return nil, nil, NewParserError(b[i:i+2], `need a \n after \r`) + return nil, nil, NewParserError(b[i:i+2], base+i, `need a \n after \r`) } - i += 2 // skip the \n + i += 2 continue } size := characters.Utf8ValidNext(b[i:]) if size == 0 { - return nil, nil, NewParserError(b[i:i+1], "invalid character") + return nil, nil, NewParserError(b[i:i+1], base+i, "invalid character") } i += size } - return nil, nil, NewParserError(b[len(b):], `multiline literal string not terminated by '''`) + return nil, nil, NewParserError(b[len(b):], base+len(b), `multiline literal string not terminated by '''`) } -func scanWindowsNewline(b []byte) ([]byte, []byte, error) { +func scanWindowsNewline(b []byte, base int) ([]byte, []byte, error) { const lenCRLF = 2 if len(b) < lenCRLF { - return nil, nil, NewParserError(b, "windows new line expected") + return nil, nil, NewParserError(b, base, "windows new line expected") } if b[1] != '\n' { - return nil, nil, NewParserError(b, `windows new line should be \r\n`) + return nil, nil, NewParserError(b, base, `windows new line should be \r\n`) } return b[:lenCRLF], b[lenCRLF:], nil @@ -151,13 +134,7 @@ func scanWhitespace(b []byte) ([]byte, []byte) { return b, b[len(b):] } -func scanComment(b []byte) ([]byte, []byte, error) { - // comment-start-symbol = %x23 ; # - // non-ascii = %x80-D7FF / %xE000-10FFFF - // non-eol = %x09 / %x20-7F / non-ascii - // - // comment = comment-start-symbol *non-eol - +func scanComment(b []byte, base int) ([]byte, []byte, error) { for i := 1; i < len(b); { if b[i] == '\n' { return b[:i], b[i:], nil @@ -166,11 +143,11 @@ func scanComment(b []byte) ([]byte, []byte, error) { if i+1 < len(b) && b[i+1] == '\n' { return b[:i+1], b[i+1:], nil } - return nil, nil, NewParserError(b[i:i+1], "invalid character in comment") + return nil, nil, NewParserError(b[i:i+1], base+i, "invalid character in comment") } size := characters.Utf8ValidNext(b[i:]) if size == 0 { - return nil, nil, NewParserError(b[i:i+1], "invalid character in comment") + return nil, nil, NewParserError(b[i:i+1], base+i, "invalid character in comment") } i += size @@ -179,12 +156,7 @@ func scanComment(b []byte) ([]byte, []byte, error) { return b, b[len(b):], nil } -func scanBasicString(b []byte) ([]byte, bool, []byte, error) { - // basic-string = quotation-mark *basic-char quotation-mark - // quotation-mark = %x22 ; " - // basic-char = basic-unescaped / escaped - // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii - // escaped = escape escape-seq-char +func scanBasicString(b []byte, base int) ([]byte, bool, []byte, error) { escaped := false i := 1 @@ -193,31 +165,20 @@ func scanBasicString(b []byte) ([]byte, bool, []byte, error) { case '"': return b[:i+1], escaped, b[i+1:], nil case '\n', '\r': - return nil, escaped, nil, NewParserError(b[i:i+1], "basic strings cannot have new lines") + return nil, escaped, nil, NewParserError(b[i:i+1], base+i, "basic strings cannot have new lines") case '\\': if len(b) < i+2 { - return nil, escaped, nil, NewParserError(b[i:i+1], "need a character after \\") + return nil, escaped, nil, NewParserError(b[i:i+1], base+i, "need a character after \\") } escaped = true i++ // skip the next character } } - return nil, escaped, nil, NewParserError(b[len(b):], `basic string not terminated by "`) + return nil, escaped, nil, NewParserError(b[len(b):], base+len(b), `basic string not terminated by "`) } -func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) { - // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body - // ml-basic-string-delim - // ml-basic-string-delim = 3quotation-mark - // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] - // - // mlb-content = mlb-char / newline / mlb-escaped-nl - // mlb-char = mlb-unescaped / escaped - // mlb-quotes = 1*2quotation-mark - // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii - // mlb-escaped-nl = escape ws newline *( wschar / newline ) - +func scanMultilineBasicString(b []byte, base int) ([]byte, bool, []byte, error) { escaped := false i := 3 @@ -227,12 +188,6 @@ func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) { if scanFollowsMultilineBasicStringDelimiter(b[i:]) { i += 3 - // At that point we found 3 apostrophe, and i is the - // index of the byte after the third one. The scanner - // needs to be eager, because there can be an extra 2 - // apostrophe that can be accepted at the end of the - // string. - if i >= len(b) || b[i] != '"' { return b[:i], escaped, b[i:], nil } @@ -244,27 +199,27 @@ func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) { i++ if i < len(b) && b[i] == '"' { - return nil, escaped, nil, NewParserError(b[i-3:i+1], `""" not allowed in multiline basic string`) + return nil, escaped, nil, NewParserError(b[i-3:i+1], base+i-3, `""" not allowed in multiline basic string`) } return b[:i], escaped, b[i:], nil } case '\\': if len(b) < i+2 { - return nil, escaped, nil, NewParserError(b[len(b):], "need a character after \\") + return nil, escaped, nil, NewParserError(b[len(b):], base+len(b), "need a character after \\") } escaped = true i++ // skip the next character case '\r': if len(b) < i+2 { - return nil, escaped, nil, NewParserError(b[len(b):], `need a \n after \r`) + return nil, escaped, nil, NewParserError(b[len(b):], base+len(b), `need a \n after \r`) } if b[i+1] != '\n' { - return nil, escaped, nil, NewParserError(b[i:i+2], `need a \n after \r`) + return nil, escaped, nil, NewParserError(b[i:i+2], base+i, `need a \n after \r`) } i++ // skip the \n } } - return nil, escaped, nil, NewParserError(b[len(b):], `multiline basic string not terminated by """`) + return nil, escaped, nil, NewParserError(b[len(b):], base+len(b), `multiline basic string not terminated by """`) }