From 85f5d567e4337d9180c28b30e93c9c3215c38f7e Mon Sep 17 00:00:00 2001 From: Thomas Pelletier Date: Sat, 16 Oct 2021 07:41:12 -0400 Subject: [PATCH] parser: validate invalid ASCII control characters --- parser.go | 30 ++++++++++++-------- unmarshaler_test.go | 67 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 12 deletions(-) diff --git a/parser.go b/parser.go index 014c4d9..0d2beb9 100644 --- a/parser.go +++ b/parser.go @@ -558,15 +558,11 @@ func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er return nil, nil, nil, newDecodeError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8") } - i = escaped - var builder bytes.Buffer - // grow? - builder.Write(token[startIdx:i]) // The scanner ensures that the token starts and ends with quotes and that // escapes are balanced. - for ; i < len(token)-3; i++ { + for i < len(token)-3 { c := token[i] //nolint:nestif @@ -584,7 +580,7 @@ func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er break } } - + i++ continue } @@ -623,8 +619,14 @@ func (p *parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er default: return nil, nil, nil, newDecodeError(token[i:i+1], "invalid escaped character %#U", c) } + i++ } else { - builder.WriteByte(c) + size := utf8ValidNext(token[i:]) + if size == 0 { + return nil, nil, nil, newDecodeError(token[i:i+1], "invalid character %#U", c) + } + builder.Write(token[i : i+size]) + i += size } } @@ -731,15 +733,13 @@ func (p *parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { return nil, nil, nil, newDecodeError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8") } - i := escaped + i := startIdx var builder bytes.Buffer - // grow? - builder.Write(token[startIdx:i]) // The scanner ensures that the token starts and ends with quotes and that // escapes are balanced. - for ; i < len(token)-1; i++ { + for i < len(token)-1 { c := token[i] if c == '\\' { i++ @@ -777,8 +777,14 @@ func (p *parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { default: return nil, nil, nil, newDecodeError(token[i:i+1], "invalid escaped character %#U", c) } + i++ } else { - builder.WriteByte(c) + size := utf8ValidNext(token[i:]) + if size == 0 { + return nil, nil, nil, newDecodeError(token[i:i+1], "invalid character %#U", c) + } + builder.Write(token[i : i+size]) + i += size } } diff --git a/unmarshaler_test.go b/unmarshaler_test.go index 7b05afa..a33b555 100644 --- a/unmarshaler_test.go +++ b/unmarshaler_test.go @@ -2113,6 +2113,66 @@ world'`, } } +func TestASCIIControlCharacters(t *testing.T) { + invalidCharacters := []byte{0x7F} + for c := byte(0x0); c <= 0x08; c++ { + invalidCharacters = append(invalidCharacters, c) + } + for c := byte(0x0B); c <= 0x0C; c++ { + invalidCharacters = append(invalidCharacters, c) + } + for c := byte(0x0E); c <= 0x1F; c++ { + invalidCharacters = append(invalidCharacters, c) + } + + type stringType struct { + Delimiter string + CanEscape bool + } + + stringTypes := map[string]stringType{ + "basic": {Delimiter: "\"", CanEscape: true}, + "basicMultiline": {Delimiter: "\"\"\"", CanEscape: true}, + "literal": {Delimiter: "'", CanEscape: false}, + "literalMultiline": {Delimiter: "'''", CanEscape: false}, + } + + checkError := func(t *testing.T, input []byte) { + t.Helper() + m := map[string]interface{}{} + err := toml.Unmarshal(input, &m) + require.Error(t, err) + + var de *toml.DecodeError + if !errors.As(err, &de) { + t.Fatalf("err should have been a *toml.DecodeError, but got %s (%T)", err, err) + } + } + + for name, st := range stringTypes { + t.Run(name, func(t *testing.T) { + for _, c := range invalidCharacters { + name := fmt.Sprintf("%2X", c) + t.Run(name, func(t *testing.T) { + data := []byte("A = " + st.Delimiter + string(c) + st.Delimiter) + checkError(t, data) + + if st.CanEscape { + t.Run("withEscapeBefore", func(t *testing.T) { + data := []byte("A = " + st.Delimiter + "\\t" + string(c) + st.Delimiter) + checkError(t, data) + }) + t.Run("withEscapeAfter", func(t *testing.T) { + data := []byte("A = " + st.Delimiter + string(c) + "\\t" + st.Delimiter) + checkError(t, data) + }) + } + }) + } + }) + } +} + //nolint:funlen func TestLocalDateTime(t *testing.T) { examples := []struct { @@ -2268,6 +2328,13 @@ xz_hash = "1a48f723fea1f17d786ce6eadd9d00914d38062d28fd9c455ed3c3801905b388" require.Equal(t, expected, dist) } +func TestIssue631(t *testing.T) { + v := map[string]interface{}{} + + err := toml.Unmarshal([]byte("\"\\b\u007f\"= 2"), &v) + require.Error(t, err) +} + //nolint:funlen func TestDecoderStrict(t *testing.T) { examples := []struct {