Changes to support #29 - Support multi-line literal strings

* Added error output to test_program.go * Added multi-line literal string support to lexer * Added multi-line string supprt to lexer * Added unit-test for new string support * Modified test.sh to take an optional parameter to run an individual BurntSushi test suite. * Fixed formatting
2015-02-26 17:43:47 -05:00
parent a56707c85f
commit 1f3d0e03c3
5 changed files with 186 additions and 61 deletions
@@ -13,10 +13,12 @@ import (
 func main() {
 	bytes, err := ioutil.ReadAll(os.Stdin)
 	if err != nil {
 		log.Fatalf("Error during TOML read: %s", err)
 		os.Exit(2)
 	}
 	tree, err := toml.Load(string(bytes))
 	if err != nil {
 		log.Fatalf("Error during TOML load: %s", err)
 		os.Exit(1)
 	}
@@ -24,6 +26,7 @@ func main() {
 	if err := json.NewEncoder(os.Stdout).Encode(typedTree); err != nil {
 		log.Fatalf("Error encoding JSON: %s", err)
 		os.Exit(3)
 	}
 	os.Exit(0)
@@ -254,7 +254,7 @@ func (l *tomlLexer) lexComma() tomlLexStateFn {
 func (l *tomlLexer) lexKey() tomlLexStateFn {
 	l.ignore()
 	for r := l.next(); isKeyChar(r); r = l.next() {
-		if (r == '#') {
+		if r == '#' {
 			return l.errorf("keys cannot contain # character")
 		}
 	}
@@ -286,11 +286,26 @@ func (l *tomlLexer) lexLiteralString() tomlLexStateFn {
 	l.ignore()
 	growingString := ""
-	for {
+	// handle special case for triple-quote
-		if l.peek() == '\'' {
+	terminator := "'"
-			l.emitWithValue(tokenString, growingString)
+	if l.follow("''") {
 		l.pos += 2
 		l.ignore()
 		terminator = "'''"
 		// special case: discard leading newline
 		if l.peek() == '\n' {
 			l.pos++
 			l.ignore()
 		}
 	}
 	// find end of string
 	for {
 		if l.follow(terminator) {
 			l.emitWithValue(tokenString, growingString)
 			l.pos += len(terminator)
 			l.ignore()
 			return l.lexRvalue
 		}
@@ -309,58 +324,80 @@ func (l *tomlLexer) lexString() tomlLexStateFn {
 	l.ignore()
 	growingString := ""
-	for {
+	// handle special case for triple-quote
-		if l.peek() == '"' {
+	terminator := "\""
-			l.emitWithValue(tokenString, growingString)
+	if l.follow("\"\"") {
 		l.pos += 2
 		l.ignore()
 		terminator = "\"\"\""
 		// special case: discard leading newline
 		if l.peek() == '\n' {
 			l.pos++
 			l.ignore()
 		}
 	}
 	for {
 		if l.follow(terminator) {
 			l.emitWithValue(tokenString, growingString)
 			l.pos += len(terminator)
 			l.ignore()
 			return l.lexRvalue
 		}
-		if l.follow("\\\"") {
+		if l.follow("\\") {
 			l.pos++
-			growingString += "\""
+			switch l.peek() {
-		} else if l.follow("\\n") {
+			case '\r':
-			l.pos++
+				fallthrough
-			growingString += "\n"
+			case '\n':
-		} else if l.follow("\\b") {
+				fallthrough
-			l.pos++
+			case '\t':
-			growingString += "\b"
+				fallthrough
-		} else if l.follow("\\f") {
+			case ' ':
-			l.pos++
+				// skip all whitespace chars following backslash
 			growingString += "\f"
 		} else if l.follow("\\/") {
 			l.pos++
 			growingString += "/"
 		} else if l.follow("\\t") {
 			l.pos++
 			growingString += "\t"
 		} else if l.follow("\\r") {
 			l.pos++
 			growingString += "\r"
 		} else if l.follow("\\\\") {
 			l.pos++
 			growingString += "\\"
 		} else if l.follow("\\u") {
 			l.pos += 2
 			code := ""
 			for i := 0; i < 4; i++ {
 				c := l.peek()
 				l.pos++
-				if !isHexDigit(c) {
+				for strings.ContainsRune("\r\n\t ", l.peek()) {
-					return l.errorf("unfinished unicode escape")
+					l.pos++
 				}
-				code = code + string(c)
+				l.pos--
 			case '"':
 				growingString += "\""
 			case 'n':
 				growingString += "\n"
 			case 'b':
 				growingString += "\b"
 			case 'f':
 				growingString += "\f"
 			case '/':
 				growingString += "/"
 			case 't':
 				growingString += "\t"
 			case 'r':
 				growingString += "\r"
 			case '\\':
 				growingString += "\\"
 			case 'u':
 				l.pos++
 				code := ""
 				for i := 0; i < 4; i++ {
 					c := l.peek()
 					l.pos++
 					if !isHexDigit(c) {
 						return l.errorf("unfinished unicode escape")
 					}
 					code = code + string(c)
 				}
 				l.pos--
 				intcode, err := strconv.ParseInt(code, 16, 32)
 				if err != nil {
 					return l.errorf("invalid unicode escape: \\u" + code)
 				}
 				growingString += string(rune(intcode))
 			default:
 				return l.errorf("invalid escape sequence: \\" + string(l.peek()))
 			}
 			l.pos--
 			intcode, err := strconv.ParseInt(code, 16, 32)
 			if err != nil {
 				return l.errorf("invalid unicode escape: \\u" + code)
 			}
 			growingString += string(rune(intcode))
 		} else if l.follow("\\") {
 			l.pos++
 			return l.errorf("invalid escape sequence: \\" + string(l.peek()))
 		} else {
 			growingString += string(l.peek())
 		}
@@ -487,6 +487,45 @@ func TestLiteralString(t *testing.T) {
 	})
 }
 func TestMultilineLiteralString(t *testing.T) {
 	testFlow(t, `foo = '''hello 'literal' world'''`, []token{
 		token{Position{1, 1}, tokenKey, "foo"},
 		token{Position{1, 5}, tokenEqual, "="},
 		token{Position{1, 10}, tokenString, `hello 'literal' world`},
 		token{Position{1, 34}, tokenEOF, ""},
 	})
 	testFlow(t, "foo = '''\nhello\n'literal'\nworld'''", []token{
 		token{Position{1, 1}, tokenKey, "foo"},
 		token{Position{1, 5}, tokenEqual, "="},
 		token{Position{2, 1}, tokenString, "hello\n'literal'\nworld"},
 		token{Position{4, 9}, tokenEOF, ""},
 	})
 }
 func TestMultilineString(t *testing.T) {
 	testFlow(t, `foo = """hello "literal" world"""`, []token{
 		token{Position{1, 1}, tokenKey, "foo"},
 		token{Position{1, 5}, tokenEqual, "="},
 		token{Position{1, 10}, tokenString, `hello "literal" world`},
 		token{Position{1, 34}, tokenEOF, ""},
 	})
 	testFlow(t, "foo = \"\"\"\nhello\n\"literal\"\nworld\"\"\"", []token{
 		token{Position{1, 1}, tokenKey, "foo"},
 		token{Position{1, 5}, tokenEqual, "="},
 		token{Position{2, 1}, tokenString, "hello\n\"literal\"\nworld"},
 		token{Position{4, 9}, tokenEOF, ""},
 	})
 	testFlow(t, "foo = \"\"\"\\\n    \\\n    \\\n    hello\nmultiline\nworld\"\"\"", []token{
 		token{Position{1, 1}, tokenKey, "foo"},
 		token{Position{1, 5}, tokenEqual, "="},
 		token{Position{1, 10}, tokenString, "hello\nmultiline\nworld"},
 		token{Position{6, 9}, tokenEOF, ""},
 	})
 }
 func TestUnicodeString(t *testing.T) {
 	testFlow(t, `foo = "hello ♥ world"`, []token{
 		token{Position{1, 1}, tokenKey, "foo"},
@@ -91,18 +91,17 @@ func TestSimpleDate(t *testing.T) {
 func TestDateOffset(t *testing.T) {
 	tree, err := Load("a = 1979-05-27T00:32:00-07:00")
 	assertTree(t, tree, err, map[string]interface{}{
-		"a": time.Date(1979, time.May, 27, 0, 32, 0, 0, time.FixedZone("", -7 * 60 * 60)),
+		"a": time.Date(1979, time.May, 27, 0, 32, 0, 0, time.FixedZone("", -7*60*60)),
 	})
 }
 func TestDateNano(t *testing.T) {
 	tree, err := Load("a = 1979-05-27T00:32:00.999999999-07:00")
 	assertTree(t, tree, err, map[string]interface{}{
-		"a": time.Date(1979, time.May, 27, 0, 32, 0, 999999999, time.FixedZone("", -7 * 60 * 60)),
+		"a": time.Date(1979, time.May, 27, 0, 32, 0, 999999999, time.FixedZone("", -7*60*60)),
 	})
 }
 func TestSimpleString(t *testing.T) {
 	tree, err := Load("a = \"hello world\"")
 	assertTree(t, tree, err, map[string]interface{}{
@@ -5,15 +5,26 @@ set -e
 # set the path to the present working directory
 export GOPATH=`pwd`
-# Vendorize the BurntSushi test suite
+function git_clone() {
-# NOTE: this gets a specific release to avoid versioning issues
+  path=$1
-if [ ! -d 'src/github.com/BurntSushi/toml-test' ]; then
+  branch=$2
-  mkdir -p src/github.com/BurntSushi
+  version=$3
-  git clone https://github.com/BurntSushi/toml-test.git src/github.com/BurntSushi/toml-test
+  if [ ! -d "src/$path" ]; then
-fi
+    mkdir -p src/$path
-pushd src/github.com/BurntSushi/toml-test
+    git clone https://$path.git src/$path
-git reset --hard '0.2.0'  # use the released version, NOT tip
+  fi
-popd
+  pushd src/$path
  git checkout "$branch"
  git reset --hard "$version"
  popd
 }
 # get code for BurntSushi TOML validation
 # pinning all to 'HEAD' for version 0.3.x work (TODO: pin to commit hash when tests stabilize)
 git_clone github.com/BurntSushi/toml master HEAD
 git_clone github.com/BurntSushi/toml-test master HEAD #was: 0.2.0 HEAD
 # build the BurntSushi test application
 go build -o toml-test github.com/BurntSushi/toml-test
 # vendorize the current lib for testing
@@ -23,6 +34,42 @@ cp *.go *.toml src/github.com/pelletier/go-toml
 cp cmd/*.go src/github.com/pelletier/go-toml/cmd
 go build -o test_program_bin src/github.com/pelletier/go-toml/cmd/test_program.go
-# Run basic unit tests and then the BurntSushi test suite
+# Run basic unit tests
 go test -v github.com/pelletier/go-toml
-./toml-test ./test_program_bin | tee test_out
+
 # run the entire BurntSushi test suite
 if [[ $# -eq 0 ]] ; then
  echo "Running all BurntSushi tests"
  ./toml-test ./test_program_bin | tee test_out
 else
  # run a specific test
  test=$1
  test_path='src/github.com/BurntSushi/toml-test/tests'
  valid_test="$test_path/valid/$test"
  invalid_test="$test_path/invalid/$test"
  if [ -e "$valid_test.toml" ]; then
    echo "Valid Test TOML for $test:"
    echo "===="
    cat "$valid_test.toml"
    echo "Valid Test JSON for $test:"
    echo "===="
    cat "$valid_test.json"
    echo "Go-TOML Output for $test:"
    echo "===="
    cat "$valid_test.toml" | ./test_program_bin
  fi
  if [ -e "$invalid_test.toml" ]; then
    echo "Invalid Test TOML for $test:"
    echo "===="
    cat "$invalid_test.toml"
    echo "Go-TOML Output for $test:"
    echo "===="
    echo "go-toml Output:"
    cat "$invalid_test.toml" | ./test_program_bin
  fi
 fi