internal/encoding/json: initial commit of JSON parser/serializer

Package json provides a parser and serializer for the JSON format.
This focuses on the grammar of the format and is agnostic towards specific
semantics of protobuf types.

High-level API:
	func Marshal(v Value, indent string) ([]byte, error)
	func Unmarshal(b []byte) (Value, error)
	type Type uint8
	    const Null Type ...
	type Value struct{ ... }
	    func ValueOf(v interface{}) Value
		func (v Value) Type() Type
		func (v Value) Bool() bool
		func (v Value) Number() float64
		func (v Value) String() string
		func (v Value) Array() []Value
		func (v Value) Object() [][2]Value
		func (v Value) Raw() []byte

Change-Id: I26422f6b3881ef1a11b8aa95160645b1384b27b8
Reviewed-on: https://go-review.googlesource.com/127824
Reviewed-by: Herbie Ong <herbie@google.com>
diff --git a/internal/encoding/json/string.go b/internal/encoding/json/string.go
new file mode 100644
index 0000000..ad80e85
--- /dev/null
+++ b/internal/encoding/json/string.go
@@ -0,0 +1,160 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+	"io"
+	"math/bits"
+	"strconv"
+	"unicode"
+	"unicode/utf16"
+	"unicode/utf8"
+
+	"google.golang.org/proto/internal/errors"
+)
+
+func (p *encoder) marshalString(v Value) error {
+	var err error
+	p.out, err = appendString(p.out, v)
+	return err
+}
+func appendString(out []byte, v Value) ([]byte, error) {
+	if v.Type() != String {
+		return nil, errors.New("invalid type %v, expected string", v.Type())
+	}
+	if len(v.raw) > 0 {
+		return append(out, v.raw...), nil
+	}
+	in := v.String()
+
+	var nerr errors.NonFatal
+	out = append(out, '"')
+	i := indexNeedEscape(in)
+	in, out = in[i:], append(out, in[:i]...)
+	for len(in) > 0 {
+		switch r, n := utf8.DecodeRuneInString(in); {
+		case r == utf8.RuneError && n == 1:
+			nerr.AppendInvalidUTF8("")
+			in, out = in[1:], append(out, in[0]) // preserve invalid byte
+		case r < ' ' || r == '"' || r == '\\':
+			out = append(out, '\\')
+			switch r {
+			case '"', '\\':
+				out = append(out, byte(r))
+			case '\b':
+				out = append(out, 'b')
+			case '\f':
+				out = append(out, 'f')
+			case '\n':
+				out = append(out, 'n')
+			case '\r':
+				out = append(out, 'r')
+			case '\t':
+				out = append(out, 't')
+			default:
+				out = append(out, 'u')
+				out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
+				out = strconv.AppendUint(out, uint64(r), 16)
+			}
+			in = in[n:]
+		default:
+			i := indexNeedEscape(in[n:])
+			in, out = in[n+i:], append(out, in[:n+i]...)
+		}
+	}
+	out = append(out, '"')
+	return out, nerr.E
+}
+
+func (p *decoder) unmarshalString() (Value, error) {
+	v, n, err := consumeString(p.in)
+	p.consume(n)
+	return v, err
+}
+func consumeString(in []byte) (Value, int, error) {
+	var nerr errors.NonFatal
+	in0 := in
+	if len(in) == 0 {
+		return Value{}, 0, io.ErrUnexpectedEOF
+	}
+	if in[0] != '"' {
+		return Value{}, 0, newSyntaxError("invalid character %q at start of string", in[0])
+	}
+	in = in[1:]
+	i := indexNeedEscape(string(in))
+	in, out := in[i:], in[:i:i] // set cap to prevent mutations
+	for len(in) > 0 {
+		switch r, n := utf8.DecodeRune(in); {
+		case r == utf8.RuneError && n == 1:
+			nerr.AppendInvalidUTF8("")
+			in, out = in[1:], append(out, in[0]) // preserve invalid byte
+		case r < ' ':
+			return Value{}, 0, newSyntaxError("invalid character %q in string", r)
+		case r == '"':
+			in = in[1:]
+			n := len(in0) - len(in)
+			v := rawValueOf(string(out), in0[:n:n])
+			return v, n, nerr.E
+		case r == '\\':
+			if len(in) < 2 {
+				return Value{}, 0, io.ErrUnexpectedEOF
+			}
+			switch r := in[1]; r {
+			case '"', '\\', '/':
+				in, out = in[2:], append(out, r)
+			case 'b':
+				in, out = in[2:], append(out, '\b')
+			case 'f':
+				in, out = in[2:], append(out, '\f')
+			case 'n':
+				in, out = in[2:], append(out, '\n')
+			case 'r':
+				in, out = in[2:], append(out, '\r')
+			case 't':
+				in, out = in[2:], append(out, '\t')
+			case 'u':
+				if len(in) < 6 {
+					return Value{}, 0, io.ErrUnexpectedEOF
+				}
+				v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
+				if err != nil {
+					return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:6])
+				}
+				in = in[6:]
+
+				r := rune(v)
+				if utf16.IsSurrogate(r) {
+					if len(in) < 6 {
+						return Value{}, 0, io.ErrUnexpectedEOF
+					}
+					v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
+					r = utf16.DecodeRune(r, rune(v))
+					if in[0] != '\\' || in[1] != 'u' || r == unicode.ReplacementChar || err != nil {
+						return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:6])
+					}
+					in = in[6:]
+				}
+				out = append(out, string(r)...)
+			default:
+				return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:2])
+			}
+		default:
+			i := indexNeedEscape(string(in[n:]))
+			in, out = in[n+i:], append(out, in[:n+i]...)
+		}
+	}
+	return Value{}, 0, io.ErrUnexpectedEOF
+}
+
+// indexNeedEscape returns the index of the next character that needs escaping.
+// If no characters need escaping, this returns the input length.
+func indexNeedEscape(s string) int {
+	for i, r := range s {
+		if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
+			return i
+		}
+	}
+	return len(s)
+}