Blame - internal/encoding/json/decode.go - platform/external/golang-protobuf

blob: 769619b7f2d3fccfe3a10b40681ec447b79f34b9 [file] [log] [blame]

Joe Tsai	879b18d	2018-08-03 17:22:24 -0700	[diff] [blame]	1	// Copyright 2018 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	package json
				6
				7	import (
				8	"bytes"
				9	"io"
				10	"regexp"
				11	"unicode/utf8"
				12
Joe Tsai	01ab296	2018-09-21 17:44:00 -0700	[diff] [blame^]	13	"github.com/golang/protobuf/v2/internal/errors"
Joe Tsai	879b18d	2018-08-03 17:22:24 -0700	[diff] [blame]	14	)
				15
				16	type syntaxError struct{ error }
				17
				18	func newSyntaxError(f string, x ...interface{}) error {
				19	return syntaxError{errors.New(f, x...)}
				20	}
				21
				22	// Unmarshal parses b as the JSON format.
				23	// It returns a Value, which represents the input as an AST.
				24	func Unmarshal(b []byte) (Value, error) {
				25	p := decoder{in: b}
				26	p.consume(0) // trim leading spaces
				27	v, err := p.unmarshalValue()
				28	if !p.nerr.Merge(err) {
				29	if e, ok := err.(syntaxError); ok {
				30	b = b[:len(b)-len(p.in)] // consumed input
				31	line := bytes.Count(b, []byte("\n")) + 1
				32	if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
				33	b = b[i+1:]
				34	}
				35	column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
				36	err = errors.New("syntax error (line %d:%d): %v", line, column, e.error)
				37	}
				38	return Value{}, err
				39	}
				40	if len(p.in) > 0 {
				41	return Value{}, errors.New("%d bytes of unconsumed input", len(p.in))
				42	}
				43	return v, p.nerr.E
				44	}
				45
				46	type decoder struct {
				47	nerr errors.NonFatal
				48	in []byte
				49	}
				50
				51	var literalRegexp = regexp.MustCompile("^(null\|true\|false)")
				52
				53	func (p *decoder) unmarshalValue() (Value, error) {
				54	if len(p.in) == 0 {
				55	return Value{}, io.ErrUnexpectedEOF
				56	}
				57	switch p.in[0] {
				58	case 'n', 't', 'f':
				59	if n := matchWithDelim(literalRegexp, p.in); n > 0 {
				60	var v Value
				61	switch p.in[0] {
				62	case 'n':
				63	v = rawValueOf(nil, p.in[:n:n])
				64	case 't':
				65	v = rawValueOf(true, p.in[:n:n])
				66	case 'f':
				67	v = rawValueOf(false, p.in[:n:n])
				68	}
				69	p.consume(n)
				70	return v, nil
				71	}
				72	return Value{}, newSyntaxError("invalid %q as literal", errRegexp.Find(p.in))
				73	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
				74	return p.unmarshalNumber()
				75	case '"':
				76	return p.unmarshalString()
				77	case '[':
				78	return p.unmarshalArray()
				79	case '{':
				80	return p.unmarshalObject()
				81	default:
				82	return Value{}, newSyntaxError("invalid %q as value", errRegexp.Find(p.in))
				83	}
				84	}
				85
				86	func (p *decoder) unmarshalArray() (Value, error) {
				87	b := p.in
				88	var elems []Value
				89	if err := p.consumeChar('[', "at start of array"); err != nil {
				90	return Value{}, err
				91	}
				92	if len(p.in) > 0 && p.in[0] != ']' {
				93	for len(p.in) > 0 {
				94	v, err := p.unmarshalValue()
				95	if !p.nerr.Merge(err) {
				96	return Value{}, err
				97	}
				98	elems = append(elems, v)
				99	if !p.tryConsumeChar(',') {
				100	break
				101	}
				102	}
				103	}
				104	if err := p.consumeChar(']', "at end of array"); err != nil {
				105	return Value{}, err
				106	}
				107	b = b[:len(b)-len(p.in)]
				108	return rawValueOf(elems, b[:len(b):len(b)]), nil
				109	}
				110
				111	func (p *decoder) unmarshalObject() (Value, error) {
				112	b := p.in
				113	var items [][2]Value
				114	if err := p.consumeChar('{', "at start of object"); err != nil {
				115	return Value{}, err
				116	}
				117	if len(p.in) > 0 && p.in[0] != '}' {
				118	for len(p.in) > 0 {
				119	k, err := p.unmarshalString()
				120	if !p.nerr.Merge(err) {
				121	return Value{}, err
				122	}
				123	if err := p.consumeChar(':', "in object"); err != nil {
				124	return Value{}, err
				125	}
				126	v, err := p.unmarshalValue()
				127	if !p.nerr.Merge(err) {
				128	return Value{}, err
				129	}
				130	items = append(items, [2]Value{k, v})
				131	if !p.tryConsumeChar(',') {
				132	break
				133	}
				134	}
				135	}
				136	if err := p.consumeChar('}', "at end of object"); err != nil {
				137	return Value{}, err
				138	}
				139	b = b[:len(b)-len(p.in)]
				140	return rawValueOf(items, b[:len(b):len(b)]), nil
				141	}
				142
				143	func (p *decoder) consumeChar(c byte, msg string) error {
				144	if p.tryConsumeChar(c) {
				145	return nil
				146	}
				147	if len(p.in) == 0 {
				148	return io.ErrUnexpectedEOF
				149	}
				150	return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg)
				151	}
				152
				153	func (p *decoder) tryConsumeChar(c byte) bool {
				154	if len(p.in) > 0 && p.in[0] == c {
				155	p.consume(1)
				156	return true
				157	}
				158	return false
				159	}
				160
				161	// consume consumes n bytes of input and any subsequent whitespace.
				162	func (p *decoder) consume(n int) {
				163	p.in = p.in[n:]
				164	for len(p.in) > 0 {
				165	switch p.in[0] {
				166	case ' ', '\n', '\r', '\t':
				167	p.in = p.in[1:]
				168	default:
				169	return
				170	}
				171	}
				172	}
				173
				174	// Any sequence that looks like a non-delimiter (for error reporting).
				175	var errRegexp = regexp.MustCompile("^([-+._a-zA-Z0-9]{1,32}\|.)")
				176
				177	// matchWithDelim matches r with the input b and verifies that the match
				178	// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
				179	// As a special case, EOF is considered a delimiter.
				180	func matchWithDelim(r *regexp.Regexp, b []byte) int {
				181	n := len(r.Find(b))
				182	if n < len(b) {
				183	// Check that that the next character is a delimiter.
				184	c := b[n]
				185	notDelim := (c == '-' \|\| c == '+' \|\| c == '.' \|\| c == '_' \|\|
				186	('a' <= c && c <= 'z') \|\|
				187	('A' <= c && c <= 'Z') \|\|
				188	('0' <= c && c <= '9'))
				189	if notDelim {
				190	return 0
				191	}
				192	}
				193	return n
				194	}