Blame - internal/encoding/text/decode.go - platform/external/golang-protobuf

blob: 2b32ed9ee35e879c7b2a4e4cb84311eee8463a85 [file] [log] [blame]

Joe Tsai	27c2a76	2018-08-01 16:48:18 -0700	[diff] [blame]	1	// Copyright 2018 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	package text
				6
				7	import (
				8	"bytes"
				9	"io"
				10	"regexp"
				11	"unicode/utf8"
				12
Damien Neil	e89e624	2019-05-13 23:55:40 -0700	[diff] [blame]	13	"google.golang.org/protobuf/internal/errors"
				14	"google.golang.org/protobuf/reflect/protoreflect"
Joe Tsai	27c2a76	2018-08-01 16:48:18 -0700	[diff] [blame]	15	)
				16
				17	type syntaxError struct{ error }
				18
				19	func newSyntaxError(f string, x ...interface{}) error {
				20	return syntaxError{errors.New(f, x...)}
				21	}
				22
				23	// Unmarshal parses b as the proto text format.
				24	// It returns a Value, which is always of the Message type.
				25	func Unmarshal(b []byte) (Value, error) {
				26	p := decoder{in: b}
				27	p.consume(0) // trim leading spaces or comments
				28	v, err := p.unmarshalMessage(false)
Damien Neil	8c86fc5	2019-06-19 09:28:29 -0700	[diff] [blame]	29	if err != nil {
Joe Tsai	27c2a76	2018-08-01 16:48:18 -0700	[diff] [blame]	30	if e, ok := err.(syntaxError); ok {
				31	b = b[:len(b)-len(p.in)] // consumed input
				32	line := bytes.Count(b, []byte("\n")) + 1
				33	if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
				34	b = b[i+1:]
				35	}
				36	column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
				37	err = errors.New("syntax error (line %d:%d): %v", line, column, e.error)
				38	}
				39	return Value{}, err
				40	}
				41	if len(p.in) > 0 {
				42	return Value{}, errors.New("%d bytes of unconsumed input", len(p.in))
				43	}
Damien Neil	8c86fc5	2019-06-19 09:28:29 -0700	[diff] [blame]	44	return v, nil
Joe Tsai	27c2a76	2018-08-01 16:48:18 -0700	[diff] [blame]	45	}
				46
				47	type decoder struct {
Damien Neil	8c86fc5	2019-06-19 09:28:29 -0700	[diff] [blame]	48	in []byte
Joe Tsai	27c2a76	2018-08-01 16:48:18 -0700	[diff] [blame]	49	}
				50
				51	func (p *decoder) unmarshalList() (Value, error) {
				52	b := p.in
				53	var elems []Value
				54	if err := p.consumeChar('[', "at start of list"); err != nil {
				55	return Value{}, err
				56	}
				57	if len(p.in) > 0 && p.in[0] != ']' {
				58	for len(p.in) > 0 {
				59	v, err := p.unmarshalValue()
Damien Neil	8c86fc5	2019-06-19 09:28:29 -0700	[diff] [blame]	60	if err != nil {
Joe Tsai	27c2a76	2018-08-01 16:48:18 -0700	[diff] [blame]	61	return Value{}, err
				62	}
				63	elems = append(elems, v)
				64	if !p.tryConsumeChar(',') {
				65	break
				66	}
				67	}
				68	}
				69	if err := p.consumeChar(']', "at end of list"); err != nil {
				70	return Value{}, err
				71	}
				72	b = b[:len(b)-len(p.in)]
				73	return rawValueOf(elems, b[:len(b):len(b)]), nil
				74	}
				75
				76	func (p *decoder) unmarshalMessage(checkDelims bool) (Value, error) {
				77	b := p.in
				78	var items [][2]Value
				79	delims := [2]byte{'{', '}'}
				80	if len(p.in) > 0 && p.in[0] == '<' {
				81	delims = [2]byte{'<', '>'}
				82	}
				83	if checkDelims {
				84	if err := p.consumeChar(delims[0], "at start of message"); err != nil {
				85	return Value{}, err
				86	}
				87	}
				88	for len(p.in) > 0 {
				89	if p.in[0] == '}' \|\| p.in[0] == '>' {
				90	break
				91	}
				92	k, err := p.unmarshalKey()
Damien Neil	8c86fc5	2019-06-19 09:28:29 -0700	[diff] [blame]	93	if err != nil {
Joe Tsai	27c2a76	2018-08-01 16:48:18 -0700	[diff] [blame]	94	return Value{}, err
				95	}
				96	if !p.tryConsumeChar(':') && len(p.in) > 0 && p.in[0] != '{' && p.in[0] != '<' {
				97	return Value{}, newSyntaxError("expected ':' after message key")
				98	}
				99	v, err := p.unmarshalValue()
Damien Neil	8c86fc5	2019-06-19 09:28:29 -0700	[diff] [blame]	100	if err != nil {
Joe Tsai	27c2a76	2018-08-01 16:48:18 -0700	[diff] [blame]	101	return Value{}, err
				102	}
				103	if p.tryConsumeChar(';') \|\| p.tryConsumeChar(',') {
				104	// always optional
				105	}
				106	items = append(items, [2]Value{k, v})
				107	}
				108	if checkDelims {
				109	if err := p.consumeChar(delims[1], "at end of message"); err != nil {
				110	return Value{}, err
				111	}
				112	}
				113	b = b[:len(b)-len(p.in)]
				114	return rawValueOf(items, b[:len(b):len(b)]), nil
				115	}
				116
				117	// This expression is more liberal than ConsumeAnyTypeUrl in C++.
				118	// However, the C++ parser does not handle many legal URL strings.
				119	// The Go implementation is more liberal to be backwards compatible with
				120	// the historical Go implementation which was overly liberal (and buggy).
				121	var urlRegexp = regexp.MustCompile(`^[-_a-zA-Z0-9]+([./][-_a-zA-Z0-9]+)*`)
				122
				123	// unmarshalKey parses the key, which may be a Name, String, or Uint.
				124	func (p *decoder) unmarshalKey() (v Value, err error) {
				125	if p.tryConsumeChar('[') {
				126	if len(p.in) == 0 {
				127	return Value{}, io.ErrUnexpectedEOF
				128	}
				129	if p.in[0] == '\'' \|\| p.in[0] == '"' {
				130	// Historically, Go's parser allowed a string for the Any type URL.
				131	// This is specific to Go and contrary to the C++ implementation,
				132	// which does not support strings for the Any type URL.
				133	v, err = p.unmarshalString()
Damien Neil	8c86fc5	2019-06-19 09:28:29 -0700	[diff] [blame]	134	if err != nil {
Joe Tsai	27c2a76	2018-08-01 16:48:18 -0700	[diff] [blame]	135	return Value{}, err
				136	}
				137	} else if n := matchWithDelim(urlRegexp, p.in); n > 0 {
				138	v = rawValueOf(string(p.in[:n]), p.in[:n:n])
				139	p.consume(n)
				140	} else {
				141	return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
				142	}
				143	if err := p.consumeChar(']', "at end of extension name"); err != nil {
				144	return Value{}, err
				145	}
				146	return v, nil
				147	}
				148	if matchWithDelim(intRegexp, p.in) > 0 && p.in[0] != '-' {
				149	return p.unmarshalNumber()
				150	}
				151	return p.unmarshalName()
				152	}
				153
				154	func (p *decoder) unmarshalValue() (Value, error) {
				155	if len(p.in) == 0 {
				156	return Value{}, io.ErrUnexpectedEOF
				157	}
				158	switch p.in[0] {
				159	case '"', '\'':
				160	return p.unmarshalStrings()
				161	case '[':
				162	return p.unmarshalList()
				163	case '{', '<':
				164	return p.unmarshalMessage(true)
				165	default:
				166	n := matchWithDelim(nameRegexp, p.in) // zero if no match
				167	if n > 0 && literals[string(p.in[:n])] == nil {
				168	return p.unmarshalName()
				169	}
				170	return p.unmarshalNumber()
				171	}
				172	}
				173
				174	// This expression matches all valid proto identifiers.
				175	var nameRegexp = regexp.MustCompile(`^[_a-zA-Z][_a-zA-Z0-9]*`)
				176
				177	// unmarshalName unmarshals an unquoted identifier.
				178	//
				179	// E.g., `field_name` => ValueOf(protoreflect.Name("field_name"))
				180	func (p *decoder) unmarshalName() (Value, error) {
				181	if n := matchWithDelim(nameRegexp, p.in); n > 0 {
				182	v := rawValueOf(protoreflect.Name(p.in[:n]), p.in[:n:n])
				183	p.consume(n)
				184	return v, nil
				185	}
				186	return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
				187	}
				188
				189	func (p *decoder) consumeChar(c byte, msg string) error {
				190	if p.tryConsumeChar(c) {
				191	return nil
				192	}
				193	if len(p.in) == 0 {
				194	return io.ErrUnexpectedEOF
				195	}
				196	return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg)
				197	}
				198
				199	func (p *decoder) tryConsumeChar(c byte) bool {
				200	if len(p.in) > 0 && p.in[0] == c {
				201	p.consume(1)
				202	return true
				203	}
				204	return false
				205	}
				206
				207	// consume consumes n bytes of input and any subsequent whitespace or comments.
				208	func (p *decoder) consume(n int) {
				209	p.in = p.in[n:]
				210	for len(p.in) > 0 {
				211	switch p.in[0] {
				212	case ' ', '\n', '\r', '\t':
				213	p.in = p.in[1:]
				214	case '#':
				215	if i := bytes.IndexByte(p.in, '\n'); i >= 0 {
				216	p.in = p.in[i+len("\n"):]
				217	} else {
				218	p.in = nil
				219	}
				220	default:
				221	return
				222	}
				223	}
				224	}
				225
				226	// Any sequence that looks like a non-delimiter (for error reporting).
				227	var errRegexp = regexp.MustCompile("^([-+._a-zA-Z0-9]{1,32}\|.)")
				228
				229	// matchWithDelim matches r with the input b and verifies that the match
				230	// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
				231	// As a special case, EOF is considered a delimiter.
				232	func matchWithDelim(r *regexp.Regexp, b []byte) int {
				233	n := len(r.Find(b))
				234	if n < len(b) {
				235	// Check that that the next character is a delimiter.
				236	c := b[n]
				237	notDelim := (c == '-' \|\| c == '+' \|\| c == '.' \|\| c == '_' \|\|
				238	('a' <= c && c <= 'z') \|\|
				239	('A' <= c && c <= 'Z') \|\|
				240	('0' <= c && c <= '9'))
				241	if notDelim {
				242	return 0
				243	}
				244	}
				245	return n
				246	}