blob: 8dddd78c33f6decc9eb66b23b940f7fa73b6577a [file] [log] [blame]
Joe Tsai27c2a762018-08-01 16:48:18 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package text implements the text format for protocol buffers.
6// This package has no semantic understanding for protocol buffers and is only
7// a parser and composer for the format.
8//
9// There is no formal specification for the protobuf text format, as such the
10// C++ implementation (see google::protobuf::TextFormat) is the reference
11// implementation of the text format.
12//
13// This package is neither a superset nor a subset of the C++ implementation.
14// This implementation permits a more liberal grammar in some cases to be
15// backwards compatible with the historical Go implementation.
16// Future parsings unique to Go should not be added.
17// Some grammars allowed by the C++ implementation are deliberately
18// not implemented here because they are considered a bug by the protobuf team
19// and should not be replicated.
20//
21// The Go implementation should implement a sufficient amount of the C++
22// grammar such that the default text serialization by C++ can be parsed by Go.
23// However, just because the C++ parser accepts some input does not mean that
24// the Go implementation should as well.
25//
26// The text format is almost a superset of JSON except:
27// * message keys are not quoted strings, but identifiers
28// * the top-level value must be a message without the delimiters
29package text
30
31import (
32 "fmt"
33 "math"
34 "strings"
35
36 "google.golang.org/proto/internal/flags"
37 "google.golang.org/proto/reflect/protoreflect"
38)
39
40// Type represents a type expressible in the text format.
41type Type uint8
42
43const (
44 _ Type = iota
45
46 // Bool is a boolean (e.g., "true" or "false").
47 Bool
48 // Int is a signed integer (e.g., "-1423").
49 Int
50 // Uint is an unsigned integer (e.g., "0xdeadbeef").
51 Uint
52 // Float is a floating-point number (e.g., "1.234" or "1e100").
53 Float
54 // String is a quoted string (e.g., `"the quick brown fox"`).
55 String
56 // Name is a protocol buffer identifier (e.g., `field_name`).
57 Name
58 // List is an ordered list of values (e.g., `[0, "one", true]`).
59 List
60 // Message is an ordered map of values (e.g., `{"key": null}`).
61 Message
62)
63
64func (t Type) String() string {
65 switch t {
66 case Bool:
67 return "bool"
68 case Int:
69 return "int"
70 case Uint:
71 return "uint"
72 case Float:
73 return "float"
74 case String:
75 return "string"
76 case Name:
77 return "name"
78 case List:
79 return "list"
80 case Message:
81 return "message"
82 default:
83 return "<invalid>"
84 }
85}
86
87// Value contains a value of a given Type.
88type Value struct {
89 typ Type
90 raw []byte // raw bytes of the serialized data
91 str string // only for String or Name
92 num uint64 // only for Bool, Int, Uint, or Float
93 arr []Value // only for List
94 obj [][2]Value // only for Message
95}
96
97// ValueOf returns a Value for a given Go value:
98// bool => Bool
99// int32, int64 => Int
100// uint32, uint64 => Uint
101// float32, float64 => Float
102// string, []byte => String
103// protoreflect.Name => Name
104// []Value => List
105// [][2]Value => Message
106//
107// ValueOf panics if the Go type is not one of the above.
108func ValueOf(v interface{}) Value {
109 switch v := v.(type) {
110 case bool:
111 if v {
112 return Value{typ: Bool, num: 1}
113 } else {
114 return Value{typ: Bool, num: 0}
115 }
116 case int32:
117 return Value{typ: Int, num: uint64(v)}
118 case int64:
119 return Value{typ: Int, num: uint64(v)}
120 case uint32:
121 return Value{typ: Uint, num: uint64(v)}
122 case uint64:
123 return Value{typ: Uint, num: uint64(v)}
124 case float32:
125 return Value{typ: Float, num: math.Float64bits(float64(v))}
126 case float64:
127 return Value{typ: Float, num: math.Float64bits(float64(v))}
128 case string:
129 return Value{typ: String, str: string(v)}
130 case []byte:
131 return Value{typ: String, str: string(v)}
132 case protoreflect.Name:
133 return Value{typ: Name, str: string(v)}
134 case []Value:
135 return Value{typ: List, arr: v}
136 case [][2]Value:
137 return Value{typ: Message, obj: v}
138 default:
139 panic(fmt.Sprintf("invalid type %T", v))
140 }
141}
142func rawValueOf(v interface{}, raw []byte) Value {
143 v2 := ValueOf(v)
144 v2.raw = raw
145 return v2
146}
147
148// Type is the type of the value. When parsing, this is a best-effort guess
149// at the resulting type. However, there are ambiguities as to the exact type
150// of the value (e.g., "false" is either a bool or a name).
151// Thus, some of the types are convertible with each other.
152// The Bool, Int, Uint, Float, and Name methods return a boolean to report
153// whether the conversion was successful.
154func (v Value) Type() Type {
155 return v.typ
156}
157
158// Bool returns v as a bool and reports whether the conversion succeeded.
159func (v Value) Bool() (x bool, ok bool) {
160 switch v.typ {
161 case Bool:
162 return v.num > 0, true
163 case Uint, Int:
164 // C++ allows a 1-bit unsigned integer (e.g., "0", "1", or "0x1").
165 if len(v.raw) > 0 && v.raw[0] != '-' && v.num < 2 {
166 return v.num > 0, true
167 }
168 }
169 return false, false
170}
171
172// Int returns v as an int64 of the specified precision and reports whether
173// the conversion succeeded.
174func (v Value) Int(b64 bool) (x int64, ok bool) {
175 switch v.typ {
176 case Int:
177 n := int64(v.num)
178 if b64 || (math.MinInt32 <= n && n <= math.MaxInt32) {
179 return int64(n), true
180 }
181 case Uint:
182 n := uint64(v.num)
183 if (!b64 && n <= math.MaxInt32) || (b64 && n <= math.MaxInt64) {
184 return int64(n), true
185 }
186 // C++ accepts large positive hex numbers as negative values.
187 // This feature is here for proto1 backwards compatibility purposes.
188 if flags.Proto1Legacy && len(v.raw) > 1 && v.raw[0] == '0' && v.raw[1] == 'x' {
189 if !b64 {
190 return int64(int32(n)), n <= math.MaxUint32
191 }
192 // if !b64 && n <= math.MaxUint32 {
193 // return int64(int32(n)), true
194 // }
195 return int64(n), true
196 }
197 }
198 return 0, false
199}
200
201// Uint returns v as an uint64 of the specified precision and reports whether
202// the conversion succeeded.
203func (v Value) Uint(b64 bool) (x uint64, ok bool) {
204 switch v.typ {
205 case Int:
206 n := int64(v.num)
207 if len(v.raw) > 0 && v.raw[0] != '-' && (b64 || n <= math.MaxUint32) {
208 return uint64(n), true
209 }
210 case Uint:
211 n := uint64(v.num)
212 if b64 || n <= math.MaxUint32 {
213 return uint64(n), true
214 }
215 }
216 return 0, false
217}
218
219// Float returns v as a float64 of the specified precision and reports whether
220// the conversion succeeded.
221func (v Value) Float(b64 bool) (x float64, ok bool) {
222 switch v.typ {
223 case Int:
224 return float64(int64(v.num)), true // possibly lossy, but allowed
225 case Uint:
226 return float64(uint64(v.num)), true // possibly lossy, but allowed
227 case Float:
228 n := math.Float64frombits(v.num)
229 if math.IsNaN(n) || math.IsInf(n, 0) {
230 return float64(n), true
231 }
232 if b64 || math.Abs(n) <= math.MaxFloat32 {
233 return float64(n), true
234 }
235 }
236 return 0, false
237}
238
239// String returns v as a string if the Type is String.
240// Otherwise, this returns a formatted string of v for debugging purposes.
241//
242// Since String is used to represent both text and binary, it is not validated
243// to contain valid UTF-8. When using this value with the string type in proto,
244// it is the user's responsibility perform additional UTF-8 validation.
245func (v Value) String() string {
246 if v.typ != String {
247 return v.stringValue()
248 }
249 return v.str
250}
251func (v Value) stringValue() string {
252 switch v.typ {
253 case Bool, Int, Uint, Float, Name:
254 return string(v.Raw())
255 case List:
256 var ss []string
257 for _, v := range v.List() {
258 ss = append(ss, v.String())
259 }
260 return "[" + strings.Join(ss, ",") + "]"
261 case Message:
262 var ss []string
263 for _, v := range v.Message() {
264 k := v[0].String()
265 if v[0].Type() == String {
266 k = "[" + k + "]"
267 }
268 ss = append(ss, k+":"+v[1].String())
269 }
270 return "{" + strings.Join(ss, ",") + "}"
271 default:
272 return "<invalid>"
273 }
274}
275
276// Name returns the field name or enum value name and reports whether the value
277// can be treated as an identifier.
278func (v Value) Name() (protoreflect.Name, bool) {
279 switch v.typ {
280 case Bool, Float:
281 // Ambiguity arises in unmarshalValue since "nan" may interpreted as
282 // either a Name type (for enum values) or a Float type.
283 // Similarly, "true" may be interpreted as either a Name or Bool type.
284 n := protoreflect.Name(v.raw)
285 if n.IsValid() {
286 return n, true
287 }
288 case Name:
289 return protoreflect.Name(v.str), true
290 }
291 return "", false
292}
293
294// List returns the elements of v and panics if the Type is not List.
295// Mutations on the return value may not be observable from the Raw method.
296func (v Value) List() []Value {
297 if v.typ != List {
298 panic("value is not a list")
299 }
300 return v.arr
301}
302
303// Message returns the items of v and panics if the Type is not Message.
304// The [2]Value represents a key and value pair, where the key is either
305// a Name (representing a field name), a String (representing extension field
306// names or the Any type URL), or an Uint for unknown fields.
307//
308// Mutations on the return value may not be observable from the Raw method.
309func (v Value) Message() [][2]Value {
310 if v.typ != Message {
311 panic("value is not a message")
312 }
313 return v.obj
314}
315
316// Raw returns the raw representation of the value.
317// The returned value may alias the input given to Unmarshal.
318func (v Value) Raw() []byte {
319 if len(v.raw) > 0 {
320 return v.raw
321 }
322 p := encoder{}
323 if err := p.marshalValue(v); !p.nerr.Merge(err) {
324 return []byte("<invalid>")
325 }
326 return p.out
327}