blob: 2498f66f4cc40732a171128fc58eeeedf894a799 [file] [log] [blame]
Joe Tsai27c2a762018-08-01 16:48:18 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package text implements the text format for protocol buffers.
6// This package has no semantic understanding for protocol buffers and is only
7// a parser and composer for the format.
8//
9// There is no formal specification for the protobuf text format, as such the
10// C++ implementation (see google::protobuf::TextFormat) is the reference
11// implementation of the text format.
12//
13// This package is neither a superset nor a subset of the C++ implementation.
14// This implementation permits a more liberal grammar in some cases to be
15// backwards compatible with the historical Go implementation.
16// Future parsings unique to Go should not be added.
17// Some grammars allowed by the C++ implementation are deliberately
18// not implemented here because they are considered a bug by the protobuf team
19// and should not be replicated.
20//
21// The Go implementation should implement a sufficient amount of the C++
22// grammar such that the default text serialization by C++ can be parsed by Go.
23// However, just because the C++ parser accepts some input does not mean that
24// the Go implementation should as well.
25//
26// The text format is almost a superset of JSON except:
27// * message keys are not quoted strings, but identifiers
28// * the top-level value must be a message without the delimiters
29package text
30
31import (
32 "fmt"
33 "math"
34 "strings"
35
Joe Tsai01ab2962018-09-21 17:44:00 -070036 "github.com/golang/protobuf/v2/internal/flags"
37 "github.com/golang/protobuf/v2/reflect/protoreflect"
Joe Tsai27c2a762018-08-01 16:48:18 -070038)
39
40// Type represents a type expressible in the text format.
41type Type uint8
42
43const (
44 _ Type = iota
45
46 // Bool is a boolean (e.g., "true" or "false").
47 Bool
48 // Int is a signed integer (e.g., "-1423").
49 Int
50 // Uint is an unsigned integer (e.g., "0xdeadbeef").
51 Uint
Herbie Ong84f09602019-01-17 19:31:47 -080052 // Float32 is a 32-bit floating-point number (e.g., "1.234" or "1e38").
53 // This allows encoding to differentiate the bitsize used for formatting.
54 Float32
55 // Float64 is a 64-bit floating-point number.
56 Float64
Joe Tsai27c2a762018-08-01 16:48:18 -070057 // String is a quoted string (e.g., `"the quick brown fox"`).
58 String
59 // Name is a protocol buffer identifier (e.g., `field_name`).
60 Name
61 // List is an ordered list of values (e.g., `[0, "one", true]`).
62 List
63 // Message is an ordered map of values (e.g., `{"key": null}`).
64 Message
65)
66
67func (t Type) String() string {
68 switch t {
69 case Bool:
70 return "bool"
71 case Int:
72 return "int"
73 case Uint:
74 return "uint"
Herbie Ong84f09602019-01-17 19:31:47 -080075 case Float32:
76 return "float32"
77 case Float64:
78 return "float64"
Joe Tsai27c2a762018-08-01 16:48:18 -070079 case String:
80 return "string"
81 case Name:
82 return "name"
83 case List:
84 return "list"
85 case Message:
86 return "message"
87 default:
88 return "<invalid>"
89 }
90}
91
92// Value contains a value of a given Type.
93type Value struct {
94 typ Type
95 raw []byte // raw bytes of the serialized data
96 str string // only for String or Name
Herbie Ong84f09602019-01-17 19:31:47 -080097 num uint64 // only for Bool, Int, Uint, Float32, or Float64
Joe Tsai27c2a762018-08-01 16:48:18 -070098 arr []Value // only for List
99 obj [][2]Value // only for Message
100}
101
102// ValueOf returns a Value for a given Go value:
103// bool => Bool
104// int32, int64 => Int
105// uint32, uint64 => Uint
Herbie Ong84f09602019-01-17 19:31:47 -0800106// float32 => Float32
107// float64 => Float64
Joe Tsai27c2a762018-08-01 16:48:18 -0700108// string, []byte => String
109// protoreflect.Name => Name
110// []Value => List
111// [][2]Value => Message
112//
113// ValueOf panics if the Go type is not one of the above.
114func ValueOf(v interface{}) Value {
115 switch v := v.(type) {
116 case bool:
117 if v {
118 return Value{typ: Bool, num: 1}
119 } else {
120 return Value{typ: Bool, num: 0}
121 }
122 case int32:
123 return Value{typ: Int, num: uint64(v)}
124 case int64:
125 return Value{typ: Int, num: uint64(v)}
126 case uint32:
127 return Value{typ: Uint, num: uint64(v)}
128 case uint64:
129 return Value{typ: Uint, num: uint64(v)}
130 case float32:
Herbie Ong84f09602019-01-17 19:31:47 -0800131 // Store as float64 bits.
132 return Value{typ: Float32, num: math.Float64bits(float64(v))}
Joe Tsai27c2a762018-08-01 16:48:18 -0700133 case float64:
Herbie Ong84f09602019-01-17 19:31:47 -0800134 return Value{typ: Float64, num: math.Float64bits(float64(v))}
Joe Tsai27c2a762018-08-01 16:48:18 -0700135 case string:
136 return Value{typ: String, str: string(v)}
137 case []byte:
138 return Value{typ: String, str: string(v)}
139 case protoreflect.Name:
140 return Value{typ: Name, str: string(v)}
141 case []Value:
142 return Value{typ: List, arr: v}
143 case [][2]Value:
144 return Value{typ: Message, obj: v}
145 default:
146 panic(fmt.Sprintf("invalid type %T", v))
147 }
148}
149func rawValueOf(v interface{}, raw []byte) Value {
150 v2 := ValueOf(v)
151 v2.raw = raw
152 return v2
153}
154
155// Type is the type of the value. When parsing, this is a best-effort guess
156// at the resulting type. However, there are ambiguities as to the exact type
157// of the value (e.g., "false" is either a bool or a name).
158// Thus, some of the types are convertible with each other.
Herbie Ong84f09602019-01-17 19:31:47 -0800159// The Bool, Int, Uint, Float32, Float64, and Name methods return a boolean to
160// report whether the conversion was successful.
Joe Tsai27c2a762018-08-01 16:48:18 -0700161func (v Value) Type() Type {
162 return v.typ
163}
164
165// Bool returns v as a bool and reports whether the conversion succeeded.
166func (v Value) Bool() (x bool, ok bool) {
167 switch v.typ {
168 case Bool:
169 return v.num > 0, true
170 case Uint, Int:
171 // C++ allows a 1-bit unsigned integer (e.g., "0", "1", or "0x1").
172 if len(v.raw) > 0 && v.raw[0] != '-' && v.num < 2 {
173 return v.num > 0, true
174 }
175 }
176 return false, false
177}
178
179// Int returns v as an int64 of the specified precision and reports whether
180// the conversion succeeded.
181func (v Value) Int(b64 bool) (x int64, ok bool) {
182 switch v.typ {
183 case Int:
184 n := int64(v.num)
185 if b64 || (math.MinInt32 <= n && n <= math.MaxInt32) {
186 return int64(n), true
187 }
188 case Uint:
189 n := uint64(v.num)
190 if (!b64 && n <= math.MaxInt32) || (b64 && n <= math.MaxInt64) {
191 return int64(n), true
192 }
193 // C++ accepts large positive hex numbers as negative values.
194 // This feature is here for proto1 backwards compatibility purposes.
195 if flags.Proto1Legacy && len(v.raw) > 1 && v.raw[0] == '0' && v.raw[1] == 'x' {
196 if !b64 {
197 return int64(int32(n)), n <= math.MaxUint32
198 }
199 // if !b64 && n <= math.MaxUint32 {
200 // return int64(int32(n)), true
201 // }
202 return int64(n), true
203 }
204 }
205 return 0, false
206}
207
208// Uint returns v as an uint64 of the specified precision and reports whether
209// the conversion succeeded.
210func (v Value) Uint(b64 bool) (x uint64, ok bool) {
211 switch v.typ {
212 case Int:
213 n := int64(v.num)
214 if len(v.raw) > 0 && v.raw[0] != '-' && (b64 || n <= math.MaxUint32) {
215 return uint64(n), true
216 }
217 case Uint:
218 n := uint64(v.num)
219 if b64 || n <= math.MaxUint32 {
220 return uint64(n), true
221 }
222 }
223 return 0, false
224}
225
Herbie Ong250c6ea2019-03-12 20:55:10 -0700226// Float returns v as a float64 of the specified precision and reports whether
Joe Tsai27c2a762018-08-01 16:48:18 -0700227// the conversion succeeded.
Herbie Ong250c6ea2019-03-12 20:55:10 -0700228func (v Value) Float(b64 bool) (x float64, ok bool) {
Joe Tsai27c2a762018-08-01 16:48:18 -0700229 switch v.typ {
230 case Int:
231 return float64(int64(v.num)), true // possibly lossy, but allowed
232 case Uint:
233 return float64(uint64(v.num)), true // possibly lossy, but allowed
Herbie Ong250c6ea2019-03-12 20:55:10 -0700234 case Float32, Float64:
Joe Tsai27c2a762018-08-01 16:48:18 -0700235 n := math.Float64frombits(v.num)
Herbie Ong250c6ea2019-03-12 20:55:10 -0700236 if math.IsNaN(n) || math.IsInf(n, 0) {
237 return float64(n), true
238 }
239 if b64 || math.Abs(n) <= math.MaxFloat32 {
240 return float64(n), true
241 }
Joe Tsai27c2a762018-08-01 16:48:18 -0700242 }
243 return 0, false
244}
245
246// String returns v as a string if the Type is String.
247// Otherwise, this returns a formatted string of v for debugging purposes.
248//
249// Since String is used to represent both text and binary, it is not validated
250// to contain valid UTF-8. When using this value with the string type in proto,
251// it is the user's responsibility perform additional UTF-8 validation.
252func (v Value) String() string {
253 if v.typ != String {
254 return v.stringValue()
255 }
256 return v.str
257}
258func (v Value) stringValue() string {
259 switch v.typ {
Herbie Ong84f09602019-01-17 19:31:47 -0800260 case Bool, Int, Uint, Float32, Float64, Name:
Joe Tsai27c2a762018-08-01 16:48:18 -0700261 return string(v.Raw())
262 case List:
263 var ss []string
264 for _, v := range v.List() {
265 ss = append(ss, v.String())
266 }
267 return "[" + strings.Join(ss, ",") + "]"
268 case Message:
269 var ss []string
270 for _, v := range v.Message() {
271 k := v[0].String()
272 if v[0].Type() == String {
273 k = "[" + k + "]"
274 }
275 ss = append(ss, k+":"+v[1].String())
276 }
277 return "{" + strings.Join(ss, ",") + "}"
278 default:
279 return "<invalid>"
280 }
281}
282
283// Name returns the field name or enum value name and reports whether the value
284// can be treated as an identifier.
285func (v Value) Name() (protoreflect.Name, bool) {
286 switch v.typ {
Herbie Ong84f09602019-01-17 19:31:47 -0800287 case Bool, Float32, Float64:
Joe Tsai27c2a762018-08-01 16:48:18 -0700288 // Ambiguity arises in unmarshalValue since "nan" may interpreted as
Herbie Ong84f09602019-01-17 19:31:47 -0800289 // either a Name type (for enum values) or a Float32/Float64 type.
Joe Tsai27c2a762018-08-01 16:48:18 -0700290 // Similarly, "true" may be interpreted as either a Name or Bool type.
291 n := protoreflect.Name(v.raw)
292 if n.IsValid() {
293 return n, true
294 }
295 case Name:
296 return protoreflect.Name(v.str), true
297 }
298 return "", false
299}
300
301// List returns the elements of v and panics if the Type is not List.
302// Mutations on the return value may not be observable from the Raw method.
303func (v Value) List() []Value {
304 if v.typ != List {
305 panic("value is not a list")
306 }
307 return v.arr
308}
309
310// Message returns the items of v and panics if the Type is not Message.
311// The [2]Value represents a key and value pair, where the key is either
312// a Name (representing a field name), a String (representing extension field
313// names or the Any type URL), or an Uint for unknown fields.
314//
315// Mutations on the return value may not be observable from the Raw method.
316func (v Value) Message() [][2]Value {
317 if v.typ != Message {
318 panic("value is not a message")
319 }
320 return v.obj
321}
322
323// Raw returns the raw representation of the value.
324// The returned value may alias the input given to Unmarshal.
325func (v Value) Raw() []byte {
326 if len(v.raw) > 0 {
327 return v.raw
328 }
329 p := encoder{}
330 if err := p.marshalValue(v); !p.nerr.Merge(err) {
331 return []byte("<invalid>")
332 }
333 return p.out
334}