Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 1 | // Copyright 2018 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Package text implements the text format for protocol buffers. |
| 6 | // This package has no semantic understanding for protocol buffers and is only |
| 7 | // a parser and composer for the format. |
| 8 | // |
| 9 | // There is no formal specification for the protobuf text format, as such the |
| 10 | // C++ implementation (see google::protobuf::TextFormat) is the reference |
| 11 | // implementation of the text format. |
| 12 | // |
| 13 | // This package is neither a superset nor a subset of the C++ implementation. |
| 14 | // This implementation permits a more liberal grammar in some cases to be |
| 15 | // backwards compatible with the historical Go implementation. |
| 16 | // Future parsings unique to Go should not be added. |
| 17 | // Some grammars allowed by the C++ implementation are deliberately |
| 18 | // not implemented here because they are considered a bug by the protobuf team |
| 19 | // and should not be replicated. |
| 20 | // |
| 21 | // The Go implementation should implement a sufficient amount of the C++ |
| 22 | // grammar such that the default text serialization by C++ can be parsed by Go. |
| 23 | // However, just because the C++ parser accepts some input does not mean that |
| 24 | // the Go implementation should as well. |
| 25 | // |
| 26 | // The text format is almost a superset of JSON except: |
| 27 | // * message keys are not quoted strings, but identifiers |
| 28 | // * the top-level value must be a message without the delimiters |
| 29 | package text |
| 30 | |
| 31 | import ( |
| 32 | "fmt" |
| 33 | "math" |
| 34 | "strings" |
| 35 | |
Joe Tsai | 01ab296 | 2018-09-21 17:44:00 -0700 | [diff] [blame] | 36 | "github.com/golang/protobuf/v2/internal/flags" |
| 37 | "github.com/golang/protobuf/v2/reflect/protoreflect" |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 38 | ) |
| 39 | |
| 40 | // Type represents a type expressible in the text format. |
| 41 | type Type uint8 |
| 42 | |
| 43 | const ( |
| 44 | _ Type = iota |
| 45 | |
| 46 | // Bool is a boolean (e.g., "true" or "false"). |
| 47 | Bool |
| 48 | // Int is a signed integer (e.g., "-1423"). |
| 49 | Int |
| 50 | // Uint is an unsigned integer (e.g., "0xdeadbeef"). |
| 51 | Uint |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 52 | // Float32 is a 32-bit floating-point number (e.g., "1.234" or "1e38"). |
| 53 | // This allows encoding to differentiate the bitsize used for formatting. |
| 54 | Float32 |
| 55 | // Float64 is a 64-bit floating-point number. |
| 56 | Float64 |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 57 | // String is a quoted string (e.g., `"the quick brown fox"`). |
| 58 | String |
| 59 | // Name is a protocol buffer identifier (e.g., `field_name`). |
| 60 | Name |
| 61 | // List is an ordered list of values (e.g., `[0, "one", true]`). |
| 62 | List |
| 63 | // Message is an ordered map of values (e.g., `{"key": null}`). |
| 64 | Message |
| 65 | ) |
| 66 | |
| 67 | func (t Type) String() string { |
| 68 | switch t { |
| 69 | case Bool: |
| 70 | return "bool" |
| 71 | case Int: |
| 72 | return "int" |
| 73 | case Uint: |
| 74 | return "uint" |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 75 | case Float32: |
| 76 | return "float32" |
| 77 | case Float64: |
| 78 | return "float64" |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 79 | case String: |
| 80 | return "string" |
| 81 | case Name: |
| 82 | return "name" |
| 83 | case List: |
| 84 | return "list" |
| 85 | case Message: |
| 86 | return "message" |
| 87 | default: |
| 88 | return "<invalid>" |
| 89 | } |
| 90 | } |
| 91 | |
| 92 | // Value contains a value of a given Type. |
| 93 | type Value struct { |
| 94 | typ Type |
| 95 | raw []byte // raw bytes of the serialized data |
| 96 | str string // only for String or Name |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 97 | num uint64 // only for Bool, Int, Uint, Float32, or Float64 |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 98 | arr []Value // only for List |
| 99 | obj [][2]Value // only for Message |
| 100 | } |
| 101 | |
| 102 | // ValueOf returns a Value for a given Go value: |
| 103 | // bool => Bool |
| 104 | // int32, int64 => Int |
| 105 | // uint32, uint64 => Uint |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 106 | // float32 => Float32 |
| 107 | // float64 => Float64 |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 108 | // string, []byte => String |
| 109 | // protoreflect.Name => Name |
| 110 | // []Value => List |
| 111 | // [][2]Value => Message |
| 112 | // |
| 113 | // ValueOf panics if the Go type is not one of the above. |
| 114 | func ValueOf(v interface{}) Value { |
| 115 | switch v := v.(type) { |
| 116 | case bool: |
| 117 | if v { |
| 118 | return Value{typ: Bool, num: 1} |
| 119 | } else { |
| 120 | return Value{typ: Bool, num: 0} |
| 121 | } |
| 122 | case int32: |
| 123 | return Value{typ: Int, num: uint64(v)} |
| 124 | case int64: |
| 125 | return Value{typ: Int, num: uint64(v)} |
| 126 | case uint32: |
| 127 | return Value{typ: Uint, num: uint64(v)} |
| 128 | case uint64: |
| 129 | return Value{typ: Uint, num: uint64(v)} |
| 130 | case float32: |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 131 | // Store as float64 bits. |
| 132 | return Value{typ: Float32, num: math.Float64bits(float64(v))} |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 133 | case float64: |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 134 | return Value{typ: Float64, num: math.Float64bits(float64(v))} |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 135 | case string: |
| 136 | return Value{typ: String, str: string(v)} |
| 137 | case []byte: |
| 138 | return Value{typ: String, str: string(v)} |
| 139 | case protoreflect.Name: |
| 140 | return Value{typ: Name, str: string(v)} |
| 141 | case []Value: |
| 142 | return Value{typ: List, arr: v} |
| 143 | case [][2]Value: |
| 144 | return Value{typ: Message, obj: v} |
| 145 | default: |
| 146 | panic(fmt.Sprintf("invalid type %T", v)) |
| 147 | } |
| 148 | } |
| 149 | func rawValueOf(v interface{}, raw []byte) Value { |
| 150 | v2 := ValueOf(v) |
| 151 | v2.raw = raw |
| 152 | return v2 |
| 153 | } |
| 154 | |
| 155 | // Type is the type of the value. When parsing, this is a best-effort guess |
| 156 | // at the resulting type. However, there are ambiguities as to the exact type |
| 157 | // of the value (e.g., "false" is either a bool or a name). |
| 158 | // Thus, some of the types are convertible with each other. |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 159 | // The Bool, Int, Uint, Float32, Float64, and Name methods return a boolean to |
| 160 | // report whether the conversion was successful. |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 161 | func (v Value) Type() Type { |
| 162 | return v.typ |
| 163 | } |
| 164 | |
| 165 | // Bool returns v as a bool and reports whether the conversion succeeded. |
| 166 | func (v Value) Bool() (x bool, ok bool) { |
| 167 | switch v.typ { |
| 168 | case Bool: |
| 169 | return v.num > 0, true |
| 170 | case Uint, Int: |
| 171 | // C++ allows a 1-bit unsigned integer (e.g., "0", "1", or "0x1"). |
| 172 | if len(v.raw) > 0 && v.raw[0] != '-' && v.num < 2 { |
| 173 | return v.num > 0, true |
| 174 | } |
| 175 | } |
| 176 | return false, false |
| 177 | } |
| 178 | |
| 179 | // Int returns v as an int64 of the specified precision and reports whether |
| 180 | // the conversion succeeded. |
| 181 | func (v Value) Int(b64 bool) (x int64, ok bool) { |
| 182 | switch v.typ { |
| 183 | case Int: |
| 184 | n := int64(v.num) |
| 185 | if b64 || (math.MinInt32 <= n && n <= math.MaxInt32) { |
| 186 | return int64(n), true |
| 187 | } |
| 188 | case Uint: |
| 189 | n := uint64(v.num) |
| 190 | if (!b64 && n <= math.MaxInt32) || (b64 && n <= math.MaxInt64) { |
| 191 | return int64(n), true |
| 192 | } |
| 193 | // C++ accepts large positive hex numbers as negative values. |
| 194 | // This feature is here for proto1 backwards compatibility purposes. |
| 195 | if flags.Proto1Legacy && len(v.raw) > 1 && v.raw[0] == '0' && v.raw[1] == 'x' { |
| 196 | if !b64 { |
| 197 | return int64(int32(n)), n <= math.MaxUint32 |
| 198 | } |
| 199 | // if !b64 && n <= math.MaxUint32 { |
| 200 | // return int64(int32(n)), true |
| 201 | // } |
| 202 | return int64(n), true |
| 203 | } |
| 204 | } |
| 205 | return 0, false |
| 206 | } |
| 207 | |
| 208 | // Uint returns v as an uint64 of the specified precision and reports whether |
| 209 | // the conversion succeeded. |
| 210 | func (v Value) Uint(b64 bool) (x uint64, ok bool) { |
| 211 | switch v.typ { |
| 212 | case Int: |
| 213 | n := int64(v.num) |
| 214 | if len(v.raw) > 0 && v.raw[0] != '-' && (b64 || n <= math.MaxUint32) { |
| 215 | return uint64(n), true |
| 216 | } |
| 217 | case Uint: |
| 218 | n := uint64(v.num) |
| 219 | if b64 || n <= math.MaxUint32 { |
| 220 | return uint64(n), true |
| 221 | } |
| 222 | } |
| 223 | return 0, false |
| 224 | } |
| 225 | |
Herbie Ong | 250c6ea | 2019-03-12 20:55:10 -0700 | [diff] [blame] | 226 | // Float returns v as a float64 of the specified precision and reports whether |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 227 | // the conversion succeeded. |
Herbie Ong | 250c6ea | 2019-03-12 20:55:10 -0700 | [diff] [blame] | 228 | func (v Value) Float(b64 bool) (x float64, ok bool) { |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 229 | switch v.typ { |
| 230 | case Int: |
| 231 | return float64(int64(v.num)), true // possibly lossy, but allowed |
| 232 | case Uint: |
| 233 | return float64(uint64(v.num)), true // possibly lossy, but allowed |
Herbie Ong | 250c6ea | 2019-03-12 20:55:10 -0700 | [diff] [blame] | 234 | case Float32, Float64: |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 235 | n := math.Float64frombits(v.num) |
Herbie Ong | 250c6ea | 2019-03-12 20:55:10 -0700 | [diff] [blame] | 236 | if math.IsNaN(n) || math.IsInf(n, 0) { |
| 237 | return float64(n), true |
| 238 | } |
| 239 | if b64 || math.Abs(n) <= math.MaxFloat32 { |
| 240 | return float64(n), true |
| 241 | } |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 242 | } |
| 243 | return 0, false |
| 244 | } |
| 245 | |
| 246 | // String returns v as a string if the Type is String. |
| 247 | // Otherwise, this returns a formatted string of v for debugging purposes. |
| 248 | // |
| 249 | // Since String is used to represent both text and binary, it is not validated |
| 250 | // to contain valid UTF-8. When using this value with the string type in proto, |
| 251 | // it is the user's responsibility perform additional UTF-8 validation. |
| 252 | func (v Value) String() string { |
| 253 | if v.typ != String { |
| 254 | return v.stringValue() |
| 255 | } |
| 256 | return v.str |
| 257 | } |
| 258 | func (v Value) stringValue() string { |
| 259 | switch v.typ { |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 260 | case Bool, Int, Uint, Float32, Float64, Name: |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 261 | return string(v.Raw()) |
| 262 | case List: |
| 263 | var ss []string |
| 264 | for _, v := range v.List() { |
| 265 | ss = append(ss, v.String()) |
| 266 | } |
| 267 | return "[" + strings.Join(ss, ",") + "]" |
| 268 | case Message: |
| 269 | var ss []string |
| 270 | for _, v := range v.Message() { |
| 271 | k := v[0].String() |
| 272 | if v[0].Type() == String { |
| 273 | k = "[" + k + "]" |
| 274 | } |
| 275 | ss = append(ss, k+":"+v[1].String()) |
| 276 | } |
| 277 | return "{" + strings.Join(ss, ",") + "}" |
| 278 | default: |
| 279 | return "<invalid>" |
| 280 | } |
| 281 | } |
| 282 | |
| 283 | // Name returns the field name or enum value name and reports whether the value |
| 284 | // can be treated as an identifier. |
| 285 | func (v Value) Name() (protoreflect.Name, bool) { |
| 286 | switch v.typ { |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 287 | case Bool, Float32, Float64: |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 288 | // Ambiguity arises in unmarshalValue since "nan" may interpreted as |
Herbie Ong | 84f0960 | 2019-01-17 19:31:47 -0800 | [diff] [blame] | 289 | // either a Name type (for enum values) or a Float32/Float64 type. |
Joe Tsai | 27c2a76 | 2018-08-01 16:48:18 -0700 | [diff] [blame] | 290 | // Similarly, "true" may be interpreted as either a Name or Bool type. |
| 291 | n := protoreflect.Name(v.raw) |
| 292 | if n.IsValid() { |
| 293 | return n, true |
| 294 | } |
| 295 | case Name: |
| 296 | return protoreflect.Name(v.str), true |
| 297 | } |
| 298 | return "", false |
| 299 | } |
| 300 | |
| 301 | // List returns the elements of v and panics if the Type is not List. |
| 302 | // Mutations on the return value may not be observable from the Raw method. |
| 303 | func (v Value) List() []Value { |
| 304 | if v.typ != List { |
| 305 | panic("value is not a list") |
| 306 | } |
| 307 | return v.arr |
| 308 | } |
| 309 | |
| 310 | // Message returns the items of v and panics if the Type is not Message. |
| 311 | // The [2]Value represents a key and value pair, where the key is either |
| 312 | // a Name (representing a field name), a String (representing extension field |
| 313 | // names or the Any type URL), or an Uint for unknown fields. |
| 314 | // |
| 315 | // Mutations on the return value may not be observable from the Raw method. |
| 316 | func (v Value) Message() [][2]Value { |
| 317 | if v.typ != Message { |
| 318 | panic("value is not a message") |
| 319 | } |
| 320 | return v.obj |
| 321 | } |
| 322 | |
| 323 | // Raw returns the raw representation of the value. |
| 324 | // The returned value may alias the input given to Unmarshal. |
| 325 | func (v Value) Raw() []byte { |
| 326 | if len(v.raw) > 0 { |
| 327 | return v.raw |
| 328 | } |
| 329 | p := encoder{} |
| 330 | if err := p.marshalValue(v); !p.nerr.Merge(err) { |
| 331 | return []byte("<invalid>") |
| 332 | } |
| 333 | return p.out |
| 334 | } |