blob: b6e5526c477f122f8a85108c61876952f358fb6c [file] [log] [blame]
Rob Pikeaaa3a622010-03-20 22:32:34 -07001// Go support for Protocol Buffers - Google's data interchange format
2//
3// Copyright 2010 Google Inc. All rights reserved.
4// http://code.google.com/p/goprotobuf/
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32package proto
33
34/*
35 * Routines for encoding data into the wire format for protocol buffers.
36 */
37
38import (
39 "fmt"
40 "os"
41 "reflect"
42 "runtime"
43 "strconv"
44 "strings"
45 "sync"
46 "unsafe"
47)
48
49const debug bool = false
50
51// Constants that identify the encoding of a value on the wire.
52const (
53 WireVarint = 0
54 WireFixed64 = 1
55 WireBytes = 2
56 WireStartGroup = 3
57 WireEndGroup = 4
58 WireFixed32 = 5
59)
60
61const startSize = 10 // initial slice/string sizes
62
63// Encoders are defined in encoder.go
64// An encoder outputs the full representation of a field, including its
65// tag and encoder type.
66type encoder func(p *Buffer, prop *Properties, base uintptr) os.Error
67
68// A valueEncoder encodes a single integer in a particular encoding.
69type valueEncoder func(o *Buffer, x uint64) os.Error
70
71// Decoders are defined in decode.go
72// A decoder creates a value from its wire representation.
73// Unrecognized subelements are saved in unrec.
74type decoder func(p *Buffer, prop *Properties, base uintptr, sbase uintptr) os.Error
75
76// A valueDecoder decodes a single integer in a particular encoding.
77type valueDecoder func(o *Buffer) (x uint64, err os.Error)
78
79// StructProperties represents properties for all the fields of a struct.
80type StructProperties struct {
David Symonds79eae332010-10-16 11:33:20 +110081 Prop []*Properties // properties for each field
82 reqCount int // required count
83 tags map[int]int // map from proto tag to struct field number
84 origNames map[string]int // map from original name to struct field number
85 nscratch uintptr // size of scratch space
Rob Pikeaaa3a622010-03-20 22:32:34 -070086}
87
88// Properties represents the protocol-specific behavior of a single struct field.
89type Properties struct {
90 Name string // name of the field, for error messages
91 OrigName string // original name before protocol compiler (always set)
92 Wire string
93 WireType int
94 Tag int
95 Required bool
96 Optional bool
97 Repeated bool
98 Enum string // set for enum types only
99 Default string // default value
100 def_uint64 uint64
101
102 enc encoder
103 valEnc valueEncoder // set for bool and numeric types only
104 offset uintptr
105 tagcode []byte // encoding of EncodeVarint((Tag<<3)|WireType)
106 tagbuf [8]byte
107 stype *reflect.PtrType
108
109 dec decoder
110 valDec valueDecoder // set for bool and numeric types only
111 scratch uintptr
112 sizeof int // calculations of scratch space
113 alignof int
114}
115
116// String formats the properties in the "PB(...)" struct tag style.
117func (p *Properties) String() string {
118 s := p.Wire
119 s = ","
120 s += strconv.Itoa(p.Tag)
121 if p.Required {
122 s += ",req"
123 }
124 if p.Optional {
125 s += ",opt"
126 }
127 if p.Repeated {
128 s += ",rep"
129 }
130 if p.OrigName != p.Name {
131 s += ",name=" + p.OrigName
132 }
133 if len(p.Enum) > 0 {
134 s += ",enum=" + p.Enum
135 }
136 if len(p.Default) > 0 {
137 s += ",def=" + p.Default
138 }
139 return s
140}
141
142// Parse populates p by parsing a string in the "PB(...)" struct tag style.
143func (p *Properties) Parse(s string) {
144 // "bytes,49,opt,def=hello!,name=foo"
Rob Pike53385442010-06-30 22:22:43 -0700145 fields := strings.Split(s, ",", -1) // breaks def=, but handled below.
Rob Pikeaaa3a622010-03-20 22:32:34 -0700146 if len(fields) < 2 {
147 fmt.Fprintf(os.Stderr, "proto: tag has too few fields: %q\n", s)
148 return
149 }
150
151 p.Wire = fields[0]
152 switch p.Wire {
153 case "varint":
154 p.WireType = WireVarint
155 p.valEnc = (*Buffer).EncodeVarint
156 p.valDec = (*Buffer).DecodeVarint
157 case "fixed32":
158 p.WireType = WireFixed32
159 p.valEnc = (*Buffer).EncodeFixed32
160 p.valDec = (*Buffer).DecodeFixed32
161 case "fixed64":
162 p.WireType = WireFixed64
163 p.valEnc = (*Buffer).EncodeFixed64
164 p.valDec = (*Buffer).DecodeFixed64
165 case "zigzag32":
166 p.WireType = WireVarint
167 p.valEnc = (*Buffer).EncodeZigzag32
168 p.valDec = (*Buffer).DecodeZigzag32
169 case "zigzag64":
170 p.WireType = WireVarint
171 p.valEnc = (*Buffer).EncodeZigzag64
172 p.valDec = (*Buffer).DecodeZigzag64
173 case "bytes", "group":
174 p.WireType = WireBytes
175 // no numeric converter for non-numeric types
176 default:
177 fmt.Fprintf(os.Stderr, "proto: tag has unknown wire type: %q\n", s)
178 return
179 }
180
181 var err os.Error
182 p.Tag, err = strconv.Atoi(fields[1])
183 if err != nil {
184 return
185 }
186
187 for i := 2; i < len(fields); i++ {
188 f := fields[i]
189 switch {
190 case f == "req":
191 p.Required = true
192 case f == "opt":
193 p.Optional = true
194 case f == "rep":
195 p.Repeated = true
196 case len(f) >= 5 && f[0:5] == "name=":
197 p.OrigName = f[5:len(f)]
198 case len(f) >= 5 && f[0:5] == "enum=":
199 p.Enum = f[5:len(f)]
200 case len(f) >= 4 && f[0:4] == "def=":
201 p.Default = f[4:len(f)] // rest of string
202 if i+1 < len(fields) {
203 // Commas aren't escaped, and def is always last.
204 p.Default += "," + strings.Join(fields[i+1:len(fields)], ",")
205 break
206 }
207 }
208 }
209}
210
211// Initialize the fields for encoding and decoding.
212func (p *Properties) setEncAndDec(typ reflect.Type) {
213 var vbool bool
214 var vbyte byte
215 var vint32 int32
216 var vint64 int64
217 var vfloat32 float32
218 var vfloat64 float64
219 var vstring string
220 var vslice []byte
221
222 p.enc = nil
223 p.dec = nil
224
225 switch t1 := typ.(type) {
226 default:
227 fmt.Fprintf(os.Stderr, "proto: no coders for %T\n", t1)
228 break
229
230 case *reflect.PtrType:
231 switch t2 := t1.Elem().(type) {
232 default:
Rob Pikeab5b8022010-06-21 17:47:58 -0700233 BadType:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700234 fmt.Fprintf(os.Stderr, "proto: no encoder function for %T -> %T\n", t1, t2)
235 break
236 case *reflect.BoolType:
237 p.enc = (*Buffer).enc_bool
238 p.dec = (*Buffer).dec_bool
239 p.alignof = unsafe.Alignof(vbool)
240 p.sizeof = unsafe.Sizeof(vbool)
Rob Pikeab5b8022010-06-21 17:47:58 -0700241 case *reflect.IntType, *reflect.UintType:
242 switch t2.Bits() {
243 case 32:
244 p.enc = (*Buffer).enc_int32
245 p.dec = (*Buffer).dec_int32
246 p.alignof = unsafe.Alignof(vint32)
247 p.sizeof = unsafe.Sizeof(vint32)
248 case 64:
249 p.enc = (*Buffer).enc_int64
250 p.dec = (*Buffer).dec_int64
251 p.alignof = unsafe.Alignof(vint64)
252 p.sizeof = unsafe.Sizeof(vint64)
253 default:
254 goto BadType
255 }
256 case *reflect.FloatType:
257 switch t2.Bits() {
258 case 32:
259 p.enc = (*Buffer).enc_int32 // can just treat them as bits
260 p.dec = (*Buffer).dec_int32
261 p.alignof = unsafe.Alignof(vfloat32)
262 p.sizeof = unsafe.Sizeof(vfloat32)
263 case 64:
264 p.enc = (*Buffer).enc_int64 // can just treat them as bits
265 p.dec = (*Buffer).dec_int64
266 p.alignof = unsafe.Alignof(vfloat64)
267 p.sizeof = unsafe.Sizeof(vfloat64)
268 default:
269 goto BadType
270 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700271 case *reflect.StringType:
272 p.enc = (*Buffer).enc_string
273 p.dec = (*Buffer).dec_string
274 p.alignof = unsafe.Alignof(vstring)
275 p.sizeof = unsafe.Sizeof(vstring) + startSize*unsafe.Sizeof(vbyte)
276 case *reflect.StructType:
277 p.stype = t1
278 if p.Wire == "bytes" {
279 p.enc = (*Buffer).enc_struct_message
280 p.dec = (*Buffer).dec_struct_message
281 } else {
282 p.enc = (*Buffer).enc_struct_group
283 p.dec = (*Buffer).dec_struct_group
284 }
285 }
286
287 case *reflect.SliceType:
288 switch t2 := t1.Elem().(type) {
289 default:
Rob Pikeab5b8022010-06-21 17:47:58 -0700290 BadSliceType:
291 fmt.Fprintf(os.Stderr, "proto: no slice oenc for %T = []%T\n", t1, t2)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700292 break
Rob Pikeaaa3a622010-03-20 22:32:34 -0700293 case *reflect.BoolType:
294 p.enc = (*Buffer).enc_slice_bool
295 p.dec = (*Buffer).dec_slice_bool
296 p.alignof = unsafe.Alignof(vbool)
297 p.sizeof = startSize * unsafe.Sizeof(vbool)
Rob Pikeab5b8022010-06-21 17:47:58 -0700298 case *reflect.IntType, *reflect.UintType:
299 switch t2.Bits() {
300 case 32:
301 p.enc = (*Buffer).enc_slice_int32
302 p.dec = (*Buffer).dec_slice_int32
303 p.alignof = unsafe.Alignof(vint32)
304 p.sizeof = startSize * unsafe.Sizeof(vint32)
305 case 64:
306 p.enc = (*Buffer).enc_slice_int64
307 p.dec = (*Buffer).dec_slice_int64
308 p.alignof = unsafe.Alignof(vint64)
309 p.sizeof = startSize * unsafe.Sizeof(vint64)
310 case 8:
311 if t2.Kind() == reflect.Uint8 {
312 p.enc = (*Buffer).enc_slice_byte
313 p.dec = (*Buffer).dec_slice_byte
314 p.alignof = unsafe.Alignof(vbyte)
315 p.sizeof = startSize * unsafe.Sizeof(vbyte)
316 }
317 default:
318 goto BadSliceType
319 }
320 case *reflect.FloatType:
321 switch t2.Bits() {
322 case 32:
323 p.enc = (*Buffer).enc_slice_int32 // can just treat them as bits
324 p.dec = (*Buffer).dec_slice_int32
325 p.alignof = unsafe.Alignof(vfloat32)
326 p.sizeof = startSize * unsafe.Sizeof(vfloat32)
327 case 64:
328 p.enc = (*Buffer).enc_slice_int64 // can just treat them as bits
329 p.dec = (*Buffer).dec_slice_int64
330 p.alignof = unsafe.Alignof(vfloat64)
331 p.sizeof = startSize * unsafe.Sizeof(vfloat64)
332 default:
333 goto BadSliceType
334 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700335 case *reflect.StringType:
336 p.enc = (*Buffer).enc_slice_string
337 p.dec = (*Buffer).dec_slice_string
338 p.alignof = unsafe.Alignof(vstring)
339 p.sizeof = startSize * unsafe.Sizeof(vstring)
340 case *reflect.PtrType:
341 switch t3 := t2.Elem().(type) {
342 default:
Rob Pikeab5b8022010-06-21 17:47:58 -0700343 fmt.Fprintf(os.Stderr, "proto: no ptr oenc for %T -> %T -> %T\n", t1, t2, t3)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700344 break
345 case *reflect.StructType:
346 p.stype = t2
347 p.enc = (*Buffer).enc_slice_struct_group
348 p.dec = (*Buffer).dec_slice_struct_group
349 if p.Wire == "bytes" {
350 p.enc = (*Buffer).enc_slice_struct_message
351 p.dec = (*Buffer).dec_slice_struct_message
352 }
353 p.alignof = unsafe.Alignof(vslice)
354 p.sizeof = startSize * unsafe.Sizeof(vslice)
355 }
356 case *reflect.SliceType:
Rob Pikeab5b8022010-06-21 17:47:58 -0700357 switch t2.Elem().Kind() {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700358 default:
Rob Pikeab5b8022010-06-21 17:47:58 -0700359 fmt.Fprintf(os.Stderr, "proto: no slice elem oenc for %T -> %T -> %T\n", t1, t2, t2.Elem())
Rob Pikeaaa3a622010-03-20 22:32:34 -0700360 break
Rob Pikeab5b8022010-06-21 17:47:58 -0700361 case reflect.Uint8:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700362 p.enc = (*Buffer).enc_slice_slice_byte
363 p.dec = (*Buffer).dec_slice_slice_byte
364 p.alignof = unsafe.Alignof(vslice)
365 p.sizeof = startSize * unsafe.Sizeof(vslice)
366 }
367 }
368 }
369
370 // precalculate tag code
371 x := p.Tag<<3 | p.WireType
372 i := 0
373 for i = 0; x > 127; i++ {
374 p.tagbuf[i] = 0x80 | uint8(x&0x7F)
375 x >>= 7
376 }
377 p.tagbuf[i] = uint8(x)
378 p.tagcode = p.tagbuf[0 : i+1]
379}
380
381// Init populates the properties from a protocol buffer struct field.
382func (p *Properties) Init(typ reflect.Type, name, tag string, offset uintptr) {
383 // "PB(bytes,49,opt,def=hello!)"
384 // TODO: should not assume the only thing is PB(...)
385 p.Name = name
386 p.OrigName = name
387 p.offset = offset
388
389 if len(tag) < 4 || tag[0:3] != "PB(" || tag[len(tag)-1] != ')' {
390 return
391 }
392 p.Parse(tag[3 : len(tag)-1])
393 p.setEncAndDec(typ)
394}
395
396var (
397 mutex sync.Mutex
398 propertiesMap = make(map[*reflect.StructType]*StructProperties)
399)
400
401// GetProperties returns the list of properties for the type represented by t.
402func GetProperties(t *reflect.StructType) *StructProperties {
403 mutex.Lock()
404 if prop, ok := propertiesMap[t]; ok {
405 mutex.Unlock()
406 stats.Chit++
407 return prop
408 }
409 stats.Cmiss++
410
411 prop := new(StructProperties)
412
413 // build properties
414 prop.Prop = make([]*Properties, t.NumField())
David Symonds79eae332010-10-16 11:33:20 +1100415 prop.origNames = make(map[string]int)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700416 for i := 0; i < t.NumField(); i++ {
417 f := t.Field(i)
418 p := new(Properties)
419 p.Init(f.Type, f.Name, f.Tag, f.Offset)
420 if f.Name == "XXX_extensions" { // special case
421 var vmap map[int32][]byte
422 p.enc = (*Buffer).enc_map
423 p.dec = nil // not needed
424 p.alignof = unsafe.Alignof(vmap)
425 p.sizeof = unsafe.Sizeof(vmap)
426 }
427 prop.Prop[i] = p
David Symonds79eae332010-10-16 11:33:20 +1100428 prop.origNames[p.OrigName] = i
Rob Pikeaaa3a622010-03-20 22:32:34 -0700429 if debug {
430 print(i, " ", f.Name, " ", t.String(), " ")
431 if p.Tag > 0 {
432 print(p.String())
433 }
434 print("\n")
435 }
436 if p.enc == nil && !strings.HasPrefix(f.Name, "XXX_") {
437 fmt.Fprintln(os.Stderr, "proto: no encoder for", f.Name, f.Type.String(), "[GetProperties]")
438 }
439 }
440
441 // build required counts
442 // build scratch offsets
443 // build tags
444 reqCount := 0
445 scratch := uintptr(0)
446 prop.tags = make(map[int]int)
447 for i, p := range prop.Prop {
448 if p.Required {
449 reqCount++
450 }
451 scratch = align(scratch, p.alignof)
452 p.scratch = scratch
453 scratch += uintptr(p.sizeof)
454 prop.tags[p.Tag] = i
455 }
456 prop.reqCount = reqCount
457 prop.nscratch = scratch
458
459 propertiesMap[t] = prop
460 mutex.Unlock()
461 return prop
462}
463
464// Alignment of the data in the scratch area. It doesn't have to be
465// exact, just conservative. Returns the first number >= o that divides s.
466func align(o uintptr, s int) uintptr {
467 if s != 0 {
468 for o%uintptr(s) != 0 {
469 o++
470 }
471 }
472 return o
473}
474
475// Return the field index of the named field.
476// Returns nil if there is no such field.
477func fieldIndex(t *reflect.StructType, name string) []int {
478 if field, ok := t.FieldByName(name); ok {
479 return field.Index
480 }
481 return nil
482}
483
484// Return the Properties object for the x[0]'th field of the structure.
485func propByIndex(t *reflect.StructType, x []int) *Properties {
486 if len(x) != 1 {
487 fmt.Fprintf(os.Stderr, "proto: field index dimension %d (not 1) for type %s\n", len(x), t)
488 return nil
489 }
490 prop := GetProperties(t)
491 return prop.Prop[x[0]]
492}
493
494// Get the address and type of a pointer to the structure from an interface.
495// unsafe.Reflect can do this, but does multiple mallocs.
496func getbase(pb interface{}) (t *reflect.PtrType, b uintptr, err os.Error) {
497 // get pointer
498 x := *(*[2]uintptr)(unsafe.Pointer(&pb))
499 b = x[1]
500 if b == 0 {
501 err = ErrNil
502 return
503 }
504
505 // get the reflect type of the struct.
506 t1 := unsafe.Typeof(pb).(*runtime.PtrType)
507 t = (*reflect.PtrType)(unsafe.Pointer(t1))
508 return
509}
510
511// Allocate the aux space containing all the decoded data. The structure
512// handed into Unmarshal is filled with pointers to this newly allocated
513// data.
514func getsbase(prop *StructProperties) uintptr {
515 var vbyteptr *byte
516 if prop.nscratch == 0 {
517 return 0
518 }
519
520 // allocate the decode space as pointers
521 // so that the GC will scan it for pointers
522 n := uintptr(unsafe.Sizeof(vbyteptr))
523 b := make([]*byte, (prop.nscratch+n-1)/n)
524 sbase := uintptr(unsafe.Pointer(&b[0]))
525 return sbase
526}
527
528// A global registry of enum types.
529// The generated code will register the generated maps by calling RegisterEnum.
530
531var enumNameMaps = make(map[string]map[int32]string)
532var enumValueMaps = make(map[string]map[string]int32)
533
534// RegisterEnum is called from the generated code to install the enum descriptor
535// maps into the global table to aid parsing ASCII protocol buffers.
536func RegisterEnum(typeName string, nameMap map[int32]string, valueMap map[string]int32) {
537 if _, ok := enumNameMaps[typeName]; ok {
Rob Pike79c63792010-03-24 17:48:35 -0700538 panic("proto: duplicate enum registered: " + typeName)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700539 }
540 enumNameMaps[typeName] = nameMap
541 enumValueMaps[typeName] = valueMap
542}