blob: 55f746f50fb1cbe163848441c0aaf235eff74480 [file] [log] [blame]
temporal40ee5512008-07-10 02:12:20 +00001# Protocol Buffers - Google's data interchange format
kenton@google.com24bf56f2008-09-24 20:31:01 +00002# Copyright 2008 Google Inc. All rights reserved.
temporal40ee5512008-07-10 02:12:20 +00003# http://code.google.com/p/protobuf/
4#
kenton@google.com24bf56f2008-09-24 20:31:01 +00005# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
temporal40ee5512008-07-10 02:12:20 +00008#
kenton@google.com24bf56f2008-09-24 20:31:01 +00009# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15# * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
temporal40ee5512008-07-10 02:12:20 +000018#
kenton@google.com24bf56f2008-09-24 20:31:01 +000019# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
temporal40ee5512008-07-10 02:12:20 +000030
kenton@google.comfccb1462009-12-18 02:11:36 +000031"""Code for decoding protocol buffer primitives.
temporal40ee5512008-07-10 02:12:20 +000032
kenton@google.comfccb1462009-12-18 02:11:36 +000033This code is very similar to encoder.py -- read the docs for that module first.
34
35A "decoder" is a function with the signature:
36 Decode(buffer, pos, end, message, field_dict)
37The arguments are:
38 buffer: The string containing the encoded message.
39 pos: The current position in the string.
40 end: The position in the string where the current message ends. May be
41 less than len(buffer) if we're reading a sub-message.
42 message: The message object into which we're parsing.
43 field_dict: message._fields (avoids a hashtable lookup).
44The decoder reads the field and stores it into field_dict, returning the new
45buffer position. A decoder for a repeated field may proactively decode all of
46the elements of that field, if they appear consecutively.
47
48Note that decoders may throw any of the following:
49 IndexError: Indicates a truncated message.
50 struct.error: Unpacking of a fixed-width field failed.
51 message.DecodeError: Other errors.
52
53Decoders are expected to raise an exception if they are called with pos > end.
54This allows callers to be lax about bounds checking: it's fineto read past
55"end" as long as you are sure that someone else will notice and throw an
56exception later on.
57
58Something up the call stack is expected to catch IndexError and struct.error
59and convert them to message.DecodeError.
60
61Decoders are constructed using decoder constructors with the signature:
62 MakeDecoder(field_number, is_repeated, is_packed, key, new_default)
63The arguments are:
64 field_number: The field number of the field we want to decode.
65 is_repeated: Is the field a repeated field? (bool)
66 is_packed: Is the field a packed field? (bool)
67 key: The key to use when looking up the field within field_dict.
68 (This is actually the FieldDescriptor but nothing in this
69 file should depend on that.)
70 new_default: A function which takes a message object as a parameter and
71 returns a new instance of the default value for this field.
72 (This is called for repeated fields and sub-messages, when an
73 instance does not already exist.)
74
75As with encoders, we define a decoder constructor for every type of field.
76Then, for every field of every message class we construct an actual decoder.
77That decoder goes into a dict indexed by tag, so when we decode a message
78we repeatedly read a tag, look up the corresponding decoder, and invoke it.
temporal40ee5512008-07-10 02:12:20 +000079"""
80
kenton@google.comfccb1462009-12-18 02:11:36 +000081__author__ = 'kenton@google.com (Kenton Varda)'
temporal40ee5512008-07-10 02:12:20 +000082
83import struct
kenton@google.comfccb1462009-12-18 02:11:36 +000084from google.protobuf.internal import encoder
temporal40ee5512008-07-10 02:12:20 +000085from google.protobuf.internal import wire_format
kenton@google.comfccb1462009-12-18 02:11:36 +000086from google.protobuf import message
temporal40ee5512008-07-10 02:12:20 +000087
88
liujisi@google.com33165fe2010-11-02 13:14:58 +000089# This will overflow and thus become IEEE-754 "infinity". We would use
90# "float('inf')" but it doesn't work on Windows pre-Python-2.6.
91_POS_INF = 1e10000
92_NEG_INF = -_POS_INF
93_NAN = _POS_INF * 0
94
95
kenton@google.comfccb1462009-12-18 02:11:36 +000096# This is not for optimization, but rather to avoid conflicts with local
97# variables named "message".
98_DecodeError = message.DecodeError
temporal40ee5512008-07-10 02:12:20 +000099
100
kenton@google.comfccb1462009-12-18 02:11:36 +0000101def _VarintDecoder(mask):
102 """Return an encoder for a basic varint value (does not include tag).
temporal40ee5512008-07-10 02:12:20 +0000103
kenton@google.comfccb1462009-12-18 02:11:36 +0000104 Decoded values will be bitwise-anded with the given mask before being
105 returned, e.g. to limit them to 32 bits. The returned decoder does not
106 take the usual "end" parameter -- the caller is expected to do bounds checking
107 after the fact (often the caller can defer such checking until later). The
108 decoder returns a (value, new_pos) pair.
109 """
temporal40ee5512008-07-10 02:12:20 +0000110
kenton@google.comfccb1462009-12-18 02:11:36 +0000111 local_ord = ord
112 def DecodeVarint(buffer, pos):
113 result = 0
114 shift = 0
115 while 1:
116 b = local_ord(buffer[pos])
117 result |= ((b & 0x7f) << shift)
118 pos += 1
119 if not (b & 0x80):
120 result &= mask
121 return (result, pos)
122 shift += 7
123 if shift >= 64:
124 raise _DecodeError('Too many bytes when decoding varint.')
125 return DecodeVarint
temporal40ee5512008-07-10 02:12:20 +0000126
kenton@google.comfccb1462009-12-18 02:11:36 +0000127
128def _SignedVarintDecoder(mask):
129 """Like _VarintDecoder() but decodes signed values."""
130
131 local_ord = ord
132 def DecodeVarint(buffer, pos):
133 result = 0
134 shift = 0
135 while 1:
136 b = local_ord(buffer[pos])
137 result |= ((b & 0x7f) << shift)
138 pos += 1
139 if not (b & 0x80):
140 if result > 0x7fffffffffffffff:
141 result -= (1 << 64)
142 result |= ~mask
143 else:
144 result &= mask
145 return (result, pos)
146 shift += 7
147 if shift >= 64:
148 raise _DecodeError('Too many bytes when decoding varint.')
149 return DecodeVarint
150
151
152_DecodeVarint = _VarintDecoder((1 << 64) - 1)
153_DecodeSignedVarint = _SignedVarintDecoder((1 << 64) - 1)
154
155# Use these versions for values which must be limited to 32 bits.
156_DecodeVarint32 = _VarintDecoder((1 << 32) - 1)
157_DecodeSignedVarint32 = _SignedVarintDecoder((1 << 32) - 1)
158
159
160def ReadTag(buffer, pos):
161 """Read a tag from the buffer, and return a (tag_bytes, new_pos) tuple.
162
163 We return the raw bytes of the tag rather than decoding them. The raw
164 bytes can then be used to look up the proper decoder. This effectively allows
165 us to trade some work that would be done in pure-python (decoding a varint)
166 for work that is done in C (searching for a byte string in a hash table).
167 In a low-level language it would be much cheaper to decode the varint and
168 use that, but not in Python.
169 """
170
171 start = pos
172 while ord(buffer[pos]) & 0x80:
173 pos += 1
174 pos += 1
175 return (buffer[start:pos], pos)
176
177
178# --------------------------------------------------------------------
179
180
181def _SimpleDecoder(wire_type, decode_value):
182 """Return a constructor for a decoder for fields of a particular type.
183
184 Args:
185 wire_type: The field's wire type.
186 decode_value: A function which decodes an individual value, e.g.
187 _DecodeVarint()
188 """
189
190 def SpecificDecoder(field_number, is_repeated, is_packed, key, new_default):
191 if is_packed:
192 local_DecodeVarint = _DecodeVarint
193 def DecodePackedField(buffer, pos, end, message, field_dict):
194 value = field_dict.get(key)
195 if value is None:
196 value = field_dict.setdefault(key, new_default(message))
197 (endpoint, pos) = local_DecodeVarint(buffer, pos)
198 endpoint += pos
199 if endpoint > end:
200 raise _DecodeError('Truncated message.')
201 while pos < endpoint:
202 (element, pos) = decode_value(buffer, pos)
203 value.append(element)
204 if pos > endpoint:
205 del value[-1] # Discard corrupt value.
206 raise _DecodeError('Packed element was truncated.')
207 return pos
208 return DecodePackedField
209 elif is_repeated:
210 tag_bytes = encoder.TagBytes(field_number, wire_type)
211 tag_len = len(tag_bytes)
212 def DecodeRepeatedField(buffer, pos, end, message, field_dict):
213 value = field_dict.get(key)
214 if value is None:
215 value = field_dict.setdefault(key, new_default(message))
216 while 1:
217 (element, new_pos) = decode_value(buffer, pos)
218 value.append(element)
219 # Predict that the next tag is another copy of the same repeated
220 # field.
221 pos = new_pos + tag_len
222 if buffer[new_pos:pos] != tag_bytes or new_pos >= end:
223 # Prediction failed. Return.
224 if new_pos > end:
225 raise _DecodeError('Truncated message.')
226 return new_pos
227 return DecodeRepeatedField
228 else:
229 def DecodeField(buffer, pos, end, message, field_dict):
230 (field_dict[key], pos) = decode_value(buffer, pos)
231 if pos > end:
232 del field_dict[key] # Discard corrupt value.
233 raise _DecodeError('Truncated message.')
234 return pos
235 return DecodeField
236
237 return SpecificDecoder
238
239
240def _ModifiedDecoder(wire_type, decode_value, modify_value):
241 """Like SimpleDecoder but additionally invokes modify_value on every value
242 before storing it. Usually modify_value is ZigZagDecode.
243 """
244
245 # Reusing _SimpleDecoder is slightly slower than copying a bunch of code, but
246 # not enough to make a significant difference.
247
248 def InnerDecode(buffer, pos):
249 (result, new_pos) = decode_value(buffer, pos)
250 return (modify_value(result), new_pos)
251 return _SimpleDecoder(wire_type, InnerDecode)
252
253
254def _StructPackDecoder(wire_type, format):
255 """Return a constructor for a decoder for a fixed-width field.
256
257 Args:
258 wire_type: The field's wire type.
259 format: The format string to pass to struct.unpack().
260 """
261
262 value_size = struct.calcsize(format)
263 local_unpack = struct.unpack
264
265 # Reusing _SimpleDecoder is slightly slower than copying a bunch of code, but
266 # not enough to make a significant difference.
267
268 # Note that we expect someone up-stack to catch struct.error and convert
269 # it to _DecodeError -- this way we don't have to set up exception-
270 # handling blocks every time we parse one value.
271
272 def InnerDecode(buffer, pos):
273 new_pos = pos + value_size
274 result = local_unpack(format, buffer[pos:new_pos])[0]
275 return (result, new_pos)
276 return _SimpleDecoder(wire_type, InnerDecode)
277
278
liujisi@google.com33165fe2010-11-02 13:14:58 +0000279def _FloatDecoder():
280 """Returns a decoder for a float field.
281
282 This code works around a bug in struct.unpack for non-finite 32-bit
283 floating-point values.
284 """
285
286 local_unpack = struct.unpack
287
288 def InnerDecode(buffer, pos):
289 # We expect a 32-bit value in little-endian byte order. Bit 1 is the sign
290 # bit, bits 2-9 represent the exponent, and bits 10-32 are the significand.
291 new_pos = pos + 4
292 float_bytes = buffer[pos:new_pos]
293
294 # If this value has all its exponent bits set, then it's non-finite.
295 # In Python 2.4, struct.unpack will convert it to a finite 64-bit value.
296 # To avoid that, we parse it specially.
297 if ((float_bytes[3] in '\x7F\xFF')
298 and (float_bytes[2] >= '\x80')):
299 # If at least one significand bit is set...
300 if float_bytes[0:3] != '\x00\x00\x80':
301 return (_NAN, new_pos)
302 # If sign bit is set...
303 if float_bytes[3] == '\xFF':
304 return (_NEG_INF, new_pos)
305 return (_POS_INF, new_pos)
306
307 # Note that we expect someone up-stack to catch struct.error and convert
308 # it to _DecodeError -- this way we don't have to set up exception-
309 # handling blocks every time we parse one value.
310 result = local_unpack('<f', float_bytes)[0]
311 return (result, new_pos)
312 return _SimpleDecoder(wire_format.WIRETYPE_FIXED32, InnerDecode)
313
314
315def _DoubleDecoder():
316 """Returns a decoder for a double field.
317
318 This code works around a bug in struct.unpack for not-a-number.
319 """
320
321 local_unpack = struct.unpack
322
323 def InnerDecode(buffer, pos):
324 # We expect a 64-bit value in little-endian byte order. Bit 1 is the sign
325 # bit, bits 2-12 represent the exponent, and bits 13-64 are the significand.
326 new_pos = pos + 8
327 double_bytes = buffer[pos:new_pos]
328
329 # If this value has all its exponent bits set and at least one significand
330 # bit set, it's not a number. In Python 2.4, struct.unpack will treat it
331 # as inf or -inf. To avoid that, we treat it specially.
332 if ((double_bytes[7] in '\x7F\xFF')
333 and (double_bytes[6] >= '\xF0')
334 and (double_bytes[0:7] != '\x00\x00\x00\x00\x00\x00\xF0')):
335 return (_NAN, new_pos)
336
337 # Note that we expect someone up-stack to catch struct.error and convert
338 # it to _DecodeError -- this way we don't have to set up exception-
339 # handling blocks every time we parse one value.
340 result = local_unpack('<d', double_bytes)[0]
341 return (result, new_pos)
342 return _SimpleDecoder(wire_format.WIRETYPE_FIXED64, InnerDecode)
343
344
kenton@google.comfccb1462009-12-18 02:11:36 +0000345# --------------------------------------------------------------------
346
347
348Int32Decoder = EnumDecoder = _SimpleDecoder(
349 wire_format.WIRETYPE_VARINT, _DecodeSignedVarint32)
350
351Int64Decoder = _SimpleDecoder(
352 wire_format.WIRETYPE_VARINT, _DecodeSignedVarint)
353
354UInt32Decoder = _SimpleDecoder(wire_format.WIRETYPE_VARINT, _DecodeVarint32)
355UInt64Decoder = _SimpleDecoder(wire_format.WIRETYPE_VARINT, _DecodeVarint)
356
357SInt32Decoder = _ModifiedDecoder(
358 wire_format.WIRETYPE_VARINT, _DecodeVarint32, wire_format.ZigZagDecode)
359SInt64Decoder = _ModifiedDecoder(
360 wire_format.WIRETYPE_VARINT, _DecodeVarint, wire_format.ZigZagDecode)
361
362# Note that Python conveniently guarantees that when using the '<' prefix on
363# formats, they will also have the same size across all platforms (as opposed
364# to without the prefix, where their sizes depend on the C compiler's basic
365# type sizes).
366Fixed32Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<I')
367Fixed64Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<Q')
368SFixed32Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<i')
369SFixed64Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<q')
liujisi@google.com33165fe2010-11-02 13:14:58 +0000370FloatDecoder = _FloatDecoder()
371DoubleDecoder = _DoubleDecoder()
kenton@google.comfccb1462009-12-18 02:11:36 +0000372
373BoolDecoder = _ModifiedDecoder(
374 wire_format.WIRETYPE_VARINT, _DecodeVarint, bool)
375
376
377def StringDecoder(field_number, is_repeated, is_packed, key, new_default):
378 """Returns a decoder for a string field."""
379
380 local_DecodeVarint = _DecodeVarint
381 local_unicode = unicode
382
383 assert not is_packed
384 if is_repeated:
385 tag_bytes = encoder.TagBytes(field_number,
386 wire_format.WIRETYPE_LENGTH_DELIMITED)
387 tag_len = len(tag_bytes)
388 def DecodeRepeatedField(buffer, pos, end, message, field_dict):
389 value = field_dict.get(key)
390 if value is None:
391 value = field_dict.setdefault(key, new_default(message))
392 while 1:
393 (size, pos) = local_DecodeVarint(buffer, pos)
394 new_pos = pos + size
395 if new_pos > end:
396 raise _DecodeError('Truncated string.')
397 value.append(local_unicode(buffer[pos:new_pos], 'utf-8'))
398 # Predict that the next tag is another copy of the same repeated field.
399 pos = new_pos + tag_len
400 if buffer[new_pos:pos] != tag_bytes or new_pos == end:
401 # Prediction failed. Return.
402 return new_pos
403 return DecodeRepeatedField
404 else:
405 def DecodeField(buffer, pos, end, message, field_dict):
406 (size, pos) = local_DecodeVarint(buffer, pos)
407 new_pos = pos + size
408 if new_pos > end:
409 raise _DecodeError('Truncated string.')
410 field_dict[key] = local_unicode(buffer[pos:new_pos], 'utf-8')
411 return new_pos
412 return DecodeField
413
414
415def BytesDecoder(field_number, is_repeated, is_packed, key, new_default):
416 """Returns a decoder for a bytes field."""
417
418 local_DecodeVarint = _DecodeVarint
419
420 assert not is_packed
421 if is_repeated:
422 tag_bytes = encoder.TagBytes(field_number,
423 wire_format.WIRETYPE_LENGTH_DELIMITED)
424 tag_len = len(tag_bytes)
425 def DecodeRepeatedField(buffer, pos, end, message, field_dict):
426 value = field_dict.get(key)
427 if value is None:
428 value = field_dict.setdefault(key, new_default(message))
429 while 1:
430 (size, pos) = local_DecodeVarint(buffer, pos)
431 new_pos = pos + size
432 if new_pos > end:
433 raise _DecodeError('Truncated string.')
434 value.append(buffer[pos:new_pos])
435 # Predict that the next tag is another copy of the same repeated field.
436 pos = new_pos + tag_len
437 if buffer[new_pos:pos] != tag_bytes or new_pos == end:
438 # Prediction failed. Return.
439 return new_pos
440 return DecodeRepeatedField
441 else:
442 def DecodeField(buffer, pos, end, message, field_dict):
443 (size, pos) = local_DecodeVarint(buffer, pos)
444 new_pos = pos + size
445 if new_pos > end:
446 raise _DecodeError('Truncated string.')
447 field_dict[key] = buffer[pos:new_pos]
448 return new_pos
449 return DecodeField
450
451
452def GroupDecoder(field_number, is_repeated, is_packed, key, new_default):
453 """Returns a decoder for a group field."""
454
455 end_tag_bytes = encoder.TagBytes(field_number,
456 wire_format.WIRETYPE_END_GROUP)
457 end_tag_len = len(end_tag_bytes)
458
459 assert not is_packed
460 if is_repeated:
461 tag_bytes = encoder.TagBytes(field_number,
462 wire_format.WIRETYPE_START_GROUP)
463 tag_len = len(tag_bytes)
464 def DecodeRepeatedField(buffer, pos, end, message, field_dict):
465 value = field_dict.get(key)
466 if value is None:
467 value = field_dict.setdefault(key, new_default(message))
468 while 1:
469 value = field_dict.get(key)
470 if value is None:
471 value = field_dict.setdefault(key, new_default(message))
472 # Read sub-message.
473 pos = value.add()._InternalParse(buffer, pos, end)
474 # Read end tag.
475 new_pos = pos+end_tag_len
476 if buffer[pos:new_pos] != end_tag_bytes or new_pos > end:
477 raise _DecodeError('Missing group end tag.')
478 # Predict that the next tag is another copy of the same repeated field.
479 pos = new_pos + tag_len
480 if buffer[new_pos:pos] != tag_bytes or new_pos == end:
481 # Prediction failed. Return.
482 return new_pos
483 return DecodeRepeatedField
484 else:
485 def DecodeField(buffer, pos, end, message, field_dict):
486 value = field_dict.get(key)
487 if value is None:
488 value = field_dict.setdefault(key, new_default(message))
489 # Read sub-message.
490 pos = value._InternalParse(buffer, pos, end)
491 # Read end tag.
492 new_pos = pos+end_tag_len
493 if buffer[pos:new_pos] != end_tag_bytes or new_pos > end:
494 raise _DecodeError('Missing group end tag.')
495 return new_pos
496 return DecodeField
497
498
499def MessageDecoder(field_number, is_repeated, is_packed, key, new_default):
500 """Returns a decoder for a message field."""
501
502 local_DecodeVarint = _DecodeVarint
503
504 assert not is_packed
505 if is_repeated:
506 tag_bytes = encoder.TagBytes(field_number,
507 wire_format.WIRETYPE_LENGTH_DELIMITED)
508 tag_len = len(tag_bytes)
509 def DecodeRepeatedField(buffer, pos, end, message, field_dict):
510 value = field_dict.get(key)
511 if value is None:
512 value = field_dict.setdefault(key, new_default(message))
513 while 1:
514 value = field_dict.get(key)
515 if value is None:
516 value = field_dict.setdefault(key, new_default(message))
517 # Read length.
518 (size, pos) = local_DecodeVarint(buffer, pos)
519 new_pos = pos + size
520 if new_pos > end:
521 raise _DecodeError('Truncated message.')
522 # Read sub-message.
523 if value.add()._InternalParse(buffer, pos, new_pos) != new_pos:
524 # The only reason _InternalParse would return early is if it
525 # encountered an end-group tag.
526 raise _DecodeError('Unexpected end-group tag.')
527 # Predict that the next tag is another copy of the same repeated field.
528 pos = new_pos + tag_len
529 if buffer[new_pos:pos] != tag_bytes or new_pos == end:
530 # Prediction failed. Return.
531 return new_pos
532 return DecodeRepeatedField
533 else:
534 def DecodeField(buffer, pos, end, message, field_dict):
535 value = field_dict.get(key)
536 if value is None:
537 value = field_dict.setdefault(key, new_default(message))
538 # Read length.
539 (size, pos) = local_DecodeVarint(buffer, pos)
540 new_pos = pos + size
541 if new_pos > end:
542 raise _DecodeError('Truncated message.')
543 # Read sub-message.
544 if value._InternalParse(buffer, pos, new_pos) != new_pos:
545 # The only reason _InternalParse would return early is if it encountered
546 # an end-group tag.
547 raise _DecodeError('Unexpected end-group tag.')
548 return new_pos
549 return DecodeField
550
551
552# --------------------------------------------------------------------
553
554MESSAGE_SET_ITEM_TAG = encoder.TagBytes(1, wire_format.WIRETYPE_START_GROUP)
555
556def MessageSetItemDecoder(extensions_by_number):
557 """Returns a decoder for a MessageSet item.
558
559 The parameter is the _extensions_by_number map for the message class.
560
561 The message set message looks like this:
562 message MessageSet {
563 repeated group Item = 1 {
564 required int32 type_id = 2;
565 required string message = 3;
566 }
567 }
568 """
569
570 type_id_tag_bytes = encoder.TagBytes(2, wire_format.WIRETYPE_VARINT)
571 message_tag_bytes = encoder.TagBytes(3, wire_format.WIRETYPE_LENGTH_DELIMITED)
572 item_end_tag_bytes = encoder.TagBytes(1, wire_format.WIRETYPE_END_GROUP)
573
574 local_ReadTag = ReadTag
575 local_DecodeVarint = _DecodeVarint
576 local_SkipField = SkipField
577
578 def DecodeItem(buffer, pos, end, message, field_dict):
579 type_id = -1
580 message_start = -1
581 message_end = -1
582
583 # Technically, type_id and message can appear in any order, so we need
584 # a little loop here.
585 while 1:
586 (tag_bytes, pos) = local_ReadTag(buffer, pos)
587 if tag_bytes == type_id_tag_bytes:
588 (type_id, pos) = local_DecodeVarint(buffer, pos)
589 elif tag_bytes == message_tag_bytes:
590 (size, message_start) = local_DecodeVarint(buffer, pos)
591 pos = message_end = message_start + size
592 elif tag_bytes == item_end_tag_bytes:
593 break
594 else:
595 pos = SkipField(buffer, pos, end, tag_bytes)
596 if pos == -1:
597 raise _DecodeError('Missing group end tag.')
598
599 if pos > end:
600 raise _DecodeError('Truncated message.')
601
602 if type_id == -1:
603 raise _DecodeError('MessageSet item missing type_id.')
604 if message_start == -1:
605 raise _DecodeError('MessageSet item missing message.')
606
607 extension = extensions_by_number.get(type_id)
608 if extension is not None:
609 value = field_dict.get(extension)
610 if value is None:
611 value = field_dict.setdefault(
612 extension, extension.message_type._concrete_class())
613 if value._InternalParse(buffer, message_start,message_end) != message_end:
614 # The only reason _InternalParse would return early is if it encountered
615 # an end-group tag.
616 raise _DecodeError('Unexpected end-group tag.')
617
618 return pos
619
620 return DecodeItem
621
622# --------------------------------------------------------------------
623# Optimization is not as heavy here because calls to SkipField() are rare,
624# except for handling end-group tags.
625
626def _SkipVarint(buffer, pos, end):
627 """Skip a varint value. Returns the new position."""
628
629 while ord(buffer[pos]) & 0x80:
630 pos += 1
631 pos += 1
632 if pos > end:
633 raise _DecodeError('Truncated message.')
634 return pos
635
636def _SkipFixed64(buffer, pos, end):
637 """Skip a fixed64 value. Returns the new position."""
638
639 pos += 8
640 if pos > end:
641 raise _DecodeError('Truncated message.')
642 return pos
643
644def _SkipLengthDelimited(buffer, pos, end):
645 """Skip a length-delimited value. Returns the new position."""
646
647 (size, pos) = _DecodeVarint(buffer, pos)
648 pos += size
649 if pos > end:
650 raise _DecodeError('Truncated message.')
651 return pos
652
653def _SkipGroup(buffer, pos, end):
654 """Skip sub-group. Returns the new position."""
655
656 while 1:
657 (tag_bytes, pos) = ReadTag(buffer, pos)
658 new_pos = SkipField(buffer, pos, end, tag_bytes)
659 if new_pos == -1:
660 return pos
661 pos = new_pos
662
663def _EndGroup(buffer, pos, end):
664 """Skipping an END_GROUP tag returns -1 to tell the parent loop to break."""
665
666 return -1
667
668def _SkipFixed32(buffer, pos, end):
669 """Skip a fixed32 value. Returns the new position."""
670
671 pos += 4
672 if pos > end:
673 raise _DecodeError('Truncated message.')
674 return pos
675
676def _RaiseInvalidWireType(buffer, pos, end):
677 """Skip function for unknown wire types. Raises an exception."""
678
679 raise _DecodeError('Tag had invalid wire type.')
680
681def _FieldSkipper():
682 """Constructs the SkipField function."""
683
684 WIRETYPE_TO_SKIPPER = [
685 _SkipVarint,
686 _SkipFixed64,
687 _SkipLengthDelimited,
688 _SkipGroup,
689 _EndGroup,
690 _SkipFixed32,
691 _RaiseInvalidWireType,
692 _RaiseInvalidWireType,
693 ]
694
695 wiretype_mask = wire_format.TAG_TYPE_MASK
696 local_ord = ord
697
698 def SkipField(buffer, pos, end, tag_bytes):
699 """Skips a field with the specified tag.
700
701 |pos| should point to the byte immediately after the tag.
702
703 Returns:
704 The new position (after the tag value), or -1 if the tag is an end-group
705 tag (in which case the calling loop should break).
temporal40ee5512008-07-10 02:12:20 +0000706 """
temporal40ee5512008-07-10 02:12:20 +0000707
kenton@google.comfccb1462009-12-18 02:11:36 +0000708 # The wire type is always in the first byte since varints are little-endian.
709 wire_type = local_ord(tag_bytes[0]) & wiretype_mask
710 return WIRETYPE_TO_SKIPPER[wire_type](buffer, pos, end)
temporal40ee5512008-07-10 02:12:20 +0000711
kenton@google.comfccb1462009-12-18 02:11:36 +0000712 return SkipField
temporal40ee5512008-07-10 02:12:20 +0000713
kenton@google.comfccb1462009-12-18 02:11:36 +0000714SkipField = _FieldSkipper()