blob: a6f41ca882d12b0f63f63188b33ea0e3a40c9d1c [file] [log] [blame]
temporal40ee5512008-07-10 02:12:20 +00001# Protocol Buffers - Google's data interchange format
kenton@google.com24bf56f2008-09-24 20:31:01 +00002# Copyright 2008 Google Inc. All rights reserved.
Feng Xiaoe4288622014-10-01 16:26:23 -07003# https://developers.google.com/protocol-buffers/
temporal40ee5512008-07-10 02:12:20 +00004#
kenton@google.com24bf56f2008-09-24 20:31:01 +00005# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
temporal40ee5512008-07-10 02:12:20 +00008#
kenton@google.com24bf56f2008-09-24 20:31:01 +00009# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15# * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
temporal40ee5512008-07-10 02:12:20 +000018#
kenton@google.com24bf56f2008-09-24 20:31:01 +000019# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
temporal40ee5512008-07-10 02:12:20 +000030
Jisi Liu46e8ff62015-10-05 11:59:43 -070031"""Contains routines for printing protocol messages in text format.
jieluo@google.combde4a322014-08-12 21:10:30 +000032
Jisi Liu46e8ff62015-10-05 11:59:43 -070033Simple usage example:
34
35 # Create a proto object and serialize it to a text proto string.
36 message = my_proto_pb2.MyMessage(foo='bar')
37 text_proto = text_format.MessageToString(message)
38
39 # Parse a text proto string.
40 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage())
41"""
temporal40ee5512008-07-10 02:12:20 +000042
43__author__ = 'kenton@google.com (Kenton Varda)'
44
Tres Seaver47ee4d32015-01-13 15:04:41 -050045import io
kenton@google.com80b1d622009-07-29 01:13:20 +000046import re
temporal40ee5512008-07-10 02:12:20 +000047
Tres Seaverf336d4b2015-01-13 14:21:29 -050048import six
49
Dan O'Reillyfe7d9372015-08-14 15:26:33 -040050if six.PY3:
51 long = int
52
kenton@google.com80b1d622009-07-29 01:13:20 +000053from google.protobuf.internal import type_checkers
temporal40ee5512008-07-10 02:12:20 +000054from google.protobuf import descriptor
jieluo@google.combde4a322014-08-12 21:10:30 +000055from google.protobuf import text_encoding
temporal40ee5512008-07-10 02:12:20 +000056
jieluo@google.combde4a322014-08-12 21:10:30 +000057__all__ = ['MessageToString', 'PrintMessage', 'PrintField',
58 'PrintFieldValue', 'Merge']
kenton@google.com80b1d622009-07-29 01:13:20 +000059
60
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +000061_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
62 type_checkers.Int32ValueChecker(),
63 type_checkers.Uint64ValueChecker(),
64 type_checkers.Int64ValueChecker())
65_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
66_FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
jieluo@google.combde4a322014-08-12 21:10:30 +000067_FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
Feng Xiaoe841bac2015-12-11 17:09:20 -080069_QUOTES = frozenset(("'", '"'))
kenton@google.comd0047c42009-12-23 02:01:01 +000070
71
jieluo@google.combde4a322014-08-12 21:10:30 +000072class Error(Exception):
73 """Top-level module error for text_format."""
74
75
76class ParseError(Error):
Feng Xiaoe841bac2015-12-11 17:09:20 -080077 """Thrown in case of text parsing error."""
78
kenton@google.com80b1d622009-07-29 01:13:20 +000079
Dan O'Reillyfc808742015-08-15 10:11:28 -040080class TextWriter(object):
81 def __init__(self, as_utf8):
Dan O'Reilly38eef022015-08-22 13:02:24 -040082 if six.PY2:
Dan O'Reillyfc808742015-08-15 10:11:28 -040083 self._writer = io.BytesIO()
84 else:
85 self._writer = io.StringIO()
86
87 def write(self, val):
Dan O'Reilly38eef022015-08-22 13:02:24 -040088 if six.PY2:
Dan O'Reillyfc808742015-08-15 10:11:28 -040089 if isinstance(val, six.text_type):
90 val = val.encode('utf-8')
Dan O'Reillyfc808742015-08-15 10:11:28 -040091 return self._writer.write(val)
92
93 def close(self):
94 return self._writer.close()
95
96 def getvalue(self):
97 return self._writer.getvalue()
98
temporal40ee5512008-07-10 02:12:20 +000099
jieluo@google.combde4a322014-08-12 21:10:30 +0000100def MessageToString(message, as_utf8=False, as_one_line=False,
101 pointy_brackets=False, use_index_order=False,
102 float_format=None):
103 """Convert protobuf message to text format.
104
105 Floating point values can be formatted compactly with 15 digits of
106 precision (which is the most that IEEE 754 "double" can guarantee)
Feng Xiaoe841bac2015-12-11 17:09:20 -0800107 using float_format='.15g'. To ensure that converting to text and back to a
108 proto will result in an identical value, float_format='.17g' should be used.
jieluo@google.combde4a322014-08-12 21:10:30 +0000109
110 Args:
111 message: The protocol buffers message.
112 as_utf8: Produce text output in UTF8 format.
113 as_one_line: Don't introduce newlines between fields.
114 pointy_brackets: If True, use angle brackets instead of curly braces for
115 nesting.
116 use_index_order: If True, print fields of a proto message using the order
117 defined in source code instead of the field number. By default, use the
118 field number order.
119 float_format: If set, use this to specify floating point number formatting
120 (per the "Format Specification Mini-Language"); otherwise, str() is used.
121
122 Returns:
123 A string of the text formatted protocol buffer message.
124 """
Dan O'Reillyfc808742015-08-15 10:11:28 -0400125 out = TextWriter(as_utf8)
jieluo@google.combde4a322014-08-12 21:10:30 +0000126 PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,
127 pointy_brackets=pointy_brackets,
128 use_index_order=use_index_order,
129 float_format=float_format)
temporal40ee5512008-07-10 02:12:20 +0000130 result = out.getvalue()
131 out.close()
liujisi@google.com33165fe2010-11-02 13:14:58 +0000132 if as_one_line:
133 return result.rstrip()
temporal40ee5512008-07-10 02:12:20 +0000134 return result
135
Feng Xiaoe841bac2015-12-11 17:09:20 -0800136
Bo Yang5db21732015-05-21 14:28:59 -0700137def _IsMapEntry(field):
138 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
139 field.message_type.has_options and
140 field.message_type.GetOptions().map_entry)
kenton@google.com80b1d622009-07-29 01:13:20 +0000141
Feng Xiaoe841bac2015-12-11 17:09:20 -0800142
jieluo@google.combde4a322014-08-12 21:10:30 +0000143def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,
144 pointy_brackets=False, use_index_order=False,
145 float_format=None):
146 fields = message.ListFields()
147 if use_index_order:
148 fields.sort(key=lambda x: x[0].index)
149 for field, value in fields:
Bo Yang5db21732015-05-21 14:28:59 -0700150 if _IsMapEntry(field):
Feng Xiaoeee38b02015-08-22 18:25:48 -0700151 for key in sorted(value):
Bo Yang5db21732015-05-21 14:28:59 -0700152 # This is slow for maps with submessage entires because it copies the
153 # entire tree. Unfortunately this would take significant refactoring
154 # of this file to work around.
155 #
156 # TODO(haberman): refactor and optimize if this becomes an issue.
157 entry_submsg = field.message_type._concrete_class(
158 key=key, value=value[key])
159 PrintField(field, entry_submsg, out, indent, as_utf8, as_one_line,
160 pointy_brackets=pointy_brackets,
161 use_index_order=use_index_order, float_format=float_format)
162 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
temporal40ee5512008-07-10 02:12:20 +0000163 for element in value:
jieluo@google.combde4a322014-08-12 21:10:30 +0000164 PrintField(field, element, out, indent, as_utf8, as_one_line,
165 pointy_brackets=pointy_brackets,
Feng Xiao6ef984a2014-11-10 17:34:54 -0800166 use_index_order=use_index_order,
jieluo@google.combde4a322014-08-12 21:10:30 +0000167 float_format=float_format)
temporal40ee5512008-07-10 02:12:20 +0000168 else:
jieluo@google.combde4a322014-08-12 21:10:30 +0000169 PrintField(field, value, out, indent, as_utf8, as_one_line,
170 pointy_brackets=pointy_brackets,
Feng Xiao6ef984a2014-11-10 17:34:54 -0800171 use_index_order=use_index_order,
jieluo@google.combde4a322014-08-12 21:10:30 +0000172 float_format=float_format)
temporal40ee5512008-07-10 02:12:20 +0000173
Feng Xiaoe841bac2015-12-11 17:09:20 -0800174
jieluo@google.combde4a322014-08-12 21:10:30 +0000175def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,
Feng Xiao6ef984a2014-11-10 17:34:54 -0800176 pointy_brackets=False, use_index_order=False, float_format=None):
temporal40ee5512008-07-10 02:12:20 +0000177 """Print a single field name/value pair. For repeated fields, the value
Feng Xiaoe841bac2015-12-11 17:09:20 -0800178 should be a single element.
179 """
temporal40ee5512008-07-10 02:12:20 +0000180
jieluo@google.combde4a322014-08-12 21:10:30 +0000181 out.write(' ' * indent)
temporal40ee5512008-07-10 02:12:20 +0000182 if field.is_extension:
183 out.write('[')
184 if (field.containing_type.GetOptions().message_set_wire_format and
185 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
temporal40ee5512008-07-10 02:12:20 +0000186 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
187 out.write(field.message_type.full_name)
188 else:
189 out.write(field.full_name)
190 out.write(']')
191 elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
192 # For groups, use the capitalized name.
193 out.write(field.message_type.name)
194 else:
Dan O'Reillyfc808742015-08-15 10:11:28 -0400195 out.write(field.name)
temporal40ee5512008-07-10 02:12:20 +0000196
197 if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
198 # The colon is optional in this case, but our cross-language golden files
199 # don't include it.
200 out.write(': ')
201
jieluo@google.combde4a322014-08-12 21:10:30 +0000202 PrintFieldValue(field, value, out, indent, as_utf8, as_one_line,
203 pointy_brackets=pointy_brackets,
Feng Xiao6ef984a2014-11-10 17:34:54 -0800204 use_index_order=use_index_order,
jieluo@google.combde4a322014-08-12 21:10:30 +0000205 float_format=float_format)
liujisi@google.com33165fe2010-11-02 13:14:58 +0000206 if as_one_line:
207 out.write(' ')
208 else:
209 out.write('\n')
temporal40ee5512008-07-10 02:12:20 +0000210
kenton@google.com80b1d622009-07-29 01:13:20 +0000211
jieluo@google.combde4a322014-08-12 21:10:30 +0000212def PrintFieldValue(field, value, out, indent=0, as_utf8=False,
213 as_one_line=False, pointy_brackets=False,
Feng Xiao6ef984a2014-11-10 17:34:54 -0800214 use_index_order=False,
jieluo@google.combde4a322014-08-12 21:10:30 +0000215 float_format=None):
temporal40ee5512008-07-10 02:12:20 +0000216 """Print a single field value (not including name). For repeated fields,
217 the value should be a single element."""
218
jieluo@google.combde4a322014-08-12 21:10:30 +0000219 if pointy_brackets:
220 openb = '<'
221 closeb = '>'
222 else:
223 openb = '{'
224 closeb = '}'
225
temporal40ee5512008-07-10 02:12:20 +0000226 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
liujisi@google.com33165fe2010-11-02 13:14:58 +0000227 if as_one_line:
jieluo@google.combde4a322014-08-12 21:10:30 +0000228 out.write(' %s ' % openb)
229 PrintMessage(value, out, indent, as_utf8, as_one_line,
230 pointy_brackets=pointy_brackets,
Feng Xiao6ef984a2014-11-10 17:34:54 -0800231 use_index_order=use_index_order,
jieluo@google.combde4a322014-08-12 21:10:30 +0000232 float_format=float_format)
233 out.write(closeb)
liujisi@google.com33165fe2010-11-02 13:14:58 +0000234 else:
jieluo@google.combde4a322014-08-12 21:10:30 +0000235 out.write(' %s\n' % openb)
236 PrintMessage(value, out, indent + 2, as_utf8, as_one_line,
237 pointy_brackets=pointy_brackets,
Feng Xiao6ef984a2014-11-10 17:34:54 -0800238 use_index_order=use_index_order,
jieluo@google.combde4a322014-08-12 21:10:30 +0000239 float_format=float_format)
240 out.write(' ' * indent + closeb)
temporal40ee5512008-07-10 02:12:20 +0000241 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000242 enum_value = field.enum_type.values_by_number.get(value, None)
243 if enum_value is not None:
244 out.write(enum_value.name)
245 else:
246 out.write(str(value))
temporal40ee5512008-07-10 02:12:20 +0000247 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
248 out.write('\"')
Tres Seaverf336d4b2015-01-13 14:21:29 -0500249 if isinstance(value, six.text_type):
jieluo@google.combde4a322014-08-12 21:10:30 +0000250 out_value = value.encode('utf-8')
liujisi@google.com33165fe2010-11-02 13:14:58 +0000251 else:
jieluo@google.combde4a322014-08-12 21:10:30 +0000252 out_value = value
253 if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
254 # We need to escape non-UTF8 chars in TYPE_BYTES field.
255 out_as_utf8 = False
256 else:
257 out_as_utf8 = as_utf8
258 out.write(text_encoding.CEscape(out_value, out_as_utf8))
temporal40ee5512008-07-10 02:12:20 +0000259 out.write('\"')
260 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
261 if value:
jieluo@google.combde4a322014-08-12 21:10:30 +0000262 out.write('true')
temporal40ee5512008-07-10 02:12:20 +0000263 else:
jieluo@google.combde4a322014-08-12 21:10:30 +0000264 out.write('false')
265 elif field.cpp_type in _FLOAT_TYPES and float_format is not None:
266 out.write('{1:{0}}'.format(float_format, value))
temporal40ee5512008-07-10 02:12:20 +0000267 else:
268 out.write(str(value))
269
kenton@google.com80b1d622009-07-29 01:13:20 +0000270
Feng Xiaoe841bac2015-12-11 17:09:20 -0800271def Parse(text, message, allow_unknown_extension=False):
272 """Parses an text representation of a protocol message into a message.
kenton@google.com80b1d622009-07-29 01:13:20 +0000273
274 Args:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800275 text: Message text representation.
kenton@google.com80b1d622009-07-29 01:13:20 +0000276 message: A protocol buffer message to merge into.
Feng Xiaoe841bac2015-12-11 17:09:20 -0800277 allow_unknown_extension: if True, skip over missing extensions and keep
278 parsing
kenton@google.com80b1d622009-07-29 01:13:20 +0000279
jieluo@google.combde4a322014-08-12 21:10:30 +0000280 Returns:
281 The same message passed as argument.
282
kenton@google.com80b1d622009-07-29 01:13:20 +0000283 Raises:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800284 ParseError: On text parsing problems.
kenton@google.com80b1d622009-07-29 01:13:20 +0000285 """
Feng Xiaoe841bac2015-12-11 17:09:20 -0800286 if not isinstance(text, str):
287 text = text.decode('utf-8')
288 return ParseLines(text.split('\n'), message, allow_unknown_extension)
kenton@google.com80b1d622009-07-29 01:13:20 +0000289
290
Feng Xiaoe841bac2015-12-11 17:09:20 -0800291def Merge(text, message, allow_unknown_extension=False):
292 """Parses an text representation of a protocol message into a message.
jieluo@google.combde4a322014-08-12 21:10:30 +0000293
294 Like Parse(), but allows repeated values for a non-repeated field, and uses
295 the last one.
296
297 Args:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800298 text: Message text representation.
jieluo@google.combde4a322014-08-12 21:10:30 +0000299 message: A protocol buffer message to merge into.
Feng Xiaoe841bac2015-12-11 17:09:20 -0800300 allow_unknown_extension: if True, skip over missing extensions and keep
301 parsing
jieluo@google.combde4a322014-08-12 21:10:30 +0000302
303 Returns:
304 The same message passed as argument.
305
306 Raises:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800307 ParseError: On text parsing problems.
jieluo@google.combde4a322014-08-12 21:10:30 +0000308 """
Feng Xiaoe841bac2015-12-11 17:09:20 -0800309 return MergeLines(text.split('\n'), message, allow_unknown_extension)
jieluo@google.combde4a322014-08-12 21:10:30 +0000310
311
Feng Xiaoe841bac2015-12-11 17:09:20 -0800312def ParseLines(lines, message, allow_unknown_extension=False):
313 """Parses an text representation of a protocol message into a message.
jieluo@google.combde4a322014-08-12 21:10:30 +0000314
315 Args:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800316 lines: An iterable of lines of a message's text representation.
jieluo@google.combde4a322014-08-12 21:10:30 +0000317 message: A protocol buffer message to merge into.
Feng Xiaoe841bac2015-12-11 17:09:20 -0800318 allow_unknown_extension: if True, skip over missing extensions and keep
319 parsing
jieluo@google.combde4a322014-08-12 21:10:30 +0000320
321 Returns:
322 The same message passed as argument.
323
324 Raises:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800325 ParseError: On text parsing problems.
jieluo@google.combde4a322014-08-12 21:10:30 +0000326 """
Feng Xiaoe841bac2015-12-11 17:09:20 -0800327 _ParseOrMerge(lines, message, False, allow_unknown_extension)
jieluo@google.combde4a322014-08-12 21:10:30 +0000328 return message
329
330
Feng Xiaoe841bac2015-12-11 17:09:20 -0800331def MergeLines(lines, message, allow_unknown_extension=False):
332 """Parses an text representation of a protocol message into a message.
jieluo@google.combde4a322014-08-12 21:10:30 +0000333
334 Args:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800335 lines: An iterable of lines of a message's text representation.
jieluo@google.combde4a322014-08-12 21:10:30 +0000336 message: A protocol buffer message to merge into.
Feng Xiaoe841bac2015-12-11 17:09:20 -0800337 allow_unknown_extension: if True, skip over missing extensions and keep
338 parsing
jieluo@google.combde4a322014-08-12 21:10:30 +0000339
340 Returns:
341 The same message passed as argument.
342
343 Raises:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800344 ParseError: On text parsing problems.
jieluo@google.combde4a322014-08-12 21:10:30 +0000345 """
Feng Xiaoe841bac2015-12-11 17:09:20 -0800346 _ParseOrMerge(lines, message, True, allow_unknown_extension)
jieluo@google.combde4a322014-08-12 21:10:30 +0000347 return message
348
349
Feng Xiaoe841bac2015-12-11 17:09:20 -0800350def _ParseOrMerge(lines,
351 message,
352 allow_multiple_scalars,
353 allow_unknown_extension=False):
354 """Converts an text representation of a protocol message into a message.
Feng Xiaof157a562014-11-14 11:50:31 -0800355
356 Args:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800357 lines: Lines of a message's text representation.
Feng Xiaof157a562014-11-14 11:50:31 -0800358 message: A protocol buffer message to merge into.
359 allow_multiple_scalars: Determines if repeated values for a non-repeated
360 field are permitted, e.g., the string "foo: 1 foo: 2" for a
361 required/optional field named "foo".
Feng Xiaoe841bac2015-12-11 17:09:20 -0800362 allow_unknown_extension: if True, skip over missing extensions and keep
363 parsing
Feng Xiaof157a562014-11-14 11:50:31 -0800364
365 Raises:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800366 ParseError: On text parsing problems.
Feng Xiaof157a562014-11-14 11:50:31 -0800367 """
368 tokenizer = _Tokenizer(lines)
369 while not tokenizer.AtEnd():
Feng Xiaoe841bac2015-12-11 17:09:20 -0800370 _MergeField(tokenizer, message, allow_multiple_scalars,
371 allow_unknown_extension)
Feng Xiaof157a562014-11-14 11:50:31 -0800372
373
Feng Xiaoe841bac2015-12-11 17:09:20 -0800374def _MergeField(tokenizer,
375 message,
376 allow_multiple_scalars,
377 allow_unknown_extension=False):
kenton@google.com80b1d622009-07-29 01:13:20 +0000378 """Merges a single protocol message field into a message.
379
380 Args:
381 tokenizer: A tokenizer to parse the field name and values.
382 message: A protocol message to record the data.
jieluo@google.combde4a322014-08-12 21:10:30 +0000383 allow_multiple_scalars: Determines if repeated values for a non-repeated
384 field are permitted, e.g., the string "foo: 1 foo: 2" for a
385 required/optional field named "foo".
Feng Xiaoe841bac2015-12-11 17:09:20 -0800386 allow_unknown_extension: if True, skip over missing extensions and keep
Jisi Liu3b3c8ab2016-03-30 11:39:59 -0700387 parsing.
kenton@google.com80b1d622009-07-29 01:13:20 +0000388
389 Raises:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800390 ParseError: In case of text parsing problems.
kenton@google.com80b1d622009-07-29 01:13:20 +0000391 """
392 message_descriptor = message.DESCRIPTOR
Jisi Liuada65562015-02-25 16:39:11 -0800393 if (hasattr(message_descriptor, 'syntax') and
394 message_descriptor.syntax == 'proto3'):
395 # Proto3 doesn't represent presence so we can't test if multiple
396 # scalars have occurred. We have to allow them.
397 allow_multiple_scalars = True
kenton@google.com80b1d622009-07-29 01:13:20 +0000398 if tokenizer.TryConsume('['):
399 name = [tokenizer.ConsumeIdentifier()]
400 while tokenizer.TryConsume('.'):
401 name.append(tokenizer.ConsumeIdentifier())
402 name = '.'.join(name)
403
kenton@google.comfccb1462009-12-18 02:11:36 +0000404 if not message_descriptor.is_extendable:
405 raise tokenizer.ParseErrorPreviousToken(
406 'Message type "%s" does not have extensions.' %
407 message_descriptor.full_name)
jieluo@google.combde4a322014-08-12 21:10:30 +0000408 # pylint: disable=protected-access
kenton@google.com80b1d622009-07-29 01:13:20 +0000409 field = message.Extensions._FindExtensionByName(name)
jieluo@google.combde4a322014-08-12 21:10:30 +0000410 # pylint: enable=protected-access
kenton@google.com80b1d622009-07-29 01:13:20 +0000411 if not field:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800412 if allow_unknown_extension:
413 field = None
414 else:
415 raise tokenizer.ParseErrorPreviousToken(
416 'Extension "%s" not registered.' % name)
kenton@google.com80b1d622009-07-29 01:13:20 +0000417 elif message_descriptor != field.containing_type:
418 raise tokenizer.ParseErrorPreviousToken(
419 'Extension "%s" does not extend message type "%s".' % (
420 name, message_descriptor.full_name))
Feng Xiaoe841bac2015-12-11 17:09:20 -0800421
kenton@google.com80b1d622009-07-29 01:13:20 +0000422 tokenizer.Consume(']')
Feng Xiaoe841bac2015-12-11 17:09:20 -0800423
kenton@google.com80b1d622009-07-29 01:13:20 +0000424 else:
425 name = tokenizer.ConsumeIdentifier()
426 field = message_descriptor.fields_by_name.get(name, None)
427
428 # Group names are expected to be capitalized as they appear in the
429 # .proto file, which actually matches their type names, not their field
430 # names.
431 if not field:
432 field = message_descriptor.fields_by_name.get(name.lower(), None)
433 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
434 field = None
435
436 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
437 field.message_type.name != name):
438 field = None
439
440 if not field:
441 raise tokenizer.ParseErrorPreviousToken(
442 'Message type "%s" has no field named "%s".' % (
443 message_descriptor.full_name, name))
444
Jisi Liu3b3c8ab2016-03-30 11:39:59 -0700445 if field:
446 if not allow_multiple_scalars and field.containing_oneof:
447 # Check if there's a different field set in this oneof.
448 # Note that we ignore the case if the same field was set before, and we
449 # apply allow_multiple_scalars to non-scalar fields as well.
450 which_oneof = message.WhichOneof(field.containing_oneof.name)
451 if which_oneof is not None and which_oneof != field.name:
452 raise tokenizer.ParseErrorPreviousToken(
453 'Field "%s" is specified along with field "%s", another member of '
454 'oneof "%s" for message type "%s".' % (
455 field.name, which_oneof, field.containing_oneof.name,
456 message_descriptor.full_name))
kenton@google.com80b1d622009-07-29 01:13:20 +0000457
Jisi Liu3b3c8ab2016-03-30 11:39:59 -0700458 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
459 tokenizer.TryConsume(':')
460 merger = _MergeMessageField
kenton@google.com80b1d622009-07-29 01:13:20 +0000461 else:
Jisi Liu3b3c8ab2016-03-30 11:39:59 -0700462 tokenizer.Consume(':')
463 merger = _MergeScalarField
kenton@google.com80b1d622009-07-29 01:13:20 +0000464
Jisi Liu3b3c8ab2016-03-30 11:39:59 -0700465 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED
466 and tokenizer.TryConsume('[')):
Feng Xiaoe841bac2015-12-11 17:09:20 -0800467 # Short repeated format, e.g. "foo: [1, 2, 3]"
468 while True:
Jisi Liu3b3c8ab2016-03-30 11:39:59 -0700469 merger(tokenizer, message, field, allow_multiple_scalars,
470 allow_unknown_extension)
471 if tokenizer.TryConsume(']'): break
Feng Xiaoe841bac2015-12-11 17:09:20 -0800472 tokenizer.Consume(',')
Jisi Liu3b3c8ab2016-03-30 11:39:59 -0700473
Feng Xiaoe841bac2015-12-11 17:09:20 -0800474 else:
Jisi Liu3b3c8ab2016-03-30 11:39:59 -0700475 merger(tokenizer, message, field, allow_multiple_scalars,
476 allow_unknown_extension)
477
Feng Xiaoe841bac2015-12-11 17:09:20 -0800478 else: # Proto field is unknown.
479 assert allow_unknown_extension
480 _SkipFieldContents(tokenizer)
jieluo@google.combde4a322014-08-12 21:10:30 +0000481
482 # For historical reasons, fields may optionally be separated by commas or
483 # semicolons.
484 if not tokenizer.TryConsume(','):
485 tokenizer.TryConsume(';')
kenton@google.com80b1d622009-07-29 01:13:20 +0000486
487
Feng Xiaoe841bac2015-12-11 17:09:20 -0800488def _SkipFieldContents(tokenizer):
489 """Skips over contents (value or message) of a field.
490
491 Args:
492 tokenizer: A tokenizer to parse the field name and values.
493 """
494 # Try to guess the type of this field.
495 # If this field is not a message, there should be a ":" between the
496 # field name and the field value and also the field value should not
497 # start with "{" or "<" which indicates the beginning of a message body.
498 # If there is no ":" or there is a "{" or "<" after ":", this field has
499 # to be a message or the input is ill-formed.
500 if tokenizer.TryConsume(':') and not tokenizer.LookingAt(
501 '{') and not tokenizer.LookingAt('<'):
502 _SkipFieldValue(tokenizer)
503 else:
504 _SkipFieldMessage(tokenizer)
505
506
507def _SkipField(tokenizer):
508 """Skips over a complete field (name and value/message).
509
510 Args:
511 tokenizer: A tokenizer to parse the field name and values.
512 """
513 if tokenizer.TryConsume('['):
514 # Consume extension name.
515 tokenizer.ConsumeIdentifier()
516 while tokenizer.TryConsume('.'):
517 tokenizer.ConsumeIdentifier()
518 tokenizer.Consume(']')
519 else:
520 tokenizer.ConsumeIdentifier()
521
522 _SkipFieldContents(tokenizer)
523
524 # For historical reasons, fields may optionally be separated by commas or
525 # semicolons.
526 if not tokenizer.TryConsume(','):
527 tokenizer.TryConsume(';')
528
529
530def _SkipFieldMessage(tokenizer):
531 """Skips over a field message.
532
533 Args:
534 tokenizer: A tokenizer to parse the field name and values.
535 """
536
537 if tokenizer.TryConsume('<'):
538 delimiter = '>'
539 else:
540 tokenizer.Consume('{')
541 delimiter = '}'
542
543 while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'):
544 _SkipField(tokenizer)
545
546 tokenizer.Consume(delimiter)
547
548
549def _SkipFieldValue(tokenizer):
550 """Skips over a field value.
551
552 Args:
553 tokenizer: A tokenizer to parse the field name and values.
554
555 Raises:
556 ParseError: In case an invalid field value is found.
557 """
558 # String tokens can come in multiple adjacent string literals.
559 # If we can consume one, consume as many as we can.
560 if tokenizer.TryConsumeString():
561 while tokenizer.TryConsumeString():
562 pass
563 return
564
565 if (not tokenizer.TryConsumeIdentifier() and
566 not tokenizer.TryConsumeInt64() and
567 not tokenizer.TryConsumeUint64() and
568 not tokenizer.TryConsumeFloat()):
569 raise ParseError('Invalid field value: ' + tokenizer.token)
570
571
Jisi Liu3b3c8ab2016-03-30 11:39:59 -0700572def _MergeMessageField(tokenizer, message, field, allow_multiple_scalars,
573 allow_unknown_extension):
574 """Merges a single scalar field into a message.
575
576 Args:
577 tokenizer: A tokenizer to parse the field value.
578 message: The message of which field is a member.
579 field: The descriptor of the field to be merged.
580 allow_multiple_scalars: Determines if repeated values for a non-repeated
581 field are permitted, e.g., the string "foo: 1 foo: 2" for a
582 required/optional field named "foo".
583 allow_unknown_extension: if True, skip over missing extensions and keep
584 parsing.
585
586 Raises:
587 ParseError: In case of text parsing problems.
588 """
589 is_map_entry = _IsMapEntry(field)
590
591 if tokenizer.TryConsume('<'):
592 end_token = '>'
593 else:
594 tokenizer.Consume('{')
595 end_token = '}'
596
597 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
598 if field.is_extension:
599 sub_message = message.Extensions[field].add()
600 elif is_map_entry:
601 # pylint: disable=protected-access
602 sub_message = field.message_type._concrete_class()
603 else:
604 sub_message = getattr(message, field.name).add()
605 else:
606 if field.is_extension:
607 sub_message = message.Extensions[field]
608 else:
609 sub_message = getattr(message, field.name)
610 sub_message.SetInParent()
611
612 while not tokenizer.TryConsume(end_token):
613 if tokenizer.AtEnd():
614 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token,))
615 _MergeField(tokenizer, sub_message, allow_multiple_scalars,
616 allow_unknown_extension)
617
618 if is_map_entry:
619 value_cpptype = field.message_type.fields_by_name['value'].cpp_type
620 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
621 value = getattr(message, field.name)[sub_message.key]
622 value.MergeFrom(sub_message.value)
623 else:
624 getattr(message, field.name)[sub_message.key] = sub_message.value
625
626
627def _MergeScalarField(tokenizer, message, field, allow_multiple_scalars,
628 allow_unknown_extension):
629 """Merges a single scalar field into a message.
kenton@google.com80b1d622009-07-29 01:13:20 +0000630
631 Args:
632 tokenizer: A tokenizer to parse the field value.
633 message: A protocol message to record the data.
634 field: The descriptor of the field to be merged.
jieluo@google.combde4a322014-08-12 21:10:30 +0000635 allow_multiple_scalars: Determines if repeated values for a non-repeated
636 field are permitted, e.g., the string "foo: 1 foo: 2" for a
637 required/optional field named "foo".
Jisi Liu3b3c8ab2016-03-30 11:39:59 -0700638 allow_unknown_extension: Unused, just here for consistency with
639 _MergeMessageField.
kenton@google.com80b1d622009-07-29 01:13:20 +0000640
641 Raises:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800642 ParseError: In case of text parsing problems.
kenton@google.com80b1d622009-07-29 01:13:20 +0000643 RuntimeError: On runtime errors.
644 """
Jisi Liu3b3c8ab2016-03-30 11:39:59 -0700645 _ = allow_unknown_extension
kenton@google.com80b1d622009-07-29 01:13:20 +0000646 value = None
647
648 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
649 descriptor.FieldDescriptor.TYPE_SINT32,
650 descriptor.FieldDescriptor.TYPE_SFIXED32):
651 value = tokenizer.ConsumeInt32()
652 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
653 descriptor.FieldDescriptor.TYPE_SINT64,
654 descriptor.FieldDescriptor.TYPE_SFIXED64):
655 value = tokenizer.ConsumeInt64()
656 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
657 descriptor.FieldDescriptor.TYPE_FIXED32):
658 value = tokenizer.ConsumeUint32()
659 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
660 descriptor.FieldDescriptor.TYPE_FIXED64):
661 value = tokenizer.ConsumeUint64()
662 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
663 descriptor.FieldDescriptor.TYPE_DOUBLE):
664 value = tokenizer.ConsumeFloat()
665 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
666 value = tokenizer.ConsumeBool()
667 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
668 value = tokenizer.ConsumeString()
669 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
670 value = tokenizer.ConsumeByteString()
671 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000672 value = tokenizer.ConsumeEnum(field)
kenton@google.com80b1d622009-07-29 01:13:20 +0000673 else:
674 raise RuntimeError('Unknown field type %d' % field.type)
675
676 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
677 if field.is_extension:
678 message.Extensions[field].append(value)
679 else:
680 getattr(message, field.name).append(value)
681 else:
682 if field.is_extension:
jieluo@google.combde4a322014-08-12 21:10:30 +0000683 if not allow_multiple_scalars and message.HasExtension(field):
684 raise tokenizer.ParseErrorPreviousToken(
685 'Message type "%s" should not have multiple "%s" extensions.' %
686 (message.DESCRIPTOR.full_name, field.full_name))
687 else:
688 message.Extensions[field] = value
kenton@google.com80b1d622009-07-29 01:13:20 +0000689 else:
jieluo@google.combde4a322014-08-12 21:10:30 +0000690 if not allow_multiple_scalars and message.HasField(field.name):
691 raise tokenizer.ParseErrorPreviousToken(
692 'Message type "%s" should not have multiple "%s" fields.' %
693 (message.DESCRIPTOR.full_name, field.name))
694 else:
695 setattr(message, field.name, value)
kenton@google.com80b1d622009-07-29 01:13:20 +0000696
697
698class _Tokenizer(object):
Feng Xiaoe841bac2015-12-11 17:09:20 -0800699 """Protocol buffer text representation tokenizer.
kenton@google.com80b1d622009-07-29 01:13:20 +0000700
701 This class handles the lower level string parsing by splitting it into
702 meaningful tokens.
703
704 It was directly ported from the Java protocol buffer API.
705 """
706
707 _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE)
Feng Xiaoe841bac2015-12-11 17:09:20 -0800708 _TOKEN = re.compile('|'.join([
709 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier
710 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number
711 ] + [ # quoted str for each quote mark
712 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES
713 ]))
714
jieluo@google.combde4a322014-08-12 21:10:30 +0000715 _IDENTIFIER = re.compile(r'\w+')
kenton@google.com80b1d622009-07-29 01:13:20 +0000716
jieluo@google.combde4a322014-08-12 21:10:30 +0000717 def __init__(self, lines):
kenton@google.com80b1d622009-07-29 01:13:20 +0000718 self._position = 0
719 self._line = -1
720 self._column = 0
721 self._token_start = None
722 self.token = ''
jieluo@google.combde4a322014-08-12 21:10:30 +0000723 self._lines = iter(lines)
kenton@google.com80b1d622009-07-29 01:13:20 +0000724 self._current_line = ''
725 self._previous_line = 0
726 self._previous_column = 0
jieluo@google.combde4a322014-08-12 21:10:30 +0000727 self._more_lines = True
kenton@google.com80b1d622009-07-29 01:13:20 +0000728 self._SkipWhitespace()
729 self.NextToken()
730
Feng Xiaoe841bac2015-12-11 17:09:20 -0800731 def LookingAt(self, token):
732 return self.token == token
733
kenton@google.com80b1d622009-07-29 01:13:20 +0000734 def AtEnd(self):
735 """Checks the end of the text was reached.
736
737 Returns:
738 True iff the end was reached.
739 """
jieluo@google.combde4a322014-08-12 21:10:30 +0000740 return not self.token
kenton@google.com80b1d622009-07-29 01:13:20 +0000741
742 def _PopLine(self):
liujisi@google.com33165fe2010-11-02 13:14:58 +0000743 while len(self._current_line) <= self._column:
jieluo@google.combde4a322014-08-12 21:10:30 +0000744 try:
Tres Seaverf336d4b2015-01-13 14:21:29 -0500745 self._current_line = next(self._lines)
jieluo@google.combde4a322014-08-12 21:10:30 +0000746 except StopIteration:
kenton@google.com80b1d622009-07-29 01:13:20 +0000747 self._current_line = ''
jieluo@google.combde4a322014-08-12 21:10:30 +0000748 self._more_lines = False
kenton@google.com80b1d622009-07-29 01:13:20 +0000749 return
jieluo@google.combde4a322014-08-12 21:10:30 +0000750 else:
751 self._line += 1
752 self._column = 0
kenton@google.com80b1d622009-07-29 01:13:20 +0000753
754 def _SkipWhitespace(self):
755 while True:
756 self._PopLine()
liujisi@google.com33165fe2010-11-02 13:14:58 +0000757 match = self._WHITESPACE.match(self._current_line, self._column)
kenton@google.com80b1d622009-07-29 01:13:20 +0000758 if not match:
759 break
760 length = len(match.group(0))
kenton@google.com80b1d622009-07-29 01:13:20 +0000761 self._column += length
762
763 def TryConsume(self, token):
764 """Tries to consume a given piece of text.
765
766 Args:
767 token: Text to consume.
768
769 Returns:
770 True iff the text was consumed.
771 """
772 if self.token == token:
773 self.NextToken()
774 return True
775 return False
776
777 def Consume(self, token):
778 """Consumes a piece of text.
779
780 Args:
781 token: Text to consume.
782
783 Raises:
784 ParseError: If the text couldn't be consumed.
785 """
786 if not self.TryConsume(token):
787 raise self._ParseError('Expected "%s".' % token)
788
Feng Xiaoe841bac2015-12-11 17:09:20 -0800789 def TryConsumeIdentifier(self):
790 try:
791 self.ConsumeIdentifier()
792 return True
793 except ParseError:
794 return False
795
kenton@google.com80b1d622009-07-29 01:13:20 +0000796 def ConsumeIdentifier(self):
797 """Consumes protocol message field identifier.
798
799 Returns:
800 Identifier string.
801
802 Raises:
803 ParseError: If an identifier couldn't be consumed.
804 """
805 result = self.token
liujisi@google.com33165fe2010-11-02 13:14:58 +0000806 if not self._IDENTIFIER.match(result):
kenton@google.com80b1d622009-07-29 01:13:20 +0000807 raise self._ParseError('Expected identifier.')
808 self.NextToken()
809 return result
810
811 def ConsumeInt32(self):
812 """Consumes a signed 32bit integer number.
813
814 Returns:
815 The integer parsed.
816
817 Raises:
818 ParseError: If a signed 32bit integer couldn't be consumed.
819 """
820 try:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000821 result = ParseInteger(self.token, is_signed=True, is_long=False)
Tres Seaverf336d4b2015-01-13 14:21:29 -0500822 except ValueError as e:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000823 raise self._ParseError(str(e))
kenton@google.com80b1d622009-07-29 01:13:20 +0000824 self.NextToken()
825 return result
826
827 def ConsumeUint32(self):
828 """Consumes an unsigned 32bit integer number.
829
830 Returns:
831 The integer parsed.
832
833 Raises:
834 ParseError: If an unsigned 32bit integer couldn't be consumed.
835 """
836 try:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000837 result = ParseInteger(self.token, is_signed=False, is_long=False)
Tres Seaverf336d4b2015-01-13 14:21:29 -0500838 except ValueError as e:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000839 raise self._ParseError(str(e))
kenton@google.com80b1d622009-07-29 01:13:20 +0000840 self.NextToken()
841 return result
842
Feng Xiaoe841bac2015-12-11 17:09:20 -0800843 def TryConsumeInt64(self):
844 try:
845 self.ConsumeInt64()
846 return True
847 except ParseError:
848 return False
849
kenton@google.com80b1d622009-07-29 01:13:20 +0000850 def ConsumeInt64(self):
851 """Consumes a signed 64bit integer number.
852
853 Returns:
854 The integer parsed.
855
856 Raises:
857 ParseError: If a signed 64bit integer couldn't be consumed.
858 """
859 try:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000860 result = ParseInteger(self.token, is_signed=True, is_long=True)
Tres Seaverf336d4b2015-01-13 14:21:29 -0500861 except ValueError as e:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000862 raise self._ParseError(str(e))
kenton@google.com80b1d622009-07-29 01:13:20 +0000863 self.NextToken()
864 return result
865
Feng Xiaoe841bac2015-12-11 17:09:20 -0800866 def TryConsumeUint64(self):
867 try:
868 self.ConsumeUint64()
869 return True
870 except ParseError:
871 return False
872
kenton@google.com80b1d622009-07-29 01:13:20 +0000873 def ConsumeUint64(self):
874 """Consumes an unsigned 64bit integer number.
875
876 Returns:
877 The integer parsed.
878
879 Raises:
880 ParseError: If an unsigned 64bit integer couldn't be consumed.
881 """
882 try:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000883 result = ParseInteger(self.token, is_signed=False, is_long=True)
Tres Seaverf336d4b2015-01-13 14:21:29 -0500884 except ValueError as e:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000885 raise self._ParseError(str(e))
kenton@google.com80b1d622009-07-29 01:13:20 +0000886 self.NextToken()
887 return result
888
Feng Xiaoe841bac2015-12-11 17:09:20 -0800889 def TryConsumeFloat(self):
890 try:
891 self.ConsumeFloat()
892 return True
893 except ParseError:
894 return False
895
kenton@google.com80b1d622009-07-29 01:13:20 +0000896 def ConsumeFloat(self):
897 """Consumes an floating point number.
898
899 Returns:
900 The number parsed.
901
902 Raises:
903 ParseError: If a floating point number couldn't be consumed.
904 """
kenton@google.com80b1d622009-07-29 01:13:20 +0000905 try:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000906 result = ParseFloat(self.token)
Tres Seaverf336d4b2015-01-13 14:21:29 -0500907 except ValueError as e:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000908 raise self._ParseError(str(e))
kenton@google.com80b1d622009-07-29 01:13:20 +0000909 self.NextToken()
910 return result
911
912 def ConsumeBool(self):
913 """Consumes a boolean value.
914
915 Returns:
916 The bool parsed.
917
918 Raises:
919 ParseError: If a boolean value couldn't be consumed.
920 """
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000921 try:
922 result = ParseBool(self.token)
Tres Seaverf336d4b2015-01-13 14:21:29 -0500923 except ValueError as e:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000924 raise self._ParseError(str(e))
925 self.NextToken()
926 return result
kenton@google.com80b1d622009-07-29 01:13:20 +0000927
Feng Xiaoe841bac2015-12-11 17:09:20 -0800928 def TryConsumeString(self):
929 try:
930 self.ConsumeString()
931 return True
932 except ParseError:
933 return False
934
kenton@google.com80b1d622009-07-29 01:13:20 +0000935 def ConsumeString(self):
936 """Consumes a string value.
937
938 Returns:
939 The string parsed.
940
941 Raises:
942 ParseError: If a string value couldn't be consumed.
943 """
jieluo@google.combde4a322014-08-12 21:10:30 +0000944 the_bytes = self.ConsumeByteString()
liujisi@google.com33165fe2010-11-02 13:14:58 +0000945 try:
Tres Seaverf336d4b2015-01-13 14:21:29 -0500946 return six.text_type(the_bytes, 'utf-8')
947 except UnicodeDecodeError as e:
liujisi@google.com33165fe2010-11-02 13:14:58 +0000948 raise self._StringParseError(e)
kenton@google.com80b1d622009-07-29 01:13:20 +0000949
950 def ConsumeByteString(self):
951 """Consumes a byte array value.
952
953 Returns:
954 The array parsed (as a string).
955
956 Raises:
957 ParseError: If a byte array value couldn't be consumed.
958 """
jieluo@google.combde4a322014-08-12 21:10:30 +0000959 the_list = [self._ConsumeSingleByteString()]
Feng Xiaoe841bac2015-12-11 17:09:20 -0800960 while self.token and self.token[0] in _QUOTES:
jieluo@google.combde4a322014-08-12 21:10:30 +0000961 the_list.append(self._ConsumeSingleByteString())
Tres Seaverf336d4b2015-01-13 14:21:29 -0500962 return b''.join(the_list)
kenton@google.comeef5f832009-12-23 01:32:45 +0000963
kenton@google.com53530182010-01-07 02:08:03 +0000964 def _ConsumeSingleByteString(self):
965 """Consume one token of a string literal.
966
967 String literals (whether bytes or text) can come in multiple adjacent
968 tokens which are automatically concatenated, like in C or Python. This
969 method only consumes one token.
Bo Yang5db21732015-05-21 14:28:59 -0700970
Feng Xiaoe841bac2015-12-11 17:09:20 -0800971 Returns:
972 The token parsed.
Bo Yang5db21732015-05-21 14:28:59 -0700973 Raises:
974 ParseError: When the wrong format data is found.
kenton@google.com53530182010-01-07 02:08:03 +0000975 """
kenton@google.com80b1d622009-07-29 01:13:20 +0000976 text = self.token
Feng Xiaoe841bac2015-12-11 17:09:20 -0800977 if len(text) < 1 or text[0] not in _QUOTES:
Bo Yang5db21732015-05-21 14:28:59 -0700978 raise self._ParseError('Expected string but found: %r' % (text,))
kenton@google.com80b1d622009-07-29 01:13:20 +0000979
980 if len(text) < 2 or text[-1] != text[0]:
Bo Yang5db21732015-05-21 14:28:59 -0700981 raise self._ParseError('String missing ending quote: %r' % (text,))
kenton@google.com80b1d622009-07-29 01:13:20 +0000982
983 try:
jieluo@google.combde4a322014-08-12 21:10:30 +0000984 result = text_encoding.CUnescape(text[1:-1])
Tres Seaverf336d4b2015-01-13 14:21:29 -0500985 except ValueError as e:
kenton@google.com80b1d622009-07-29 01:13:20 +0000986 raise self._ParseError(str(e))
987 self.NextToken()
988 return result
989
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000990 def ConsumeEnum(self, field):
991 try:
992 result = ParseEnum(field, self.token)
Tres Seaverf336d4b2015-01-13 14:21:29 -0500993 except ValueError as e:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000994 raise self._ParseError(str(e))
995 self.NextToken()
kenton@google.com80b1d622009-07-29 01:13:20 +0000996 return result
997
998 def ParseErrorPreviousToken(self, message):
999 """Creates and *returns* a ParseError for the previously read token.
1000
1001 Args:
1002 message: A message to set for the exception.
1003
1004 Returns:
1005 A ParseError instance.
1006 """
1007 return ParseError('%d:%d : %s' % (
1008 self._previous_line + 1, self._previous_column + 1, message))
1009
1010 def _ParseError(self, message):
1011 """Creates and *returns* a ParseError for the current token."""
1012 return ParseError('%d:%d : %s' % (
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +00001013 self._line + 1, self._column + 1, message))
kenton@google.com80b1d622009-07-29 01:13:20 +00001014
liujisi@google.com33165fe2010-11-02 13:14:58 +00001015 def _StringParseError(self, e):
1016 return self._ParseError('Couldn\'t parse string: ' + str(e))
1017
kenton@google.com80b1d622009-07-29 01:13:20 +00001018 def NextToken(self):
1019 """Reads the next meaningful token."""
1020 self._previous_line = self._line
1021 self._previous_column = self._column
liujisi@google.com33165fe2010-11-02 13:14:58 +00001022
1023 self._column += len(self.token)
1024 self._SkipWhitespace()
1025
jieluo@google.combde4a322014-08-12 21:10:30 +00001026 if not self._more_lines:
kenton@google.com80b1d622009-07-29 01:13:20 +00001027 self.token = ''
1028 return
kenton@google.com80b1d622009-07-29 01:13:20 +00001029
liujisi@google.com33165fe2010-11-02 13:14:58 +00001030 match = self._TOKEN.match(self._current_line, self._column)
kenton@google.com80b1d622009-07-29 01:13:20 +00001031 if match:
1032 token = match.group(0)
kenton@google.com80b1d622009-07-29 01:13:20 +00001033 self.token = token
1034 else:
liujisi@google.com33165fe2010-11-02 13:14:58 +00001035 self.token = self._current_line[self._column]
kenton@google.com80b1d622009-07-29 01:13:20 +00001036
1037
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +00001038def ParseInteger(text, is_signed=False, is_long=False):
1039 """Parses an integer.
1040
1041 Args:
1042 text: The text to parse.
1043 is_signed: True if a signed integer must be parsed.
1044 is_long: True if a long integer must be parsed.
1045
1046 Returns:
1047 The integer value.
1048
1049 Raises:
1050 ValueError: Thrown Iff the text is not a valid integer.
1051 """
1052 # Do the actual parsing. Exception handling is propagated to caller.
1053 try:
jieluo@google.combde4a322014-08-12 21:10:30 +00001054 # We force 32-bit values to int and 64-bit values to long to make
1055 # alternate implementations where the distinction is more significant
1056 # (e.g. the C++ implementation) simpler.
1057 if is_long:
1058 result = long(text, 0)
1059 else:
1060 result = int(text, 0)
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +00001061 except ValueError:
1062 raise ValueError('Couldn\'t parse integer: %s' % text)
1063
1064 # Check if the integer is sane. Exceptions handled by callers.
1065 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
1066 checker.CheckValue(result)
1067 return result
1068
1069
1070def ParseFloat(text):
1071 """Parse a floating point number.
1072
1073 Args:
1074 text: Text to parse.
1075
1076 Returns:
1077 The number parsed.
1078
1079 Raises:
1080 ValueError: If a floating point number couldn't be parsed.
1081 """
1082 try:
1083 # Assume Python compatible syntax.
1084 return float(text)
1085 except ValueError:
1086 # Check alternative spellings.
1087 if _FLOAT_INFINITY.match(text):
1088 if text[0] == '-':
1089 return float('-inf')
1090 else:
1091 return float('inf')
1092 elif _FLOAT_NAN.match(text):
1093 return float('nan')
1094 else:
1095 # assume '1.0f' format
1096 try:
1097 return float(text.rstrip('f'))
1098 except ValueError:
1099 raise ValueError('Couldn\'t parse float: %s' % text)
1100
1101
1102def ParseBool(text):
1103 """Parse a boolean value.
1104
1105 Args:
1106 text: Text to parse.
1107
1108 Returns:
1109 Boolean values parsed
1110
1111 Raises:
1112 ValueError: If text is not a valid boolean.
1113 """
1114 if text in ('true', 't', '1'):
1115 return True
1116 elif text in ('false', 'f', '0'):
1117 return False
1118 else:
1119 raise ValueError('Expected "true" or "false".')
1120
1121
1122def ParseEnum(field, value):
1123 """Parse an enum value.
1124
1125 The value can be specified by a number (the enum value), or by
1126 a string literal (the enum name).
1127
1128 Args:
1129 field: Enum field descriptor.
1130 value: String value.
1131
1132 Returns:
1133 Enum value number.
1134
1135 Raises:
1136 ValueError: If the enum value could not be parsed.
1137 """
1138 enum_descriptor = field.enum_type
1139 try:
1140 number = int(value, 0)
1141 except ValueError:
1142 # Identifier.
1143 enum_value = enum_descriptor.values_by_name.get(value, None)
1144 if enum_value is None:
1145 raise ValueError(
1146 'Enum type "%s" has no value named %s.' % (
1147 enum_descriptor.full_name, value))
1148 else:
1149 # Numeric value.
1150 enum_value = enum_descriptor.values_by_number.get(number, None)
1151 if enum_value is None:
1152 raise ValueError(
1153 'Enum type "%s" has no value with number %d.' % (
1154 enum_descriptor.full_name, number))
1155 return enum_value.number