blob: 8d256076c28ad4c3245aaccbbbd8284aeb4bc961 [file] [log] [blame]
temporal40ee5512008-07-10 02:12:20 +00001# Protocol Buffers - Google's data interchange format
kenton@google.com24bf56f2008-09-24 20:31:01 +00002# Copyright 2008 Google Inc. All rights reserved.
Feng Xiaoe4288622014-10-01 16:26:23 -07003# https://developers.google.com/protocol-buffers/
temporal40ee5512008-07-10 02:12:20 +00004#
kenton@google.com24bf56f2008-09-24 20:31:01 +00005# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
temporal40ee5512008-07-10 02:12:20 +00008#
kenton@google.com24bf56f2008-09-24 20:31:01 +00009# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15# * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
temporal40ee5512008-07-10 02:12:20 +000018#
kenton@google.com24bf56f2008-09-24 20:31:01 +000019# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
temporal40ee5512008-07-10 02:12:20 +000030
Jisi Liu46e8ff62015-10-05 11:59:43 -070031"""Contains routines for printing protocol messages in text format.
jieluo@google.combde4a322014-08-12 21:10:30 +000032
Jisi Liu46e8ff62015-10-05 11:59:43 -070033Simple usage example:
34
35 # Create a proto object and serialize it to a text proto string.
36 message = my_proto_pb2.MyMessage(foo='bar')
37 text_proto = text_format.MessageToString(message)
38
39 # Parse a text proto string.
40 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage())
41"""
temporal40ee5512008-07-10 02:12:20 +000042
43__author__ = 'kenton@google.com (Kenton Varda)'
44
Tres Seaver47ee4d32015-01-13 15:04:41 -050045import io
kenton@google.com80b1d622009-07-29 01:13:20 +000046import re
temporal40ee5512008-07-10 02:12:20 +000047
Tres Seaverf336d4b2015-01-13 14:21:29 -050048import six
49
Dan O'Reillyfe7d9372015-08-14 15:26:33 -040050if six.PY3:
51 long = int
52
kenton@google.com80b1d622009-07-29 01:13:20 +000053from google.protobuf.internal import type_checkers
temporal40ee5512008-07-10 02:12:20 +000054from google.protobuf import descriptor
jieluo@google.combde4a322014-08-12 21:10:30 +000055from google.protobuf import text_encoding
temporal40ee5512008-07-10 02:12:20 +000056
jieluo@google.combde4a322014-08-12 21:10:30 +000057__all__ = ['MessageToString', 'PrintMessage', 'PrintField',
58 'PrintFieldValue', 'Merge']
kenton@google.com80b1d622009-07-29 01:13:20 +000059
60
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +000061_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
62 type_checkers.Int32ValueChecker(),
63 type_checkers.Uint64ValueChecker(),
64 type_checkers.Int64ValueChecker())
65_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
66_FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
jieluo@google.combde4a322014-08-12 21:10:30 +000067_FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
Feng Xiaoe841bac2015-12-11 17:09:20 -080069_QUOTES = frozenset(("'", '"'))
kenton@google.comd0047c42009-12-23 02:01:01 +000070
71
jieluo@google.combde4a322014-08-12 21:10:30 +000072class Error(Exception):
73 """Top-level module error for text_format."""
74
75
76class ParseError(Error):
Feng Xiaoe841bac2015-12-11 17:09:20 -080077 """Thrown in case of text parsing error."""
78
kenton@google.com80b1d622009-07-29 01:13:20 +000079
Dan O'Reillyfc808742015-08-15 10:11:28 -040080class TextWriter(object):
81 def __init__(self, as_utf8):
Dan O'Reilly38eef022015-08-22 13:02:24 -040082 if six.PY2:
Dan O'Reillyfc808742015-08-15 10:11:28 -040083 self._writer = io.BytesIO()
84 else:
85 self._writer = io.StringIO()
86
87 def write(self, val):
Dan O'Reilly38eef022015-08-22 13:02:24 -040088 if six.PY2:
Dan O'Reillyfc808742015-08-15 10:11:28 -040089 if isinstance(val, six.text_type):
90 val = val.encode('utf-8')
Dan O'Reillyfc808742015-08-15 10:11:28 -040091 return self._writer.write(val)
92
93 def close(self):
94 return self._writer.close()
95
96 def getvalue(self):
97 return self._writer.getvalue()
98
temporal40ee5512008-07-10 02:12:20 +000099
jieluo@google.combde4a322014-08-12 21:10:30 +0000100def MessageToString(message, as_utf8=False, as_one_line=False,
101 pointy_brackets=False, use_index_order=False,
102 float_format=None):
103 """Convert protobuf message to text format.
104
105 Floating point values can be formatted compactly with 15 digits of
106 precision (which is the most that IEEE 754 "double" can guarantee)
Feng Xiaoe841bac2015-12-11 17:09:20 -0800107 using float_format='.15g'. To ensure that converting to text and back to a
108 proto will result in an identical value, float_format='.17g' should be used.
jieluo@google.combde4a322014-08-12 21:10:30 +0000109
110 Args:
111 message: The protocol buffers message.
112 as_utf8: Produce text output in UTF8 format.
113 as_one_line: Don't introduce newlines between fields.
114 pointy_brackets: If True, use angle brackets instead of curly braces for
115 nesting.
116 use_index_order: If True, print fields of a proto message using the order
117 defined in source code instead of the field number. By default, use the
118 field number order.
119 float_format: If set, use this to specify floating point number formatting
120 (per the "Format Specification Mini-Language"); otherwise, str() is used.
121
122 Returns:
123 A string of the text formatted protocol buffer message.
124 """
Dan O'Reillyfc808742015-08-15 10:11:28 -0400125 out = TextWriter(as_utf8)
jieluo@google.combde4a322014-08-12 21:10:30 +0000126 PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,
127 pointy_brackets=pointy_brackets,
128 use_index_order=use_index_order,
129 float_format=float_format)
temporal40ee5512008-07-10 02:12:20 +0000130 result = out.getvalue()
131 out.close()
liujisi@google.com33165fe2010-11-02 13:14:58 +0000132 if as_one_line:
133 return result.rstrip()
temporal40ee5512008-07-10 02:12:20 +0000134 return result
135
Feng Xiaoe841bac2015-12-11 17:09:20 -0800136
Bo Yang5db21732015-05-21 14:28:59 -0700137def _IsMapEntry(field):
138 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
139 field.message_type.has_options and
140 field.message_type.GetOptions().map_entry)
kenton@google.com80b1d622009-07-29 01:13:20 +0000141
Feng Xiaoe841bac2015-12-11 17:09:20 -0800142
jieluo@google.combde4a322014-08-12 21:10:30 +0000143def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,
144 pointy_brackets=False, use_index_order=False,
145 float_format=None):
146 fields = message.ListFields()
147 if use_index_order:
148 fields.sort(key=lambda x: x[0].index)
149 for field, value in fields:
Bo Yang5db21732015-05-21 14:28:59 -0700150 if _IsMapEntry(field):
Feng Xiaoeee38b02015-08-22 18:25:48 -0700151 for key in sorted(value):
Bo Yang5db21732015-05-21 14:28:59 -0700152 # This is slow for maps with submessage entires because it copies the
153 # entire tree. Unfortunately this would take significant refactoring
154 # of this file to work around.
155 #
156 # TODO(haberman): refactor and optimize if this becomes an issue.
157 entry_submsg = field.message_type._concrete_class(
158 key=key, value=value[key])
159 PrintField(field, entry_submsg, out, indent, as_utf8, as_one_line,
160 pointy_brackets=pointy_brackets,
161 use_index_order=use_index_order, float_format=float_format)
162 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
temporal40ee5512008-07-10 02:12:20 +0000163 for element in value:
jieluo@google.combde4a322014-08-12 21:10:30 +0000164 PrintField(field, element, out, indent, as_utf8, as_one_line,
165 pointy_brackets=pointy_brackets,
Feng Xiao6ef984a2014-11-10 17:34:54 -0800166 use_index_order=use_index_order,
jieluo@google.combde4a322014-08-12 21:10:30 +0000167 float_format=float_format)
temporal40ee5512008-07-10 02:12:20 +0000168 else:
jieluo@google.combde4a322014-08-12 21:10:30 +0000169 PrintField(field, value, out, indent, as_utf8, as_one_line,
170 pointy_brackets=pointy_brackets,
Feng Xiao6ef984a2014-11-10 17:34:54 -0800171 use_index_order=use_index_order,
jieluo@google.combde4a322014-08-12 21:10:30 +0000172 float_format=float_format)
temporal40ee5512008-07-10 02:12:20 +0000173
Feng Xiaoe841bac2015-12-11 17:09:20 -0800174
jieluo@google.combde4a322014-08-12 21:10:30 +0000175def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,
Feng Xiao6ef984a2014-11-10 17:34:54 -0800176 pointy_brackets=False, use_index_order=False, float_format=None):
temporal40ee5512008-07-10 02:12:20 +0000177 """Print a single field name/value pair. For repeated fields, the value
Feng Xiaoe841bac2015-12-11 17:09:20 -0800178 should be a single element.
179 """
temporal40ee5512008-07-10 02:12:20 +0000180
jieluo@google.combde4a322014-08-12 21:10:30 +0000181 out.write(' ' * indent)
temporal40ee5512008-07-10 02:12:20 +0000182 if field.is_extension:
183 out.write('[')
184 if (field.containing_type.GetOptions().message_set_wire_format and
185 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
temporal40ee5512008-07-10 02:12:20 +0000186 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
187 out.write(field.message_type.full_name)
188 else:
189 out.write(field.full_name)
190 out.write(']')
191 elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
192 # For groups, use the capitalized name.
193 out.write(field.message_type.name)
194 else:
Dan O'Reillyfc808742015-08-15 10:11:28 -0400195 out.write(field.name)
temporal40ee5512008-07-10 02:12:20 +0000196
197 if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
198 # The colon is optional in this case, but our cross-language golden files
199 # don't include it.
200 out.write(': ')
201
jieluo@google.combde4a322014-08-12 21:10:30 +0000202 PrintFieldValue(field, value, out, indent, as_utf8, as_one_line,
203 pointy_brackets=pointy_brackets,
Feng Xiao6ef984a2014-11-10 17:34:54 -0800204 use_index_order=use_index_order,
jieluo@google.combde4a322014-08-12 21:10:30 +0000205 float_format=float_format)
liujisi@google.com33165fe2010-11-02 13:14:58 +0000206 if as_one_line:
207 out.write(' ')
208 else:
209 out.write('\n')
temporal40ee5512008-07-10 02:12:20 +0000210
kenton@google.com80b1d622009-07-29 01:13:20 +0000211
jieluo@google.combde4a322014-08-12 21:10:30 +0000212def PrintFieldValue(field, value, out, indent=0, as_utf8=False,
213 as_one_line=False, pointy_brackets=False,
Feng Xiao6ef984a2014-11-10 17:34:54 -0800214 use_index_order=False,
jieluo@google.combde4a322014-08-12 21:10:30 +0000215 float_format=None):
temporal40ee5512008-07-10 02:12:20 +0000216 """Print a single field value (not including name). For repeated fields,
217 the value should be a single element."""
218
jieluo@google.combde4a322014-08-12 21:10:30 +0000219 if pointy_brackets:
220 openb = '<'
221 closeb = '>'
222 else:
223 openb = '{'
224 closeb = '}'
225
temporal40ee5512008-07-10 02:12:20 +0000226 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
liujisi@google.com33165fe2010-11-02 13:14:58 +0000227 if as_one_line:
jieluo@google.combde4a322014-08-12 21:10:30 +0000228 out.write(' %s ' % openb)
229 PrintMessage(value, out, indent, as_utf8, as_one_line,
230 pointy_brackets=pointy_brackets,
Feng Xiao6ef984a2014-11-10 17:34:54 -0800231 use_index_order=use_index_order,
jieluo@google.combde4a322014-08-12 21:10:30 +0000232 float_format=float_format)
233 out.write(closeb)
liujisi@google.com33165fe2010-11-02 13:14:58 +0000234 else:
jieluo@google.combde4a322014-08-12 21:10:30 +0000235 out.write(' %s\n' % openb)
236 PrintMessage(value, out, indent + 2, as_utf8, as_one_line,
237 pointy_brackets=pointy_brackets,
Feng Xiao6ef984a2014-11-10 17:34:54 -0800238 use_index_order=use_index_order,
jieluo@google.combde4a322014-08-12 21:10:30 +0000239 float_format=float_format)
240 out.write(' ' * indent + closeb)
temporal40ee5512008-07-10 02:12:20 +0000241 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000242 enum_value = field.enum_type.values_by_number.get(value, None)
243 if enum_value is not None:
244 out.write(enum_value.name)
245 else:
246 out.write(str(value))
temporal40ee5512008-07-10 02:12:20 +0000247 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
248 out.write('\"')
Tres Seaverf336d4b2015-01-13 14:21:29 -0500249 if isinstance(value, six.text_type):
jieluo@google.combde4a322014-08-12 21:10:30 +0000250 out_value = value.encode('utf-8')
liujisi@google.com33165fe2010-11-02 13:14:58 +0000251 else:
jieluo@google.combde4a322014-08-12 21:10:30 +0000252 out_value = value
253 if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
254 # We need to escape non-UTF8 chars in TYPE_BYTES field.
255 out_as_utf8 = False
256 else:
257 out_as_utf8 = as_utf8
258 out.write(text_encoding.CEscape(out_value, out_as_utf8))
temporal40ee5512008-07-10 02:12:20 +0000259 out.write('\"')
260 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
261 if value:
jieluo@google.combde4a322014-08-12 21:10:30 +0000262 out.write('true')
temporal40ee5512008-07-10 02:12:20 +0000263 else:
jieluo@google.combde4a322014-08-12 21:10:30 +0000264 out.write('false')
265 elif field.cpp_type in _FLOAT_TYPES and float_format is not None:
266 out.write('{1:{0}}'.format(float_format, value))
temporal40ee5512008-07-10 02:12:20 +0000267 else:
268 out.write(str(value))
269
kenton@google.com80b1d622009-07-29 01:13:20 +0000270
Feng Xiaoe841bac2015-12-11 17:09:20 -0800271def Parse(text, message, allow_unknown_extension=False):
272 """Parses an text representation of a protocol message into a message.
kenton@google.com80b1d622009-07-29 01:13:20 +0000273
274 Args:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800275 text: Message text representation.
kenton@google.com80b1d622009-07-29 01:13:20 +0000276 message: A protocol buffer message to merge into.
Feng Xiaoe841bac2015-12-11 17:09:20 -0800277 allow_unknown_extension: if True, skip over missing extensions and keep
278 parsing
kenton@google.com80b1d622009-07-29 01:13:20 +0000279
jieluo@google.combde4a322014-08-12 21:10:30 +0000280 Returns:
281 The same message passed as argument.
282
kenton@google.com80b1d622009-07-29 01:13:20 +0000283 Raises:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800284 ParseError: On text parsing problems.
kenton@google.com80b1d622009-07-29 01:13:20 +0000285 """
Feng Xiaoe841bac2015-12-11 17:09:20 -0800286 if not isinstance(text, str):
287 text = text.decode('utf-8')
288 return ParseLines(text.split('\n'), message, allow_unknown_extension)
kenton@google.com80b1d622009-07-29 01:13:20 +0000289
290
Feng Xiaoe841bac2015-12-11 17:09:20 -0800291def Merge(text, message, allow_unknown_extension=False):
292 """Parses an text representation of a protocol message into a message.
jieluo@google.combde4a322014-08-12 21:10:30 +0000293
294 Like Parse(), but allows repeated values for a non-repeated field, and uses
295 the last one.
296
297 Args:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800298 text: Message text representation.
jieluo@google.combde4a322014-08-12 21:10:30 +0000299 message: A protocol buffer message to merge into.
Feng Xiaoe841bac2015-12-11 17:09:20 -0800300 allow_unknown_extension: if True, skip over missing extensions and keep
301 parsing
jieluo@google.combde4a322014-08-12 21:10:30 +0000302
303 Returns:
304 The same message passed as argument.
305
306 Raises:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800307 ParseError: On text parsing problems.
jieluo@google.combde4a322014-08-12 21:10:30 +0000308 """
Feng Xiaoe841bac2015-12-11 17:09:20 -0800309 return MergeLines(text.split('\n'), message, allow_unknown_extension)
jieluo@google.combde4a322014-08-12 21:10:30 +0000310
311
Feng Xiaoe841bac2015-12-11 17:09:20 -0800312def ParseLines(lines, message, allow_unknown_extension=False):
313 """Parses an text representation of a protocol message into a message.
jieluo@google.combde4a322014-08-12 21:10:30 +0000314
315 Args:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800316 lines: An iterable of lines of a message's text representation.
jieluo@google.combde4a322014-08-12 21:10:30 +0000317 message: A protocol buffer message to merge into.
Feng Xiaoe841bac2015-12-11 17:09:20 -0800318 allow_unknown_extension: if True, skip over missing extensions and keep
319 parsing
jieluo@google.combde4a322014-08-12 21:10:30 +0000320
321 Returns:
322 The same message passed as argument.
323
324 Raises:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800325 ParseError: On text parsing problems.
jieluo@google.combde4a322014-08-12 21:10:30 +0000326 """
Feng Xiaoe841bac2015-12-11 17:09:20 -0800327 _ParseOrMerge(lines, message, False, allow_unknown_extension)
jieluo@google.combde4a322014-08-12 21:10:30 +0000328 return message
329
330
Feng Xiaoe841bac2015-12-11 17:09:20 -0800331def MergeLines(lines, message, allow_unknown_extension=False):
332 """Parses an text representation of a protocol message into a message.
jieluo@google.combde4a322014-08-12 21:10:30 +0000333
334 Args:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800335 lines: An iterable of lines of a message's text representation.
jieluo@google.combde4a322014-08-12 21:10:30 +0000336 message: A protocol buffer message to merge into.
Feng Xiaoe841bac2015-12-11 17:09:20 -0800337 allow_unknown_extension: if True, skip over missing extensions and keep
338 parsing
jieluo@google.combde4a322014-08-12 21:10:30 +0000339
340 Returns:
341 The same message passed as argument.
342
343 Raises:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800344 ParseError: On text parsing problems.
jieluo@google.combde4a322014-08-12 21:10:30 +0000345 """
Feng Xiaoe841bac2015-12-11 17:09:20 -0800346 _ParseOrMerge(lines, message, True, allow_unknown_extension)
jieluo@google.combde4a322014-08-12 21:10:30 +0000347 return message
348
349
Feng Xiaoe841bac2015-12-11 17:09:20 -0800350def _ParseOrMerge(lines,
351 message,
352 allow_multiple_scalars,
353 allow_unknown_extension=False):
354 """Converts an text representation of a protocol message into a message.
Feng Xiaof157a562014-11-14 11:50:31 -0800355
356 Args:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800357 lines: Lines of a message's text representation.
Feng Xiaof157a562014-11-14 11:50:31 -0800358 message: A protocol buffer message to merge into.
359 allow_multiple_scalars: Determines if repeated values for a non-repeated
360 field are permitted, e.g., the string "foo: 1 foo: 2" for a
361 required/optional field named "foo".
Feng Xiaoe841bac2015-12-11 17:09:20 -0800362 allow_unknown_extension: if True, skip over missing extensions and keep
363 parsing
Feng Xiaof157a562014-11-14 11:50:31 -0800364
365 Raises:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800366 ParseError: On text parsing problems.
Feng Xiaof157a562014-11-14 11:50:31 -0800367 """
368 tokenizer = _Tokenizer(lines)
369 while not tokenizer.AtEnd():
Feng Xiaoe841bac2015-12-11 17:09:20 -0800370 _MergeField(tokenizer, message, allow_multiple_scalars,
371 allow_unknown_extension)
Feng Xiaof157a562014-11-14 11:50:31 -0800372
373
Feng Xiaoe841bac2015-12-11 17:09:20 -0800374def _MergeField(tokenizer,
375 message,
376 allow_multiple_scalars,
377 allow_unknown_extension=False):
kenton@google.com80b1d622009-07-29 01:13:20 +0000378 """Merges a single protocol message field into a message.
379
380 Args:
381 tokenizer: A tokenizer to parse the field name and values.
382 message: A protocol message to record the data.
jieluo@google.combde4a322014-08-12 21:10:30 +0000383 allow_multiple_scalars: Determines if repeated values for a non-repeated
384 field are permitted, e.g., the string "foo: 1 foo: 2" for a
385 required/optional field named "foo".
Feng Xiaoe841bac2015-12-11 17:09:20 -0800386 allow_unknown_extension: if True, skip over missing extensions and keep
387 parsing
kenton@google.com80b1d622009-07-29 01:13:20 +0000388
389 Raises:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800390 ParseError: In case of text parsing problems.
kenton@google.com80b1d622009-07-29 01:13:20 +0000391 """
392 message_descriptor = message.DESCRIPTOR
Jisi Liuada65562015-02-25 16:39:11 -0800393 if (hasattr(message_descriptor, 'syntax') and
394 message_descriptor.syntax == 'proto3'):
395 # Proto3 doesn't represent presence so we can't test if multiple
396 # scalars have occurred. We have to allow them.
397 allow_multiple_scalars = True
kenton@google.com80b1d622009-07-29 01:13:20 +0000398 if tokenizer.TryConsume('['):
399 name = [tokenizer.ConsumeIdentifier()]
400 while tokenizer.TryConsume('.'):
401 name.append(tokenizer.ConsumeIdentifier())
402 name = '.'.join(name)
403
kenton@google.comfccb1462009-12-18 02:11:36 +0000404 if not message_descriptor.is_extendable:
405 raise tokenizer.ParseErrorPreviousToken(
406 'Message type "%s" does not have extensions.' %
407 message_descriptor.full_name)
jieluo@google.combde4a322014-08-12 21:10:30 +0000408 # pylint: disable=protected-access
kenton@google.com80b1d622009-07-29 01:13:20 +0000409 field = message.Extensions._FindExtensionByName(name)
jieluo@google.combde4a322014-08-12 21:10:30 +0000410 # pylint: enable=protected-access
kenton@google.com80b1d622009-07-29 01:13:20 +0000411 if not field:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800412 if allow_unknown_extension:
413 field = None
414 else:
415 raise tokenizer.ParseErrorPreviousToken(
416 'Extension "%s" not registered.' % name)
kenton@google.com80b1d622009-07-29 01:13:20 +0000417 elif message_descriptor != field.containing_type:
418 raise tokenizer.ParseErrorPreviousToken(
419 'Extension "%s" does not extend message type "%s".' % (
420 name, message_descriptor.full_name))
Feng Xiaoe841bac2015-12-11 17:09:20 -0800421
kenton@google.com80b1d622009-07-29 01:13:20 +0000422 tokenizer.Consume(']')
Feng Xiaoe841bac2015-12-11 17:09:20 -0800423
kenton@google.com80b1d622009-07-29 01:13:20 +0000424 else:
425 name = tokenizer.ConsumeIdentifier()
426 field = message_descriptor.fields_by_name.get(name, None)
427
428 # Group names are expected to be capitalized as they appear in the
429 # .proto file, which actually matches their type names, not their field
430 # names.
431 if not field:
432 field = message_descriptor.fields_by_name.get(name.lower(), None)
433 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
434 field = None
435
436 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
437 field.message_type.name != name):
438 field = None
439
440 if not field:
441 raise tokenizer.ParseErrorPreviousToken(
442 'Message type "%s" has no field named "%s".' % (
443 message_descriptor.full_name, name))
444
Feng Xiaoe841bac2015-12-11 17:09:20 -0800445 if field and field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
Bo Yang5db21732015-05-21 14:28:59 -0700446 is_map_entry = _IsMapEntry(field)
kenton@google.com80b1d622009-07-29 01:13:20 +0000447 tokenizer.TryConsume(':')
448
449 if tokenizer.TryConsume('<'):
450 end_token = '>'
451 else:
452 tokenizer.Consume('{')
453 end_token = '}'
454
455 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
456 if field.is_extension:
457 sub_message = message.Extensions[field].add()
Bo Yang5db21732015-05-21 14:28:59 -0700458 elif is_map_entry:
459 sub_message = field.message_type._concrete_class()
kenton@google.com80b1d622009-07-29 01:13:20 +0000460 else:
461 sub_message = getattr(message, field.name).add()
462 else:
463 if field.is_extension:
464 sub_message = message.Extensions[field]
465 else:
466 sub_message = getattr(message, field.name)
liujisi@google.com1fd96c42010-12-07 06:23:55 +0000467 sub_message.SetInParent()
kenton@google.com80b1d622009-07-29 01:13:20 +0000468
469 while not tokenizer.TryConsume(end_token):
470 if tokenizer.AtEnd():
471 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token))
Feng Xiaoe841bac2015-12-11 17:09:20 -0800472 _MergeField(tokenizer, sub_message, allow_multiple_scalars,
473 allow_unknown_extension)
Bo Yang5db21732015-05-21 14:28:59 -0700474
475 if is_map_entry:
476 value_cpptype = field.message_type.fields_by_name['value'].cpp_type
477 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
478 value = getattr(message, field.name)[sub_message.key]
479 value.MergeFrom(sub_message.value)
480 else:
481 getattr(message, field.name)[sub_message.key] = sub_message.value
Feng Xiaoe841bac2015-12-11 17:09:20 -0800482 elif field:
483 tokenizer.Consume(':')
484 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and
485 tokenizer.TryConsume('[')):
486 # Short repeated format, e.g. "foo: [1, 2, 3]"
487 while True:
488 _MergeScalarField(tokenizer, message, field, allow_multiple_scalars)
489 if tokenizer.TryConsume(']'):
490 break
491 tokenizer.Consume(',')
492 else:
493 _MergeScalarField(tokenizer, message, field, allow_multiple_scalars)
494 else: # Proto field is unknown.
495 assert allow_unknown_extension
496 _SkipFieldContents(tokenizer)
jieluo@google.combde4a322014-08-12 21:10:30 +0000497
498 # For historical reasons, fields may optionally be separated by commas or
499 # semicolons.
500 if not tokenizer.TryConsume(','):
501 tokenizer.TryConsume(';')
kenton@google.com80b1d622009-07-29 01:13:20 +0000502
503
Feng Xiaoe841bac2015-12-11 17:09:20 -0800504def _SkipFieldContents(tokenizer):
505 """Skips over contents (value or message) of a field.
506
507 Args:
508 tokenizer: A tokenizer to parse the field name and values.
509 """
510 # Try to guess the type of this field.
511 # If this field is not a message, there should be a ":" between the
512 # field name and the field value and also the field value should not
513 # start with "{" or "<" which indicates the beginning of a message body.
514 # If there is no ":" or there is a "{" or "<" after ":", this field has
515 # to be a message or the input is ill-formed.
516 if tokenizer.TryConsume(':') and not tokenizer.LookingAt(
517 '{') and not tokenizer.LookingAt('<'):
518 _SkipFieldValue(tokenizer)
519 else:
520 _SkipFieldMessage(tokenizer)
521
522
523def _SkipField(tokenizer):
524 """Skips over a complete field (name and value/message).
525
526 Args:
527 tokenizer: A tokenizer to parse the field name and values.
528 """
529 if tokenizer.TryConsume('['):
530 # Consume extension name.
531 tokenizer.ConsumeIdentifier()
532 while tokenizer.TryConsume('.'):
533 tokenizer.ConsumeIdentifier()
534 tokenizer.Consume(']')
535 else:
536 tokenizer.ConsumeIdentifier()
537
538 _SkipFieldContents(tokenizer)
539
540 # For historical reasons, fields may optionally be separated by commas or
541 # semicolons.
542 if not tokenizer.TryConsume(','):
543 tokenizer.TryConsume(';')
544
545
546def _SkipFieldMessage(tokenizer):
547 """Skips over a field message.
548
549 Args:
550 tokenizer: A tokenizer to parse the field name and values.
551 """
552
553 if tokenizer.TryConsume('<'):
554 delimiter = '>'
555 else:
556 tokenizer.Consume('{')
557 delimiter = '}'
558
559 while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'):
560 _SkipField(tokenizer)
561
562 tokenizer.Consume(delimiter)
563
564
565def _SkipFieldValue(tokenizer):
566 """Skips over a field value.
567
568 Args:
569 tokenizer: A tokenizer to parse the field name and values.
570
571 Raises:
572 ParseError: In case an invalid field value is found.
573 """
574 # String tokens can come in multiple adjacent string literals.
575 # If we can consume one, consume as many as we can.
576 if tokenizer.TryConsumeString():
577 while tokenizer.TryConsumeString():
578 pass
579 return
580
581 if (not tokenizer.TryConsumeIdentifier() and
582 not tokenizer.TryConsumeInt64() and
583 not tokenizer.TryConsumeUint64() and
584 not tokenizer.TryConsumeFloat()):
585 raise ParseError('Invalid field value: ' + tokenizer.token)
586
587
jieluo@google.combde4a322014-08-12 21:10:30 +0000588def _MergeScalarField(tokenizer, message, field, allow_multiple_scalars):
kenton@google.com80b1d622009-07-29 01:13:20 +0000589 """Merges a single protocol message scalar field into a message.
590
591 Args:
592 tokenizer: A tokenizer to parse the field value.
593 message: A protocol message to record the data.
594 field: The descriptor of the field to be merged.
jieluo@google.combde4a322014-08-12 21:10:30 +0000595 allow_multiple_scalars: Determines if repeated values for a non-repeated
596 field are permitted, e.g., the string "foo: 1 foo: 2" for a
597 required/optional field named "foo".
kenton@google.com80b1d622009-07-29 01:13:20 +0000598
599 Raises:
Feng Xiaoe841bac2015-12-11 17:09:20 -0800600 ParseError: In case of text parsing problems.
kenton@google.com80b1d622009-07-29 01:13:20 +0000601 RuntimeError: On runtime errors.
602 """
kenton@google.com80b1d622009-07-29 01:13:20 +0000603 value = None
604
605 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
606 descriptor.FieldDescriptor.TYPE_SINT32,
607 descriptor.FieldDescriptor.TYPE_SFIXED32):
608 value = tokenizer.ConsumeInt32()
609 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
610 descriptor.FieldDescriptor.TYPE_SINT64,
611 descriptor.FieldDescriptor.TYPE_SFIXED64):
612 value = tokenizer.ConsumeInt64()
613 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
614 descriptor.FieldDescriptor.TYPE_FIXED32):
615 value = tokenizer.ConsumeUint32()
616 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
617 descriptor.FieldDescriptor.TYPE_FIXED64):
618 value = tokenizer.ConsumeUint64()
619 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
620 descriptor.FieldDescriptor.TYPE_DOUBLE):
621 value = tokenizer.ConsumeFloat()
622 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
623 value = tokenizer.ConsumeBool()
624 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
625 value = tokenizer.ConsumeString()
626 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
627 value = tokenizer.ConsumeByteString()
628 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000629 value = tokenizer.ConsumeEnum(field)
kenton@google.com80b1d622009-07-29 01:13:20 +0000630 else:
631 raise RuntimeError('Unknown field type %d' % field.type)
632
633 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
634 if field.is_extension:
635 message.Extensions[field].append(value)
636 else:
637 getattr(message, field.name).append(value)
638 else:
639 if field.is_extension:
jieluo@google.combde4a322014-08-12 21:10:30 +0000640 if not allow_multiple_scalars and message.HasExtension(field):
641 raise tokenizer.ParseErrorPreviousToken(
642 'Message type "%s" should not have multiple "%s" extensions.' %
643 (message.DESCRIPTOR.full_name, field.full_name))
644 else:
645 message.Extensions[field] = value
kenton@google.com80b1d622009-07-29 01:13:20 +0000646 else:
jieluo@google.combde4a322014-08-12 21:10:30 +0000647 if not allow_multiple_scalars and message.HasField(field.name):
648 raise tokenizer.ParseErrorPreviousToken(
649 'Message type "%s" should not have multiple "%s" fields.' %
650 (message.DESCRIPTOR.full_name, field.name))
651 else:
652 setattr(message, field.name, value)
kenton@google.com80b1d622009-07-29 01:13:20 +0000653
654
655class _Tokenizer(object):
Feng Xiaoe841bac2015-12-11 17:09:20 -0800656 """Protocol buffer text representation tokenizer.
kenton@google.com80b1d622009-07-29 01:13:20 +0000657
658 This class handles the lower level string parsing by splitting it into
659 meaningful tokens.
660
661 It was directly ported from the Java protocol buffer API.
662 """
663
664 _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE)
Feng Xiaoe841bac2015-12-11 17:09:20 -0800665 _TOKEN = re.compile('|'.join([
666 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier
667 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number
668 ] + [ # quoted str for each quote mark
669 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES
670 ]))
671
jieluo@google.combde4a322014-08-12 21:10:30 +0000672 _IDENTIFIER = re.compile(r'\w+')
kenton@google.com80b1d622009-07-29 01:13:20 +0000673
jieluo@google.combde4a322014-08-12 21:10:30 +0000674 def __init__(self, lines):
kenton@google.com80b1d622009-07-29 01:13:20 +0000675 self._position = 0
676 self._line = -1
677 self._column = 0
678 self._token_start = None
679 self.token = ''
jieluo@google.combde4a322014-08-12 21:10:30 +0000680 self._lines = iter(lines)
kenton@google.com80b1d622009-07-29 01:13:20 +0000681 self._current_line = ''
682 self._previous_line = 0
683 self._previous_column = 0
jieluo@google.combde4a322014-08-12 21:10:30 +0000684 self._more_lines = True
kenton@google.com80b1d622009-07-29 01:13:20 +0000685 self._SkipWhitespace()
686 self.NextToken()
687
Feng Xiaoe841bac2015-12-11 17:09:20 -0800688 def LookingAt(self, token):
689 return self.token == token
690
kenton@google.com80b1d622009-07-29 01:13:20 +0000691 def AtEnd(self):
692 """Checks the end of the text was reached.
693
694 Returns:
695 True iff the end was reached.
696 """
jieluo@google.combde4a322014-08-12 21:10:30 +0000697 return not self.token
kenton@google.com80b1d622009-07-29 01:13:20 +0000698
699 def _PopLine(self):
liujisi@google.com33165fe2010-11-02 13:14:58 +0000700 while len(self._current_line) <= self._column:
jieluo@google.combde4a322014-08-12 21:10:30 +0000701 try:
Tres Seaverf336d4b2015-01-13 14:21:29 -0500702 self._current_line = next(self._lines)
jieluo@google.combde4a322014-08-12 21:10:30 +0000703 except StopIteration:
kenton@google.com80b1d622009-07-29 01:13:20 +0000704 self._current_line = ''
jieluo@google.combde4a322014-08-12 21:10:30 +0000705 self._more_lines = False
kenton@google.com80b1d622009-07-29 01:13:20 +0000706 return
jieluo@google.combde4a322014-08-12 21:10:30 +0000707 else:
708 self._line += 1
709 self._column = 0
kenton@google.com80b1d622009-07-29 01:13:20 +0000710
711 def _SkipWhitespace(self):
712 while True:
713 self._PopLine()
liujisi@google.com33165fe2010-11-02 13:14:58 +0000714 match = self._WHITESPACE.match(self._current_line, self._column)
kenton@google.com80b1d622009-07-29 01:13:20 +0000715 if not match:
716 break
717 length = len(match.group(0))
kenton@google.com80b1d622009-07-29 01:13:20 +0000718 self._column += length
719
720 def TryConsume(self, token):
721 """Tries to consume a given piece of text.
722
723 Args:
724 token: Text to consume.
725
726 Returns:
727 True iff the text was consumed.
728 """
729 if self.token == token:
730 self.NextToken()
731 return True
732 return False
733
734 def Consume(self, token):
735 """Consumes a piece of text.
736
737 Args:
738 token: Text to consume.
739
740 Raises:
741 ParseError: If the text couldn't be consumed.
742 """
743 if not self.TryConsume(token):
744 raise self._ParseError('Expected "%s".' % token)
745
Feng Xiaoe841bac2015-12-11 17:09:20 -0800746 def TryConsumeIdentifier(self):
747 try:
748 self.ConsumeIdentifier()
749 return True
750 except ParseError:
751 return False
752
kenton@google.com80b1d622009-07-29 01:13:20 +0000753 def ConsumeIdentifier(self):
754 """Consumes protocol message field identifier.
755
756 Returns:
757 Identifier string.
758
759 Raises:
760 ParseError: If an identifier couldn't be consumed.
761 """
762 result = self.token
liujisi@google.com33165fe2010-11-02 13:14:58 +0000763 if not self._IDENTIFIER.match(result):
kenton@google.com80b1d622009-07-29 01:13:20 +0000764 raise self._ParseError('Expected identifier.')
765 self.NextToken()
766 return result
767
768 def ConsumeInt32(self):
769 """Consumes a signed 32bit integer number.
770
771 Returns:
772 The integer parsed.
773
774 Raises:
775 ParseError: If a signed 32bit integer couldn't be consumed.
776 """
777 try:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000778 result = ParseInteger(self.token, is_signed=True, is_long=False)
Tres Seaverf336d4b2015-01-13 14:21:29 -0500779 except ValueError as e:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000780 raise self._ParseError(str(e))
kenton@google.com80b1d622009-07-29 01:13:20 +0000781 self.NextToken()
782 return result
783
784 def ConsumeUint32(self):
785 """Consumes an unsigned 32bit integer number.
786
787 Returns:
788 The integer parsed.
789
790 Raises:
791 ParseError: If an unsigned 32bit integer couldn't be consumed.
792 """
793 try:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000794 result = ParseInteger(self.token, is_signed=False, is_long=False)
Tres Seaverf336d4b2015-01-13 14:21:29 -0500795 except ValueError as e:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000796 raise self._ParseError(str(e))
kenton@google.com80b1d622009-07-29 01:13:20 +0000797 self.NextToken()
798 return result
799
Feng Xiaoe841bac2015-12-11 17:09:20 -0800800 def TryConsumeInt64(self):
801 try:
802 self.ConsumeInt64()
803 return True
804 except ParseError:
805 return False
806
kenton@google.com80b1d622009-07-29 01:13:20 +0000807 def ConsumeInt64(self):
808 """Consumes a signed 64bit integer number.
809
810 Returns:
811 The integer parsed.
812
813 Raises:
814 ParseError: If a signed 64bit integer couldn't be consumed.
815 """
816 try:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000817 result = ParseInteger(self.token, is_signed=True, is_long=True)
Tres Seaverf336d4b2015-01-13 14:21:29 -0500818 except ValueError as e:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000819 raise self._ParseError(str(e))
kenton@google.com80b1d622009-07-29 01:13:20 +0000820 self.NextToken()
821 return result
822
Feng Xiaoe841bac2015-12-11 17:09:20 -0800823 def TryConsumeUint64(self):
824 try:
825 self.ConsumeUint64()
826 return True
827 except ParseError:
828 return False
829
kenton@google.com80b1d622009-07-29 01:13:20 +0000830 def ConsumeUint64(self):
831 """Consumes an unsigned 64bit integer number.
832
833 Returns:
834 The integer parsed.
835
836 Raises:
837 ParseError: If an unsigned 64bit integer couldn't be consumed.
838 """
839 try:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000840 result = ParseInteger(self.token, is_signed=False, is_long=True)
Tres Seaverf336d4b2015-01-13 14:21:29 -0500841 except ValueError as e:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000842 raise self._ParseError(str(e))
kenton@google.com80b1d622009-07-29 01:13:20 +0000843 self.NextToken()
844 return result
845
Feng Xiaoe841bac2015-12-11 17:09:20 -0800846 def TryConsumeFloat(self):
847 try:
848 self.ConsumeFloat()
849 return True
850 except ParseError:
851 return False
852
kenton@google.com80b1d622009-07-29 01:13:20 +0000853 def ConsumeFloat(self):
854 """Consumes an floating point number.
855
856 Returns:
857 The number parsed.
858
859 Raises:
860 ParseError: If a floating point number couldn't be consumed.
861 """
kenton@google.com80b1d622009-07-29 01:13:20 +0000862 try:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000863 result = ParseFloat(self.token)
Tres Seaverf336d4b2015-01-13 14:21:29 -0500864 except ValueError as e:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000865 raise self._ParseError(str(e))
kenton@google.com80b1d622009-07-29 01:13:20 +0000866 self.NextToken()
867 return result
868
869 def ConsumeBool(self):
870 """Consumes a boolean value.
871
872 Returns:
873 The bool parsed.
874
875 Raises:
876 ParseError: If a boolean value couldn't be consumed.
877 """
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000878 try:
879 result = ParseBool(self.token)
Tres Seaverf336d4b2015-01-13 14:21:29 -0500880 except ValueError as e:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000881 raise self._ParseError(str(e))
882 self.NextToken()
883 return result
kenton@google.com80b1d622009-07-29 01:13:20 +0000884
Feng Xiaoe841bac2015-12-11 17:09:20 -0800885 def TryConsumeString(self):
886 try:
887 self.ConsumeString()
888 return True
889 except ParseError:
890 return False
891
kenton@google.com80b1d622009-07-29 01:13:20 +0000892 def ConsumeString(self):
893 """Consumes a string value.
894
895 Returns:
896 The string parsed.
897
898 Raises:
899 ParseError: If a string value couldn't be consumed.
900 """
jieluo@google.combde4a322014-08-12 21:10:30 +0000901 the_bytes = self.ConsumeByteString()
liujisi@google.com33165fe2010-11-02 13:14:58 +0000902 try:
Tres Seaverf336d4b2015-01-13 14:21:29 -0500903 return six.text_type(the_bytes, 'utf-8')
904 except UnicodeDecodeError as e:
liujisi@google.com33165fe2010-11-02 13:14:58 +0000905 raise self._StringParseError(e)
kenton@google.com80b1d622009-07-29 01:13:20 +0000906
907 def ConsumeByteString(self):
908 """Consumes a byte array value.
909
910 Returns:
911 The array parsed (as a string).
912
913 Raises:
914 ParseError: If a byte array value couldn't be consumed.
915 """
jieluo@google.combde4a322014-08-12 21:10:30 +0000916 the_list = [self._ConsumeSingleByteString()]
Feng Xiaoe841bac2015-12-11 17:09:20 -0800917 while self.token and self.token[0] in _QUOTES:
jieluo@google.combde4a322014-08-12 21:10:30 +0000918 the_list.append(self._ConsumeSingleByteString())
Tres Seaverf336d4b2015-01-13 14:21:29 -0500919 return b''.join(the_list)
kenton@google.comeef5f832009-12-23 01:32:45 +0000920
kenton@google.com53530182010-01-07 02:08:03 +0000921 def _ConsumeSingleByteString(self):
922 """Consume one token of a string literal.
923
924 String literals (whether bytes or text) can come in multiple adjacent
925 tokens which are automatically concatenated, like in C or Python. This
926 method only consumes one token.
Bo Yang5db21732015-05-21 14:28:59 -0700927
Feng Xiaoe841bac2015-12-11 17:09:20 -0800928 Returns:
929 The token parsed.
Bo Yang5db21732015-05-21 14:28:59 -0700930 Raises:
931 ParseError: When the wrong format data is found.
kenton@google.com53530182010-01-07 02:08:03 +0000932 """
kenton@google.com80b1d622009-07-29 01:13:20 +0000933 text = self.token
Feng Xiaoe841bac2015-12-11 17:09:20 -0800934 if len(text) < 1 or text[0] not in _QUOTES:
Bo Yang5db21732015-05-21 14:28:59 -0700935 raise self._ParseError('Expected string but found: %r' % (text,))
kenton@google.com80b1d622009-07-29 01:13:20 +0000936
937 if len(text) < 2 or text[-1] != text[0]:
Bo Yang5db21732015-05-21 14:28:59 -0700938 raise self._ParseError('String missing ending quote: %r' % (text,))
kenton@google.com80b1d622009-07-29 01:13:20 +0000939
940 try:
jieluo@google.combde4a322014-08-12 21:10:30 +0000941 result = text_encoding.CUnescape(text[1:-1])
Tres Seaverf336d4b2015-01-13 14:21:29 -0500942 except ValueError as e:
kenton@google.com80b1d622009-07-29 01:13:20 +0000943 raise self._ParseError(str(e))
944 self.NextToken()
945 return result
946
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000947 def ConsumeEnum(self, field):
948 try:
949 result = ParseEnum(field, self.token)
Tres Seaverf336d4b2015-01-13 14:21:29 -0500950 except ValueError as e:
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000951 raise self._ParseError(str(e))
952 self.NextToken()
kenton@google.com80b1d622009-07-29 01:13:20 +0000953 return result
954
955 def ParseErrorPreviousToken(self, message):
956 """Creates and *returns* a ParseError for the previously read token.
957
958 Args:
959 message: A message to set for the exception.
960
961 Returns:
962 A ParseError instance.
963 """
964 return ParseError('%d:%d : %s' % (
965 self._previous_line + 1, self._previous_column + 1, message))
966
967 def _ParseError(self, message):
968 """Creates and *returns* a ParseError for the current token."""
969 return ParseError('%d:%d : %s' % (
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000970 self._line + 1, self._column + 1, message))
kenton@google.com80b1d622009-07-29 01:13:20 +0000971
liujisi@google.com33165fe2010-11-02 13:14:58 +0000972 def _StringParseError(self, e):
973 return self._ParseError('Couldn\'t parse string: ' + str(e))
974
kenton@google.com80b1d622009-07-29 01:13:20 +0000975 def NextToken(self):
976 """Reads the next meaningful token."""
977 self._previous_line = self._line
978 self._previous_column = self._column
liujisi@google.com33165fe2010-11-02 13:14:58 +0000979
980 self._column += len(self.token)
981 self._SkipWhitespace()
982
jieluo@google.combde4a322014-08-12 21:10:30 +0000983 if not self._more_lines:
kenton@google.com80b1d622009-07-29 01:13:20 +0000984 self.token = ''
985 return
kenton@google.com80b1d622009-07-29 01:13:20 +0000986
liujisi@google.com33165fe2010-11-02 13:14:58 +0000987 match = self._TOKEN.match(self._current_line, self._column)
kenton@google.com80b1d622009-07-29 01:13:20 +0000988 if match:
989 token = match.group(0)
kenton@google.com80b1d622009-07-29 01:13:20 +0000990 self.token = token
991 else:
liujisi@google.com33165fe2010-11-02 13:14:58 +0000992 self.token = self._current_line[self._column]
kenton@google.com80b1d622009-07-29 01:13:20 +0000993
994
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +0000995def ParseInteger(text, is_signed=False, is_long=False):
996 """Parses an integer.
997
998 Args:
999 text: The text to parse.
1000 is_signed: True if a signed integer must be parsed.
1001 is_long: True if a long integer must be parsed.
1002
1003 Returns:
1004 The integer value.
1005
1006 Raises:
1007 ValueError: Thrown Iff the text is not a valid integer.
1008 """
1009 # Do the actual parsing. Exception handling is propagated to caller.
1010 try:
jieluo@google.combde4a322014-08-12 21:10:30 +00001011 # We force 32-bit values to int and 64-bit values to long to make
1012 # alternate implementations where the distinction is more significant
1013 # (e.g. the C++ implementation) simpler.
1014 if is_long:
1015 result = long(text, 0)
1016 else:
1017 result = int(text, 0)
xiaofeng@google.comb55a20f2012-09-22 02:40:50 +00001018 except ValueError:
1019 raise ValueError('Couldn\'t parse integer: %s' % text)
1020
1021 # Check if the integer is sane. Exceptions handled by callers.
1022 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
1023 checker.CheckValue(result)
1024 return result
1025
1026
1027def ParseFloat(text):
1028 """Parse a floating point number.
1029
1030 Args:
1031 text: Text to parse.
1032
1033 Returns:
1034 The number parsed.
1035
1036 Raises:
1037 ValueError: If a floating point number couldn't be parsed.
1038 """
1039 try:
1040 # Assume Python compatible syntax.
1041 return float(text)
1042 except ValueError:
1043 # Check alternative spellings.
1044 if _FLOAT_INFINITY.match(text):
1045 if text[0] == '-':
1046 return float('-inf')
1047 else:
1048 return float('inf')
1049 elif _FLOAT_NAN.match(text):
1050 return float('nan')
1051 else:
1052 # assume '1.0f' format
1053 try:
1054 return float(text.rstrip('f'))
1055 except ValueError:
1056 raise ValueError('Couldn\'t parse float: %s' % text)
1057
1058
1059def ParseBool(text):
1060 """Parse a boolean value.
1061
1062 Args:
1063 text: Text to parse.
1064
1065 Returns:
1066 Boolean values parsed
1067
1068 Raises:
1069 ValueError: If text is not a valid boolean.
1070 """
1071 if text in ('true', 't', '1'):
1072 return True
1073 elif text in ('false', 'f', '0'):
1074 return False
1075 else:
1076 raise ValueError('Expected "true" or "false".')
1077
1078
1079def ParseEnum(field, value):
1080 """Parse an enum value.
1081
1082 The value can be specified by a number (the enum value), or by
1083 a string literal (the enum name).
1084
1085 Args:
1086 field: Enum field descriptor.
1087 value: String value.
1088
1089 Returns:
1090 Enum value number.
1091
1092 Raises:
1093 ValueError: If the enum value could not be parsed.
1094 """
1095 enum_descriptor = field.enum_type
1096 try:
1097 number = int(value, 0)
1098 except ValueError:
1099 # Identifier.
1100 enum_value = enum_descriptor.values_by_name.get(value, None)
1101 if enum_value is None:
1102 raise ValueError(
1103 'Enum type "%s" has no value named %s.' % (
1104 enum_descriptor.full_name, value))
1105 else:
1106 # Numeric value.
1107 enum_value = enum_descriptor.values_by_number.get(number, None)
1108 if enum_value is None:
1109 raise ValueError(
1110 'Enum type "%s" has no value with number %d.' % (
1111 enum_descriptor.full_name, number))
1112 return enum_value.number