Blame - python/google/protobuf/text_format.py - platform/external/protobuf-javalite

blob: 8d256076c28ad4c3245aaccbbbd8284aeb4bc961 [file] [log] [blame]

temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	1	# Protocol Buffers - Google's data interchange format
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	2	# Copyright 2008 Google Inc. All rights reserved.
Feng Xiao	e428862	2014-10-01 16:26:23 -0700	[diff] [blame]	3	# https://developers.google.com/protocol-buffers/
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	4	#
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	5	# Redistribution and use in source and binary forms, with or without
				6	# modification, are permitted provided that the following conditions are
				7	# met:
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	8	#
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	9	# * Redistributions of source code must retain the above copyright
				10	# notice, this list of conditions and the following disclaimer.
				11	# * Redistributions in binary form must reproduce the above
				12	# copyright notice, this list of conditions and the following disclaimer
				13	# in the documentation and/or other materials provided with the
				14	# distribution.
				15	# * Neither the name of Google Inc. nor the names of its
				16	# contributors may be used to endorse or promote products derived from
				17	# this software without specific prior written permission.
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	18	#
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	19	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	30
Jisi Liu	46e8ff6	2015-10-05 11:59:43 -0700	[diff] [blame]	31	"""Contains routines for printing protocol messages in text format.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	32
Jisi Liu	46e8ff6	2015-10-05 11:59:43 -0700	[diff] [blame]	33	Simple usage example:
				34
				35	# Create a proto object and serialize it to a text proto string.
				36	message = my_proto_pb2.MyMessage(foo='bar')
				37	text_proto = text_format.MessageToString(message)
				38
				39	# Parse a text proto string.
				40	message = text_format.Parse(text_proto, my_proto_pb2.MyMessage())
				41	"""
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	42
				43	__author__ = 'kenton@google.com (Kenton Varda)'
				44
Tres Seaver	47ee4d3	2015-01-13 15:04:41 -0500	[diff] [blame]	45	import io
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	46	import re
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	47
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	48	import six
				49
Dan O'Reilly	fe7d937	2015-08-14 15:26:33 -0400	[diff] [blame]	50	if six.PY3:
				51	long = int
				52
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	53	from google.protobuf.internal import type_checkers
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	54	from google.protobuf import descriptor
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	55	from google.protobuf import text_encoding
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	56
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	57	__all__ = ['MessageToString', 'PrintMessage', 'PrintField',
				58	'PrintFieldValue', 'Merge']
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	59
				60
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	61	_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
				62	type_checkers.Int32ValueChecker(),
				63	type_checkers.Uint64ValueChecker(),
				64	type_checkers.Int64ValueChecker())
				65	_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
				66	_FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	67	_FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
				68	descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	69	_QUOTES = frozenset(("'", '"'))
kenton@google.com	d0047c4	2009-12-23 02:01:01 +0000	[diff] [blame]	70
				71
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	72	class Error(Exception):
				73	"""Top-level module error for text_format."""
				74
				75
				76	class ParseError(Error):
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	77	"""Thrown in case of text parsing error."""
				78
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	79
Dan O'Reilly	fc80874	2015-08-15 10:11:28 -0400	[diff] [blame]	80	class TextWriter(object):
				81	def __init__(self, as_utf8):
Dan O'Reilly	38eef02	2015-08-22 13:02:24 -0400	[diff] [blame]	82	if six.PY2:
Dan O'Reilly	fc80874	2015-08-15 10:11:28 -0400	[diff] [blame]	83	self._writer = io.BytesIO()
				84	else:
				85	self._writer = io.StringIO()
				86
				87	def write(self, val):
Dan O'Reilly	38eef02	2015-08-22 13:02:24 -0400	[diff] [blame]	88	if six.PY2:
Dan O'Reilly	fc80874	2015-08-15 10:11:28 -0400	[diff] [blame]	89	if isinstance(val, six.text_type):
				90	val = val.encode('utf-8')
Dan O'Reilly	fc80874	2015-08-15 10:11:28 -0400	[diff] [blame]	91	return self._writer.write(val)
				92
				93	def close(self):
				94	return self._writer.close()
				95
				96	def getvalue(self):
				97	return self._writer.getvalue()
				98
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	99
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	100	def MessageToString(message, as_utf8=False, as_one_line=False,
				101	pointy_brackets=False, use_index_order=False,
				102	float_format=None):
				103	"""Convert protobuf message to text format.
				104
				105	Floating point values can be formatted compactly with 15 digits of
				106	precision (which is the most that IEEE 754 "double" can guarantee)
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	107	using float_format='.15g'. To ensure that converting to text and back to a
				108	proto will result in an identical value, float_format='.17g' should be used.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	109
				110	Args:
				111	message: The protocol buffers message.
				112	as_utf8: Produce text output in UTF8 format.
				113	as_one_line: Don't introduce newlines between fields.
				114	pointy_brackets: If True, use angle brackets instead of curly braces for
				115	nesting.
				116	use_index_order: If True, print fields of a proto message using the order
				117	defined in source code instead of the field number. By default, use the
				118	field number order.
				119	float_format: If set, use this to specify floating point number formatting
				120	(per the "Format Specification Mini-Language"); otherwise, str() is used.
				121
				122	Returns:
				123	A string of the text formatted protocol buffer message.
				124	"""
Dan O'Reilly	fc80874	2015-08-15 10:11:28 -0400	[diff] [blame]	125	out = TextWriter(as_utf8)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	126	PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,
				127	pointy_brackets=pointy_brackets,
				128	use_index_order=use_index_order,
				129	float_format=float_format)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	130	result = out.getvalue()
				131	out.close()
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	132	if as_one_line:
				133	return result.rstrip()
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	134	return result
				135
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	136
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	137	def _IsMapEntry(field):
				138	return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
				139	field.message_type.has_options and
				140	field.message_type.GetOptions().map_entry)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	141
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	142
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	143	def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,
				144	pointy_brackets=False, use_index_order=False,
				145	float_format=None):
				146	fields = message.ListFields()
				147	if use_index_order:
				148	fields.sort(key=lambda x: x[0].index)
				149	for field, value in fields:
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	150	if _IsMapEntry(field):
Feng Xiao	eee38b0	2015-08-22 18:25:48 -0700	[diff] [blame]	151	for key in sorted(value):
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	152	# This is slow for maps with submessage entires because it copies the
				153	# entire tree. Unfortunately this would take significant refactoring
				154	# of this file to work around.
				155	#
				156	# TODO(haberman): refactor and optimize if this becomes an issue.
				157	entry_submsg = field.message_type._concrete_class(
				158	key=key, value=value[key])
				159	PrintField(field, entry_submsg, out, indent, as_utf8, as_one_line,
				160	pointy_brackets=pointy_brackets,
				161	use_index_order=use_index_order, float_format=float_format)
				162	elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	163	for element in value:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	164	PrintField(field, element, out, indent, as_utf8, as_one_line,
				165	pointy_brackets=pointy_brackets,
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	166	use_index_order=use_index_order,
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	167	float_format=float_format)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	168	else:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	169	PrintField(field, value, out, indent, as_utf8, as_one_line,
				170	pointy_brackets=pointy_brackets,
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	171	use_index_order=use_index_order,
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	172	float_format=float_format)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	173
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	174
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	175	def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	176	pointy_brackets=False, use_index_order=False, float_format=None):
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	177	"""Print a single field name/value pair. For repeated fields, the value
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	178	should be a single element.
				179	"""
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	180
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	181	out.write(' ' * indent)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	182	if field.is_extension:
				183	out.write('[')
				184	if (field.containing_type.GetOptions().message_set_wire_format and
				185	field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	186	field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
				187	out.write(field.message_type.full_name)
				188	else:
				189	out.write(field.full_name)
				190	out.write(']')
				191	elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
				192	# For groups, use the capitalized name.
				193	out.write(field.message_type.name)
				194	else:
Dan O'Reilly	fc80874	2015-08-15 10:11:28 -0400	[diff] [blame]	195	out.write(field.name)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	196
				197	if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				198	# The colon is optional in this case, but our cross-language golden files
				199	# don't include it.
				200	out.write(': ')
				201
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	202	PrintFieldValue(field, value, out, indent, as_utf8, as_one_line,
				203	pointy_brackets=pointy_brackets,
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	204	use_index_order=use_index_order,
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	205	float_format=float_format)
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	206	if as_one_line:
				207	out.write(' ')
				208	else:
				209	out.write('\n')
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	210
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	211
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	212	def PrintFieldValue(field, value, out, indent=0, as_utf8=False,
				213	as_one_line=False, pointy_brackets=False,
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	214	use_index_order=False,
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	215	float_format=None):
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	216	"""Print a single field value (not including name). For repeated fields,
				217	the value should be a single element."""
				218
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	219	if pointy_brackets:
				220	openb = '<'
				221	closeb = '>'
				222	else:
				223	openb = '{'
				224	closeb = '}'
				225
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	226	if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	227	if as_one_line:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	228	out.write(' %s ' % openb)
				229	PrintMessage(value, out, indent, as_utf8, as_one_line,
				230	pointy_brackets=pointy_brackets,
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	231	use_index_order=use_index_order,
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	232	float_format=float_format)
				233	out.write(closeb)
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	234	else:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	235	out.write(' %s\n' % openb)
				236	PrintMessage(value, out, indent + 2, as_utf8, as_one_line,
				237	pointy_brackets=pointy_brackets,
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	238	use_index_order=use_index_order,
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	239	float_format=float_format)
				240	out.write(' ' * indent + closeb)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	241	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	242	enum_value = field.enum_type.values_by_number.get(value, None)
				243	if enum_value is not None:
				244	out.write(enum_value.name)
				245	else:
				246	out.write(str(value))
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	247	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
				248	out.write('\"')
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	249	if isinstance(value, six.text_type):
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	250	out_value = value.encode('utf-8')
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	251	else:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	252	out_value = value
				253	if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
				254	# We need to escape non-UTF8 chars in TYPE_BYTES field.
				255	out_as_utf8 = False
				256	else:
				257	out_as_utf8 = as_utf8
				258	out.write(text_encoding.CEscape(out_value, out_as_utf8))
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	259	out.write('\"')
				260	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
				261	if value:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	262	out.write('true')
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	263	else:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	264	out.write('false')
				265	elif field.cpp_type in _FLOAT_TYPES and float_format is not None:
				266	out.write('{1:{0}}'.format(float_format, value))
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	267	else:
				268	out.write(str(value))
				269
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	270
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	271	def Parse(text, message, allow_unknown_extension=False):
				272	"""Parses an text representation of a protocol message into a message.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	273
				274	Args:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	275	text: Message text representation.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	276	message: A protocol buffer message to merge into.
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	277	allow_unknown_extension: if True, skip over missing extensions and keep
				278	parsing
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	279
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	280	Returns:
				281	The same message passed as argument.
				282
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	283	Raises:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	284	ParseError: On text parsing problems.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	285	"""
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	286	if not isinstance(text, str):
				287	text = text.decode('utf-8')
				288	return ParseLines(text.split('\n'), message, allow_unknown_extension)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	289
				290
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	291	def Merge(text, message, allow_unknown_extension=False):
				292	"""Parses an text representation of a protocol message into a message.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	293
				294	Like Parse(), but allows repeated values for a non-repeated field, and uses
				295	the last one.
				296
				297	Args:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	298	text: Message text representation.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	299	message: A protocol buffer message to merge into.
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	300	allow_unknown_extension: if True, skip over missing extensions and keep
				301	parsing
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	302
				303	Returns:
				304	The same message passed as argument.
				305
				306	Raises:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	307	ParseError: On text parsing problems.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	308	"""
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	309	return MergeLines(text.split('\n'), message, allow_unknown_extension)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	310
				311
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	312	def ParseLines(lines, message, allow_unknown_extension=False):
				313	"""Parses an text representation of a protocol message into a message.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	314
				315	Args:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	316	lines: An iterable of lines of a message's text representation.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	317	message: A protocol buffer message to merge into.
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	318	allow_unknown_extension: if True, skip over missing extensions and keep
				319	parsing
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	320
				321	Returns:
				322	The same message passed as argument.
				323
				324	Raises:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	325	ParseError: On text parsing problems.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	326	"""
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	327	_ParseOrMerge(lines, message, False, allow_unknown_extension)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	328	return message
				329
				330
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	331	def MergeLines(lines, message, allow_unknown_extension=False):
				332	"""Parses an text representation of a protocol message into a message.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	333
				334	Args:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	335	lines: An iterable of lines of a message's text representation.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	336	message: A protocol buffer message to merge into.
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	337	allow_unknown_extension: if True, skip over missing extensions and keep
				338	parsing
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	339
				340	Returns:
				341	The same message passed as argument.
				342
				343	Raises:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	344	ParseError: On text parsing problems.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	345	"""
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	346	_ParseOrMerge(lines, message, True, allow_unknown_extension)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	347	return message
				348
				349
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	350	def _ParseOrMerge(lines,
				351	message,
				352	allow_multiple_scalars,
				353	allow_unknown_extension=False):
				354	"""Converts an text representation of a protocol message into a message.
Feng Xiao	f157a56	2014-11-14 11:50:31 -0800	[diff] [blame]	355
				356	Args:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	357	lines: Lines of a message's text representation.
Feng Xiao	f157a56	2014-11-14 11:50:31 -0800	[diff] [blame]	358	message: A protocol buffer message to merge into.
				359	allow_multiple_scalars: Determines if repeated values for a non-repeated
				360	field are permitted, e.g., the string "foo: 1 foo: 2" for a
				361	required/optional field named "foo".
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	362	allow_unknown_extension: if True, skip over missing extensions and keep
				363	parsing
Feng Xiao	f157a56	2014-11-14 11:50:31 -0800	[diff] [blame]	364
				365	Raises:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	366	ParseError: On text parsing problems.
Feng Xiao	f157a56	2014-11-14 11:50:31 -0800	[diff] [blame]	367	"""
				368	tokenizer = _Tokenizer(lines)
				369	while not tokenizer.AtEnd():
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	370	_MergeField(tokenizer, message, allow_multiple_scalars,
				371	allow_unknown_extension)
Feng Xiao	f157a56	2014-11-14 11:50:31 -0800	[diff] [blame]	372
				373
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	374	def _MergeField(tokenizer,
				375	message,
				376	allow_multiple_scalars,
				377	allow_unknown_extension=False):
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	378	"""Merges a single protocol message field into a message.
				379
				380	Args:
				381	tokenizer: A tokenizer to parse the field name and values.
				382	message: A protocol message to record the data.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	383	allow_multiple_scalars: Determines if repeated values for a non-repeated
				384	field are permitted, e.g., the string "foo: 1 foo: 2" for a
				385	required/optional field named "foo".
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	386	allow_unknown_extension: if True, skip over missing extensions and keep
				387	parsing
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	388
				389	Raises:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	390	ParseError: In case of text parsing problems.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	391	"""
				392	message_descriptor = message.DESCRIPTOR
Jisi Liu	ada6556	2015-02-25 16:39:11 -0800	[diff] [blame]	393	if (hasattr(message_descriptor, 'syntax') and
				394	message_descriptor.syntax == 'proto3'):
				395	# Proto3 doesn't represent presence so we can't test if multiple
				396	# scalars have occurred. We have to allow them.
				397	allow_multiple_scalars = True
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	398	if tokenizer.TryConsume('['):
				399	name = [tokenizer.ConsumeIdentifier()]
				400	while tokenizer.TryConsume('.'):
				401	name.append(tokenizer.ConsumeIdentifier())
				402	name = '.'.join(name)
				403
kenton@google.com	fccb146	2009-12-18 02:11:36 +0000	[diff] [blame]	404	if not message_descriptor.is_extendable:
				405	raise tokenizer.ParseErrorPreviousToken(
				406	'Message type "%s" does not have extensions.' %
				407	message_descriptor.full_name)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	408	# pylint: disable=protected-access
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	409	field = message.Extensions._FindExtensionByName(name)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	410	# pylint: enable=protected-access
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	411	if not field:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	412	if allow_unknown_extension:
				413	field = None
				414	else:
				415	raise tokenizer.ParseErrorPreviousToken(
				416	'Extension "%s" not registered.' % name)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	417	elif message_descriptor != field.containing_type:
				418	raise tokenizer.ParseErrorPreviousToken(
				419	'Extension "%s" does not extend message type "%s".' % (
				420	name, message_descriptor.full_name))
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	421
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	422	tokenizer.Consume(']')
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	423
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	424	else:
				425	name = tokenizer.ConsumeIdentifier()
				426	field = message_descriptor.fields_by_name.get(name, None)
				427
				428	# Group names are expected to be capitalized as they appear in the
				429	# .proto file, which actually matches their type names, not their field
				430	# names.
				431	if not field:
				432	field = message_descriptor.fields_by_name.get(name.lower(), None)
				433	if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
				434	field = None
				435
				436	if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
				437	field.message_type.name != name):
				438	field = None
				439
				440	if not field:
				441	raise tokenizer.ParseErrorPreviousToken(
				442	'Message type "%s" has no field named "%s".' % (
				443	message_descriptor.full_name, name))
				444
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	445	if field and field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	446	is_map_entry = _IsMapEntry(field)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	447	tokenizer.TryConsume(':')
				448
				449	if tokenizer.TryConsume('<'):
				450	end_token = '>'
				451	else:
				452	tokenizer.Consume('{')
				453	end_token = '}'
				454
				455	if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
				456	if field.is_extension:
				457	sub_message = message.Extensions[field].add()
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	458	elif is_map_entry:
				459	sub_message = field.message_type._concrete_class()
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	460	else:
				461	sub_message = getattr(message, field.name).add()
				462	else:
				463	if field.is_extension:
				464	sub_message = message.Extensions[field]
				465	else:
				466	sub_message = getattr(message, field.name)
liujisi@google.com	1fd96c4	2010-12-07 06:23:55 +0000	[diff] [blame]	467	sub_message.SetInParent()
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	468
				469	while not tokenizer.TryConsume(end_token):
				470	if tokenizer.AtEnd():
				471	raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token))
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	472	_MergeField(tokenizer, sub_message, allow_multiple_scalars,
				473	allow_unknown_extension)
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	474
				475	if is_map_entry:
				476	value_cpptype = field.message_type.fields_by_name['value'].cpp_type
				477	if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				478	value = getattr(message, field.name)[sub_message.key]
				479	value.MergeFrom(sub_message.value)
				480	else:
				481	getattr(message, field.name)[sub_message.key] = sub_message.value
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	482	elif field:
				483	tokenizer.Consume(':')
				484	if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and
				485	tokenizer.TryConsume('[')):
				486	# Short repeated format, e.g. "foo: [1, 2, 3]"
				487	while True:
				488	_MergeScalarField(tokenizer, message, field, allow_multiple_scalars)
				489	if tokenizer.TryConsume(']'):
				490	break
				491	tokenizer.Consume(',')
				492	else:
				493	_MergeScalarField(tokenizer, message, field, allow_multiple_scalars)
				494	else: # Proto field is unknown.
				495	assert allow_unknown_extension
				496	_SkipFieldContents(tokenizer)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	497
				498	# For historical reasons, fields may optionally be separated by commas or
				499	# semicolons.
				500	if not tokenizer.TryConsume(','):
				501	tokenizer.TryConsume(';')
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	502
				503
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	504	def _SkipFieldContents(tokenizer):
				505	"""Skips over contents (value or message) of a field.
				506
				507	Args:
				508	tokenizer: A tokenizer to parse the field name and values.
				509	"""
				510	# Try to guess the type of this field.
				511	# If this field is not a message, there should be a ":" between the
				512	# field name and the field value and also the field value should not
				513	# start with "{" or "<" which indicates the beginning of a message body.
				514	# If there is no ":" or there is a "{" or "<" after ":", this field has
				515	# to be a message or the input is ill-formed.
				516	if tokenizer.TryConsume(':') and not tokenizer.LookingAt(
				517	'{') and not tokenizer.LookingAt('<'):
				518	_SkipFieldValue(tokenizer)
				519	else:
				520	_SkipFieldMessage(tokenizer)
				521
				522
				523	def _SkipField(tokenizer):
				524	"""Skips over a complete field (name and value/message).
				525
				526	Args:
				527	tokenizer: A tokenizer to parse the field name and values.
				528	"""
				529	if tokenizer.TryConsume('['):
				530	# Consume extension name.
				531	tokenizer.ConsumeIdentifier()
				532	while tokenizer.TryConsume('.'):
				533	tokenizer.ConsumeIdentifier()
				534	tokenizer.Consume(']')
				535	else:
				536	tokenizer.ConsumeIdentifier()
				537
				538	_SkipFieldContents(tokenizer)
				539
				540	# For historical reasons, fields may optionally be separated by commas or
				541	# semicolons.
				542	if not tokenizer.TryConsume(','):
				543	tokenizer.TryConsume(';')
				544
				545
				546	def _SkipFieldMessage(tokenizer):
				547	"""Skips over a field message.
				548
				549	Args:
				550	tokenizer: A tokenizer to parse the field name and values.
				551	"""
				552
				553	if tokenizer.TryConsume('<'):
				554	delimiter = '>'
				555	else:
				556	tokenizer.Consume('{')
				557	delimiter = '}'
				558
				559	while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'):
				560	_SkipField(tokenizer)
				561
				562	tokenizer.Consume(delimiter)
				563
				564
				565	def _SkipFieldValue(tokenizer):
				566	"""Skips over a field value.
				567
				568	Args:
				569	tokenizer: A tokenizer to parse the field name and values.
				570
				571	Raises:
				572	ParseError: In case an invalid field value is found.
				573	"""
				574	# String tokens can come in multiple adjacent string literals.
				575	# If we can consume one, consume as many as we can.
				576	if tokenizer.TryConsumeString():
				577	while tokenizer.TryConsumeString():
				578	pass
				579	return
				580
				581	if (not tokenizer.TryConsumeIdentifier() and
				582	not tokenizer.TryConsumeInt64() and
				583	not tokenizer.TryConsumeUint64() and
				584	not tokenizer.TryConsumeFloat()):
				585	raise ParseError('Invalid field value: ' + tokenizer.token)
				586
				587
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	588	def _MergeScalarField(tokenizer, message, field, allow_multiple_scalars):
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	589	"""Merges a single protocol message scalar field into a message.
				590
				591	Args:
				592	tokenizer: A tokenizer to parse the field value.
				593	message: A protocol message to record the data.
				594	field: The descriptor of the field to be merged.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	595	allow_multiple_scalars: Determines if repeated values for a non-repeated
				596	field are permitted, e.g., the string "foo: 1 foo: 2" for a
				597	required/optional field named "foo".
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	598
				599	Raises:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	600	ParseError: In case of text parsing problems.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	601	RuntimeError: On runtime errors.
				602	"""
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	603	value = None
				604
				605	if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
				606	descriptor.FieldDescriptor.TYPE_SINT32,
				607	descriptor.FieldDescriptor.TYPE_SFIXED32):
				608	value = tokenizer.ConsumeInt32()
				609	elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
				610	descriptor.FieldDescriptor.TYPE_SINT64,
				611	descriptor.FieldDescriptor.TYPE_SFIXED64):
				612	value = tokenizer.ConsumeInt64()
				613	elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
				614	descriptor.FieldDescriptor.TYPE_FIXED32):
				615	value = tokenizer.ConsumeUint32()
				616	elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
				617	descriptor.FieldDescriptor.TYPE_FIXED64):
				618	value = tokenizer.ConsumeUint64()
				619	elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
				620	descriptor.FieldDescriptor.TYPE_DOUBLE):
				621	value = tokenizer.ConsumeFloat()
				622	elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
				623	value = tokenizer.ConsumeBool()
				624	elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
				625	value = tokenizer.ConsumeString()
				626	elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
				627	value = tokenizer.ConsumeByteString()
				628	elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	629	value = tokenizer.ConsumeEnum(field)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	630	else:
				631	raise RuntimeError('Unknown field type %d' % field.type)
				632
				633	if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
				634	if field.is_extension:
				635	message.Extensions[field].append(value)
				636	else:
				637	getattr(message, field.name).append(value)
				638	else:
				639	if field.is_extension:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	640	if not allow_multiple_scalars and message.HasExtension(field):
				641	raise tokenizer.ParseErrorPreviousToken(
				642	'Message type "%s" should not have multiple "%s" extensions.' %
				643	(message.DESCRIPTOR.full_name, field.full_name))
				644	else:
				645	message.Extensions[field] = value
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	646	else:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	647	if not allow_multiple_scalars and message.HasField(field.name):
				648	raise tokenizer.ParseErrorPreviousToken(
				649	'Message type "%s" should not have multiple "%s" fields.' %
				650	(message.DESCRIPTOR.full_name, field.name))
				651	else:
				652	setattr(message, field.name, value)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	653
				654
				655	class _Tokenizer(object):
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	656	"""Protocol buffer text representation tokenizer.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	657
				658	This class handles the lower level string parsing by splitting it into
				659	meaningful tokens.
				660
				661	It was directly ported from the Java protocol buffer API.
				662	"""
				663
				664	_WHITESPACE = re.compile('(\\s\|(#.*$))+', re.MULTILINE)
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	665	_TOKEN = re.compile('\|'.join([
				666	r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier
				667	r'([0-9+-]\|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number
				668	] + [ # quoted str for each quote mark
				669	r'{qt}([^{qt}\n\\]\|\\.)*({qt}\|\\?$)'.format(qt=mark) for mark in _QUOTES
				670	]))
				671
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	672	_IDENTIFIER = re.compile(r'\w+')
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	673
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	674	def __init__(self, lines):
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	675	self._position = 0
				676	self._line = -1
				677	self._column = 0
				678	self._token_start = None
				679	self.token = ''
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	680	self._lines = iter(lines)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	681	self._current_line = ''
				682	self._previous_line = 0
				683	self._previous_column = 0
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	684	self._more_lines = True
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	685	self._SkipWhitespace()
				686	self.NextToken()
				687
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	688	def LookingAt(self, token):
				689	return self.token == token
				690
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	691	def AtEnd(self):
				692	"""Checks the end of the text was reached.
				693
				694	Returns:
				695	True iff the end was reached.
				696	"""
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	697	return not self.token
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	698
				699	def _PopLine(self):
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	700	while len(self._current_line) <= self._column:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	701	try:
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	702	self._current_line = next(self._lines)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	703	except StopIteration:
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	704	self._current_line = ''
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	705	self._more_lines = False
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	706	return
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	707	else:
				708	self._line += 1
				709	self._column = 0
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	710
				711	def _SkipWhitespace(self):
				712	while True:
				713	self._PopLine()
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	714	match = self._WHITESPACE.match(self._current_line, self._column)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	715	if not match:
				716	break
				717	length = len(match.group(0))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	718	self._column += length
				719
				720	def TryConsume(self, token):
				721	"""Tries to consume a given piece of text.
				722
				723	Args:
				724	token: Text to consume.
				725
				726	Returns:
				727	True iff the text was consumed.
				728	"""
				729	if self.token == token:
				730	self.NextToken()
				731	return True
				732	return False
				733
				734	def Consume(self, token):
				735	"""Consumes a piece of text.
				736
				737	Args:
				738	token: Text to consume.
				739
				740	Raises:
				741	ParseError: If the text couldn't be consumed.
				742	"""
				743	if not self.TryConsume(token):
				744	raise self._ParseError('Expected "%s".' % token)
				745
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	746	def TryConsumeIdentifier(self):
				747	try:
				748	self.ConsumeIdentifier()
				749	return True
				750	except ParseError:
				751	return False
				752
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	753	def ConsumeIdentifier(self):
				754	"""Consumes protocol message field identifier.
				755
				756	Returns:
				757	Identifier string.
				758
				759	Raises:
				760	ParseError: If an identifier couldn't be consumed.
				761	"""
				762	result = self.token
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	763	if not self._IDENTIFIER.match(result):
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	764	raise self._ParseError('Expected identifier.')
				765	self.NextToken()
				766	return result
				767
				768	def ConsumeInt32(self):
				769	"""Consumes a signed 32bit integer number.
				770
				771	Returns:
				772	The integer parsed.
				773
				774	Raises:
				775	ParseError: If a signed 32bit integer couldn't be consumed.
				776	"""
				777	try:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	778	result = ParseInteger(self.token, is_signed=True, is_long=False)
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	779	except ValueError as e:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	780	raise self._ParseError(str(e))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	781	self.NextToken()
				782	return result
				783
				784	def ConsumeUint32(self):
				785	"""Consumes an unsigned 32bit integer number.
				786
				787	Returns:
				788	The integer parsed.
				789
				790	Raises:
				791	ParseError: If an unsigned 32bit integer couldn't be consumed.
				792	"""
				793	try:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	794	result = ParseInteger(self.token, is_signed=False, is_long=False)
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	795	except ValueError as e:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	796	raise self._ParseError(str(e))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	797	self.NextToken()
				798	return result
				799
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	800	def TryConsumeInt64(self):
				801	try:
				802	self.ConsumeInt64()
				803	return True
				804	except ParseError:
				805	return False
				806
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	807	def ConsumeInt64(self):
				808	"""Consumes a signed 64bit integer number.
				809
				810	Returns:
				811	The integer parsed.
				812
				813	Raises:
				814	ParseError: If a signed 64bit integer couldn't be consumed.
				815	"""
				816	try:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	817	result = ParseInteger(self.token, is_signed=True, is_long=True)
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	818	except ValueError as e:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	819	raise self._ParseError(str(e))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	820	self.NextToken()
				821	return result
				822
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	823	def TryConsumeUint64(self):
				824	try:
				825	self.ConsumeUint64()
				826	return True
				827	except ParseError:
				828	return False
				829
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	830	def ConsumeUint64(self):
				831	"""Consumes an unsigned 64bit integer number.
				832
				833	Returns:
				834	The integer parsed.
				835
				836	Raises:
				837	ParseError: If an unsigned 64bit integer couldn't be consumed.
				838	"""
				839	try:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	840	result = ParseInteger(self.token, is_signed=False, is_long=True)
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	841	except ValueError as e:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	842	raise self._ParseError(str(e))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	843	self.NextToken()
				844	return result
				845
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	846	def TryConsumeFloat(self):
				847	try:
				848	self.ConsumeFloat()
				849	return True
				850	except ParseError:
				851	return False
				852
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	853	def ConsumeFloat(self):
				854	"""Consumes an floating point number.
				855
				856	Returns:
				857	The number parsed.
				858
				859	Raises:
				860	ParseError: If a floating point number couldn't be consumed.
				861	"""
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	862	try:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	863	result = ParseFloat(self.token)
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	864	except ValueError as e:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	865	raise self._ParseError(str(e))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	866	self.NextToken()
				867	return result
				868
				869	def ConsumeBool(self):
				870	"""Consumes a boolean value.
				871
				872	Returns:
				873	The bool parsed.
				874
				875	Raises:
				876	ParseError: If a boolean value couldn't be consumed.
				877	"""
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	878	try:
				879	result = ParseBool(self.token)
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	880	except ValueError as e:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	881	raise self._ParseError(str(e))
				882	self.NextToken()
				883	return result
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	884
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	885	def TryConsumeString(self):
				886	try:
				887	self.ConsumeString()
				888	return True
				889	except ParseError:
				890	return False
				891
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	892	def ConsumeString(self):
				893	"""Consumes a string value.
				894
				895	Returns:
				896	The string parsed.
				897
				898	Raises:
				899	ParseError: If a string value couldn't be consumed.
				900	"""
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	901	the_bytes = self.ConsumeByteString()
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	902	try:
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	903	return six.text_type(the_bytes, 'utf-8')
				904	except UnicodeDecodeError as e:
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	905	raise self._StringParseError(e)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	906
				907	def ConsumeByteString(self):
				908	"""Consumes a byte array value.
				909
				910	Returns:
				911	The array parsed (as a string).
				912
				913	Raises:
				914	ParseError: If a byte array value couldn't be consumed.
				915	"""
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	916	the_list = [self._ConsumeSingleByteString()]
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	917	while self.token and self.token[0] in _QUOTES:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	918	the_list.append(self._ConsumeSingleByteString())
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	919	return b''.join(the_list)
kenton@google.com	eef5f83	2009-12-23 01:32:45 +0000	[diff] [blame]	920
kenton@google.com	5353018	2010-01-07 02:08:03 +0000	[diff] [blame]	921	def _ConsumeSingleByteString(self):
				922	"""Consume one token of a string literal.
				923
				924	String literals (whether bytes or text) can come in multiple adjacent
				925	tokens which are automatically concatenated, like in C or Python. This
				926	method only consumes one token.
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	927
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	928	Returns:
				929	The token parsed.
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	930	Raises:
				931	ParseError: When the wrong format data is found.
kenton@google.com	5353018	2010-01-07 02:08:03 +0000	[diff] [blame]	932	"""
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	933	text = self.token
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	934	if len(text) < 1 or text[0] not in _QUOTES:
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	935	raise self._ParseError('Expected string but found: %r' % (text,))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	936
				937	if len(text) < 2 or text[-1] != text[0]:
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	938	raise self._ParseError('String missing ending quote: %r' % (text,))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	939
				940	try:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	941	result = text_encoding.CUnescape(text[1:-1])
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	942	except ValueError as e:
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	943	raise self._ParseError(str(e))
				944	self.NextToken()
				945	return result
				946
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	947	def ConsumeEnum(self, field):
				948	try:
				949	result = ParseEnum(field, self.token)
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	950	except ValueError as e:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	951	raise self._ParseError(str(e))
				952	self.NextToken()
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	953	return result
				954
				955	def ParseErrorPreviousToken(self, message):
				956	"""Creates and returns a ParseError for the previously read token.
				957
				958	Args:
				959	message: A message to set for the exception.
				960
				961	Returns:
				962	A ParseError instance.
				963	"""
				964	return ParseError('%d:%d : %s' % (
				965	self._previous_line + 1, self._previous_column + 1, message))
				966
				967	def _ParseError(self, message):
				968	"""Creates and returns a ParseError for the current token."""
				969	return ParseError('%d:%d : %s' % (
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	970	self._line + 1, self._column + 1, message))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	971
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	972	def _StringParseError(self, e):
				973	return self._ParseError('Couldn\'t parse string: ' + str(e))
				974
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	975	def NextToken(self):
				976	"""Reads the next meaningful token."""
				977	self._previous_line = self._line
				978	self._previous_column = self._column
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	979
				980	self._column += len(self.token)
				981	self._SkipWhitespace()
				982
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	983	if not self._more_lines:
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	984	self.token = ''
				985	return
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	986
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	987	match = self._TOKEN.match(self._current_line, self._column)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	988	if match:
				989	token = match.group(0)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	990	self.token = token
				991	else:
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	992	self.token = self._current_line[self._column]
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	993
				994
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	995	def ParseInteger(text, is_signed=False, is_long=False):
				996	"""Parses an integer.
				997
				998	Args:
				999	text: The text to parse.
				1000	is_signed: True if a signed integer must be parsed.
				1001	is_long: True if a long integer must be parsed.
				1002
				1003	Returns:
				1004	The integer value.
				1005
				1006	Raises:
				1007	ValueError: Thrown Iff the text is not a valid integer.
				1008	"""
				1009	# Do the actual parsing. Exception handling is propagated to caller.
				1010	try:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	1011	# We force 32-bit values to int and 64-bit values to long to make
				1012	# alternate implementations where the distinction is more significant
				1013	# (e.g. the C++ implementation) simpler.
				1014	if is_long:
				1015	result = long(text, 0)
				1016	else:
				1017	result = int(text, 0)
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	1018	except ValueError:
				1019	raise ValueError('Couldn\'t parse integer: %s' % text)
				1020
				1021	# Check if the integer is sane. Exceptions handled by callers.
				1022	checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
				1023	checker.CheckValue(result)
				1024	return result
				1025
				1026
				1027	def ParseFloat(text):
				1028	"""Parse a floating point number.
				1029
				1030	Args:
				1031	text: Text to parse.
				1032
				1033	Returns:
				1034	The number parsed.
				1035
				1036	Raises:
				1037	ValueError: If a floating point number couldn't be parsed.
				1038	"""
				1039	try:
				1040	# Assume Python compatible syntax.
				1041	return float(text)
				1042	except ValueError:
				1043	# Check alternative spellings.
				1044	if _FLOAT_INFINITY.match(text):
				1045	if text[0] == '-':
				1046	return float('-inf')
				1047	else:
				1048	return float('inf')
				1049	elif _FLOAT_NAN.match(text):
				1050	return float('nan')
				1051	else:
				1052	# assume '1.0f' format
				1053	try:
				1054	return float(text.rstrip('f'))
				1055	except ValueError:
				1056	raise ValueError('Couldn\'t parse float: %s' % text)
				1057
				1058
				1059	def ParseBool(text):
				1060	"""Parse a boolean value.
				1061
				1062	Args:
				1063	text: Text to parse.
				1064
				1065	Returns:
				1066	Boolean values parsed
				1067
				1068	Raises:
				1069	ValueError: If text is not a valid boolean.
				1070	"""
				1071	if text in ('true', 't', '1'):
				1072	return True
				1073	elif text in ('false', 'f', '0'):
				1074	return False
				1075	else:
				1076	raise ValueError('Expected "true" or "false".')
				1077
				1078
				1079	def ParseEnum(field, value):
				1080	"""Parse an enum value.
				1081
				1082	The value can be specified by a number (the enum value), or by
				1083	a string literal (the enum name).
				1084
				1085	Args:
				1086	field: Enum field descriptor.
				1087	value: String value.
				1088
				1089	Returns:
				1090	Enum value number.
				1091
				1092	Raises:
				1093	ValueError: If the enum value could not be parsed.
				1094	"""
				1095	enum_descriptor = field.enum_type
				1096	try:
				1097	number = int(value, 0)
				1098	except ValueError:
				1099	# Identifier.
				1100	enum_value = enum_descriptor.values_by_name.get(value, None)
				1101	if enum_value is None:
				1102	raise ValueError(
				1103	'Enum type "%s" has no value named %s.' % (
				1104	enum_descriptor.full_name, value))
				1105	else:
				1106	# Numeric value.
				1107	enum_value = enum_descriptor.values_by_number.get(number, None)
				1108	if enum_value is None:
				1109	raise ValueError(
				1110	'Enum type "%s" has no value with number %d.' % (
				1111	enum_descriptor.full_name, number))
				1112	return enum_value.number