Blame - python/google/protobuf/text_format.py - platform/external/protobuf-javalite

blob: a6f41ca882d12b0f63f63188b33ea0e3a40c9d1c [file] [log] [blame]

temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	1	# Protocol Buffers - Google's data interchange format
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	2	# Copyright 2008 Google Inc. All rights reserved.
Feng Xiao	e428862	2014-10-01 16:26:23 -0700	[diff] [blame]	3	# https://developers.google.com/protocol-buffers/
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	4	#
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	5	# Redistribution and use in source and binary forms, with or without
				6	# modification, are permitted provided that the following conditions are
				7	# met:
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	8	#
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	9	# * Redistributions of source code must retain the above copyright
				10	# notice, this list of conditions and the following disclaimer.
				11	# * Redistributions in binary form must reproduce the above
				12	# copyright notice, this list of conditions and the following disclaimer
				13	# in the documentation and/or other materials provided with the
				14	# distribution.
				15	# * Neither the name of Google Inc. nor the names of its
				16	# contributors may be used to endorse or promote products derived from
				17	# this software without specific prior written permission.
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	18	#
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	19	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	30
Jisi Liu	46e8ff6	2015-10-05 11:59:43 -0700	[diff] [blame]	31	"""Contains routines for printing protocol messages in text format.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	32
Jisi Liu	46e8ff6	2015-10-05 11:59:43 -0700	[diff] [blame]	33	Simple usage example:
				34
				35	# Create a proto object and serialize it to a text proto string.
				36	message = my_proto_pb2.MyMessage(foo='bar')
				37	text_proto = text_format.MessageToString(message)
				38
				39	# Parse a text proto string.
				40	message = text_format.Parse(text_proto, my_proto_pb2.MyMessage())
				41	"""
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	42
				43	__author__ = 'kenton@google.com (Kenton Varda)'
				44
Tres Seaver	47ee4d3	2015-01-13 15:04:41 -0500	[diff] [blame]	45	import io
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	46	import re
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	47
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	48	import six
				49
Dan O'Reilly	fe7d937	2015-08-14 15:26:33 -0400	[diff] [blame]	50	if six.PY3:
				51	long = int
				52
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	53	from google.protobuf.internal import type_checkers
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	54	from google.protobuf import descriptor
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	55	from google.protobuf import text_encoding
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	56
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	57	__all__ = ['MessageToString', 'PrintMessage', 'PrintField',
				58	'PrintFieldValue', 'Merge']
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	59
				60
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	61	_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
				62	type_checkers.Int32ValueChecker(),
				63	type_checkers.Uint64ValueChecker(),
				64	type_checkers.Int64ValueChecker())
				65	_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
				66	_FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	67	_FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
				68	descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	69	_QUOTES = frozenset(("'", '"'))
kenton@google.com	d0047c4	2009-12-23 02:01:01 +0000	[diff] [blame]	70
				71
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	72	class Error(Exception):
				73	"""Top-level module error for text_format."""
				74
				75
				76	class ParseError(Error):
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	77	"""Thrown in case of text parsing error."""
				78
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	79
Dan O'Reilly	fc80874	2015-08-15 10:11:28 -0400	[diff] [blame]	80	class TextWriter(object):
				81	def __init__(self, as_utf8):
Dan O'Reilly	38eef02	2015-08-22 13:02:24 -0400	[diff] [blame]	82	if six.PY2:
Dan O'Reilly	fc80874	2015-08-15 10:11:28 -0400	[diff] [blame]	83	self._writer = io.BytesIO()
				84	else:
				85	self._writer = io.StringIO()
				86
				87	def write(self, val):
Dan O'Reilly	38eef02	2015-08-22 13:02:24 -0400	[diff] [blame]	88	if six.PY2:
Dan O'Reilly	fc80874	2015-08-15 10:11:28 -0400	[diff] [blame]	89	if isinstance(val, six.text_type):
				90	val = val.encode('utf-8')
Dan O'Reilly	fc80874	2015-08-15 10:11:28 -0400	[diff] [blame]	91	return self._writer.write(val)
				92
				93	def close(self):
				94	return self._writer.close()
				95
				96	def getvalue(self):
				97	return self._writer.getvalue()
				98
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	99
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	100	def MessageToString(message, as_utf8=False, as_one_line=False,
				101	pointy_brackets=False, use_index_order=False,
				102	float_format=None):
				103	"""Convert protobuf message to text format.
				104
				105	Floating point values can be formatted compactly with 15 digits of
				106	precision (which is the most that IEEE 754 "double" can guarantee)
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	107	using float_format='.15g'. To ensure that converting to text and back to a
				108	proto will result in an identical value, float_format='.17g' should be used.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	109
				110	Args:
				111	message: The protocol buffers message.
				112	as_utf8: Produce text output in UTF8 format.
				113	as_one_line: Don't introduce newlines between fields.
				114	pointy_brackets: If True, use angle brackets instead of curly braces for
				115	nesting.
				116	use_index_order: If True, print fields of a proto message using the order
				117	defined in source code instead of the field number. By default, use the
				118	field number order.
				119	float_format: If set, use this to specify floating point number formatting
				120	(per the "Format Specification Mini-Language"); otherwise, str() is used.
				121
				122	Returns:
				123	A string of the text formatted protocol buffer message.
				124	"""
Dan O'Reilly	fc80874	2015-08-15 10:11:28 -0400	[diff] [blame]	125	out = TextWriter(as_utf8)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	126	PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,
				127	pointy_brackets=pointy_brackets,
				128	use_index_order=use_index_order,
				129	float_format=float_format)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	130	result = out.getvalue()
				131	out.close()
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	132	if as_one_line:
				133	return result.rstrip()
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	134	return result
				135
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	136
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	137	def _IsMapEntry(field):
				138	return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
				139	field.message_type.has_options and
				140	field.message_type.GetOptions().map_entry)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	141
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	142
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	143	def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,
				144	pointy_brackets=False, use_index_order=False,
				145	float_format=None):
				146	fields = message.ListFields()
				147	if use_index_order:
				148	fields.sort(key=lambda x: x[0].index)
				149	for field, value in fields:
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	150	if _IsMapEntry(field):
Feng Xiao	eee38b0	2015-08-22 18:25:48 -0700	[diff] [blame]	151	for key in sorted(value):
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	152	# This is slow for maps with submessage entires because it copies the
				153	# entire tree. Unfortunately this would take significant refactoring
				154	# of this file to work around.
				155	#
				156	# TODO(haberman): refactor and optimize if this becomes an issue.
				157	entry_submsg = field.message_type._concrete_class(
				158	key=key, value=value[key])
				159	PrintField(field, entry_submsg, out, indent, as_utf8, as_one_line,
				160	pointy_brackets=pointy_brackets,
				161	use_index_order=use_index_order, float_format=float_format)
				162	elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	163	for element in value:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	164	PrintField(field, element, out, indent, as_utf8, as_one_line,
				165	pointy_brackets=pointy_brackets,
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	166	use_index_order=use_index_order,
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	167	float_format=float_format)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	168	else:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	169	PrintField(field, value, out, indent, as_utf8, as_one_line,
				170	pointy_brackets=pointy_brackets,
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	171	use_index_order=use_index_order,
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	172	float_format=float_format)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	173
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	174
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	175	def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	176	pointy_brackets=False, use_index_order=False, float_format=None):
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	177	"""Print a single field name/value pair. For repeated fields, the value
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	178	should be a single element.
				179	"""
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	180
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	181	out.write(' ' * indent)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	182	if field.is_extension:
				183	out.write('[')
				184	if (field.containing_type.GetOptions().message_set_wire_format and
				185	field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	186	field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
				187	out.write(field.message_type.full_name)
				188	else:
				189	out.write(field.full_name)
				190	out.write(']')
				191	elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
				192	# For groups, use the capitalized name.
				193	out.write(field.message_type.name)
				194	else:
Dan O'Reilly	fc80874	2015-08-15 10:11:28 -0400	[diff] [blame]	195	out.write(field.name)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	196
				197	if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				198	# The colon is optional in this case, but our cross-language golden files
				199	# don't include it.
				200	out.write(': ')
				201
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	202	PrintFieldValue(field, value, out, indent, as_utf8, as_one_line,
				203	pointy_brackets=pointy_brackets,
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	204	use_index_order=use_index_order,
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	205	float_format=float_format)
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	206	if as_one_line:
				207	out.write(' ')
				208	else:
				209	out.write('\n')
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	210
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	211
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	212	def PrintFieldValue(field, value, out, indent=0, as_utf8=False,
				213	as_one_line=False, pointy_brackets=False,
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	214	use_index_order=False,
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	215	float_format=None):
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	216	"""Print a single field value (not including name). For repeated fields,
				217	the value should be a single element."""
				218
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	219	if pointy_brackets:
				220	openb = '<'
				221	closeb = '>'
				222	else:
				223	openb = '{'
				224	closeb = '}'
				225
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	226	if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	227	if as_one_line:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	228	out.write(' %s ' % openb)
				229	PrintMessage(value, out, indent, as_utf8, as_one_line,
				230	pointy_brackets=pointy_brackets,
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	231	use_index_order=use_index_order,
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	232	float_format=float_format)
				233	out.write(closeb)
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	234	else:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	235	out.write(' %s\n' % openb)
				236	PrintMessage(value, out, indent + 2, as_utf8, as_one_line,
				237	pointy_brackets=pointy_brackets,
Feng Xiao	6ef984a	2014-11-10 17:34:54 -0800	[diff] [blame]	238	use_index_order=use_index_order,
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	239	float_format=float_format)
				240	out.write(' ' * indent + closeb)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	241	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	242	enum_value = field.enum_type.values_by_number.get(value, None)
				243	if enum_value is not None:
				244	out.write(enum_value.name)
				245	else:
				246	out.write(str(value))
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	247	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
				248	out.write('\"')
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	249	if isinstance(value, six.text_type):
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	250	out_value = value.encode('utf-8')
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	251	else:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	252	out_value = value
				253	if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
				254	# We need to escape non-UTF8 chars in TYPE_BYTES field.
				255	out_as_utf8 = False
				256	else:
				257	out_as_utf8 = as_utf8
				258	out.write(text_encoding.CEscape(out_value, out_as_utf8))
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	259	out.write('\"')
				260	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
				261	if value:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	262	out.write('true')
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	263	else:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	264	out.write('false')
				265	elif field.cpp_type in _FLOAT_TYPES and float_format is not None:
				266	out.write('{1:{0}}'.format(float_format, value))
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	267	else:
				268	out.write(str(value))
				269
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	270
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	271	def Parse(text, message, allow_unknown_extension=False):
				272	"""Parses an text representation of a protocol message into a message.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	273
				274	Args:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	275	text: Message text representation.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	276	message: A protocol buffer message to merge into.
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	277	allow_unknown_extension: if True, skip over missing extensions and keep
				278	parsing
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	279
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	280	Returns:
				281	The same message passed as argument.
				282
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	283	Raises:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	284	ParseError: On text parsing problems.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	285	"""
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	286	if not isinstance(text, str):
				287	text = text.decode('utf-8')
				288	return ParseLines(text.split('\n'), message, allow_unknown_extension)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	289
				290
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	291	def Merge(text, message, allow_unknown_extension=False):
				292	"""Parses an text representation of a protocol message into a message.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	293
				294	Like Parse(), but allows repeated values for a non-repeated field, and uses
				295	the last one.
				296
				297	Args:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	298	text: Message text representation.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	299	message: A protocol buffer message to merge into.
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	300	allow_unknown_extension: if True, skip over missing extensions and keep
				301	parsing
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	302
				303	Returns:
				304	The same message passed as argument.
				305
				306	Raises:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	307	ParseError: On text parsing problems.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	308	"""
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	309	return MergeLines(text.split('\n'), message, allow_unknown_extension)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	310
				311
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	312	def ParseLines(lines, message, allow_unknown_extension=False):
				313	"""Parses an text representation of a protocol message into a message.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	314
				315	Args:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	316	lines: An iterable of lines of a message's text representation.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	317	message: A protocol buffer message to merge into.
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	318	allow_unknown_extension: if True, skip over missing extensions and keep
				319	parsing
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	320
				321	Returns:
				322	The same message passed as argument.
				323
				324	Raises:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	325	ParseError: On text parsing problems.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	326	"""
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	327	_ParseOrMerge(lines, message, False, allow_unknown_extension)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	328	return message
				329
				330
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	331	def MergeLines(lines, message, allow_unknown_extension=False):
				332	"""Parses an text representation of a protocol message into a message.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	333
				334	Args:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	335	lines: An iterable of lines of a message's text representation.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	336	message: A protocol buffer message to merge into.
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	337	allow_unknown_extension: if True, skip over missing extensions and keep
				338	parsing
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	339
				340	Returns:
				341	The same message passed as argument.
				342
				343	Raises:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	344	ParseError: On text parsing problems.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	345	"""
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	346	_ParseOrMerge(lines, message, True, allow_unknown_extension)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	347	return message
				348
				349
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	350	def _ParseOrMerge(lines,
				351	message,
				352	allow_multiple_scalars,
				353	allow_unknown_extension=False):
				354	"""Converts an text representation of a protocol message into a message.
Feng Xiao	f157a56	2014-11-14 11:50:31 -0800	[diff] [blame]	355
				356	Args:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	357	lines: Lines of a message's text representation.
Feng Xiao	f157a56	2014-11-14 11:50:31 -0800	[diff] [blame]	358	message: A protocol buffer message to merge into.
				359	allow_multiple_scalars: Determines if repeated values for a non-repeated
				360	field are permitted, e.g., the string "foo: 1 foo: 2" for a
				361	required/optional field named "foo".
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	362	allow_unknown_extension: if True, skip over missing extensions and keep
				363	parsing
Feng Xiao	f157a56	2014-11-14 11:50:31 -0800	[diff] [blame]	364
				365	Raises:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	366	ParseError: On text parsing problems.
Feng Xiao	f157a56	2014-11-14 11:50:31 -0800	[diff] [blame]	367	"""
				368	tokenizer = _Tokenizer(lines)
				369	while not tokenizer.AtEnd():
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	370	_MergeField(tokenizer, message, allow_multiple_scalars,
				371	allow_unknown_extension)
Feng Xiao	f157a56	2014-11-14 11:50:31 -0800	[diff] [blame]	372
				373
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	374	def _MergeField(tokenizer,
				375	message,
				376	allow_multiple_scalars,
				377	allow_unknown_extension=False):
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	378	"""Merges a single protocol message field into a message.
				379
				380	Args:
				381	tokenizer: A tokenizer to parse the field name and values.
				382	message: A protocol message to record the data.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	383	allow_multiple_scalars: Determines if repeated values for a non-repeated
				384	field are permitted, e.g., the string "foo: 1 foo: 2" for a
				385	required/optional field named "foo".
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	386	allow_unknown_extension: if True, skip over missing extensions and keep
Jisi Liu	3b3c8ab	2016-03-30 11:39:59 -0700	[diff] [blame]	387	parsing.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	388
				389	Raises:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	390	ParseError: In case of text parsing problems.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	391	"""
				392	message_descriptor = message.DESCRIPTOR
Jisi Liu	ada6556	2015-02-25 16:39:11 -0800	[diff] [blame]	393	if (hasattr(message_descriptor, 'syntax') and
				394	message_descriptor.syntax == 'proto3'):
				395	# Proto3 doesn't represent presence so we can't test if multiple
				396	# scalars have occurred. We have to allow them.
				397	allow_multiple_scalars = True
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	398	if tokenizer.TryConsume('['):
				399	name = [tokenizer.ConsumeIdentifier()]
				400	while tokenizer.TryConsume('.'):
				401	name.append(tokenizer.ConsumeIdentifier())
				402	name = '.'.join(name)
				403
kenton@google.com	fccb146	2009-12-18 02:11:36 +0000	[diff] [blame]	404	if not message_descriptor.is_extendable:
				405	raise tokenizer.ParseErrorPreviousToken(
				406	'Message type "%s" does not have extensions.' %
				407	message_descriptor.full_name)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	408	# pylint: disable=protected-access
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	409	field = message.Extensions._FindExtensionByName(name)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	410	# pylint: enable=protected-access
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	411	if not field:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	412	if allow_unknown_extension:
				413	field = None
				414	else:
				415	raise tokenizer.ParseErrorPreviousToken(
				416	'Extension "%s" not registered.' % name)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	417	elif message_descriptor != field.containing_type:
				418	raise tokenizer.ParseErrorPreviousToken(
				419	'Extension "%s" does not extend message type "%s".' % (
				420	name, message_descriptor.full_name))
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	421
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	422	tokenizer.Consume(']')
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	423
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	424	else:
				425	name = tokenizer.ConsumeIdentifier()
				426	field = message_descriptor.fields_by_name.get(name, None)
				427
				428	# Group names are expected to be capitalized as they appear in the
				429	# .proto file, which actually matches their type names, not their field
				430	# names.
				431	if not field:
				432	field = message_descriptor.fields_by_name.get(name.lower(), None)
				433	if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
				434	field = None
				435
				436	if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
				437	field.message_type.name != name):
				438	field = None
				439
				440	if not field:
				441	raise tokenizer.ParseErrorPreviousToken(
				442	'Message type "%s" has no field named "%s".' % (
				443	message_descriptor.full_name, name))
				444
Jisi Liu	3b3c8ab	2016-03-30 11:39:59 -0700	[diff] [blame]	445	if field:
				446	if not allow_multiple_scalars and field.containing_oneof:
				447	# Check if there's a different field set in this oneof.
				448	# Note that we ignore the case if the same field was set before, and we
				449	# apply allow_multiple_scalars to non-scalar fields as well.
				450	which_oneof = message.WhichOneof(field.containing_oneof.name)
				451	if which_oneof is not None and which_oneof != field.name:
				452	raise tokenizer.ParseErrorPreviousToken(
				453	'Field "%s" is specified along with field "%s", another member of '
				454	'oneof "%s" for message type "%s".' % (
				455	field.name, which_oneof, field.containing_oneof.name,
				456	message_descriptor.full_name))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	457
Jisi Liu	3b3c8ab	2016-03-30 11:39:59 -0700	[diff] [blame]	458	if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				459	tokenizer.TryConsume(':')
				460	merger = _MergeMessageField
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	461	else:
Jisi Liu	3b3c8ab	2016-03-30 11:39:59 -0700	[diff] [blame]	462	tokenizer.Consume(':')
				463	merger = _MergeScalarField
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	464
Jisi Liu	3b3c8ab	2016-03-30 11:39:59 -0700	[diff] [blame]	465	if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED
				466	and tokenizer.TryConsume('[')):
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	467	# Short repeated format, e.g. "foo: [1, 2, 3]"
				468	while True:
Jisi Liu	3b3c8ab	2016-03-30 11:39:59 -0700	[diff] [blame]	469	merger(tokenizer, message, field, allow_multiple_scalars,
				470	allow_unknown_extension)
				471	if tokenizer.TryConsume(']'): break
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	472	tokenizer.Consume(',')
Jisi Liu	3b3c8ab	2016-03-30 11:39:59 -0700	[diff] [blame]	473
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	474	else:
Jisi Liu	3b3c8ab	2016-03-30 11:39:59 -0700	[diff] [blame]	475	merger(tokenizer, message, field, allow_multiple_scalars,
				476	allow_unknown_extension)
				477
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	478	else: # Proto field is unknown.
				479	assert allow_unknown_extension
				480	_SkipFieldContents(tokenizer)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	481
				482	# For historical reasons, fields may optionally be separated by commas or
				483	# semicolons.
				484	if not tokenizer.TryConsume(','):
				485	tokenizer.TryConsume(';')
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	486
				487
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	488	def _SkipFieldContents(tokenizer):
				489	"""Skips over contents (value or message) of a field.
				490
				491	Args:
				492	tokenizer: A tokenizer to parse the field name and values.
				493	"""
				494	# Try to guess the type of this field.
				495	# If this field is not a message, there should be a ":" between the
				496	# field name and the field value and also the field value should not
				497	# start with "{" or "<" which indicates the beginning of a message body.
				498	# If there is no ":" or there is a "{" or "<" after ":", this field has
				499	# to be a message or the input is ill-formed.
				500	if tokenizer.TryConsume(':') and not tokenizer.LookingAt(
				501	'{') and not tokenizer.LookingAt('<'):
				502	_SkipFieldValue(tokenizer)
				503	else:
				504	_SkipFieldMessage(tokenizer)
				505
				506
				507	def _SkipField(tokenizer):
				508	"""Skips over a complete field (name and value/message).
				509
				510	Args:
				511	tokenizer: A tokenizer to parse the field name and values.
				512	"""
				513	if tokenizer.TryConsume('['):
				514	# Consume extension name.
				515	tokenizer.ConsumeIdentifier()
				516	while tokenizer.TryConsume('.'):
				517	tokenizer.ConsumeIdentifier()
				518	tokenizer.Consume(']')
				519	else:
				520	tokenizer.ConsumeIdentifier()
				521
				522	_SkipFieldContents(tokenizer)
				523
				524	# For historical reasons, fields may optionally be separated by commas or
				525	# semicolons.
				526	if not tokenizer.TryConsume(','):
				527	tokenizer.TryConsume(';')
				528
				529
				530	def _SkipFieldMessage(tokenizer):
				531	"""Skips over a field message.
				532
				533	Args:
				534	tokenizer: A tokenizer to parse the field name and values.
				535	"""
				536
				537	if tokenizer.TryConsume('<'):
				538	delimiter = '>'
				539	else:
				540	tokenizer.Consume('{')
				541	delimiter = '}'
				542
				543	while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'):
				544	_SkipField(tokenizer)
				545
				546	tokenizer.Consume(delimiter)
				547
				548
				549	def _SkipFieldValue(tokenizer):
				550	"""Skips over a field value.
				551
				552	Args:
				553	tokenizer: A tokenizer to parse the field name and values.
				554
				555	Raises:
				556	ParseError: In case an invalid field value is found.
				557	"""
				558	# String tokens can come in multiple adjacent string literals.
				559	# If we can consume one, consume as many as we can.
				560	if tokenizer.TryConsumeString():
				561	while tokenizer.TryConsumeString():
				562	pass
				563	return
				564
				565	if (not tokenizer.TryConsumeIdentifier() and
				566	not tokenizer.TryConsumeInt64() and
				567	not tokenizer.TryConsumeUint64() and
				568	not tokenizer.TryConsumeFloat()):
				569	raise ParseError('Invalid field value: ' + tokenizer.token)
				570
				571
Jisi Liu	3b3c8ab	2016-03-30 11:39:59 -0700	[diff] [blame]	572	def _MergeMessageField(tokenizer, message, field, allow_multiple_scalars,
				573	allow_unknown_extension):
				574	"""Merges a single scalar field into a message.
				575
				576	Args:
				577	tokenizer: A tokenizer to parse the field value.
				578	message: The message of which field is a member.
				579	field: The descriptor of the field to be merged.
				580	allow_multiple_scalars: Determines if repeated values for a non-repeated
				581	field are permitted, e.g., the string "foo: 1 foo: 2" for a
				582	required/optional field named "foo".
				583	allow_unknown_extension: if True, skip over missing extensions and keep
				584	parsing.
				585
				586	Raises:
				587	ParseError: In case of text parsing problems.
				588	"""
				589	is_map_entry = _IsMapEntry(field)
				590
				591	if tokenizer.TryConsume('<'):
				592	end_token = '>'
				593	else:
				594	tokenizer.Consume('{')
				595	end_token = '}'
				596
				597	if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
				598	if field.is_extension:
				599	sub_message = message.Extensions[field].add()
				600	elif is_map_entry:
				601	# pylint: disable=protected-access
				602	sub_message = field.message_type._concrete_class()
				603	else:
				604	sub_message = getattr(message, field.name).add()
				605	else:
				606	if field.is_extension:
				607	sub_message = message.Extensions[field]
				608	else:
				609	sub_message = getattr(message, field.name)
				610	sub_message.SetInParent()
				611
				612	while not tokenizer.TryConsume(end_token):
				613	if tokenizer.AtEnd():
				614	raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token,))
				615	_MergeField(tokenizer, sub_message, allow_multiple_scalars,
				616	allow_unknown_extension)
				617
				618	if is_map_entry:
				619	value_cpptype = field.message_type.fields_by_name['value'].cpp_type
				620	if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				621	value = getattr(message, field.name)[sub_message.key]
				622	value.MergeFrom(sub_message.value)
				623	else:
				624	getattr(message, field.name)[sub_message.key] = sub_message.value
				625
				626
				627	def _MergeScalarField(tokenizer, message, field, allow_multiple_scalars,
				628	allow_unknown_extension):
				629	"""Merges a single scalar field into a message.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	630
				631	Args:
				632	tokenizer: A tokenizer to parse the field value.
				633	message: A protocol message to record the data.
				634	field: The descriptor of the field to be merged.
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	635	allow_multiple_scalars: Determines if repeated values for a non-repeated
				636	field are permitted, e.g., the string "foo: 1 foo: 2" for a
				637	required/optional field named "foo".
Jisi Liu	3b3c8ab	2016-03-30 11:39:59 -0700	[diff] [blame]	638	allow_unknown_extension: Unused, just here for consistency with
				639	_MergeMessageField.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	640
				641	Raises:
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	642	ParseError: In case of text parsing problems.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	643	RuntimeError: On runtime errors.
				644	"""
Jisi Liu	3b3c8ab	2016-03-30 11:39:59 -0700	[diff] [blame]	645	_ = allow_unknown_extension
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	646	value = None
				647
				648	if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
				649	descriptor.FieldDescriptor.TYPE_SINT32,
				650	descriptor.FieldDescriptor.TYPE_SFIXED32):
				651	value = tokenizer.ConsumeInt32()
				652	elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
				653	descriptor.FieldDescriptor.TYPE_SINT64,
				654	descriptor.FieldDescriptor.TYPE_SFIXED64):
				655	value = tokenizer.ConsumeInt64()
				656	elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
				657	descriptor.FieldDescriptor.TYPE_FIXED32):
				658	value = tokenizer.ConsumeUint32()
				659	elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
				660	descriptor.FieldDescriptor.TYPE_FIXED64):
				661	value = tokenizer.ConsumeUint64()
				662	elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
				663	descriptor.FieldDescriptor.TYPE_DOUBLE):
				664	value = tokenizer.ConsumeFloat()
				665	elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
				666	value = tokenizer.ConsumeBool()
				667	elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
				668	value = tokenizer.ConsumeString()
				669	elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
				670	value = tokenizer.ConsumeByteString()
				671	elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	672	value = tokenizer.ConsumeEnum(field)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	673	else:
				674	raise RuntimeError('Unknown field type %d' % field.type)
				675
				676	if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
				677	if field.is_extension:
				678	message.Extensions[field].append(value)
				679	else:
				680	getattr(message, field.name).append(value)
				681	else:
				682	if field.is_extension:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	683	if not allow_multiple_scalars and message.HasExtension(field):
				684	raise tokenizer.ParseErrorPreviousToken(
				685	'Message type "%s" should not have multiple "%s" extensions.' %
				686	(message.DESCRIPTOR.full_name, field.full_name))
				687	else:
				688	message.Extensions[field] = value
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	689	else:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	690	if not allow_multiple_scalars and message.HasField(field.name):
				691	raise tokenizer.ParseErrorPreviousToken(
				692	'Message type "%s" should not have multiple "%s" fields.' %
				693	(message.DESCRIPTOR.full_name, field.name))
				694	else:
				695	setattr(message, field.name, value)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	696
				697
				698	class _Tokenizer(object):
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	699	"""Protocol buffer text representation tokenizer.
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	700
				701	This class handles the lower level string parsing by splitting it into
				702	meaningful tokens.
				703
				704	It was directly ported from the Java protocol buffer API.
				705	"""
				706
				707	_WHITESPACE = re.compile('(\\s\|(#.*$))+', re.MULTILINE)
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	708	_TOKEN = re.compile('\|'.join([
				709	r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier
				710	r'([0-9+-]\|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number
				711	] + [ # quoted str for each quote mark
				712	r'{qt}([^{qt}\n\\]\|\\.)*({qt}\|\\?$)'.format(qt=mark) for mark in _QUOTES
				713	]))
				714
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	715	_IDENTIFIER = re.compile(r'\w+')
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	716
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	717	def __init__(self, lines):
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	718	self._position = 0
				719	self._line = -1
				720	self._column = 0
				721	self._token_start = None
				722	self.token = ''
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	723	self._lines = iter(lines)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	724	self._current_line = ''
				725	self._previous_line = 0
				726	self._previous_column = 0
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	727	self._more_lines = True
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	728	self._SkipWhitespace()
				729	self.NextToken()
				730
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	731	def LookingAt(self, token):
				732	return self.token == token
				733
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	734	def AtEnd(self):
				735	"""Checks the end of the text was reached.
				736
				737	Returns:
				738	True iff the end was reached.
				739	"""
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	740	return not self.token
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	741
				742	def _PopLine(self):
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	743	while len(self._current_line) <= self._column:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	744	try:
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	745	self._current_line = next(self._lines)
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	746	except StopIteration:
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	747	self._current_line = ''
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	748	self._more_lines = False
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	749	return
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	750	else:
				751	self._line += 1
				752	self._column = 0
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	753
				754	def _SkipWhitespace(self):
				755	while True:
				756	self._PopLine()
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	757	match = self._WHITESPACE.match(self._current_line, self._column)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	758	if not match:
				759	break
				760	length = len(match.group(0))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	761	self._column += length
				762
				763	def TryConsume(self, token):
				764	"""Tries to consume a given piece of text.
				765
				766	Args:
				767	token: Text to consume.
				768
				769	Returns:
				770	True iff the text was consumed.
				771	"""
				772	if self.token == token:
				773	self.NextToken()
				774	return True
				775	return False
				776
				777	def Consume(self, token):
				778	"""Consumes a piece of text.
				779
				780	Args:
				781	token: Text to consume.
				782
				783	Raises:
				784	ParseError: If the text couldn't be consumed.
				785	"""
				786	if not self.TryConsume(token):
				787	raise self._ParseError('Expected "%s".' % token)
				788
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	789	def TryConsumeIdentifier(self):
				790	try:
				791	self.ConsumeIdentifier()
				792	return True
				793	except ParseError:
				794	return False
				795
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	796	def ConsumeIdentifier(self):
				797	"""Consumes protocol message field identifier.
				798
				799	Returns:
				800	Identifier string.
				801
				802	Raises:
				803	ParseError: If an identifier couldn't be consumed.
				804	"""
				805	result = self.token
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	806	if not self._IDENTIFIER.match(result):
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	807	raise self._ParseError('Expected identifier.')
				808	self.NextToken()
				809	return result
				810
				811	def ConsumeInt32(self):
				812	"""Consumes a signed 32bit integer number.
				813
				814	Returns:
				815	The integer parsed.
				816
				817	Raises:
				818	ParseError: If a signed 32bit integer couldn't be consumed.
				819	"""
				820	try:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	821	result = ParseInteger(self.token, is_signed=True, is_long=False)
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	822	except ValueError as e:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	823	raise self._ParseError(str(e))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	824	self.NextToken()
				825	return result
				826
				827	def ConsumeUint32(self):
				828	"""Consumes an unsigned 32bit integer number.
				829
				830	Returns:
				831	The integer parsed.
				832
				833	Raises:
				834	ParseError: If an unsigned 32bit integer couldn't be consumed.
				835	"""
				836	try:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	837	result = ParseInteger(self.token, is_signed=False, is_long=False)
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	838	except ValueError as e:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	839	raise self._ParseError(str(e))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	840	self.NextToken()
				841	return result
				842
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	843	def TryConsumeInt64(self):
				844	try:
				845	self.ConsumeInt64()
				846	return True
				847	except ParseError:
				848	return False
				849
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	850	def ConsumeInt64(self):
				851	"""Consumes a signed 64bit integer number.
				852
				853	Returns:
				854	The integer parsed.
				855
				856	Raises:
				857	ParseError: If a signed 64bit integer couldn't be consumed.
				858	"""
				859	try:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	860	result = ParseInteger(self.token, is_signed=True, is_long=True)
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	861	except ValueError as e:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	862	raise self._ParseError(str(e))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	863	self.NextToken()
				864	return result
				865
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	866	def TryConsumeUint64(self):
				867	try:
				868	self.ConsumeUint64()
				869	return True
				870	except ParseError:
				871	return False
				872
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	873	def ConsumeUint64(self):
				874	"""Consumes an unsigned 64bit integer number.
				875
				876	Returns:
				877	The integer parsed.
				878
				879	Raises:
				880	ParseError: If an unsigned 64bit integer couldn't be consumed.
				881	"""
				882	try:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	883	result = ParseInteger(self.token, is_signed=False, is_long=True)
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	884	except ValueError as e:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	885	raise self._ParseError(str(e))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	886	self.NextToken()
				887	return result
				888
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	889	def TryConsumeFloat(self):
				890	try:
				891	self.ConsumeFloat()
				892	return True
				893	except ParseError:
				894	return False
				895
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	896	def ConsumeFloat(self):
				897	"""Consumes an floating point number.
				898
				899	Returns:
				900	The number parsed.
				901
				902	Raises:
				903	ParseError: If a floating point number couldn't be consumed.
				904	"""
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	905	try:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	906	result = ParseFloat(self.token)
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	907	except ValueError as e:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	908	raise self._ParseError(str(e))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	909	self.NextToken()
				910	return result
				911
				912	def ConsumeBool(self):
				913	"""Consumes a boolean value.
				914
				915	Returns:
				916	The bool parsed.
				917
				918	Raises:
				919	ParseError: If a boolean value couldn't be consumed.
				920	"""
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	921	try:
				922	result = ParseBool(self.token)
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	923	except ValueError as e:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	924	raise self._ParseError(str(e))
				925	self.NextToken()
				926	return result
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	927
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	928	def TryConsumeString(self):
				929	try:
				930	self.ConsumeString()
				931	return True
				932	except ParseError:
				933	return False
				934
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	935	def ConsumeString(self):
				936	"""Consumes a string value.
				937
				938	Returns:
				939	The string parsed.
				940
				941	Raises:
				942	ParseError: If a string value couldn't be consumed.
				943	"""
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	944	the_bytes = self.ConsumeByteString()
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	945	try:
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	946	return six.text_type(the_bytes, 'utf-8')
				947	except UnicodeDecodeError as e:
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	948	raise self._StringParseError(e)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	949
				950	def ConsumeByteString(self):
				951	"""Consumes a byte array value.
				952
				953	Returns:
				954	The array parsed (as a string).
				955
				956	Raises:
				957	ParseError: If a byte array value couldn't be consumed.
				958	"""
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	959	the_list = [self._ConsumeSingleByteString()]
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	960	while self.token and self.token[0] in _QUOTES:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	961	the_list.append(self._ConsumeSingleByteString())
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	962	return b''.join(the_list)
kenton@google.com	eef5f83	2009-12-23 01:32:45 +0000	[diff] [blame]	963
kenton@google.com	5353018	2010-01-07 02:08:03 +0000	[diff] [blame]	964	def _ConsumeSingleByteString(self):
				965	"""Consume one token of a string literal.
				966
				967	String literals (whether bytes or text) can come in multiple adjacent
				968	tokens which are automatically concatenated, like in C or Python. This
				969	method only consumes one token.
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	970
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	971	Returns:
				972	The token parsed.
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	973	Raises:
				974	ParseError: When the wrong format data is found.
kenton@google.com	5353018	2010-01-07 02:08:03 +0000	[diff] [blame]	975	"""
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	976	text = self.token
Feng Xiao	e841bac	2015-12-11 17:09:20 -0800	[diff] [blame]	977	if len(text) < 1 or text[0] not in _QUOTES:
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	978	raise self._ParseError('Expected string but found: %r' % (text,))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	979
				980	if len(text) < 2 or text[-1] != text[0]:
Bo Yang	5db2173	2015-05-21 14:28:59 -0700	[diff] [blame]	981	raise self._ParseError('String missing ending quote: %r' % (text,))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	982
				983	try:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	984	result = text_encoding.CUnescape(text[1:-1])
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	985	except ValueError as e:
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	986	raise self._ParseError(str(e))
				987	self.NextToken()
				988	return result
				989
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	990	def ConsumeEnum(self, field):
				991	try:
				992	result = ParseEnum(field, self.token)
Tres Seaver	f336d4b	2015-01-13 14:21:29 -0500	[diff] [blame]	993	except ValueError as e:
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	994	raise self._ParseError(str(e))
				995	self.NextToken()
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	996	return result
				997
				998	def ParseErrorPreviousToken(self, message):
				999	"""Creates and returns a ParseError for the previously read token.
				1000
				1001	Args:
				1002	message: A message to set for the exception.
				1003
				1004	Returns:
				1005	A ParseError instance.
				1006	"""
				1007	return ParseError('%d:%d : %s' % (
				1008	self._previous_line + 1, self._previous_column + 1, message))
				1009
				1010	def _ParseError(self, message):
				1011	"""Creates and returns a ParseError for the current token."""
				1012	return ParseError('%d:%d : %s' % (
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	1013	self._line + 1, self._column + 1, message))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	1014
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	1015	def _StringParseError(self, e):
				1016	return self._ParseError('Couldn\'t parse string: ' + str(e))
				1017
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	1018	def NextToken(self):
				1019	"""Reads the next meaningful token."""
				1020	self._previous_line = self._line
				1021	self._previous_column = self._column
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	1022
				1023	self._column += len(self.token)
				1024	self._SkipWhitespace()
				1025
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	1026	if not self._more_lines:
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	1027	self.token = ''
				1028	return
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	1029
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	1030	match = self._TOKEN.match(self._current_line, self._column)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	1031	if match:
				1032	token = match.group(0)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	1033	self.token = token
				1034	else:
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame]	1035	self.token = self._current_line[self._column]
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	1036
				1037
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	1038	def ParseInteger(text, is_signed=False, is_long=False):
				1039	"""Parses an integer.
				1040
				1041	Args:
				1042	text: The text to parse.
				1043	is_signed: True if a signed integer must be parsed.
				1044	is_long: True if a long integer must be parsed.
				1045
				1046	Returns:
				1047	The integer value.
				1048
				1049	Raises:
				1050	ValueError: Thrown Iff the text is not a valid integer.
				1051	"""
				1052	# Do the actual parsing. Exception handling is propagated to caller.
				1053	try:
jieluo@google.com	bde4a32	2014-08-12 21:10:30 +0000	[diff] [blame]	1054	# We force 32-bit values to int and 64-bit values to long to make
				1055	# alternate implementations where the distinction is more significant
				1056	# (e.g. the C++ implementation) simpler.
				1057	if is_long:
				1058	result = long(text, 0)
				1059	else:
				1060	result = int(text, 0)
xiaofeng@google.com	b55a20f	2012-09-22 02:40:50 +0000	[diff] [blame]	1061	except ValueError:
				1062	raise ValueError('Couldn\'t parse integer: %s' % text)
				1063
				1064	# Check if the integer is sane. Exceptions handled by callers.
				1065	checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
				1066	checker.CheckValue(result)
				1067	return result
				1068
				1069
				1070	def ParseFloat(text):
				1071	"""Parse a floating point number.
				1072
				1073	Args:
				1074	text: Text to parse.
				1075
				1076	Returns:
				1077	The number parsed.
				1078
				1079	Raises:
				1080	ValueError: If a floating point number couldn't be parsed.
				1081	"""
				1082	try:
				1083	# Assume Python compatible syntax.
				1084	return float(text)
				1085	except ValueError:
				1086	# Check alternative spellings.
				1087	if _FLOAT_INFINITY.match(text):
				1088	if text[0] == '-':
				1089	return float('-inf')
				1090	else:
				1091	return float('inf')
				1092	elif _FLOAT_NAN.match(text):
				1093	return float('nan')
				1094	else:
				1095	# assume '1.0f' format
				1096	try:
				1097	return float(text.rstrip('f'))
				1098	except ValueError:
				1099	raise ValueError('Couldn\'t parse float: %s' % text)
				1100
				1101
				1102	def ParseBool(text):
				1103	"""Parse a boolean value.
				1104
				1105	Args:
				1106	text: Text to parse.
				1107
				1108	Returns:
				1109	Boolean values parsed
				1110
				1111	Raises:
				1112	ValueError: If text is not a valid boolean.
				1113	"""
				1114	if text in ('true', 't', '1'):
				1115	return True
				1116	elif text in ('false', 'f', '0'):
				1117	return False
				1118	else:
				1119	raise ValueError('Expected "true" or "false".')
				1120
				1121
				1122	def ParseEnum(field, value):
				1123	"""Parse an enum value.
				1124
				1125	The value can be specified by a number (the enum value), or by
				1126	a string literal (the enum name).
				1127
				1128	Args:
				1129	field: Enum field descriptor.
				1130	value: String value.
				1131
				1132	Returns:
				1133	Enum value number.
				1134
				1135	Raises:
				1136	ValueError: If the enum value could not be parsed.
				1137	"""
				1138	enum_descriptor = field.enum_type
				1139	try:
				1140	number = int(value, 0)
				1141	except ValueError:
				1142	# Identifier.
				1143	enum_value = enum_descriptor.values_by_name.get(value, None)
				1144	if enum_value is None:
				1145	raise ValueError(
				1146	'Enum type "%s" has no value named %s.' % (
				1147	enum_descriptor.full_name, value))
				1148	else:
				1149	# Numeric value.
				1150	enum_value = enum_descriptor.values_by_number.get(number, None)
				1151	if enum_value is None:
				1152	raise ValueError(
				1153	'Enum type "%s" has no value with number %d.' % (
				1154	enum_descriptor.full_name, number))
				1155	return enum_value.number