Blame - python/google/protobuf/text_format.py - platform/external/protobuf-javalite

blob: 6d77b5432fe3d2426c9a1665609707d7b8300cc8 [file] [log] [blame]

temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	1	# Protocol Buffers - Google's data interchange format
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	2	# Copyright 2008 Google Inc. All rights reserved.
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	3	# http://code.google.com/p/protobuf/
				4	#
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	5	# Redistribution and use in source and binary forms, with or without
				6	# modification, are permitted provided that the following conditions are
				7	# met:
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	8	#
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	9	# * Redistributions of source code must retain the above copyright
				10	# notice, this list of conditions and the following disclaimer.
				11	# * Redistributions in binary form must reproduce the above
				12	# copyright notice, this list of conditions and the following disclaimer
				13	# in the documentation and/or other materials provided with the
				14	# distribution.
				15	# * Neither the name of Google Inc. nor the names of its
				16	# contributors may be used to endorse or promote products derived from
				17	# this software without specific prior written permission.
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	18	#
kenton@google.com	24bf56f	2008-09-24 20:31:01 +0000	[diff] [blame]	19	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	30
				31	"""Contains routines for printing protocol messages in text format."""
				32
				33	__author__ = 'kenton@google.com (Kenton Varda)'
				34
				35	import cStringIO
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	36	import re
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	37
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	38	from collections import deque
				39	from google.protobuf.internal import type_checkers
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	40	from google.protobuf import descriptor
				41
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	42	__all__ = [ 'MessageToString', 'PrintMessage', 'PrintField',
				43	'PrintFieldValue', 'Merge' ]
				44
				45
kenton@google.com	d0047c4	2009-12-23 02:01:01 +0000	[diff] [blame]	46	# Infinity and NaN are not explicitly supported by Python pre-2.6, and
				47	# float('inf') does not work on Windows (pre-2.6).
kenton@google.com	46ed74e	2009-12-23 02:08:05 +0000	[diff] [blame]	48	_INFINITY = 1e10000 # overflows, thus will actually be infinity.
kenton@google.com	d0047c4	2009-12-23 02:01:01 +0000	[diff] [blame]	49	_NAN = _INFINITY * 0
				50
				51
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	52	class ParseError(Exception):
				53	"""Thrown in case of ASCII parsing error."""
				54
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	55
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	56	def MessageToString(message, as_utf8=False, as_one_line=False):
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	57	out = cStringIO.StringIO()
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	58	PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	59	result = out.getvalue()
				60	out.close()
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	61	if as_one_line:
				62	return result.rstrip()
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	63	return result
				64
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	65
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	66	def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False):
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	67	for field, value in message.ListFields():
				68	if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
				69	for element in value:
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	70	PrintField(field, element, out, indent, as_utf8, as_one_line)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	71	else:
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	72	PrintField(field, value, out, indent, as_utf8, as_one_line)
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	73
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	74
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	75	def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False):
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	76	"""Print a single field name/value pair. For repeated fields, the value
				77	should be a single element."""
				78
				79	out.write(' ' * indent);
				80	if field.is_extension:
				81	out.write('[')
				82	if (field.containing_type.GetOptions().message_set_wire_format and
				83	field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
				84	field.message_type == field.extension_scope and
				85	field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
				86	out.write(field.message_type.full_name)
				87	else:
				88	out.write(field.full_name)
				89	out.write(']')
				90	elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
				91	# For groups, use the capitalized name.
				92	out.write(field.message_type.name)
				93	else:
				94	out.write(field.name)
				95
				96	if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				97	# The colon is optional in this case, but our cross-language golden files
				98	# don't include it.
				99	out.write(': ')
				100
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	101	PrintFieldValue(field, value, out, indent, as_utf8, as_one_line)
				102	if as_one_line:
				103	out.write(' ')
				104	else:
				105	out.write('\n')
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	106
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	107
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	108	def PrintFieldValue(field, value, out, indent=0,
				109	as_utf8=False, as_one_line=False):
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	110	"""Print a single field value (not including name). For repeated fields,
				111	the value should be a single element."""
				112
				113	if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	114	if as_one_line:
				115	out.write(' { ')
				116	PrintMessage(value, out, indent, as_utf8, as_one_line)
				117	out.write('}')
				118	else:
				119	out.write(' {\n')
				120	PrintMessage(value, out, indent + 2, as_utf8, as_one_line)
				121	out.write(' ' * indent + '}')
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	122	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
				123	out.write(field.enum_type.values_by_number[value].name)
				124	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
				125	out.write('\"')
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	126	if type(value) is unicode:
				127	out.write(_CEscape(value.encode('utf-8'), as_utf8))
				128	else:
				129	out.write(_CEscape(value, as_utf8))
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	130	out.write('\"')
				131	elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
				132	if value:
				133	out.write("true")
				134	else:
				135	out.write("false")
				136	else:
				137	out.write(str(value))
				138
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	139
				140	def Merge(text, message):
				141	"""Merges an ASCII representation of a protocol message into a message.
				142
				143	Args:
				144	text: Message ASCII representation.
				145	message: A protocol buffer message to merge into.
				146
				147	Raises:
				148	ParseError: On ASCII parsing problems.
				149	"""
				150	tokenizer = _Tokenizer(text)
				151	while not tokenizer.AtEnd():
				152	_MergeField(tokenizer, message)
				153
				154
				155	def _MergeField(tokenizer, message):
				156	"""Merges a single protocol message field into a message.
				157
				158	Args:
				159	tokenizer: A tokenizer to parse the field name and values.
				160	message: A protocol message to record the data.
				161
				162	Raises:
				163	ParseError: In case of ASCII parsing problems.
				164	"""
				165	message_descriptor = message.DESCRIPTOR
				166	if tokenizer.TryConsume('['):
				167	name = [tokenizer.ConsumeIdentifier()]
				168	while tokenizer.TryConsume('.'):
				169	name.append(tokenizer.ConsumeIdentifier())
				170	name = '.'.join(name)
				171
kenton@google.com	fccb146	2009-12-18 02:11:36 +0000	[diff] [blame]	172	if not message_descriptor.is_extendable:
				173	raise tokenizer.ParseErrorPreviousToken(
				174	'Message type "%s" does not have extensions.' %
				175	message_descriptor.full_name)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	176	field = message.Extensions._FindExtensionByName(name)
				177	if not field:
				178	raise tokenizer.ParseErrorPreviousToken(
				179	'Extension "%s" not registered.' % name)
				180	elif message_descriptor != field.containing_type:
				181	raise tokenizer.ParseErrorPreviousToken(
				182	'Extension "%s" does not extend message type "%s".' % (
				183	name, message_descriptor.full_name))
				184	tokenizer.Consume(']')
				185	else:
				186	name = tokenizer.ConsumeIdentifier()
				187	field = message_descriptor.fields_by_name.get(name, None)
				188
				189	# Group names are expected to be capitalized as they appear in the
				190	# .proto file, which actually matches their type names, not their field
				191	# names.
				192	if not field:
				193	field = message_descriptor.fields_by_name.get(name.lower(), None)
				194	if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
				195	field = None
				196
				197	if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
				198	field.message_type.name != name):
				199	field = None
				200
				201	if not field:
				202	raise tokenizer.ParseErrorPreviousToken(
				203	'Message type "%s" has no field named "%s".' % (
				204	message_descriptor.full_name, name))
				205
				206	if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
				207	tokenizer.TryConsume(':')
				208
				209	if tokenizer.TryConsume('<'):
				210	end_token = '>'
				211	else:
				212	tokenizer.Consume('{')
				213	end_token = '}'
				214
				215	if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
				216	if field.is_extension:
				217	sub_message = message.Extensions[field].add()
				218	else:
				219	sub_message = getattr(message, field.name).add()
				220	else:
				221	if field.is_extension:
				222	sub_message = message.Extensions[field]
				223	else:
				224	sub_message = getattr(message, field.name)
kenton@google.com	fccb146	2009-12-18 02:11:36 +0000	[diff] [blame]	225	sub_message.SetInParent()
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	226
				227	while not tokenizer.TryConsume(end_token):
				228	if tokenizer.AtEnd():
				229	raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token))
				230	_MergeField(tokenizer, sub_message)
				231	else:
				232	_MergeScalarField(tokenizer, message, field)
				233
				234
				235	def _MergeScalarField(tokenizer, message, field):
				236	"""Merges a single protocol message scalar field into a message.
				237
				238	Args:
				239	tokenizer: A tokenizer to parse the field value.
				240	message: A protocol message to record the data.
				241	field: The descriptor of the field to be merged.
				242
				243	Raises:
				244	ParseError: In case of ASCII parsing problems.
				245	RuntimeError: On runtime errors.
				246	"""
				247	tokenizer.Consume(':')
				248	value = None
				249
				250	if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
				251	descriptor.FieldDescriptor.TYPE_SINT32,
				252	descriptor.FieldDescriptor.TYPE_SFIXED32):
				253	value = tokenizer.ConsumeInt32()
				254	elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
				255	descriptor.FieldDescriptor.TYPE_SINT64,
				256	descriptor.FieldDescriptor.TYPE_SFIXED64):
				257	value = tokenizer.ConsumeInt64()
				258	elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
				259	descriptor.FieldDescriptor.TYPE_FIXED32):
				260	value = tokenizer.ConsumeUint32()
				261	elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
				262	descriptor.FieldDescriptor.TYPE_FIXED64):
				263	value = tokenizer.ConsumeUint64()
				264	elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
				265	descriptor.FieldDescriptor.TYPE_DOUBLE):
				266	value = tokenizer.ConsumeFloat()
				267	elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
				268	value = tokenizer.ConsumeBool()
				269	elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
				270	value = tokenizer.ConsumeString()
				271	elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
				272	value = tokenizer.ConsumeByteString()
				273	elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
				274	# Enum can be specified by a number (the enum value), or by
				275	# a string literal (the enum name).
				276	enum_descriptor = field.enum_type
				277	if tokenizer.LookingAtInteger():
				278	number = tokenizer.ConsumeInt32()
				279	enum_value = enum_descriptor.values_by_number.get(number, None)
				280	if enum_value is None:
				281	raise tokenizer.ParseErrorPreviousToken(
				282	'Enum type "%s" has no value with number %d.' % (
				283	enum_descriptor.full_name, number))
				284	else:
				285	identifier = tokenizer.ConsumeIdentifier()
				286	enum_value = enum_descriptor.values_by_name.get(identifier, None)
				287	if enum_value is None:
				288	raise tokenizer.ParseErrorPreviousToken(
				289	'Enum type "%s" has no value named %s.' % (
				290	enum_descriptor.full_name, identifier))
				291	value = enum_value.number
				292	else:
				293	raise RuntimeError('Unknown field type %d' % field.type)
				294
				295	if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
				296	if field.is_extension:
				297	message.Extensions[field].append(value)
				298	else:
				299	getattr(message, field.name).append(value)
				300	else:
				301	if field.is_extension:
				302	message.Extensions[field] = value
				303	else:
				304	setattr(message, field.name, value)
				305
				306
				307	class _Tokenizer(object):
				308	"""Protocol buffer ASCII representation tokenizer.
				309
				310	This class handles the lower level string parsing by splitting it into
				311	meaningful tokens.
				312
				313	It was directly ported from the Java protocol buffer API.
				314	"""
				315
				316	_WHITESPACE = re.compile('(\\s\|(#.*$))+', re.MULTILINE)
				317	_TOKEN = re.compile(
				318	'[a-zA-Z_][0-9a-zA-Z_+-]*\|' # an identifier
				319	'[0-9+-][0-9a-zA-Z_.+-]*\|' # a number
				320	'\"([^\"\n\\\\]\|\\\\.)*(\"\|\\\\?$)\|' # a double-quoted string
kenton@google.com	eef5f83	2009-12-23 01:32:45 +0000	[diff] [blame]	321	'\'([^\'\n\\\\]\|\\\\.)*(\'\|\\\\?$)') # a single-quoted string
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	322	_IDENTIFIER = re.compile('\w+')
				323	_INTEGER_CHECKERS = [type_checkers.Uint32ValueChecker(),
				324	type_checkers.Int32ValueChecker(),
				325	type_checkers.Uint64ValueChecker(),
				326	type_checkers.Int64ValueChecker()]
				327	_FLOAT_INFINITY = re.compile('-?inf(inity)?f?', re.IGNORECASE)
				328	_FLOAT_NAN = re.compile("nanf?", re.IGNORECASE)
				329
				330	def __init__(self, text_message):
				331	self._text_message = text_message
				332
				333	self._position = 0
				334	self._line = -1
				335	self._column = 0
				336	self._token_start = None
				337	self.token = ''
				338	self._lines = deque(text_message.split('\n'))
				339	self._current_line = ''
				340	self._previous_line = 0
				341	self._previous_column = 0
				342	self._SkipWhitespace()
				343	self.NextToken()
				344
				345	def AtEnd(self):
				346	"""Checks the end of the text was reached.
				347
				348	Returns:
				349	True iff the end was reached.
				350	"""
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	351	return self.token == ''
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	352
				353	def _PopLine(self):
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	354	while len(self._current_line) <= self._column:
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	355	if not self._lines:
				356	self._current_line = ''
				357	return
				358	self._line += 1
				359	self._column = 0
				360	self._current_line = self._lines.popleft()
				361
				362	def _SkipWhitespace(self):
				363	while True:
				364	self._PopLine()
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	365	match = self._WHITESPACE.match(self._current_line, self._column)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	366	if not match:
				367	break
				368	length = len(match.group(0))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	369	self._column += length
				370
				371	def TryConsume(self, token):
				372	"""Tries to consume a given piece of text.
				373
				374	Args:
				375	token: Text to consume.
				376
				377	Returns:
				378	True iff the text was consumed.
				379	"""
				380	if self.token == token:
				381	self.NextToken()
				382	return True
				383	return False
				384
				385	def Consume(self, token):
				386	"""Consumes a piece of text.
				387
				388	Args:
				389	token: Text to consume.
				390
				391	Raises:
				392	ParseError: If the text couldn't be consumed.
				393	"""
				394	if not self.TryConsume(token):
				395	raise self._ParseError('Expected "%s".' % token)
				396
				397	def LookingAtInteger(self):
				398	"""Checks if the current token is an integer.
				399
				400	Returns:
				401	True iff the current token is an integer.
				402	"""
				403	if not self.token:
				404	return False
				405	c = self.token[0]
				406	return (c >= '0' and c <= '9') or c == '-' or c == '+'
				407
				408	def ConsumeIdentifier(self):
				409	"""Consumes protocol message field identifier.
				410
				411	Returns:
				412	Identifier string.
				413
				414	Raises:
				415	ParseError: If an identifier couldn't be consumed.
				416	"""
				417	result = self.token
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	418	if not self._IDENTIFIER.match(result):
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	419	raise self._ParseError('Expected identifier.')
				420	self.NextToken()
				421	return result
				422
				423	def ConsumeInt32(self):
				424	"""Consumes a signed 32bit integer number.
				425
				426	Returns:
				427	The integer parsed.
				428
				429	Raises:
				430	ParseError: If a signed 32bit integer couldn't be consumed.
				431	"""
				432	try:
				433	result = self._ParseInteger(self.token, is_signed=True, is_long=False)
				434	except ValueError, e:
				435	raise self._IntegerParseError(e)
				436	self.NextToken()
				437	return result
				438
				439	def ConsumeUint32(self):
				440	"""Consumes an unsigned 32bit integer number.
				441
				442	Returns:
				443	The integer parsed.
				444
				445	Raises:
				446	ParseError: If an unsigned 32bit integer couldn't be consumed.
				447	"""
				448	try:
				449	result = self._ParseInteger(self.token, is_signed=False, is_long=False)
				450	except ValueError, e:
				451	raise self._IntegerParseError(e)
				452	self.NextToken()
				453	return result
				454
				455	def ConsumeInt64(self):
				456	"""Consumes a signed 64bit integer number.
				457
				458	Returns:
				459	The integer parsed.
				460
				461	Raises:
				462	ParseError: If a signed 64bit integer couldn't be consumed.
				463	"""
				464	try:
				465	result = self._ParseInteger(self.token, is_signed=True, is_long=True)
				466	except ValueError, e:
				467	raise self._IntegerParseError(e)
				468	self.NextToken()
				469	return result
				470
				471	def ConsumeUint64(self):
				472	"""Consumes an unsigned 64bit integer number.
				473
				474	Returns:
				475	The integer parsed.
				476
				477	Raises:
				478	ParseError: If an unsigned 64bit integer couldn't be consumed.
				479	"""
				480	try:
				481	result = self._ParseInteger(self.token, is_signed=False, is_long=True)
				482	except ValueError, e:
				483	raise self._IntegerParseError(e)
				484	self.NextToken()
				485	return result
				486
				487	def ConsumeFloat(self):
				488	"""Consumes an floating point number.
				489
				490	Returns:
				491	The number parsed.
				492
				493	Raises:
				494	ParseError: If a floating point number couldn't be consumed.
				495	"""
				496	text = self.token
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	497	if self._FLOAT_INFINITY.match(text):
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	498	self.NextToken()
				499	if text.startswith('-'):
kenton@google.com	d0047c4	2009-12-23 02:01:01 +0000	[diff] [blame]	500	return -_INFINITY
				501	return _INFINITY
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	502
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	503	if self._FLOAT_NAN.match(text):
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	504	self.NextToken()
kenton@google.com	d0047c4	2009-12-23 02:01:01 +0000	[diff] [blame]	505	return _NAN
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	506
				507	try:
				508	result = float(text)
				509	except ValueError, e:
				510	raise self._FloatParseError(e)
				511	self.NextToken()
				512	return result
				513
				514	def ConsumeBool(self):
				515	"""Consumes a boolean value.
				516
				517	Returns:
				518	The bool parsed.
				519
				520	Raises:
				521	ParseError: If a boolean value couldn't be consumed.
				522	"""
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	523	if self.token in ('true', 't', '1'):
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	524	self.NextToken()
				525	return True
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	526	elif self.token in ('false', 'f', '0'):
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	527	self.NextToken()
				528	return False
				529	else:
				530	raise self._ParseError('Expected "true" or "false".')
				531
				532	def ConsumeString(self):
				533	"""Consumes a string value.
				534
				535	Returns:
				536	The string parsed.
				537
				538	Raises:
				539	ParseError: If a string value couldn't be consumed.
				540	"""
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	541	bytes = self.ConsumeByteString()
				542	try:
				543	return unicode(bytes, 'utf-8')
				544	except UnicodeDecodeError, e:
				545	raise self._StringParseError(e)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	546
				547	def ConsumeByteString(self):
				548	"""Consumes a byte array value.
				549
				550	Returns:
				551	The array parsed (as a string).
				552
				553	Raises:
				554	ParseError: If a byte array value couldn't be consumed.
				555	"""
kenton@google.com	5353018	2010-01-07 02:08:03 +0000	[diff] [blame]	556	list = [self._ConsumeSingleByteString()]
kenton@google.com	eef5f83	2009-12-23 01:32:45 +0000	[diff] [blame]	557	while len(self.token) > 0 and self.token[0] in ('\'', '"'):
kenton@google.com	5353018	2010-01-07 02:08:03 +0000	[diff] [blame]	558	list.append(self._ConsumeSingleByteString())
kenton@google.com	eef5f83	2009-12-23 01:32:45 +0000	[diff] [blame]	559	return "".join(list)
				560
kenton@google.com	5353018	2010-01-07 02:08:03 +0000	[diff] [blame]	561	def _ConsumeSingleByteString(self):
				562	"""Consume one token of a string literal.
				563
				564	String literals (whether bytes or text) can come in multiple adjacent
				565	tokens which are automatically concatenated, like in C or Python. This
				566	method only consumes one token.
				567	"""
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	568	text = self.token
				569	if len(text) < 1 or text[0] not in ('\'', '"'):
				570	raise self._ParseError('Exptected string.')
				571
				572	if len(text) < 2 or text[-1] != text[0]:
				573	raise self._ParseError('String missing ending quote.')
				574
				575	try:
				576	result = _CUnescape(text[1:-1])
				577	except ValueError, e:
				578	raise self._ParseError(str(e))
				579	self.NextToken()
				580	return result
				581
				582	def _ParseInteger(self, text, is_signed=False, is_long=False):
				583	"""Parses an integer.
				584
				585	Args:
				586	text: The text to parse.
				587	is_signed: True if a signed integer must be parsed.
				588	is_long: True if a long integer must be parsed.
				589
				590	Returns:
				591	The integer value.
				592
				593	Raises:
				594	ValueError: Thrown Iff the text is not a valid integer.
				595	"""
				596	pos = 0
				597	if text.startswith('-'):
				598	pos += 1
				599
				600	base = 10
				601	if text.startswith('0x', pos) or text.startswith('0X', pos):
				602	base = 16
				603	elif text.startswith('0', pos):
				604	base = 8
				605
				606	# Do the actual parsing. Exception handling is propagated to caller.
				607	result = int(text, base)
				608
				609	# Check if the integer is sane. Exceptions handled by callers.
				610	checker = self._INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
				611	checker.CheckValue(result)
				612	return result
				613
				614	def ParseErrorPreviousToken(self, message):
				615	"""Creates and returns a ParseError for the previously read token.
				616
				617	Args:
				618	message: A message to set for the exception.
				619
				620	Returns:
				621	A ParseError instance.
				622	"""
				623	return ParseError('%d:%d : %s' % (
				624	self._previous_line + 1, self._previous_column + 1, message))
				625
				626	def _ParseError(self, message):
				627	"""Creates and returns a ParseError for the current token."""
				628	return ParseError('%d:%d : %s' % (
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	629	self._line + 1, self._column - len(self.token) + 1, message))
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	630
				631	def _IntegerParseError(self, e):
				632	return self._ParseError('Couldn\'t parse integer: ' + str(e))
				633
				634	def _FloatParseError(self, e):
				635	return self._ParseError('Couldn\'t parse number: ' + str(e))
				636
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	637	def _StringParseError(self, e):
				638	return self._ParseError('Couldn\'t parse string: ' + str(e))
				639
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	640	def NextToken(self):
				641	"""Reads the next meaningful token."""
				642	self._previous_line = self._line
				643	self._previous_column = self._column
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	644
				645	self._column += len(self.token)
				646	self._SkipWhitespace()
				647
				648	if not self._lines and len(self._current_line) <= self._column:
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	649	self.token = ''
				650	return
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	651
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	652	match = self._TOKEN.match(self._current_line, self._column)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	653	if match:
				654	token = match.group(0)
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	655	self.token = token
				656	else:
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	657	self.token = self._current_line[self._column]
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	658
				659
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	660	# text.encode('string_escape') does not seem to satisfy our needs as it
				661	# encodes unprintable characters using two-digit hex escapes whereas our
				662	# C++ unescaping function allows hex escapes to be any length. So,
				663	# "\0011".encode('string_escape') ends up being "\\x011", which will be
				664	# decoded in C++ as a single-character string with char code 0x11.
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	665	def _CEscape(text, as_utf8):
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	666	def escape(c):
				667	o = ord(c)
				668	if o == 10: return r"\n" # optional escape
				669	if o == 13: return r"\r" # optional escape
				670	if o == 9: return r"\t" # optional escape
				671	if o == 39: return r"\'" # optional escape
				672
				673	if o == 34: return r'\"' # necessary escape
				674	if o == 92: return r"\\" # necessary escape
				675
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	676	# necessary escapes
				677	if not as_utf8 and (o >= 127 or o < 32): return "\\%03o" % o
temporal	40ee551	2008-07-10 02:12:20 +0000	[diff] [blame]	678	return c
				679	return "".join([escape(c) for c in text])
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	680
				681
liujisi@google.com	33165fe	2010-11-02 13:14:58 +0000	[diff] [blame^]	682	_CUNESCAPE_HEX = re.compile('\\\\x([0-9a-fA-F]{2}\|[0-9a-fA-F])')
kenton@google.com	80b1d62	2009-07-29 01:13:20 +0000	[diff] [blame]	683
				684
				685	def _CUnescape(text):
				686	def ReplaceHex(m):
				687	return chr(int(m.group(0)[2:], 16))
				688	# This is required because the 'string_escape' encoding doesn't
				689	# allow single-digit hex escapes (like '\xf').
				690	result = _CUNESCAPE_HEX.sub(ReplaceHex, text)
				691	return result.decode('string_escape')