Blame - python/helpers/docutils/utils.py - platform/tools/idea

blob: e5443bd7461776e9ed97e64705e8a0e27fa4c377 [file] [log] [blame]

Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame^]	1	# $Id: utils.py 6394 2010-08-20 11:26:58Z milde $
				2	# Author: David Goodger <goodger@python.org>
				3	# Copyright: This module has been placed in the public domain.
				4
				5	"""
				6	Miscellaneous utilities for the documentation utilities.
				7	"""
				8
				9	__docformat__ = 'reStructuredText'
				10
				11	import sys
				12	import os
				13	import os.path
				14	import warnings
				15	import unicodedata
				16	from docutils import ApplicationError, DataError
				17	from docutils import nodes
				18	from docutils._compat import bytes
				19
				20
				21	class SystemMessage(ApplicationError):
				22
				23	def __init__(self, system_message, level):
				24	Exception.__init__(self, system_message.astext())
				25	self.level = level
				26
				27
				28	class SystemMessagePropagation(ApplicationError): pass
				29
				30
				31	class Reporter:
				32
				33	"""
				34	Info/warning/error reporter and ``system_message`` element generator.
				35
				36	Five levels of system messages are defined, along with corresponding
				37	methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
				38
				39	There is typically one Reporter object per process. A Reporter object is
				40	instantiated with thresholds for reporting (generating warnings) and
				41	halting processing (raising exceptions), a switch to turn debug output on
				42	or off, and an I/O stream for warnings. These are stored as instance
				43	attributes.
				44
				45	When a system message is generated, its level is compared to the stored
				46	thresholds, and a warning or error is generated as appropriate. Debug
				47	messages are produced iff the stored debug switch is on, independently of
				48	other thresholds. Message output is sent to the stored warning stream if
				49	not set to ''.
				50
				51	The Reporter class also employs a modified form of the "Observer" pattern
				52	[GoF95]_ to track system messages generated. The `attach_observer` method
				53	should be called before parsing, with a bound method or function which
				54	accepts system messages. The observer can be removed with
				55	`detach_observer`, and another added in its place.
				56
				57	.. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
				58	Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
				59	1995.
				60	"""
				61
				62	levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
				63	"""List of names for system message levels, indexed by level."""
				64
				65	# system message level constants:
				66	(DEBUG_LEVEL,
				67	INFO_LEVEL,
				68	WARNING_LEVEL,
				69	ERROR_LEVEL,
				70	SEVERE_LEVEL) = range(5)
				71
				72	def __init__(self, source, report_level, halt_level, stream=None,
				73	debug=0, encoding=None, error_handler='backslashreplace'):
				74	"""
				75	:Parameters:
				76	- `source`: The path to or description of the source data.
				77	- `report_level`: The level at or above which warning output will
				78	be sent to `stream`.
				79	- `halt_level`: The level at or above which `SystemMessage`
				80	exceptions will be raised, halting execution.
				81	- `debug`: Show debug (level=0) system messages?
				82	- `stream`: Where warning output is sent. Can be file-like (has a
				83	``.write`` method), a string (file name, opened for writing),
				84	'' (empty string, for discarding all stream messages) or
				85	`None` (implies `sys.stderr`; default).
				86	- `encoding`: The output encoding.
				87	- `error_handler`: The error handler for stderr output encoding.
				88	"""
				89
				90	self.source = source
				91	"""The path to or description of the source data."""
				92
				93	self.error_handler = error_handler
				94	"""The character encoding error handler."""
				95
				96	self.debug_flag = debug
				97	"""Show debug (level=0) system messages?"""
				98
				99	self.report_level = report_level
				100	"""The level at or above which warning output will be sent
				101	to `self.stream`."""
				102
				103	self.halt_level = halt_level
				104	"""The level at or above which `SystemMessage` exceptions
				105	will be raised, halting execution."""
				106
				107	if stream is None:
				108	stream = sys.stderr
				109	elif stream and type(stream) in (unicode, bytes):
				110	# if `stream` is a file name, open it
				111	if type(stream) is bytes:
				112	stream = open(stream, 'w')
				113	else:
				114	stream = open(stream.encode(), 'w')
				115
				116	self.stream = stream
				117	"""Where warning output is sent."""
				118
				119	if encoding is None:
				120	try:
				121	encoding = stream.encoding
				122	except AttributeError:
				123	pass
				124
				125	self.encoding = encoding or 'ascii'
				126	"""The output character encoding."""
				127
				128	self.observers = []
				129	"""List of bound methods or functions to call with each system_message
				130	created."""
				131
				132	self.max_level = -1
				133	"""The highest level system message generated so far."""
				134
				135	def set_conditions(self, category, report_level, halt_level,
				136	stream=None, debug=0):
				137	warnings.warn('docutils.utils.Reporter.set_conditions deprecated; '
				138	'set attributes via configuration settings or directly',
				139	DeprecationWarning, stacklevel=2)
				140	self.report_level = report_level
				141	self.halt_level = halt_level
				142	if stream is None:
				143	stream = sys.stderr
				144	self.stream = stream
				145	self.debug_flag = debug
				146
				147	def attach_observer(self, observer):
				148	"""
				149	The `observer` parameter is a function or bound method which takes one
				150	argument, a `nodes.system_message` instance.
				151	"""
				152	self.observers.append(observer)
				153
				154	def detach_observer(self, observer):
				155	self.observers.remove(observer)
				156
				157	def notify_observers(self, message):
				158	for observer in self.observers:
				159	observer(message)
				160
				161	def system_message(self, level, message, children, *kwargs):
				162	"""
				163	Return a system_message object.
				164
				165	Raise an exception or generate a warning if appropriate.
				166	"""
				167	attributes = kwargs.copy()
				168	if 'base_node' in kwargs:
				169	source, line = get_source_line(kwargs['base_node'])
				170	del attributes['base_node']
				171	if source is not None:
				172	attributes.setdefault('source', source)
				173	if line is not None:
				174	attributes.setdefault('line', line)
				175	# assert source is not None, "node has line- but no source-argument"
				176	if not 'source' in attributes: # 'line' is absolute line number
				177	try: # look up (source, line-in-source)
				178	source, line = self.locator(attributes.get('line'))
				179	# print "locator lookup", kwargs.get('line'), "->", source, line
				180	except AttributeError:
				181	source, line = None, None
				182	if source is not None:
				183	attributes['source'] = source
				184	if line is not None:
				185	attributes['line'] = line
				186	# assert attributes['line'] is not None, (message, kwargs)
				187	# assert attributes['source'] is not None, (message, kwargs)
				188	attributes.setdefault('source', self.source)
				189
				190	msg = nodes.system_message(message, level=level,
				191	type=self.levels[level],
				192	children, *attributes)
				193	if self.stream and (level >= self.report_level
				194	or self.debug_flag and level == self.DEBUG_LEVEL
				195	or level >= self.halt_level):
				196	msgtext = msg.astext() + '\n'
				197	try:
				198	self.stream.write(msgtext)
				199	except UnicodeEncodeError:
				200	self.stream.write(msgtext.encode(self.encoding,
				201	self.error_handler))
				202	if level >= self.halt_level:
				203	raise SystemMessage(msg, level)
				204	if level > self.DEBUG_LEVEL or self.debug_flag:
				205	self.notify_observers(msg)
				206	self.max_level = max(level, self.max_level)
				207	return msg
				208
				209	def debug(self, args, *kwargs):
				210	"""
				211	Level-0, "DEBUG": an internal reporting issue. Typically, there is no
				212	effect on the processing. Level-0 system messages are handled
				213	separately from the others.
				214	"""
				215	if self.debug_flag:
				216	return self.system_message(self.DEBUG_LEVEL, args, *kwargs)
				217
				218	def info(self, args, *kwargs):
				219	"""
				220	Level-1, "INFO": a minor issue that can be ignored. Typically there is
				221	no effect on processing, and level-1 system messages are not reported.
				222	"""
				223	return self.system_message(self.INFO_LEVEL, args, *kwargs)
				224
				225	def warning(self, args, *kwargs):
				226	"""
				227	Level-2, "WARNING": an issue that should be addressed. If ignored,
				228	there may be unpredictable problems with the output.
				229	"""
				230	return self.system_message(self.WARNING_LEVEL, args, *kwargs)
				231
				232	def error(self, args, *kwargs):
				233	"""
				234	Level-3, "ERROR": an error that should be addressed. If ignored, the
				235	output will contain errors.
				236	"""
				237	return self.system_message(self.ERROR_LEVEL, args, *kwargs)
				238
				239	def severe(self, args, *kwargs):
				240	"""
				241	Level-4, "SEVERE": a severe error that must be addressed. If ignored,
				242	the output will contain severe errors. Typically level-4 system
				243	messages are turned into exceptions which halt processing.
				244	"""
				245	return self.system_message(self.SEVERE_LEVEL, args, *kwargs)
				246
				247
				248	class ExtensionOptionError(DataError): pass
				249	class BadOptionError(ExtensionOptionError): pass
				250	class BadOptionDataError(ExtensionOptionError): pass
				251	class DuplicateOptionError(ExtensionOptionError): pass
				252
				253
				254	def extract_extension_options(field_list, options_spec):
				255	"""
				256	Return a dictionary mapping extension option names to converted values.
				257
				258	:Parameters:
				259	- `field_list`: A flat field list without field arguments, where each
				260	field body consists of a single paragraph only.
				261	- `options_spec`: Dictionary mapping known option names to a
				262	conversion function such as `int` or `float`.
				263
				264	:Exceptions:
				265	- `KeyError` for unknown option names.
				266	- `ValueError` for invalid option values (raised by the conversion
				267	function).
				268	- `TypeError` for invalid option value types (raised by conversion
				269	function).
				270	- `DuplicateOptionError` for duplicate options.
				271	- `BadOptionError` for invalid fields.
				272	- `BadOptionDataError` for invalid option data (missing name,
				273	missing data, bad quotes, etc.).
				274	"""
				275	option_list = extract_options(field_list)
				276	option_dict = assemble_option_dict(option_list, options_spec)
				277	return option_dict
				278
				279	def extract_options(field_list):
				280	"""
				281	Return a list of option (name, value) pairs from field names & bodies.
				282
				283	:Parameter:
				284	`field_list`: A flat field list, where each field name is a single
				285	word and each field body consists of a single paragraph only.
				286
				287	:Exceptions:
				288	- `BadOptionError` for invalid fields.
				289	- `BadOptionDataError` for invalid option data (missing name,
				290	missing data, bad quotes, etc.).
				291	"""
				292	option_list = []
				293	for field in field_list:
				294	if len(field[0].astext().split()) != 1:
				295	raise BadOptionError(
				296	'extension option field name may not contain multiple words')
				297	name = str(field[0].astext().lower())
				298	body = field[1]
				299	if len(body) == 0:
				300	data = None
				301	elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
				302	or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
				303	raise BadOptionDataError(
				304	'extension option field body may contain\n'
				305	'a single paragraph only (option "%s")' % name)
				306	else:
				307	data = body[0][0].astext()
				308	option_list.append((name, data))
				309	return option_list
				310
				311	def assemble_option_dict(option_list, options_spec):
				312	"""
				313	Return a mapping of option names to values.
				314
				315	:Parameters:
				316	- `option_list`: A list of (name, value) pairs (the output of
				317	`extract_options()`).
				318	- `options_spec`: Dictionary mapping known option names to a
				319	conversion function such as `int` or `float`.
				320
				321	:Exceptions:
				322	- `KeyError` for unknown option names.
				323	- `DuplicateOptionError` for duplicate options.
				324	- `ValueError` for invalid option values (raised by conversion
				325	function).
				326	- `TypeError` for invalid option value types (raised by conversion
				327	function).
				328	"""
				329	options = {}
				330	for name, value in option_list:
				331	convertor = options_spec[name] # raises KeyError if unknown
				332	if convertor is None:
				333	raise KeyError(name) # or if explicitly disabled
				334	if name in options:
				335	raise DuplicateOptionError('duplicate option "%s"' % name)
				336	try:
				337	options[name] = convertor(value)
				338	except (ValueError, TypeError), detail:
				339	raise detail.__class__('(option: "%s"; value: %r)\n%s'
				340	% (name, value, ' '.join(detail.args)))
				341	return options
				342
				343
				344	class NameValueError(DataError): pass
				345
				346
				347	def decode_path(path):
				348	"""
				349	Decode file/path string. Return `nodes.reprunicode` object.
				350
				351	Convert to Unicode without the UnicodeDecode error of the
				352	implicit 'ascii:strict' decoding.
				353	"""
				354	# see also http://article.gmane.org/gmane.text.docutils.user/2905
				355	try:
				356	path = path.decode(sys.getfilesystemencoding(), 'strict')
				357	except AttributeError: # default value None has no decode method
				358	return nodes.reprunicode(path)
				359	except UnicodeDecodeError:
				360	try:
				361	path = path.decode('utf-8', 'strict')
				362	except UnicodeDecodeError:
				363	path = path.decode('ascii', 'replace')
				364	return nodes.reprunicode(path)
				365
				366
				367	def extract_name_value(line):
				368	"""
				369	Return a list of (name, value) from a line of the form "name=value ...".
				370
				371	:Exception:
				372	`NameValueError` for invalid input (missing name, missing data, bad
				373	quotes, etc.).
				374	"""
				375	attlist = []
				376	while line:
				377	equals = line.find('=')
				378	if equals == -1:
				379	raise NameValueError('missing "="')
				380	attname = line[:equals].strip()
				381	if equals == 0 or not attname:
				382	raise NameValueError(
				383	'missing attribute name before "="')
				384	line = line[equals+1:].lstrip()
				385	if not line:
				386	raise NameValueError(
				387	'missing value after "%s="' % attname)
				388	if line[0] in '\'"':
				389	endquote = line.find(line[0], 1)
				390	if endquote == -1:
				391	raise NameValueError(
				392	'attribute "%s" missing end quote (%s)'
				393	% (attname, line[0]))
				394	if len(line) > endquote + 1 and line[endquote + 1].strip():
				395	raise NameValueError(
				396	'attribute "%s" end quote (%s) not followed by '
				397	'whitespace' % (attname, line[0]))
				398	data = line[1:endquote]
				399	line = line[endquote+1:].lstrip()
				400	else:
				401	space = line.find(' ')
				402	if space == -1:
				403	data = line
				404	line = ''
				405	else:
				406	data = line[:space]
				407	line = line[space+1:].lstrip()
				408	attlist.append((attname.lower(), data))
				409	return attlist
				410
				411	def new_reporter(source_path, settings):
				412	"""
				413	Return a new Reporter object.
				414
				415	:Parameters:
				416	`source` : string
				417	The path to or description of the source text of the document.
				418	`settings` : optparse.Values object
				419	Runtime settings.
				420	"""
				421	reporter = Reporter(
				422	source_path, settings.report_level, settings.halt_level,
				423	stream=settings.warning_stream, debug=settings.debug,
				424	encoding=settings.error_encoding,
				425	error_handler=settings.error_encoding_error_handler)
				426	return reporter
				427
				428	def new_document(source_path, settings=None):
				429	"""
				430	Return a new empty document object.
				431
				432	:Parameters:
				433	`source_path` : string
				434	The path to or description of the source text of the document.
				435	`settings` : optparse.Values object
				436	Runtime settings. If none are provided, a default core set will
				437	be used. If you will use the document object with any Docutils
				438	components, you must provide their default settings as well. For
				439	example, if parsing, at least provide the parser settings,
				440	obtainable as follows::
				441
				442	settings = docutils.frontend.OptionParser(
				443	components=(docutils.parsers.rst.Parser,)
				444	).get_default_values()
				445	"""
				446	from docutils import frontend
				447	if settings is None:
				448	settings = frontend.OptionParser().get_default_values()
				449	source_path = decode_path(source_path)
				450	reporter = new_reporter(source_path, settings)
				451	document = nodes.document(settings, reporter, source=source_path)
				452	document.note_source(source_path, -1)
				453	return document
				454
				455	def clean_rcs_keywords(paragraph, keyword_substitutions):
				456	if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
				457	textnode = paragraph[0]
				458	for pattern, substitution in keyword_substitutions:
				459	match = pattern.search(textnode)
				460	if match:
				461	paragraph[0] = nodes.Text(pattern.sub(substitution, textnode))
				462	return
				463
				464	def relative_path(source, target):
				465	"""
				466	Build and return a path to `target`, relative to `source` (both files).
				467
				468	If there is no common prefix, return the absolute path to `target`.
				469	"""
				470	source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
				471	target_parts = os.path.abspath(target).split(os.sep)
				472	# Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
				473	if source_parts[:2] != target_parts[:2]:
				474	# Nothing in common between paths.
				475	# Return absolute path, using '/' for URLs:
				476	return '/'.join(target_parts)
				477	source_parts.reverse()
				478	target_parts.reverse()
				479	while (source_parts and target_parts
				480	and source_parts[-1] == target_parts[-1]):
				481	# Remove path components in common:
				482	source_parts.pop()
				483	target_parts.pop()
				484	target_parts.reverse()
				485	parts = ['..'] * (len(source_parts) - 1) + target_parts
				486	return '/'.join(parts)
				487
				488	def get_stylesheet_reference(settings, relative_to=None):
				489	"""
				490	Retrieve a stylesheet reference from the settings object.
				491
				492	Deprecated. Use get_stylesheet_reference_list() instead to
				493	enable specification of multiple stylesheets as a comma-separated
				494	list.
				495	"""
				496	if settings.stylesheet_path:
				497	assert not settings.stylesheet, (
				498	'stylesheet and stylesheet_path are mutually exclusive.')
				499	if relative_to == None:
				500	relative_to = settings._destination
				501	return relative_path(relative_to, settings.stylesheet_path)
				502	else:
				503	return settings.stylesheet
				504
				505	# Return 'stylesheet' or 'stylesheet_path' arguments as list.
				506	#
				507	# The original settings arguments are kept unchanged: you can test
				508	# with e.g. ``if settings.stylesheet_path:``
				509	#
				510	# Differences to ``get_stylesheet_reference``:
				511	# * return value is a list
				512	# * no re-writing of the path (and therefore no optional argument)
				513	# (if required, use ``utils.relative_path(source, target)``
				514	# in the calling script)
				515	def get_stylesheet_list(settings):
				516	"""
				517	Retrieve list of stylesheet references from the settings object.
				518	"""
				519	assert not (settings.stylesheet and settings.stylesheet_path), (
				520	'stylesheet and stylesheet_path are mutually exclusive.')
				521	if settings.stylesheet_path:
				522	sheets = settings.stylesheet_path.split(",")
				523	elif settings.stylesheet:
				524	sheets = settings.stylesheet.split(",")
				525	else:
				526	sheets = []
				527	# strip whitespace (frequently occuring in config files)
				528	return [sheet.strip(u' \t\n\r') for sheet in sheets]
				529
				530	def get_trim_footnote_ref_space(settings):
				531	"""
				532	Return whether or not to trim footnote space.
				533
				534	If trim_footnote_reference_space is not None, return it.
				535
				536	If trim_footnote_reference_space is None, return False unless the
				537	footnote reference style is 'superscript'.
				538	"""
				539	if settings.trim_footnote_reference_space is None:
				540	return hasattr(settings, 'footnote_references') and \
				541	settings.footnote_references == 'superscript'
				542	else:
				543	return settings.trim_footnote_reference_space
				544
				545	def get_source_line(node):
				546	"""
				547	Return the "source" and "line" attributes from the `node` given or from
				548	its closest ancestor.
				549	"""
				550	while node:
				551	if node.source or node.line:
				552	return node.source, node.line
				553	node = node.parent
				554	return None, None
				555
				556	def escape2null(text):
				557	"""Return a string with escape-backslashes converted to nulls."""
				558	parts = []
				559	start = 0
				560	while 1:
				561	found = text.find('\\', start)
				562	if found == -1:
				563	parts.append(text[start:])
				564	return ''.join(parts)
				565	parts.append(text[start:found])
				566	parts.append('\x00' + text[found+1:found+2])
				567	start = found + 2 # skip character after escape
				568
				569	def unescape(text, restore_backslashes=0):
				570	"""
				571	Return a string with nulls removed or restored to backslashes.
				572	Backslash-escaped spaces are also removed.
				573	"""
				574	if restore_backslashes:
				575	return text.replace('\x00', '\\')
				576	else:
				577	for sep in ['\x00 ', '\x00\n', '\x00']:
				578	text = ''.join(text.split(sep))
				579	return text
				580
				581	east_asian_widths = {'W': 2, # Wide
				582	'F': 2, # Full-width (wide)
				583	'Na': 1, # Narrow
				584	'H': 1, # Half-width (narrow)
				585	'N': 1, # Neutral (not East Asian, treated as narrow)
				586	'A': 1} # Ambiguous (s/b wide in East Asian context,
				587	# narrow otherwise, but that doesn't work)
				588	"""Mapping of result codes from `unicodedata.east_asian_width()` to character
				589	column widths."""
				590
				591	def east_asian_column_width(text):
				592	if isinstance(text, unicode):
				593	total = 0
				594	for c in text:
				595	total += east_asian_widths[unicodedata.east_asian_width(c)]
				596	return total
				597	else:
				598	return len(text)
				599
				600	if hasattr(unicodedata, 'east_asian_width'):
				601	column_width = east_asian_column_width
				602	else:
				603	column_width = len
				604
				605	def uniq(L):
				606	r = []
				607	for item in L:
				608	if not item in r:
				609	r.append(item)
				610	return r
				611
				612
				613	class DependencyList:
				614
				615	"""
				616	List of dependencies, with file recording support.
				617
				618	Note that the output file is not automatically closed. You have
				619	to explicitly call the close() method.
				620	"""
				621
				622	def __init__(self, output_file=None, dependencies=[]):
				623	"""
				624	Initialize the dependency list, automatically setting the
				625	output file to `output_file` (see `set_output()`) and adding
				626	all supplied dependencies.
				627	"""
				628	self.set_output(output_file)
				629	for i in dependencies:
				630	self.add(i)
				631
				632	def set_output(self, output_file):
				633	"""
				634	Set the output file and clear the list of already added
				635	dependencies.
				636
				637	`output_file` must be a string. The specified file is
				638	immediately overwritten.
				639
				640	If output_file is '-', the output will be written to stdout.
				641	If it is None, no file output is done when calling add().
				642	"""
				643	self.list = []
				644	if output_file == '-':
				645	self.file = sys.stdout
				646	elif output_file:
				647	self.file = open(output_file, 'w')
				648	else:
				649	self.file = None
				650
				651	def add(self, *filenames):
				652	"""
				653	If the dependency `filename` has not already been added,
				654	append it to self.list and print it to self.file if self.file
				655	is not None.
				656	"""
				657	for filename in filenames:
				658	if not filename in self.list:
				659	self.list.append(filename)
				660	if self.file is not None:
				661	print >>self.file, filename
				662
				663	def close(self):
				664	"""
				665	Close the output file.
				666	"""
				667	self.file.close()
				668	self.file = None
				669
				670	def __repr__(self):
				671	if self.file:
				672	output_file = self.file.name
				673	else:
				674	output_file = None
				675	return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)