blob: e5443bd7461776e9ed97e64705e8a0e27fa4c377 [file] [log] [blame]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001# $Id: utils.py 6394 2010-08-20 11:26:58Z milde $
2# Author: David Goodger <goodger@python.org>
3# Copyright: This module has been placed in the public domain.
4
5"""
6Miscellaneous utilities for the documentation utilities.
7"""
8
9__docformat__ = 'reStructuredText'
10
11import sys
12import os
13import os.path
14import warnings
15import unicodedata
16from docutils import ApplicationError, DataError
17from docutils import nodes
18from docutils._compat import bytes
19
20
21class SystemMessage(ApplicationError):
22
23 def __init__(self, system_message, level):
24 Exception.__init__(self, system_message.astext())
25 self.level = level
26
27
28class SystemMessagePropagation(ApplicationError): pass
29
30
31class Reporter:
32
33 """
34 Info/warning/error reporter and ``system_message`` element generator.
35
36 Five levels of system messages are defined, along with corresponding
37 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
38
39 There is typically one Reporter object per process. A Reporter object is
40 instantiated with thresholds for reporting (generating warnings) and
41 halting processing (raising exceptions), a switch to turn debug output on
42 or off, and an I/O stream for warnings. These are stored as instance
43 attributes.
44
45 When a system message is generated, its level is compared to the stored
46 thresholds, and a warning or error is generated as appropriate. Debug
47 messages are produced iff the stored debug switch is on, independently of
48 other thresholds. Message output is sent to the stored warning stream if
49 not set to ''.
50
51 The Reporter class also employs a modified form of the "Observer" pattern
52 [GoF95]_ to track system messages generated. The `attach_observer` method
53 should be called before parsing, with a bound method or function which
54 accepts system messages. The observer can be removed with
55 `detach_observer`, and another added in its place.
56
57 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
58 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
59 1995.
60 """
61
62 levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
63 """List of names for system message levels, indexed by level."""
64
65 # system message level constants:
66 (DEBUG_LEVEL,
67 INFO_LEVEL,
68 WARNING_LEVEL,
69 ERROR_LEVEL,
70 SEVERE_LEVEL) = range(5)
71
72 def __init__(self, source, report_level, halt_level, stream=None,
73 debug=0, encoding=None, error_handler='backslashreplace'):
74 """
75 :Parameters:
76 - `source`: The path to or description of the source data.
77 - `report_level`: The level at or above which warning output will
78 be sent to `stream`.
79 - `halt_level`: The level at or above which `SystemMessage`
80 exceptions will be raised, halting execution.
81 - `debug`: Show debug (level=0) system messages?
82 - `stream`: Where warning output is sent. Can be file-like (has a
83 ``.write`` method), a string (file name, opened for writing),
84 '' (empty string, for discarding all stream messages) or
85 `None` (implies `sys.stderr`; default).
86 - `encoding`: The output encoding.
87 - `error_handler`: The error handler for stderr output encoding.
88 """
89
90 self.source = source
91 """The path to or description of the source data."""
92
93 self.error_handler = error_handler
94 """The character encoding error handler."""
95
96 self.debug_flag = debug
97 """Show debug (level=0) system messages?"""
98
99 self.report_level = report_level
100 """The level at or above which warning output will be sent
101 to `self.stream`."""
102
103 self.halt_level = halt_level
104 """The level at or above which `SystemMessage` exceptions
105 will be raised, halting execution."""
106
107 if stream is None:
108 stream = sys.stderr
109 elif stream and type(stream) in (unicode, bytes):
110 # if `stream` is a file name, open it
111 if type(stream) is bytes:
112 stream = open(stream, 'w')
113 else:
114 stream = open(stream.encode(), 'w')
115
116 self.stream = stream
117 """Where warning output is sent."""
118
119 if encoding is None:
120 try:
121 encoding = stream.encoding
122 except AttributeError:
123 pass
124
125 self.encoding = encoding or 'ascii'
126 """The output character encoding."""
127
128 self.observers = []
129 """List of bound methods or functions to call with each system_message
130 created."""
131
132 self.max_level = -1
133 """The highest level system message generated so far."""
134
135 def set_conditions(self, category, report_level, halt_level,
136 stream=None, debug=0):
137 warnings.warn('docutils.utils.Reporter.set_conditions deprecated; '
138 'set attributes via configuration settings or directly',
139 DeprecationWarning, stacklevel=2)
140 self.report_level = report_level
141 self.halt_level = halt_level
142 if stream is None:
143 stream = sys.stderr
144 self.stream = stream
145 self.debug_flag = debug
146
147 def attach_observer(self, observer):
148 """
149 The `observer` parameter is a function or bound method which takes one
150 argument, a `nodes.system_message` instance.
151 """
152 self.observers.append(observer)
153
154 def detach_observer(self, observer):
155 self.observers.remove(observer)
156
157 def notify_observers(self, message):
158 for observer in self.observers:
159 observer(message)
160
161 def system_message(self, level, message, *children, **kwargs):
162 """
163 Return a system_message object.
164
165 Raise an exception or generate a warning if appropriate.
166 """
167 attributes = kwargs.copy()
168 if 'base_node' in kwargs:
169 source, line = get_source_line(kwargs['base_node'])
170 del attributes['base_node']
171 if source is not None:
172 attributes.setdefault('source', source)
173 if line is not None:
174 attributes.setdefault('line', line)
175 # assert source is not None, "node has line- but no source-argument"
176 if not 'source' in attributes: # 'line' is absolute line number
177 try: # look up (source, line-in-source)
178 source, line = self.locator(attributes.get('line'))
179 # print "locator lookup", kwargs.get('line'), "->", source, line
180 except AttributeError:
181 source, line = None, None
182 if source is not None:
183 attributes['source'] = source
184 if line is not None:
185 attributes['line'] = line
186 # assert attributes['line'] is not None, (message, kwargs)
187 # assert attributes['source'] is not None, (message, kwargs)
188 attributes.setdefault('source', self.source)
189
190 msg = nodes.system_message(message, level=level,
191 type=self.levels[level],
192 *children, **attributes)
193 if self.stream and (level >= self.report_level
194 or self.debug_flag and level == self.DEBUG_LEVEL
195 or level >= self.halt_level):
196 msgtext = msg.astext() + '\n'
197 try:
198 self.stream.write(msgtext)
199 except UnicodeEncodeError:
200 self.stream.write(msgtext.encode(self.encoding,
201 self.error_handler))
202 if level >= self.halt_level:
203 raise SystemMessage(msg, level)
204 if level > self.DEBUG_LEVEL or self.debug_flag:
205 self.notify_observers(msg)
206 self.max_level = max(level, self.max_level)
207 return msg
208
209 def debug(self, *args, **kwargs):
210 """
211 Level-0, "DEBUG": an internal reporting issue. Typically, there is no
212 effect on the processing. Level-0 system messages are handled
213 separately from the others.
214 """
215 if self.debug_flag:
216 return self.system_message(self.DEBUG_LEVEL, *args, **kwargs)
217
218 def info(self, *args, **kwargs):
219 """
220 Level-1, "INFO": a minor issue that can be ignored. Typically there is
221 no effect on processing, and level-1 system messages are not reported.
222 """
223 return self.system_message(self.INFO_LEVEL, *args, **kwargs)
224
225 def warning(self, *args, **kwargs):
226 """
227 Level-2, "WARNING": an issue that should be addressed. If ignored,
228 there may be unpredictable problems with the output.
229 """
230 return self.system_message(self.WARNING_LEVEL, *args, **kwargs)
231
232 def error(self, *args, **kwargs):
233 """
234 Level-3, "ERROR": an error that should be addressed. If ignored, the
235 output will contain errors.
236 """
237 return self.system_message(self.ERROR_LEVEL, *args, **kwargs)
238
239 def severe(self, *args, **kwargs):
240 """
241 Level-4, "SEVERE": a severe error that must be addressed. If ignored,
242 the output will contain severe errors. Typically level-4 system
243 messages are turned into exceptions which halt processing.
244 """
245 return self.system_message(self.SEVERE_LEVEL, *args, **kwargs)
246
247
248class ExtensionOptionError(DataError): pass
249class BadOptionError(ExtensionOptionError): pass
250class BadOptionDataError(ExtensionOptionError): pass
251class DuplicateOptionError(ExtensionOptionError): pass
252
253
254def extract_extension_options(field_list, options_spec):
255 """
256 Return a dictionary mapping extension option names to converted values.
257
258 :Parameters:
259 - `field_list`: A flat field list without field arguments, where each
260 field body consists of a single paragraph only.
261 - `options_spec`: Dictionary mapping known option names to a
262 conversion function such as `int` or `float`.
263
264 :Exceptions:
265 - `KeyError` for unknown option names.
266 - `ValueError` for invalid option values (raised by the conversion
267 function).
268 - `TypeError` for invalid option value types (raised by conversion
269 function).
270 - `DuplicateOptionError` for duplicate options.
271 - `BadOptionError` for invalid fields.
272 - `BadOptionDataError` for invalid option data (missing name,
273 missing data, bad quotes, etc.).
274 """
275 option_list = extract_options(field_list)
276 option_dict = assemble_option_dict(option_list, options_spec)
277 return option_dict
278
279def extract_options(field_list):
280 """
281 Return a list of option (name, value) pairs from field names & bodies.
282
283 :Parameter:
284 `field_list`: A flat field list, where each field name is a single
285 word and each field body consists of a single paragraph only.
286
287 :Exceptions:
288 - `BadOptionError` for invalid fields.
289 - `BadOptionDataError` for invalid option data (missing name,
290 missing data, bad quotes, etc.).
291 """
292 option_list = []
293 for field in field_list:
294 if len(field[0].astext().split()) != 1:
295 raise BadOptionError(
296 'extension option field name may not contain multiple words')
297 name = str(field[0].astext().lower())
298 body = field[1]
299 if len(body) == 0:
300 data = None
301 elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
302 or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
303 raise BadOptionDataError(
304 'extension option field body may contain\n'
305 'a single paragraph only (option "%s")' % name)
306 else:
307 data = body[0][0].astext()
308 option_list.append((name, data))
309 return option_list
310
311def assemble_option_dict(option_list, options_spec):
312 """
313 Return a mapping of option names to values.
314
315 :Parameters:
316 - `option_list`: A list of (name, value) pairs (the output of
317 `extract_options()`).
318 - `options_spec`: Dictionary mapping known option names to a
319 conversion function such as `int` or `float`.
320
321 :Exceptions:
322 - `KeyError` for unknown option names.
323 - `DuplicateOptionError` for duplicate options.
324 - `ValueError` for invalid option values (raised by conversion
325 function).
326 - `TypeError` for invalid option value types (raised by conversion
327 function).
328 """
329 options = {}
330 for name, value in option_list:
331 convertor = options_spec[name] # raises KeyError if unknown
332 if convertor is None:
333 raise KeyError(name) # or if explicitly disabled
334 if name in options:
335 raise DuplicateOptionError('duplicate option "%s"' % name)
336 try:
337 options[name] = convertor(value)
338 except (ValueError, TypeError), detail:
339 raise detail.__class__('(option: "%s"; value: %r)\n%s'
340 % (name, value, ' '.join(detail.args)))
341 return options
342
343
344class NameValueError(DataError): pass
345
346
347def decode_path(path):
348 """
349 Decode file/path string. Return `nodes.reprunicode` object.
350
351 Convert to Unicode without the UnicodeDecode error of the
352 implicit 'ascii:strict' decoding.
353 """
354 # see also http://article.gmane.org/gmane.text.docutils.user/2905
355 try:
356 path = path.decode(sys.getfilesystemencoding(), 'strict')
357 except AttributeError: # default value None has no decode method
358 return nodes.reprunicode(path)
359 except UnicodeDecodeError:
360 try:
361 path = path.decode('utf-8', 'strict')
362 except UnicodeDecodeError:
363 path = path.decode('ascii', 'replace')
364 return nodes.reprunicode(path)
365
366
367def extract_name_value(line):
368 """
369 Return a list of (name, value) from a line of the form "name=value ...".
370
371 :Exception:
372 `NameValueError` for invalid input (missing name, missing data, bad
373 quotes, etc.).
374 """
375 attlist = []
376 while line:
377 equals = line.find('=')
378 if equals == -1:
379 raise NameValueError('missing "="')
380 attname = line[:equals].strip()
381 if equals == 0 or not attname:
382 raise NameValueError(
383 'missing attribute name before "="')
384 line = line[equals+1:].lstrip()
385 if not line:
386 raise NameValueError(
387 'missing value after "%s="' % attname)
388 if line[0] in '\'"':
389 endquote = line.find(line[0], 1)
390 if endquote == -1:
391 raise NameValueError(
392 'attribute "%s" missing end quote (%s)'
393 % (attname, line[0]))
394 if len(line) > endquote + 1 and line[endquote + 1].strip():
395 raise NameValueError(
396 'attribute "%s" end quote (%s) not followed by '
397 'whitespace' % (attname, line[0]))
398 data = line[1:endquote]
399 line = line[endquote+1:].lstrip()
400 else:
401 space = line.find(' ')
402 if space == -1:
403 data = line
404 line = ''
405 else:
406 data = line[:space]
407 line = line[space+1:].lstrip()
408 attlist.append((attname.lower(), data))
409 return attlist
410
411def new_reporter(source_path, settings):
412 """
413 Return a new Reporter object.
414
415 :Parameters:
416 `source` : string
417 The path to or description of the source text of the document.
418 `settings` : optparse.Values object
419 Runtime settings.
420 """
421 reporter = Reporter(
422 source_path, settings.report_level, settings.halt_level,
423 stream=settings.warning_stream, debug=settings.debug,
424 encoding=settings.error_encoding,
425 error_handler=settings.error_encoding_error_handler)
426 return reporter
427
428def new_document(source_path, settings=None):
429 """
430 Return a new empty document object.
431
432 :Parameters:
433 `source_path` : string
434 The path to or description of the source text of the document.
435 `settings` : optparse.Values object
436 Runtime settings. If none are provided, a default core set will
437 be used. If you will use the document object with any Docutils
438 components, you must provide their default settings as well. For
439 example, if parsing, at least provide the parser settings,
440 obtainable as follows::
441
442 settings = docutils.frontend.OptionParser(
443 components=(docutils.parsers.rst.Parser,)
444 ).get_default_values()
445 """
446 from docutils import frontend
447 if settings is None:
448 settings = frontend.OptionParser().get_default_values()
449 source_path = decode_path(source_path)
450 reporter = new_reporter(source_path, settings)
451 document = nodes.document(settings, reporter, source=source_path)
452 document.note_source(source_path, -1)
453 return document
454
455def clean_rcs_keywords(paragraph, keyword_substitutions):
456 if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
457 textnode = paragraph[0]
458 for pattern, substitution in keyword_substitutions:
459 match = pattern.search(textnode)
460 if match:
461 paragraph[0] = nodes.Text(pattern.sub(substitution, textnode))
462 return
463
464def relative_path(source, target):
465 """
466 Build and return a path to `target`, relative to `source` (both files).
467
468 If there is no common prefix, return the absolute path to `target`.
469 """
470 source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
471 target_parts = os.path.abspath(target).split(os.sep)
472 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
473 if source_parts[:2] != target_parts[:2]:
474 # Nothing in common between paths.
475 # Return absolute path, using '/' for URLs:
476 return '/'.join(target_parts)
477 source_parts.reverse()
478 target_parts.reverse()
479 while (source_parts and target_parts
480 and source_parts[-1] == target_parts[-1]):
481 # Remove path components in common:
482 source_parts.pop()
483 target_parts.pop()
484 target_parts.reverse()
485 parts = ['..'] * (len(source_parts) - 1) + target_parts
486 return '/'.join(parts)
487
488def get_stylesheet_reference(settings, relative_to=None):
489 """
490 Retrieve a stylesheet reference from the settings object.
491
492 Deprecated. Use get_stylesheet_reference_list() instead to
493 enable specification of multiple stylesheets as a comma-separated
494 list.
495 """
496 if settings.stylesheet_path:
497 assert not settings.stylesheet, (
498 'stylesheet and stylesheet_path are mutually exclusive.')
499 if relative_to == None:
500 relative_to = settings._destination
501 return relative_path(relative_to, settings.stylesheet_path)
502 else:
503 return settings.stylesheet
504
505# Return 'stylesheet' or 'stylesheet_path' arguments as list.
506#
507# The original settings arguments are kept unchanged: you can test
508# with e.g. ``if settings.stylesheet_path:``
509#
510# Differences to ``get_stylesheet_reference``:
511# * return value is a list
512# * no re-writing of the path (and therefore no optional argument)
513# (if required, use ``utils.relative_path(source, target)``
514# in the calling script)
515def get_stylesheet_list(settings):
516 """
517 Retrieve list of stylesheet references from the settings object.
518 """
519 assert not (settings.stylesheet and settings.stylesheet_path), (
520 'stylesheet and stylesheet_path are mutually exclusive.')
521 if settings.stylesheet_path:
522 sheets = settings.stylesheet_path.split(",")
523 elif settings.stylesheet:
524 sheets = settings.stylesheet.split(",")
525 else:
526 sheets = []
527 # strip whitespace (frequently occuring in config files)
528 return [sheet.strip(u' \t\n\r') for sheet in sheets]
529
530def get_trim_footnote_ref_space(settings):
531 """
532 Return whether or not to trim footnote space.
533
534 If trim_footnote_reference_space is not None, return it.
535
536 If trim_footnote_reference_space is None, return False unless the
537 footnote reference style is 'superscript'.
538 """
539 if settings.trim_footnote_reference_space is None:
540 return hasattr(settings, 'footnote_references') and \
541 settings.footnote_references == 'superscript'
542 else:
543 return settings.trim_footnote_reference_space
544
545def get_source_line(node):
546 """
547 Return the "source" and "line" attributes from the `node` given or from
548 its closest ancestor.
549 """
550 while node:
551 if node.source or node.line:
552 return node.source, node.line
553 node = node.parent
554 return None, None
555
556def escape2null(text):
557 """Return a string with escape-backslashes converted to nulls."""
558 parts = []
559 start = 0
560 while 1:
561 found = text.find('\\', start)
562 if found == -1:
563 parts.append(text[start:])
564 return ''.join(parts)
565 parts.append(text[start:found])
566 parts.append('\x00' + text[found+1:found+2])
567 start = found + 2 # skip character after escape
568
569def unescape(text, restore_backslashes=0):
570 """
571 Return a string with nulls removed or restored to backslashes.
572 Backslash-escaped spaces are also removed.
573 """
574 if restore_backslashes:
575 return text.replace('\x00', '\\')
576 else:
577 for sep in ['\x00 ', '\x00\n', '\x00']:
578 text = ''.join(text.split(sep))
579 return text
580
581east_asian_widths = {'W': 2, # Wide
582 'F': 2, # Full-width (wide)
583 'Na': 1, # Narrow
584 'H': 1, # Half-width (narrow)
585 'N': 1, # Neutral (not East Asian, treated as narrow)
586 'A': 1} # Ambiguous (s/b wide in East Asian context,
587 # narrow otherwise, but that doesn't work)
588"""Mapping of result codes from `unicodedata.east_asian_width()` to character
589column widths."""
590
591def east_asian_column_width(text):
592 if isinstance(text, unicode):
593 total = 0
594 for c in text:
595 total += east_asian_widths[unicodedata.east_asian_width(c)]
596 return total
597 else:
598 return len(text)
599
600if hasattr(unicodedata, 'east_asian_width'):
601 column_width = east_asian_column_width
602else:
603 column_width = len
604
605def uniq(L):
606 r = []
607 for item in L:
608 if not item in r:
609 r.append(item)
610 return r
611
612
613class DependencyList:
614
615 """
616 List of dependencies, with file recording support.
617
618 Note that the output file is not automatically closed. You have
619 to explicitly call the close() method.
620 """
621
622 def __init__(self, output_file=None, dependencies=[]):
623 """
624 Initialize the dependency list, automatically setting the
625 output file to `output_file` (see `set_output()`) and adding
626 all supplied dependencies.
627 """
628 self.set_output(output_file)
629 for i in dependencies:
630 self.add(i)
631
632 def set_output(self, output_file):
633 """
634 Set the output file and clear the list of already added
635 dependencies.
636
637 `output_file` must be a string. The specified file is
638 immediately overwritten.
639
640 If output_file is '-', the output will be written to stdout.
641 If it is None, no file output is done when calling add().
642 """
643 self.list = []
644 if output_file == '-':
645 self.file = sys.stdout
646 elif output_file:
647 self.file = open(output_file, 'w')
648 else:
649 self.file = None
650
651 def add(self, *filenames):
652 """
653 If the dependency `filename` has not already been added,
654 append it to self.list and print it to self.file if self.file
655 is not None.
656 """
657 for filename in filenames:
658 if not filename in self.list:
659 self.list.append(filename)
660 if self.file is not None:
661 print >>self.file, filename
662
663 def close(self):
664 """
665 Close the output file.
666 """
667 self.file.close()
668 self.file = None
669
670 def __repr__(self):
671 if self.file:
672 output_file = self.file.name
673 else:
674 output_file = None
675 return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)