blob: c43fa3de8649a01f2029f5f4daa9019af93892ca [file] [log] [blame]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001# $Id: nodes.py 6351 2010-07-03 14:19:09Z gbrandl $
2# Author: David Goodger <goodger@python.org>
3# Copyright: This module has been placed in the public domain.
4
5"""
6Docutils document tree element class library.
7
8Classes in CamelCase are abstract base classes or auxiliary classes. The one
9exception is `Text`, for a text (PCDATA) node; uppercase is used to
10differentiate from element classes. Classes in lower_case_with_underscores
11are element classes, matching the XML element generic identifiers in the DTD_.
12
13The position of each node (the level at which it can occur) is significant and
14is represented by abstract base classes (`Root`, `Structural`, `Body`,
15`Inline`, etc.). Certain transformations will be easier because we can use
16``isinstance(node, base_class)`` to determine the position of the node in the
17hierarchy.
18
19.. _DTD: http://docutils.sourceforge.net/docs/ref/docutils.dtd
20"""
21
22__docformat__ = 'reStructuredText'
23
24import sys
25import os
26import re
27import warnings
28import types
29import unicodedata
30
31# ==============================
32# Functional Node Base Classes
33# ==============================
34
35class Node(object):
36
37 """Abstract base class of nodes in a document tree."""
38
39 parent = None
40 """Back-reference to the Node immediately containing this Node."""
41
42 document = None
43 """The `document` node at the root of the tree containing this Node."""
44
45 source = None
46 """Path or description of the input source which generated this Node."""
47
48 line = None
49 """The line number (1-based) of the beginning of this Node in `source`."""
50
51 def __nonzero__(self):
52 """
53 Node instances are always true, even if they're empty. A node is more
54 than a simple container. Its boolean "truth" does not depend on
55 having one or more subnodes in the doctree.
56
57 Use `len()` to check node length. Use `None` to represent a boolean
58 false value.
59 """
60 return True
61
62 if sys.version_info < (3,):
63 # on 2.x, str(node) will be a byte string with Unicode
64 # characters > 255 escaped; on 3.x this is no longer necessary
65 def __str__(self):
66 return unicode(self).encode('raw_unicode_escape')
67
68 def asdom(self, dom=None):
69 """Return a DOM **fragment** representation of this Node."""
70 if dom is None:
71 import xml.dom.minidom as dom
72 domroot = dom.Document()
73 return self._dom_node(domroot)
74
75 def pformat(self, indent=' ', level=0):
76 """
77 Return an indented pseudo-XML representation, for test purposes.
78
79 Override in subclasses.
80 """
81 raise NotImplementedError
82
83 def copy(self):
84 """Return a copy of self."""
85 raise NotImplementedError
86
87 def deepcopy(self):
88 """Return a deep copy of self (also copying children)."""
89 raise NotImplementedError
90
91 def setup_child(self, child):
92 child.parent = self
93 if self.document:
94 child.document = self.document
95 if child.source is None:
96 child.source = self.document.current_source
97 if child.line is None:
98 child.line = self.document.current_line
99
100 def walk(self, visitor):
101 """
102 Traverse a tree of `Node` objects, calling the
103 `dispatch_visit()` method of `visitor` when entering each
104 node. (The `walkabout()` method is similar, except it also
105 calls the `dispatch_departure()` method before exiting each
106 node.)
107
108 This tree traversal supports limited in-place tree
109 modifications. Replacing one node with one or more nodes is
110 OK, as is removing an element. However, if the node removed
111 or replaced occurs after the current node, the old node will
112 still be traversed, and any new nodes will not.
113
114 Within ``visit`` methods (and ``depart`` methods for
115 `walkabout()`), `TreePruningException` subclasses may be raised
116 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
117
118 Parameter `visitor`: A `NodeVisitor` object, containing a
119 ``visit`` implementation for each `Node` subclass encountered.
120
121 Return true if we should stop the traversal.
122 """
123 stop = 0
124 visitor.document.reporter.debug(
125 'docutils.nodes.Node.walk calling dispatch_visit for %s'
126 % self.__class__.__name__)
127 try:
128 try:
129 visitor.dispatch_visit(self)
130 except (SkipChildren, SkipNode):
131 return stop
132 except SkipDeparture: # not applicable; ignore
133 pass
134 children = self.children
135 try:
136 for child in children[:]:
137 if child.walk(visitor):
138 stop = 1
139 break
140 except SkipSiblings:
141 pass
142 except StopTraversal:
143 stop = 1
144 return stop
145
146 def walkabout(self, visitor):
147 """
148 Perform a tree traversal similarly to `Node.walk()` (which
149 see), except also call the `dispatch_departure()` method
150 before exiting each node.
151
152 Parameter `visitor`: A `NodeVisitor` object, containing a
153 ``visit`` and ``depart`` implementation for each `Node`
154 subclass encountered.
155
156 Return true if we should stop the traversal.
157 """
158 call_depart = 1
159 stop = 0
160 visitor.document.reporter.debug(
161 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
162 % self.__class__.__name__)
163 try:
164 try:
165 visitor.dispatch_visit(self)
166 except SkipNode:
167 return stop
168 except SkipDeparture:
169 call_depart = 0
170 children = self.children
171 try:
172 for child in children[:]:
173 if child.walkabout(visitor):
174 stop = 1
175 break
176 except SkipSiblings:
177 pass
178 except SkipChildren:
179 pass
180 except StopTraversal:
181 stop = 1
182 if call_depart:
183 visitor.document.reporter.debug(
184 'docutils.nodes.Node.walkabout calling dispatch_departure '
185 'for %s' % self.__class__.__name__)
186 visitor.dispatch_departure(self)
187 return stop
188
189 def _fast_traverse(self, cls):
190 """Specialized traverse() that only supports instance checks."""
191 result = []
192 if isinstance(self, cls):
193 result.append(self)
194 for child in self.children:
195 result.extend(child._fast_traverse(cls))
196 return result
197
198 def _all_traverse(self):
199 """Specialized traverse() that doesn't check for a condition."""
200 result = []
201 result.append(self)
202 for child in self.children:
203 result.extend(child._all_traverse())
204 return result
205
206 def traverse(self, condition=None,
207 include_self=1, descend=1, siblings=0, ascend=0):
208 """
209 Return an iterable containing
210
211 * self (if include_self is true)
212 * all descendants in tree traversal order (if descend is true)
213 * all siblings (if siblings is true) and their descendants (if
214 also descend is true)
215 * the siblings of the parent (if ascend is true) and their
216 descendants (if also descend is true), and so on
217
218 If `condition` is not None, the iterable contains only nodes
219 for which ``condition(node)`` is true. If `condition` is a
220 node class ``cls``, it is equivalent to a function consisting
221 of ``return isinstance(node, cls)``.
222
223 If ascend is true, assume siblings to be true as well.
224
225 For example, given the following tree::
226
227 <paragraph>
228 <emphasis> <--- emphasis.traverse() and
229 <strong> <--- strong.traverse() are called.
230 Foo
231 Bar
232 <reference name="Baz" refid="baz">
233 Baz
234
235 Then list(emphasis.traverse()) equals ::
236
237 [<emphasis>, <strong>, <#text: Foo>, <#text: Bar>]
238
239 and list(strong.traverse(ascend=1)) equals ::
240
241 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
242 """
243 if ascend:
244 siblings=1
245 # Check for special argument combinations that allow using an
246 # optimized version of traverse()
247 if include_self and descend and not siblings:
248 if condition is None:
249 return self._all_traverse()
250 elif isinstance(condition, (types.ClassType, type)):
251 return self._fast_traverse(condition)
252 # Check if `condition` is a class (check for TypeType for Python
253 # implementations that use only new-style classes, like PyPy).
254 if isinstance(condition, (types.ClassType, type)):
255 node_class = condition
256 def condition(node, node_class=node_class):
257 return isinstance(node, node_class)
258 r = []
259 if include_self and (condition is None or condition(self)):
260 r.append(self)
261 if descend and len(self.children):
262 for child in self:
263 r.extend(child.traverse(
264 include_self=1, descend=1, siblings=0, ascend=0,
265 condition=condition))
266 if siblings or ascend:
267 node = self
268 while node.parent:
269 index = node.parent.index(node)
270 for sibling in node.parent[index+1:]:
271 r.extend(sibling.traverse(include_self=1, descend=descend,
272 siblings=0, ascend=0,
273 condition=condition))
274 if not ascend:
275 break
276 else:
277 node = node.parent
278 return r
279
280 def next_node(self, condition=None,
281 include_self=0, descend=1, siblings=0, ascend=0):
282 """
283 Return the first node in the iterable returned by traverse(),
284 or None if the iterable is empty.
285
286 Parameter list is the same as of traverse. Note that
287 include_self defaults to 0, though.
288 """
289 iterable = self.traverse(condition=condition,
290 include_self=include_self, descend=descend,
291 siblings=siblings, ascend=ascend)
292 try:
293 return iterable[0]
294 except IndexError:
295 return None
296
297if sys.version_info < (3,):
298 class reprunicode(unicode):
299 """
300 A class that removes the initial u from unicode's repr.
301 """
302
303 def __repr__(self):
304 return unicode.__repr__(self)[1:]
305else:
306 reprunicode = unicode
307
308
309class Text(Node, reprunicode):
310
311 """
312 Instances are terminal nodes (leaves) containing text only; no child
313 nodes or attributes. Initialize by passing a string to the constructor.
314 Access the text itself with the `astext` method.
315 """
316
317 tagname = '#text'
318
319 children = ()
320 """Text nodes have no children, and cannot have children."""
321
322 if sys.version_info > (3,):
323 def __new__(cls, data, rawsource=None):
324 """Prevent the rawsource argument from propagating to str."""
325 if isinstance(data, bytes):
326 raise TypeError('expecting str data, not bytes')
327 return reprunicode.__new__(cls, data)
328 else:
329 def __new__(cls, data, rawsource=None):
330 """Prevent the rawsource argument from propagating to str."""
331 return reprunicode.__new__(cls, data)
332
333 def __init__(self, data, rawsource=''):
334
335 self.rawsource = rawsource
336 """The raw text from which this element was constructed."""
337
338 def shortrepr(self, maxlen=18):
339 data = self
340 if len(data) > maxlen:
341 data = data[:maxlen-4] + ' ...'
342 return '<%s: %s>' % (self.tagname, repr(reprunicode(data)))
343
344 def __repr__(self):
345 return self.shortrepr(maxlen=68)
346
347 def _dom_node(self, domroot):
348 return domroot.createTextNode(unicode(self))
349
350 def astext(self):
351 return reprunicode(self)
352
353 # Note about __unicode__: The implementation of __unicode__ here,
354 # and the one raising NotImplemented in the superclass Node had
355 # to be removed when changing Text to a subclass of unicode instead
356 # of UserString, since there is no way to delegate the __unicode__
357 # call to the superclass unicode:
358 # unicode itself does not have __unicode__ method to delegate to
359 # and calling unicode(self) or unicode.__new__ directly creates
360 # an infinite loop
361
362 def copy(self):
363 return self.__class__(reprunicode(self), rawsource=self.rawsource)
364
365 def deepcopy(self):
366 return self.copy()
367
368 def pformat(self, indent=' ', level=0):
369 result = []
370 indent = indent * level
371 for line in self.splitlines():
372 result.append(indent + line + '\n')
373 return ''.join(result)
374
375 # rstrip and lstrip are used by substitution definitions where
376 # they are expected to return a Text instance, this was formerly
377 # taken care of by UserString. Note that then and now the
378 # rawsource member is lost.
379
380 def rstrip(self, chars=None):
381 return self.__class__(reprunicode.rstrip(self, chars))
382 def lstrip(self, chars=None):
383 return self.__class__(reprunicode.lstrip(self, chars))
384
385class Element(Node):
386
387 """
388 `Element` is the superclass to all specific elements.
389
390 Elements contain attributes and child nodes. Elements emulate
391 dictionaries for attributes, indexing by attribute name (a string). To
392 set the attribute 'att' to 'value', do::
393
394 element['att'] = 'value'
395
396 There are two special attributes: 'ids' and 'names'. Both are
397 lists of unique identifiers, and names serve as human interfaces
398 to IDs. Names are case- and whitespace-normalized (see the
399 fully_normalize_name() function), and IDs conform to the regular
400 expression ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function).
401
402 Elements also emulate lists for child nodes (element nodes and/or text
403 nodes), indexing by integer. To get the first child node, use::
404
405 element[0]
406
407 Elements may be constructed using the ``+=`` operator. To add one new
408 child node to element, do::
409
410 element += node
411
412 This is equivalent to ``element.append(node)``.
413
414 To add a list of multiple child nodes at once, use the same ``+=``
415 operator::
416
417 element += [node1, node2]
418
419 This is equivalent to ``element.extend([node1, node2])``.
420 """
421
422 list_attributes = ('ids', 'classes', 'names', 'dupnames', 'backrefs')
423 """List attributes, automatically initialized to empty lists for
424 all nodes."""
425
426 tagname = None
427 """The element generic identifier. If None, it is set as an instance
428 attribute to the name of the class."""
429
430 child_text_separator = '\n\n'
431 """Separator for child nodes, used by `astext()` method."""
432
433 def __init__(self, rawsource='', *children, **attributes):
434 self.rawsource = rawsource
435 """The raw text from which this element was constructed."""
436
437 self.children = []
438 """List of child nodes (elements and/or `Text`)."""
439
440 self.extend(children) # maintain parent info
441
442 self.attributes = {}
443 """Dictionary of attribute {name: value}."""
444
445 # Initialize list attributes.
446 for att in self.list_attributes:
447 self.attributes[att] = []
448
449 for att, value in attributes.items():
450 att = att.lower()
451 if att in self.list_attributes:
452 # mutable list; make a copy for this node
453 self.attributes[att] = value[:]
454 else:
455 self.attributes[att] = value
456
457 if self.tagname is None:
458 self.tagname = self.__class__.__name__
459
460 def _dom_node(self, domroot):
461 element = domroot.createElement(self.tagname)
462 for attribute, value in self.attlist():
463 if isinstance(value, list):
464 value = ' '.join([serial_escape('%s' % v) for v in value])
465 element.setAttribute(attribute, '%s' % value)
466 for child in self.children:
467 element.appendChild(child._dom_node(domroot))
468 return element
469
470 def __repr__(self):
471 data = ''
472 for c in self.children:
473 data += c.shortrepr()
474 if len(data) > 60:
475 data = data[:56] + ' ...'
476 break
477 if self['names']:
478 return '<%s "%s": %s>' % (self.__class__.__name__,
479 '; '.join(self['names']), data)
480 else:
481 return '<%s: %s>' % (self.__class__.__name__, data)
482
483 def shortrepr(self):
484 if self['names']:
485 return '<%s "%s"...>' % (self.__class__.__name__,
486 '; '.join(self['names']))
487 else:
488 return '<%s...>' % self.tagname
489
490 def __unicode__(self):
491 if self.children:
492 return u'%s%s%s' % (self.starttag(),
493 ''.join([unicode(c) for c in self.children]),
494 self.endtag())
495 else:
496 return self.emptytag()
497
498 if sys.version_info > (3,):
499 # 2to3 doesn't convert __unicode__ to __str__
500 __str__ = __unicode__
501
502 def starttag(self):
503 parts = [self.tagname]
504 for name, value in self.attlist():
505 if value is None: # boolean attribute
506 parts.append(name)
507 elif isinstance(value, list):
508 values = [serial_escape('%s' % v) for v in value]
509 parts.append('%s="%s"' % (name, ' '.join(values)))
510 else:
511 parts.append('%s="%s"' % (name, value))
512 return '<%s>' % ' '.join(parts)
513
514 def endtag(self):
515 return '</%s>' % self.tagname
516
517 def emptytag(self):
518 return u'<%s/>' % ' '.join([self.tagname] +
519 ['%s="%s"' % (n, v)
520 for n, v in self.attlist()])
521
522 def __len__(self):
523 return len(self.children)
524
525 def __contains__(self, key):
526 # support both membership test for children and attributes
527 # (has_key is translated to "in" by 2to3)
528 if isinstance(key, basestring):
529 return key in self.attributes
530 return key in self.children
531
532 def __getitem__(self, key):
533 if isinstance(key, basestring):
534 return self.attributes[key]
535 elif isinstance(key, int):
536 return self.children[key]
537 elif isinstance(key, types.SliceType):
538 assert key.step in (None, 1), 'cannot handle slice with stride'
539 return self.children[key.start:key.stop]
540 else:
541 raise TypeError, ('element index must be an integer, a slice, or '
542 'an attribute name string')
543
544 def __setitem__(self, key, item):
545 if isinstance(key, basestring):
546 self.attributes[str(key)] = item
547 elif isinstance(key, int):
548 self.setup_child(item)
549 self.children[key] = item
550 elif isinstance(key, types.SliceType):
551 assert key.step in (None, 1), 'cannot handle slice with stride'
552 for node in item:
553 self.setup_child(node)
554 self.children[key.start:key.stop] = item
555 else:
556 raise TypeError, ('element index must be an integer, a slice, or '
557 'an attribute name string')
558
559 def __delitem__(self, key):
560 if isinstance(key, basestring):
561 del self.attributes[key]
562 elif isinstance(key, int):
563 del self.children[key]
564 elif isinstance(key, types.SliceType):
565 assert key.step in (None, 1), 'cannot handle slice with stride'
566 del self.children[key.start:key.stop]
567 else:
568 raise TypeError, ('element index must be an integer, a simple '
569 'slice, or an attribute name string')
570
571 def __add__(self, other):
572 return self.children + other
573
574 def __radd__(self, other):
575 return other + self.children
576
577 def __iadd__(self, other):
578 """Append a node or a list of nodes to `self.children`."""
579 if isinstance(other, Node):
580 self.append(other)
581 elif other is not None:
582 self.extend(other)
583 return self
584
585 def astext(self):
586 return self.child_text_separator.join(
587 [child.astext() for child in self.children])
588
589 def non_default_attributes(self):
590 atts = {}
591 for key, value in self.attributes.items():
592 if self.is_not_default(key):
593 atts[key] = value
594 return atts
595
596 def attlist(self):
597 attlist = self.non_default_attributes().items()
598 attlist.sort()
599 return attlist
600
601 def get(self, key, failobj=None):
602 return self.attributes.get(key, failobj)
603
604 def hasattr(self, attr):
605 return attr in self.attributes
606
607 def delattr(self, attr):
608 if attr in self.attributes:
609 del self.attributes[attr]
610
611 def setdefault(self, key, failobj=None):
612 return self.attributes.setdefault(key, failobj)
613
614 has_key = hasattr
615
616 # support operator in
617 __contains__ = hasattr
618
619 def append(self, item):
620 self.setup_child(item)
621 self.children.append(item)
622
623 def extend(self, item):
624 for node in item:
625 self.append(node)
626
627 def insert(self, index, item):
628 if isinstance(item, Node):
629 self.setup_child(item)
630 self.children.insert(index, item)
631 elif item is not None:
632 self[index:index] = item
633
634 def pop(self, i=-1):
635 return self.children.pop(i)
636
637 def remove(self, item):
638 self.children.remove(item)
639
640 def index(self, item):
641 return self.children.index(item)
642
643 def is_not_default(self, key):
644 if self[key] == [] and key in self.list_attributes:
645 return 0
646 else:
647 return 1
648
649 def update_basic_atts(self, dict):
650 """
651 Update basic attributes ('ids', 'names', 'classes',
652 'dupnames', but not 'source') from node or dictionary `dict`.
653 """
654 if isinstance(dict, Node):
655 dict = dict.attributes
656 for att in ('ids', 'classes', 'names', 'dupnames'):
657 for value in dict.get(att, []):
658 if not value in self[att]:
659 self[att].append(value)
660
661 def clear(self):
662 self.children = []
663
664 def replace(self, old, new):
665 """Replace one child `Node` with another child or children."""
666 index = self.index(old)
667 if isinstance(new, Node):
668 self.setup_child(new)
669 self[index] = new
670 elif new is not None:
671 self[index:index+1] = new
672
673 def replace_self(self, new):
674 """
675 Replace `self` node with `new`, where `new` is a node or a
676 list of nodes.
677 """
678 update = new
679 if not isinstance(new, Node):
680 # `new` is a list; update first child.
681 try:
682 update = new[0]
683 except IndexError:
684 update = None
685 if isinstance(update, Element):
686 update.update_basic_atts(self)
687 else:
688 # `update` is a Text node or `new` is an empty list.
689 # Assert that we aren't losing any attributes.
690 for att in ('ids', 'names', 'classes', 'dupnames'):
691 assert not self[att], \
692 'Losing "%s" attribute: %s' % (att, self[att])
693 self.parent.replace(self, new)
694
695 def first_child_matching_class(self, childclass, start=0, end=sys.maxint):
696 """
697 Return the index of the first child whose class exactly matches.
698
699 Parameters:
700
701 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
702 classes. If a tuple, any of the classes may match.
703 - `start`: Initial index to check.
704 - `end`: Initial index to *not* check.
705 """
706 if not isinstance(childclass, tuple):
707 childclass = (childclass,)
708 for index in range(start, min(len(self), end)):
709 for c in childclass:
710 if isinstance(self[index], c):
711 return index
712 return None
713
714 def first_child_not_matching_class(self, childclass, start=0,
715 end=sys.maxint):
716 """
717 Return the index of the first child whose class does *not* match.
718
719 Parameters:
720
721 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
722 classes. If a tuple, none of the classes may match.
723 - `start`: Initial index to check.
724 - `end`: Initial index to *not* check.
725 """
726 if not isinstance(childclass, tuple):
727 childclass = (childclass,)
728 for index in range(start, min(len(self), end)):
729 for c in childclass:
730 if isinstance(self.children[index], c):
731 break
732 else:
733 return index
734 return None
735
736 def pformat(self, indent=' ', level=0):
737 return ''.join(['%s%s\n' % (indent * level, self.starttag())] +
738 [child.pformat(indent, level+1)
739 for child in self.children])
740
741 def copy(self):
742 return self.__class__(rawsource=self.rawsource, **self.attributes)
743
744 def deepcopy(self):
745 copy = self.copy()
746 copy.extend([child.deepcopy() for child in self.children])
747 return copy
748
749 def set_class(self, name):
750 """Add a new class to the "classes" attribute."""
751 warnings.warn('docutils.nodes.Element.set_class deprecated; '
752 "append to Element['classes'] list attribute directly",
753 DeprecationWarning, stacklevel=2)
754 assert ' ' not in name
755 self['classes'].append(name.lower())
756
757 def note_referenced_by(self, name=None, id=None):
758 """Note that this Element has been referenced by its name
759 `name` or id `id`."""
760 self.referenced = 1
761 # Element.expect_referenced_by_* dictionaries map names or ids
762 # to nodes whose ``referenced`` attribute is set to true as
763 # soon as this node is referenced by the given name or id.
764 # Needed for target propagation.
765 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
766 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
767 if by_name:
768 assert name is not None
769 by_name.referenced = 1
770 if by_id:
771 assert id is not None
772 by_id.referenced = 1
773
774
775class TextElement(Element):
776
777 """
778 An element which directly contains text.
779
780 Its children are all `Text` or `Inline` subclass nodes. You can
781 check whether an element's context is inline simply by checking whether
782 its immediate parent is a `TextElement` instance (including subclasses).
783 This is handy for nodes like `image` that can appear both inline and as
784 standalone body elements.
785
786 If passing children to `__init__()`, make sure to set `text` to
787 ``''`` or some other suitable value.
788 """
789
790 child_text_separator = ''
791 """Separator for child nodes, used by `astext()` method."""
792
793 def __init__(self, rawsource='', text='', *children, **attributes):
794 if text != '':
795 textnode = Text(text)
796 Element.__init__(self, rawsource, textnode, *children,
797 **attributes)
798 else:
799 Element.__init__(self, rawsource, *children, **attributes)
800
801
802class FixedTextElement(TextElement):
803
804 """An element which directly contains preformatted text."""
805
806 def __init__(self, rawsource='', text='', *children, **attributes):
807 TextElement.__init__(self, rawsource, text, *children, **attributes)
808 self.attributes['xml:space'] = 'preserve'
809
810
811# ========
812# Mixins
813# ========
814
815class Resolvable:
816
817 resolved = 0
818
819
820class BackLinkable:
821
822 def add_backref(self, refid):
823 self['backrefs'].append(refid)
824
825
826# ====================
827# Element Categories
828# ====================
829
830class Root: pass
831
832class Titular: pass
833
834class PreBibliographic:
835 """Category of Node which may occur before Bibliographic Nodes."""
836
837class Bibliographic: pass
838
839class Decorative(PreBibliographic): pass
840
841class Structural: pass
842
843class Body: pass
844
845class General(Body): pass
846
847class Sequential(Body):
848 """List-like elements."""
849
850class Admonition(Body): pass
851
852class Special(Body):
853 """Special internal body elements."""
854
855class Invisible(PreBibliographic):
856 """Internal elements that don't appear in output."""
857
858class Part: pass
859
860class Inline: pass
861
862class Referential(Resolvable): pass
863
864
865class Targetable(Resolvable):
866
867 referenced = 0
868
869 indirect_reference_name = None
870 """Holds the whitespace_normalized_name (contains mixed case) of a target.
871 Required for MoinMoin/reST compatibility."""
872
873
874class Labeled:
875 """Contains a `label` as its first element."""
876
877
878# ==============
879# Root Element
880# ==============
881
882class document(Root, Structural, Element):
883
884 """
885 The document root element.
886
887 Do not instantiate this class directly; use
888 `docutils.utils.new_document()` instead.
889 """
890
891 def __init__(self, settings, reporter, *args, **kwargs):
892 Element.__init__(self, *args, **kwargs)
893
894 self.current_source = None
895 """Path to or description of the input source being processed."""
896
897 self.current_line = None
898 """Line number (1-based) of `current_source`."""
899
900 self.settings = settings
901 """Runtime settings data record."""
902
903 self.reporter = reporter
904 """System message generator."""
905
906 self.indirect_targets = []
907 """List of indirect target nodes."""
908
909 self.substitution_defs = {}
910 """Mapping of substitution names to substitution_definition nodes."""
911
912 self.substitution_names = {}
913 """Mapping of case-normalized substitution names to case-sensitive
914 names."""
915
916 self.refnames = {}
917 """Mapping of names to lists of referencing nodes."""
918
919 self.refids = {}
920 """Mapping of ids to lists of referencing nodes."""
921
922 self.nameids = {}
923 """Mapping of names to unique id's."""
924
925 self.nametypes = {}
926 """Mapping of names to hyperlink type (boolean: True => explicit,
927 False => implicit."""
928
929 self.ids = {}
930 """Mapping of ids to nodes."""
931
932 self.footnote_refs = {}
933 """Mapping of footnote labels to lists of footnote_reference nodes."""
934
935 self.citation_refs = {}
936 """Mapping of citation labels to lists of citation_reference nodes."""
937
938 self.autofootnotes = []
939 """List of auto-numbered footnote nodes."""
940
941 self.autofootnote_refs = []
942 """List of auto-numbered footnote_reference nodes."""
943
944 self.symbol_footnotes = []
945 """List of symbol footnote nodes."""
946
947 self.symbol_footnote_refs = []
948 """List of symbol footnote_reference nodes."""
949
950 self.footnotes = []
951 """List of manually-numbered footnote nodes."""
952
953 self.citations = []
954 """List of citation nodes."""
955
956 self.autofootnote_start = 1
957 """Initial auto-numbered footnote number."""
958
959 self.symbol_footnote_start = 0
960 """Initial symbol footnote symbol index."""
961
962 self.id_start = 1
963 """Initial ID number."""
964
965 self.parse_messages = []
966 """System messages generated while parsing."""
967
968 self.transform_messages = []
969 """System messages generated while applying transforms."""
970
971 import docutils.transforms
972 self.transformer = docutils.transforms.Transformer(self)
973 """Storage for transforms to be applied to this document."""
974
975 self.decoration = None
976 """Document's `decoration` node."""
977
978 self.document = self
979
980 def __getstate__(self):
981 """
982 Return dict with unpicklable references removed.
983 """
984 state = self.__dict__.copy()
985 state['reporter'] = None
986 state['transformer'] = None
987 return state
988
989 def asdom(self, dom=None):
990 """Return a DOM representation of this document."""
991 if dom is None:
992 import xml.dom.minidom as dom
993 domroot = dom.Document()
994 domroot.appendChild(self._dom_node(domroot))
995 return domroot
996
997 def set_id(self, node, msgnode=None):
998 for id in node['ids']:
999 if id in self.ids and self.ids[id] is not node:
1000 msg = self.reporter.severe('Duplicate ID: "%s".' % id)
1001 if msgnode != None:
1002 msgnode += msg
1003 if not node['ids']:
1004 for name in node['names']:
1005 id = self.settings.id_prefix + make_id(name)
1006 if id and id not in self.ids:
1007 break
1008 else:
1009 id = ''
1010 while not id or id in self.ids:
1011 id = (self.settings.id_prefix +
1012 self.settings.auto_id_prefix + str(self.id_start))
1013 self.id_start += 1
1014 node['ids'].append(id)
1015 self.ids[id] = node
1016 return id
1017
1018 def set_name_id_map(self, node, id, msgnode=None, explicit=None):
1019 """
1020 `self.nameids` maps names to IDs, while `self.nametypes` maps names to
1021 booleans representing hyperlink type (True==explicit,
1022 False==implicit). This method updates the mappings.
1023
1024 The following state transition table shows how `self.nameids` ("ids")
1025 and `self.nametypes` ("types") change with new input (a call to this
1026 method), and what actions are performed ("implicit"-type system
1027 messages are INFO/1, and "explicit"-type system messages are ERROR/3):
1028
1029 ==== ===== ======== ======== ======= ==== ===== =====
1030 Old State Input Action New State Notes
1031 ----------- -------- ----------------- ----------- -----
1032 ids types new type sys.msg. dupname ids types
1033 ==== ===== ======== ======== ======= ==== ===== =====
1034 - - explicit - - new True
1035 - - implicit - - new False
1036 None False explicit - - new True
1037 old False explicit implicit old new True
1038 None True explicit explicit new None True
1039 old True explicit explicit new,old None True [#]_
1040 None False implicit implicit new None False
1041 old False implicit implicit new,old None False
1042 None True implicit implicit new None True
1043 old True implicit implicit new old True
1044 ==== ===== ======== ======== ======= ==== ===== =====
1045
1046 .. [#] Do not clear the name-to-id map or invalidate the old target if
1047 both old and new targets are external and refer to identical URIs.
1048 The new target is invalidated regardless.
1049 """
1050 for name in node['names']:
1051 if name in self.nameids:
1052 self.set_duplicate_name_id(node, id, name, msgnode, explicit)
1053 else:
1054 self.nameids[name] = id
1055 self.nametypes[name] = explicit
1056
1057 def set_duplicate_name_id(self, node, id, name, msgnode, explicit):
1058 old_id = self.nameids[name]
1059 old_explicit = self.nametypes[name]
1060 self.nametypes[name] = old_explicit or explicit
1061 if explicit:
1062 if old_explicit:
1063 level = 2
1064 if old_id is not None:
1065 old_node = self.ids[old_id]
1066 if 'refuri' in node:
1067 refuri = node['refuri']
1068 if old_node['names'] \
1069 and 'refuri' in old_node \
1070 and old_node['refuri'] == refuri:
1071 level = 1 # just inform if refuri's identical
1072 if level > 1:
1073 dupname(old_node, name)
1074 self.nameids[name] = None
1075 msg = self.reporter.system_message(
1076 level, 'Duplicate explicit target name: "%s".' % name,
1077 backrefs=[id], base_node=node)
1078 if msgnode != None:
1079 msgnode += msg
1080 dupname(node, name)
1081 else:
1082 self.nameids[name] = id
1083 if old_id is not None:
1084 old_node = self.ids[old_id]
1085 dupname(old_node, name)
1086 else:
1087 if old_id is not None and not old_explicit:
1088 self.nameids[name] = None
1089 old_node = self.ids[old_id]
1090 dupname(old_node, name)
1091 dupname(node, name)
1092 if not explicit or (not old_explicit and old_id is not None):
1093 msg = self.reporter.info(
1094 'Duplicate implicit target name: "%s".' % name,
1095 backrefs=[id], base_node=node)
1096 if msgnode != None:
1097 msgnode += msg
1098
1099 def has_name(self, name):
1100 return name in self.nameids
1101
1102 # "note" here is an imperative verb: "take note of".
1103 def note_implicit_target(self, target, msgnode=None):
1104 id = self.set_id(target, msgnode)
1105 self.set_name_id_map(target, id, msgnode, explicit=None)
1106
1107 def note_explicit_target(self, target, msgnode=None):
1108 id = self.set_id(target, msgnode)
1109 self.set_name_id_map(target, id, msgnode, explicit=1)
1110
1111 def note_refname(self, node):
1112 self.refnames.setdefault(node['refname'], []).append(node)
1113
1114 def note_refid(self, node):
1115 self.refids.setdefault(node['refid'], []).append(node)
1116
1117 def note_indirect_target(self, target):
1118 self.indirect_targets.append(target)
1119 if target['names']:
1120 self.note_refname(target)
1121
1122 def note_anonymous_target(self, target):
1123 self.set_id(target)
1124
1125 def note_autofootnote(self, footnote):
1126 self.set_id(footnote)
1127 self.autofootnotes.append(footnote)
1128
1129 def note_autofootnote_ref(self, ref):
1130 self.set_id(ref)
1131 self.autofootnote_refs.append(ref)
1132
1133 def note_symbol_footnote(self, footnote):
1134 self.set_id(footnote)
1135 self.symbol_footnotes.append(footnote)
1136
1137 def note_symbol_footnote_ref(self, ref):
1138 self.set_id(ref)
1139 self.symbol_footnote_refs.append(ref)
1140
1141 def note_footnote(self, footnote):
1142 self.set_id(footnote)
1143 self.footnotes.append(footnote)
1144
1145 def note_footnote_ref(self, ref):
1146 self.set_id(ref)
1147 self.footnote_refs.setdefault(ref['refname'], []).append(ref)
1148 self.note_refname(ref)
1149
1150 def note_citation(self, citation):
1151 self.citations.append(citation)
1152
1153 def note_citation_ref(self, ref):
1154 self.set_id(ref)
1155 self.citation_refs.setdefault(ref['refname'], []).append(ref)
1156 self.note_refname(ref)
1157
1158 def note_substitution_def(self, subdef, def_name, msgnode=None):
1159 name = whitespace_normalize_name(def_name)
1160 if name in self.substitution_defs:
1161 msg = self.reporter.error(
1162 'Duplicate substitution definition name: "%s".' % name,
1163 base_node=subdef)
1164 if msgnode != None:
1165 msgnode += msg
1166 oldnode = self.substitution_defs[name]
1167 dupname(oldnode, name)
1168 # keep only the last definition:
1169 self.substitution_defs[name] = subdef
1170 # case-insensitive mapping:
1171 self.substitution_names[fully_normalize_name(name)] = name
1172
1173 def note_substitution_ref(self, subref, refname):
1174 subref['refname'] = whitespace_normalize_name(refname)
1175
1176 def note_pending(self, pending, priority=None):
1177 self.transformer.add_pending(pending, priority)
1178
1179 def note_parse_message(self, message):
1180 self.parse_messages.append(message)
1181
1182 def note_transform_message(self, message):
1183 self.transform_messages.append(message)
1184
1185 def note_source(self, source, offset):
1186 self.current_source = source
1187 if offset is None:
1188 self.current_line = offset
1189 else:
1190 self.current_line = offset + 1
1191
1192 def copy(self):
1193 return self.__class__(self.settings, self.reporter,
1194 **self.attributes)
1195
1196 def get_decoration(self):
1197 if not self.decoration:
1198 self.decoration = decoration()
1199 index = self.first_child_not_matching_class(Titular)
1200 if index is None:
1201 self.append(self.decoration)
1202 else:
1203 self.insert(index, self.decoration)
1204 return self.decoration
1205
1206
1207# ================
1208# Title Elements
1209# ================
1210
1211class title(Titular, PreBibliographic, TextElement): pass
1212class subtitle(Titular, PreBibliographic, TextElement): pass
1213class rubric(Titular, TextElement): pass
1214
1215
1216# ========================
1217# Bibliographic Elements
1218# ========================
1219
1220class docinfo(Bibliographic, Element): pass
1221class author(Bibliographic, TextElement): pass
1222class authors(Bibliographic, Element): pass
1223class organization(Bibliographic, TextElement): pass
1224class address(Bibliographic, FixedTextElement): pass
1225class contact(Bibliographic, TextElement): pass
1226class version(Bibliographic, TextElement): pass
1227class revision(Bibliographic, TextElement): pass
1228class status(Bibliographic, TextElement): pass
1229class date(Bibliographic, TextElement): pass
1230class copyright(Bibliographic, TextElement): pass
1231
1232
1233# =====================
1234# Decorative Elements
1235# =====================
1236
1237class decoration(Decorative, Element):
1238
1239 def get_header(self):
1240 if not len(self.children) or not isinstance(self.children[0], header):
1241 self.insert(0, header())
1242 return self.children[0]
1243
1244 def get_footer(self):
1245 if not len(self.children) or not isinstance(self.children[-1], footer):
1246 self.append(footer())
1247 return self.children[-1]
1248
1249
1250class header(Decorative, Element): pass
1251class footer(Decorative, Element): pass
1252
1253
1254# =====================
1255# Structural Elements
1256# =====================
1257
1258class section(Structural, Element): pass
1259
1260
1261class topic(Structural, Element):
1262
1263 """
1264 Topics are terminal, "leaf" mini-sections, like block quotes with titles,
1265 or textual figures. A topic is just like a section, except that it has no
1266 subsections, and it doesn't have to conform to section placement rules.
1267
1268 Topics are allowed wherever body elements (list, table, etc.) are allowed,
1269 but only at the top level of a section or document. Topics cannot nest
1270 inside topics, sidebars, or body elements; you can't have a topic inside a
1271 table, list, block quote, etc.
1272 """
1273
1274
1275class sidebar(Structural, Element):
1276
1277 """
1278 Sidebars are like miniature, parallel documents that occur inside other
1279 documents, providing related or reference material. A sidebar is
1280 typically offset by a border and "floats" to the side of the page; the
1281 document's main text may flow around it. Sidebars can also be likened to
1282 super-footnotes; their content is outside of the flow of the document's
1283 main text.
1284
1285 Sidebars are allowed wherever body elements (list, table, etc.) are
1286 allowed, but only at the top level of a section or document. Sidebars
1287 cannot nest inside sidebars, topics, or body elements; you can't have a
1288 sidebar inside a table, list, block quote, etc.
1289 """
1290
1291
1292class transition(Structural, Element): pass
1293
1294
1295# ===============
1296# Body Elements
1297# ===============
1298
1299class paragraph(General, TextElement): pass
1300class compound(General, Element): pass
1301class container(General, Element): pass
1302class bullet_list(Sequential, Element): pass
1303class enumerated_list(Sequential, Element): pass
1304class list_item(Part, Element): pass
1305class definition_list(Sequential, Element): pass
1306class definition_list_item(Part, Element): pass
1307class term(Part, TextElement): pass
1308class classifier(Part, TextElement): pass
1309class definition(Part, Element): pass
1310class field_list(Sequential, Element): pass
1311class field(Part, Element): pass
1312class field_name(Part, TextElement): pass
1313class field_body(Part, Element): pass
1314
1315
1316class option(Part, Element):
1317
1318 child_text_separator = ''
1319
1320
1321class option_argument(Part, TextElement):
1322
1323 def astext(self):
1324 return self.get('delimiter', ' ') + TextElement.astext(self)
1325
1326
1327class option_group(Part, Element):
1328
1329 child_text_separator = ', '
1330
1331
1332class option_list(Sequential, Element): pass
1333
1334
1335class option_list_item(Part, Element):
1336
1337 child_text_separator = ' '
1338
1339
1340class option_string(Part, TextElement): pass
1341class description(Part, Element): pass
1342class literal_block(General, FixedTextElement): pass
1343class doctest_block(General, FixedTextElement): pass
1344class line_block(General, Element): pass
1345
1346
1347class line(Part, TextElement):
1348
1349 indent = None
1350
1351
1352class block_quote(General, Element): pass
1353class attribution(Part, TextElement): pass
1354class attention(Admonition, Element): pass
1355class caution(Admonition, Element): pass
1356class danger(Admonition, Element): pass
1357class error(Admonition, Element): pass
1358class important(Admonition, Element): pass
1359class note(Admonition, Element): pass
1360class tip(Admonition, Element): pass
1361class hint(Admonition, Element): pass
1362class warning(Admonition, Element): pass
1363class admonition(Admonition, Element): pass
1364class comment(Special, Invisible, FixedTextElement): pass
1365class substitution_definition(Special, Invisible, TextElement): pass
1366class target(Special, Invisible, Inline, TextElement, Targetable): pass
1367class footnote(General, BackLinkable, Element, Labeled, Targetable): pass
1368class citation(General, BackLinkable, Element, Labeled, Targetable): pass
1369class label(Part, TextElement): pass
1370class figure(General, Element): pass
1371class caption(Part, TextElement): pass
1372class legend(Part, Element): pass
1373class table(General, Element): pass
1374class tgroup(Part, Element): pass
1375class colspec(Part, Element): pass
1376class thead(Part, Element): pass
1377class tbody(Part, Element): pass
1378class row(Part, Element): pass
1379class entry(Part, Element): pass
1380
1381
1382class system_message(Special, BackLinkable, PreBibliographic, Element):
1383
1384 """
1385 System message element.
1386
1387 Do not instantiate this class directly; use
1388 ``document.reporter.info/warning/error/severe()`` instead.
1389 """
1390
1391 def __init__(self, message=None, *children, **attributes):
1392 if message:
1393 p = paragraph('', message)
1394 children = (p,) + children
1395 try:
1396 Element.__init__(self, '', *children, **attributes)
1397 except:
1398 print 'system_message: children=%r' % (children,)
1399 raise
1400
1401 def astext(self):
1402 line = self.get('line', '')
1403 return u'%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
1404 self['level'], Element.astext(self))
1405
1406
1407class pending(Special, Invisible, Element):
1408
1409 """
1410 The "pending" element is used to encapsulate a pending operation: the
1411 operation (transform), the point at which to apply it, and any data it
1412 requires. Only the pending operation's location within the document is
1413 stored in the public document tree (by the "pending" object itself); the
1414 operation and its data are stored in the "pending" object's internal
1415 instance attributes.
1416
1417 For example, say you want a table of contents in your reStructuredText
1418 document. The easiest way to specify where to put it is from within the
1419 document, with a directive::
1420
1421 .. contents::
1422
1423 But the "contents" directive can't do its work until the entire document
1424 has been parsed and possibly transformed to some extent. So the directive
1425 code leaves a placeholder behind that will trigger the second phase of its
1426 processing, something like this::
1427
1428 <pending ...public attributes...> + internal attributes
1429
1430 Use `document.note_pending()` so that the
1431 `docutils.transforms.Transformer` stage of processing can run all pending
1432 transforms.
1433 """
1434
1435 def __init__(self, transform, details=None,
1436 rawsource='', *children, **attributes):
1437 Element.__init__(self, rawsource, *children, **attributes)
1438
1439 self.transform = transform
1440 """The `docutils.transforms.Transform` class implementing the pending
1441 operation."""
1442
1443 self.details = details or {}
1444 """Detail data (dictionary) required by the pending operation."""
1445
1446 def pformat(self, indent=' ', level=0):
1447 internals = [
1448 '.. internal attributes:',
1449 ' .transform: %s.%s' % (self.transform.__module__,
1450 self.transform.__name__),
1451 ' .details:']
1452 details = self.details.items()
1453 details.sort()
1454 for key, value in details:
1455 if isinstance(value, Node):
1456 internals.append('%7s%s:' % ('', key))
1457 internals.extend(['%9s%s' % ('', line)
1458 for line in value.pformat().splitlines()])
1459 elif value and isinstance(value, list) \
1460 and isinstance(value[0], Node):
1461 internals.append('%7s%s:' % ('', key))
1462 for v in value:
1463 internals.extend(['%9s%s' % ('', line)
1464 for line in v.pformat().splitlines()])
1465 else:
1466 internals.append('%7s%s: %r' % ('', key, value))
1467 return (Element.pformat(self, indent, level)
1468 + ''.join([(' %s%s\n' % (indent * level, line))
1469 for line in internals]))
1470
1471 def copy(self):
1472 return self.__class__(self.transform, self.details, self.rawsource,
1473 **self.attributes)
1474
1475
1476class raw(Special, Inline, PreBibliographic, FixedTextElement):
1477
1478 """
1479 Raw data that is to be passed untouched to the Writer.
1480 """
1481
1482 pass
1483
1484
1485# =================
1486# Inline Elements
1487# =================
1488
1489class emphasis(Inline, TextElement): pass
1490class strong(Inline, TextElement): pass
1491class literal(Inline, TextElement): pass
1492class reference(General, Inline, Referential, TextElement): pass
1493class footnote_reference(Inline, Referential, TextElement): pass
1494class citation_reference(Inline, Referential, TextElement): pass
1495class substitution_reference(Inline, TextElement): pass
1496class title_reference(Inline, TextElement): pass
1497class abbreviation(Inline, TextElement): pass
1498class acronym(Inline, TextElement): pass
1499class superscript(Inline, TextElement): pass
1500class subscript(Inline, TextElement): pass
1501
1502
1503class image(General, Inline, Element):
1504
1505 def astext(self):
1506 return self.get('alt', '')
1507
1508
1509class inline(Inline, TextElement): pass
1510class problematic(Inline, TextElement): pass
1511class generated(Inline, TextElement): pass
1512
1513
1514# ========================================
1515# Auxiliary Classes, Functions, and Data
1516# ========================================
1517
1518node_class_names = """
1519 Text
1520 abbreviation acronym address admonition attention attribution author
1521 authors
1522 block_quote bullet_list
1523 caption caution citation citation_reference classifier colspec comment
1524 compound contact container copyright
1525 danger date decoration definition definition_list definition_list_item
1526 description docinfo doctest_block document
1527 emphasis entry enumerated_list error
1528 field field_body field_list field_name figure footer
1529 footnote footnote_reference
1530 generated
1531 header hint
1532 image important inline
1533 label legend line line_block list_item literal literal_block
1534 note
1535 option option_argument option_group option_list option_list_item
1536 option_string organization
1537 paragraph pending problematic
1538 raw reference revision row rubric
1539 section sidebar status strong subscript substitution_definition
1540 substitution_reference subtitle superscript system_message
1541 table target tbody term tgroup thead tip title title_reference topic
1542 transition
1543 version
1544 warning""".split()
1545"""A list of names of all concrete Node subclasses."""
1546
1547
1548class NodeVisitor:
1549
1550 """
1551 "Visitor" pattern [GoF95]_ abstract superclass implementation for
1552 document tree traversals.
1553
1554 Each node class has corresponding methods, doing nothing by
1555 default; override individual methods for specific and useful
1556 behaviour. The `dispatch_visit()` method is called by
1557 `Node.walk()` upon entering a node. `Node.walkabout()` also calls
1558 the `dispatch_departure()` method before exiting a node.
1559
1560 The dispatch methods call "``visit_`` + node class name" or
1561 "``depart_`` + node class name", resp.
1562
1563 This is a base class for visitors whose ``visit_...`` & ``depart_...``
1564 methods should be implemented for *all* node types encountered (such as
1565 for `docutils.writers.Writer` subclasses). Unimplemented methods will
1566 raise exceptions.
1567
1568 For sparse traversals, where only certain node types are of interest,
1569 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
1570 processing is desired, subclass `GenericNodeVisitor`.
1571
1572 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
1573 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
1574 1995.
1575 """
1576
1577 optional = ()
1578 """
1579 Tuple containing node class names (as strings).
1580
1581 No exception will be raised if writers do not implement visit
1582 or departure functions for these node classes.
1583
1584 Used to ensure transitional compatibility with existing 3rd-party writers.
1585 """
1586
1587 def __init__(self, document):
1588 self.document = document
1589
1590 def dispatch_visit(self, node):
1591 """
1592 Call self."``visit_`` + node class name" with `node` as
1593 parameter. If the ``visit_...`` method does not exist, call
1594 self.unknown_visit.
1595 """
1596 node_name = node.__class__.__name__
1597 method = getattr(self, 'visit_' + node_name, self.unknown_visit)
1598 self.document.reporter.debug(
1599 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
1600 % (method.__name__, node_name))
1601 return method(node)
1602
1603 def dispatch_departure(self, node):
1604 """
1605 Call self."``depart_`` + node class name" with `node` as
1606 parameter. If the ``depart_...`` method does not exist, call
1607 self.unknown_departure.
1608 """
1609 node_name = node.__class__.__name__
1610 method = getattr(self, 'depart_' + node_name, self.unknown_departure)
1611 self.document.reporter.debug(
1612 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
1613 % (method.__name__, node_name))
1614 return method(node)
1615
1616 def unknown_visit(self, node):
1617 """
1618 Called when entering unknown `Node` types.
1619
1620 Raise an exception unless overridden.
1621 """
1622 if (self.document.settings.strict_visitor
1623 or node.__class__.__name__ not in self.optional):
1624 raise NotImplementedError(
1625 '%s visiting unknown node type: %s'
1626 % (self.__class__, node.__class__.__name__))
1627
1628 def unknown_departure(self, node):
1629 """
1630 Called before exiting unknown `Node` types.
1631
1632 Raise exception unless overridden.
1633 """
1634 if (self.document.settings.strict_visitor
1635 or node.__class__.__name__ not in self.optional):
1636 raise NotImplementedError(
1637 '%s departing unknown node type: %s'
1638 % (self.__class__, node.__class__.__name__))
1639
1640
1641class SparseNodeVisitor(NodeVisitor):
1642
1643 """
1644 Base class for sparse traversals, where only certain node types are of
1645 interest. When ``visit_...`` & ``depart_...`` methods should be
1646 implemented for *all* node types (such as for `docutils.writers.Writer`
1647 subclasses), subclass `NodeVisitor` instead.
1648 """
1649
1650
1651class GenericNodeVisitor(NodeVisitor):
1652
1653 """
1654 Generic "Visitor" abstract superclass, for simple traversals.
1655
1656 Unless overridden, each ``visit_...`` method calls `default_visit()`, and
1657 each ``depart_...`` method (when using `Node.walkabout()`) calls
1658 `default_departure()`. `default_visit()` (and `default_departure()`) must
1659 be overridden in subclasses.
1660
1661 Define fully generic visitors by overriding `default_visit()` (and
1662 `default_departure()`) only. Define semi-generic visitors by overriding
1663 individual ``visit_...()`` (and ``depart_...()``) methods also.
1664
1665 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
1666 be overridden for default behavior.
1667 """
1668
1669 def default_visit(self, node):
1670 """Override for generic, uniform traversals."""
1671 raise NotImplementedError
1672
1673 def default_departure(self, node):
1674 """Override for generic, uniform traversals."""
1675 raise NotImplementedError
1676
1677def _call_default_visit(self, node):
1678 self.default_visit(node)
1679
1680def _call_default_departure(self, node):
1681 self.default_departure(node)
1682
1683def _nop(self, node):
1684 pass
1685
1686def _add_node_class_names(names):
1687 """Save typing with dynamic assignments:"""
1688 for _name in names:
1689 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
1690 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
1691 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
1692 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
1693
1694_add_node_class_names(node_class_names)
1695
1696
1697class TreeCopyVisitor(GenericNodeVisitor):
1698
1699 """
1700 Make a complete copy of a tree or branch, including element attributes.
1701 """
1702
1703 def __init__(self, document):
1704 GenericNodeVisitor.__init__(self, document)
1705 self.parent_stack = []
1706 self.parent = []
1707
1708 def get_tree_copy(self):
1709 return self.parent[0]
1710
1711 def default_visit(self, node):
1712 """Copy the current node, and make it the new acting parent."""
1713 newnode = node.copy()
1714 self.parent.append(newnode)
1715 self.parent_stack.append(self.parent)
1716 self.parent = newnode
1717
1718 def default_departure(self, node):
1719 """Restore the previous acting parent."""
1720 self.parent = self.parent_stack.pop()
1721
1722
1723class TreePruningException(Exception):
1724
1725 """
1726 Base class for `NodeVisitor`-related tree pruning exceptions.
1727
1728 Raise subclasses from within ``visit_...`` or ``depart_...`` methods
1729 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
1730 the tree traversed.
1731 """
1732
1733 pass
1734
1735
1736class SkipChildren(TreePruningException):
1737
1738 """
1739 Do not visit any children of the current node. The current node's
1740 siblings and ``depart_...`` method are not affected.
1741 """
1742
1743 pass
1744
1745
1746class SkipSiblings(TreePruningException):
1747
1748 """
1749 Do not visit any more siblings (to the right) of the current node. The
1750 current node's children and its ``depart_...`` method are not affected.
1751 """
1752
1753 pass
1754
1755
1756class SkipNode(TreePruningException):
1757
1758 """
1759 Do not visit the current node's children, and do not call the current
1760 node's ``depart_...`` method.
1761 """
1762
1763 pass
1764
1765
1766class SkipDeparture(TreePruningException):
1767
1768 """
1769 Do not call the current node's ``depart_...`` method. The current node's
1770 children and siblings are not affected.
1771 """
1772
1773 pass
1774
1775
1776class NodeFound(TreePruningException):
1777
1778 """
1779 Raise to indicate that the target of a search has been found. This
1780 exception must be caught by the client; it is not caught by the traversal
1781 code.
1782 """
1783
1784 pass
1785
1786
1787class StopTraversal(TreePruningException):
1788
1789 """
1790 Stop the traversal alltogether. The current node's ``depart_...`` method
1791 is not affected. The parent nodes ``depart_...`` methods are also called
1792 as usual. No other nodes are visited. This is an alternative to
1793 NodeFound that does not cause exception handling to trickle up to the
1794 caller.
1795 """
1796
1797 pass
1798
1799
1800def make_id(string):
1801 """
1802 Convert `string` into an identifier and return it.
1803
1804 Docutils identifiers will conform to the regular expression
1805 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
1806 and "id" attributes) should have no underscores, colons, or periods.
1807 Hyphens may be used.
1808
1809 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
1810
1811 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
1812 followed by any number of letters, digits ([0-9]), hyphens ("-"),
1813 underscores ("_"), colons (":"), and periods (".").
1814
1815 - However the `CSS1 spec`_ defines identifiers based on the "name" token,
1816 a tighter interpretation ("flex" tokenizer notation; "latin1" and
1817 "escape" 8-bit characters have been replaced with entities)::
1818
1819 unicode \\[0-9a-f]{1,4}
1820 latin1 [&iexcl;-&yuml;]
1821 escape {unicode}|\\[ -~&iexcl;-&yuml;]
1822 nmchar [-a-z0-9]|{latin1}|{escape}
1823 name {nmchar}+
1824
1825 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
1826 or periods ("."), therefore "class" and "id" attributes should not contain
1827 these characters. They should be replaced with hyphens ("-"). Combined
1828 with HTML's requirements (the first character must be a letter; no
1829 "unicode", "latin1", or "escape" characters), this results in the
1830 ``[a-z](-?[a-z0-9]+)*`` pattern.
1831
1832 .. _HTML 4.01 spec: http://www.w3.org/TR/html401
1833 .. _CSS1 spec: http://www.w3.org/TR/REC-CSS1
1834 """
1835 id = string.lower()
1836 if not isinstance(id, unicode):
1837 id = id.decode()
1838 id = id.translate(_non_id_translate_digraphs)
1839 id = id.translate(_non_id_translate)
1840 # get rid of non-ascii characters.
1841 # 'ascii' lowercase to prevent problems with turkish locale.
1842 id = unicodedata.normalize('NFKD', id).\
1843 encode('ascii', 'ignore').decode('ascii')
1844 # shrink runs of whitespace and replace by hyphen
1845 id = _non_id_chars.sub('-', ' '.join(id.split()))
1846 id = _non_id_at_ends.sub('', id)
1847 return str(id)
1848
1849_non_id_chars = re.compile('[^a-z0-9]+')
1850_non_id_at_ends = re.compile('^[-0-9]+|-+$')
1851_non_id_translate = {
1852 0x00f8: u'o', # o with stroke
1853 0x0111: u'd', # d with stroke
1854 0x0127: u'h', # h with stroke
1855 0x0131: u'i', # dotless i
1856 0x0142: u'l', # l with stroke
1857 0x0167: u't', # t with stroke
1858 0x0180: u'b', # b with stroke
1859 0x0183: u'b', # b with topbar
1860 0x0188: u'c', # c with hook
1861 0x018c: u'd', # d with topbar
1862 0x0192: u'f', # f with hook
1863 0x0199: u'k', # k with hook
1864 0x019a: u'l', # l with bar
1865 0x019e: u'n', # n with long right leg
1866 0x01a5: u'p', # p with hook
1867 0x01ab: u't', # t with palatal hook
1868 0x01ad: u't', # t with hook
1869 0x01b4: u'y', # y with hook
1870 0x01b6: u'z', # z with stroke
1871 0x01e5: u'g', # g with stroke
1872 0x0225: u'z', # z with hook
1873 0x0234: u'l', # l with curl
1874 0x0235: u'n', # n with curl
1875 0x0236: u't', # t with curl
1876 0x0237: u'j', # dotless j
1877 0x023c: u'c', # c with stroke
1878 0x023f: u's', # s with swash tail
1879 0x0240: u'z', # z with swash tail
1880 0x0247: u'e', # e with stroke
1881 0x0249: u'j', # j with stroke
1882 0x024b: u'q', # q with hook tail
1883 0x024d: u'r', # r with stroke
1884 0x024f: u'y', # y with stroke
1885}
1886_non_id_translate_digraphs = {
1887 0x00df: u'sz', # ligature sz
1888 0x00e6: u'ae', # ae
1889 0x0153: u'oe', # ligature oe
1890 0x0238: u'db', # db digraph
1891 0x0239: u'qp', # qp digraph
1892}
1893
1894def dupname(node, name):
1895 node['dupnames'].append(name)
1896 node['names'].remove(name)
1897 # Assume that this method is referenced, even though it isn't; we
1898 # don't want to throw unnecessary system_messages.
1899 node.referenced = 1
1900
1901def fully_normalize_name(name):
1902 """Return a case- and whitespace-normalized name."""
1903 return ' '.join(name.lower().split())
1904
1905def whitespace_normalize_name(name):
1906 """Return a whitespace-normalized name."""
1907 return ' '.join(name.split())
1908
1909def serial_escape(value):
1910 """Escape string values that are elements of a list, for serialization."""
1911 return value.replace('\\', r'\\').replace(' ', r'\ ')
1912
1913#
1914#
1915# Local Variables:
1916# indent-tabs-mode: nil
1917# sentence-end-double-space: t
1918# fill-column: 78
1919# End: