| Tor Norbye | 3a2425a | 2013-11-04 10:16:08 -0800 | [diff] [blame^] | 1 | # |
| 2 | # epytext.py: epydoc formatted docstring parsing |
| 3 | # Edward Loper |
| 4 | # |
| 5 | # Created [04/10/01 12:00 AM] |
| 6 | # $Id: epytext.py 1652 2007-09-26 04:45:34Z edloper $ |
| 7 | # |
| 8 | |
| 9 | """ |
| 10 | Parser for epytext strings. Epytext is a lightweight markup whose |
| 11 | primary intended application is Python documentation strings. This |
| 12 | parser converts Epytext strings to a simple DOM-like representation |
| 13 | (encoded as a tree of L{Element} objects and strings). Epytext |
| 14 | strings can contain the following X{structural blocks}: |
| 15 | |
| 16 | - X{epytext}: The top-level element of the DOM tree. |
| 17 | - X{para}: A paragraph of text. Paragraphs contain no newlines, |
| 18 | and all spaces are soft. |
| 19 | - X{section}: A section or subsection. |
| 20 | - X{field}: A tagged field. These fields provide information |
| 21 | about specific aspects of a Python object, such as the |
| 22 | description of a function's parameter, or the author of a |
| 23 | module. |
| 24 | - X{literalblock}: A block of literal text. This text should be |
| 25 | displayed as it would be displayed in plaintext. The |
| 26 | parser removes the appropriate amount of leading whitespace |
| 27 | from each line in the literal block. |
| 28 | - X{doctestblock}: A block containing sample python code, |
| 29 | formatted according to the specifications of the C{doctest} |
| 30 | module. |
| 31 | - X{ulist}: An unordered list. |
| 32 | - X{olist}: An ordered list. |
| 33 | - X{li}: A list item. This tag is used both for unordered list |
| 34 | items and for ordered list items. |
| 35 | |
| 36 | Additionally, the following X{inline regions} may be used within |
| 37 | C{para} blocks: |
| 38 | |
| 39 | - X{code}: Source code and identifiers. |
| 40 | - X{math}: Mathematical expressions. |
| 41 | - X{index}: A term which should be included in an index, if one |
| 42 | is generated. |
| 43 | - X{italic}: Italicized text. |
| 44 | - X{bold}: Bold-faced text. |
| 45 | - X{uri}: A Universal Resource Indicator (URI) or Universal |
| 46 | Resource Locator (URL) |
| 47 | - X{link}: A Python identifier which should be hyperlinked to |
| 48 | the named object's documentation, when possible. |
| 49 | |
| 50 | The returned DOM tree will conform to the the following Document Type |
| 51 | Description:: |
| 52 | |
| 53 | <!ENTITY % colorized '(code | math | index | italic | |
| 54 | bold | uri | link | symbol)*'> |
| 55 | |
| 56 | <!ELEMENT epytext ((para | literalblock | doctestblock | |
| 57 | section | ulist | olist)*, fieldlist?)> |
| 58 | |
| 59 | <!ELEMENT para (#PCDATA | %colorized;)*> |
| 60 | |
| 61 | <!ELEMENT section (para | listblock | doctestblock | |
| 62 | section | ulist | olist)+> |
| 63 | |
| 64 | <!ELEMENT fieldlist (field+)> |
| 65 | <!ELEMENT field (tag, arg?, (para | listblock | doctestblock) |
| 66 | ulist | olist)+)> |
| 67 | <!ELEMENT tag (#PCDATA)> |
| 68 | <!ELEMENT arg (#PCDATA)> |
| 69 | |
| 70 | <!ELEMENT literalblock (#PCDATA | %colorized;)*> |
| 71 | <!ELEMENT doctestblock (#PCDATA)> |
| 72 | |
| 73 | <!ELEMENT ulist (li+)> |
| 74 | <!ELEMENT olist (li+)> |
| 75 | <!ELEMENT li (para | literalblock | doctestblock | ulist | olist)+> |
| 76 | <!ATTLIST li bullet NMTOKEN #IMPLIED> |
| 77 | <!ATTLIST olist start NMTOKEN #IMPLIED> |
| 78 | |
| 79 | <!ELEMENT uri (name, target)> |
| 80 | <!ELEMENT link (name, target)> |
| 81 | <!ELEMENT name (#PCDATA | %colorized;)*> |
| 82 | <!ELEMENT target (#PCDATA)> |
| 83 | |
| 84 | <!ELEMENT code (#PCDATA | %colorized;)*> |
| 85 | <!ELEMENT math (#PCDATA | %colorized;)*> |
| 86 | <!ELEMENT italic (#PCDATA | %colorized;)*> |
| 87 | <!ELEMENT bold (#PCDATA | %colorized;)*> |
| 88 | <!ELEMENT indexed (#PCDATA | %colorized;)> |
| 89 | <!ATTLIST code style CDATA #IMPLIED> |
| 90 | |
| 91 | <!ELEMENT symbol (#PCDATA)> |
| 92 | |
| 93 | @var SYMBOLS: A list of the of escape symbols that are supported |
| 94 | by epydoc. Currently the following symbols are supported: |
| 95 | <<<SYMBOLS>>> |
| 96 | """ |
| 97 | # Note: the symbol list is appended to the docstring automatically, |
| 98 | # below. |
| 99 | |
| 100 | __docformat__ = 'epytext en' |
| 101 | |
| 102 | # Code organization.. |
| 103 | # 1. parse() |
| 104 | # 2. tokenize() |
| 105 | # 3. colorize() |
| 106 | # 4. helpers |
| 107 | # 5. testing |
| 108 | |
| 109 | import re, string, types, sys, os.path |
| 110 | from epydoc.markup import * |
| 111 | from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex |
| 112 | from epydoc.markup.doctest import doctest_to_html, doctest_to_latex |
| 113 | |
| 114 | ################################################## |
| 115 | ## DOM-Like Encoding |
| 116 | ################################################## |
| 117 | |
| 118 | class Element: |
| 119 | """ |
| 120 | A very simple DOM-like representation for parsed epytext |
| 121 | documents. Each epytext document is encoded as a tree whose nodes |
| 122 | are L{Element} objects, and whose leaves are C{string}s. Each |
| 123 | node is marked by a I{tag} and zero or more I{attributes}. Each |
| 124 | attribute is a mapping from a string key to a string value. |
| 125 | """ |
| 126 | def __init__(self, tag, *children, **attribs): |
| 127 | self.tag = tag |
| 128 | """A string tag indicating the type of this element. |
| 129 | @type: C{string}""" |
| 130 | |
| 131 | self.children = list(children) |
| 132 | """A list of the children of this element. |
| 133 | @type: C{list} of (C{string} or C{Element})""" |
| 134 | |
| 135 | self.attribs = attribs |
| 136 | """A dictionary mapping attribute names to attribute values |
| 137 | for this element. |
| 138 | @type: C{dict} from C{string} to C{string}""" |
| 139 | |
| 140 | def __str__(self): |
| 141 | """ |
| 142 | Return a string representation of this element, using XML |
| 143 | notation. |
| 144 | @bug: Doesn't escape '<' or '&' or '>'. |
| 145 | """ |
| 146 | attribs = ''.join([' %s=%r' % t for t in self.attribs.items()]) |
| 147 | return ('<%s%s>' % (self.tag, attribs) + |
| 148 | ''.join([str(child) for child in self.children]) + |
| 149 | '</%s>' % self.tag) |
| 150 | |
| 151 | def __repr__(self): |
| 152 | attribs = ''.join([', %s=%r' % t for t in self.attribs.items()]) |
| 153 | args = ''.join([', %r' % c for c in self.children]) |
| 154 | return 'Element(%s%s%s)' % (self.tag, args, attribs) |
| 155 | |
| 156 | ################################################## |
| 157 | ## Constants |
| 158 | ################################################## |
| 159 | |
| 160 | # The possible heading underline characters, listed in order of |
| 161 | # heading depth. |
| 162 | _HEADING_CHARS = "=-~" |
| 163 | |
| 164 | # Escape codes. These should be needed very rarely. |
| 165 | _ESCAPES = {'lb':'{', 'rb': '}'} |
| 166 | |
| 167 | # Symbols. These can be generated via S{...} escapes. |
| 168 | SYMBOLS = [ |
| 169 | # Arrows |
| 170 | '<-', '->', '^', 'v', |
| 171 | |
| 172 | # Greek letters |
| 173 | 'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', |
| 174 | 'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu', |
| 175 | 'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma', |
| 176 | 'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega', |
| 177 | 'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta', |
| 178 | 'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu', |
| 179 | 'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma', |
| 180 | 'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega', |
| 181 | |
| 182 | # HTML character entities |
| 183 | 'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr', |
| 184 | 'lArr', 'rArr', 'uArr', 'dArr', 'hArr', |
| 185 | 'copy', 'times', 'forall', 'exist', 'part', |
| 186 | 'empty', 'isin', 'notin', 'ni', 'prod', 'sum', |
| 187 | 'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup', |
| 188 | 'int', 'there4', 'sim', 'cong', 'asymp', 'ne', |
| 189 | 'equiv', 'le', 'ge', 'sub', 'sup', 'nsub', |
| 190 | 'sube', 'supe', 'oplus', 'otimes', 'perp', |
| 191 | |
| 192 | # Alternate (long) names |
| 193 | 'infinity', 'integral', 'product', |
| 194 | '>=', '<=', |
| 195 | ] |
| 196 | # Convert to a dictionary, for quick lookup |
| 197 | _SYMBOLS = {} |
| 198 | for symbol in SYMBOLS: _SYMBOLS[symbol] = 1 |
| 199 | |
| 200 | # Add symbols to the docstring. |
| 201 | symblist = ' ' |
| 202 | symblist += ';\n '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol) |
| 203 | for symbol in SYMBOLS]) |
| 204 | __doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist) |
| 205 | del symbol, symblist |
| 206 | |
| 207 | # Tags for colorizing text. |
| 208 | _COLORIZING_TAGS = { |
| 209 | 'C': 'code', |
| 210 | 'M': 'math', |
| 211 | 'X': 'indexed', |
| 212 | 'I': 'italic', |
| 213 | 'B': 'bold', |
| 214 | 'U': 'uri', |
| 215 | 'L': 'link', # A Python identifier that should be linked to |
| 216 | 'E': 'escape', # escapes characters or creates symbols |
| 217 | 'S': 'symbol', |
| 218 | 'G': 'graph', |
| 219 | } |
| 220 | |
| 221 | # Which tags can use "link syntax" (e.g., U{Python<www.python.org>})? |
| 222 | _LINK_COLORIZING_TAGS = ['link', 'uri'] |
| 223 | |
| 224 | ################################################## |
| 225 | ## Structuring (Top Level) |
| 226 | ################################################## |
| 227 | |
| 228 | def parse(str, errors = None): |
| 229 | """ |
| 230 | Return a DOM tree encoding the contents of an epytext string. Any |
| 231 | errors generated during parsing will be stored in C{errors}. |
| 232 | |
| 233 | @param str: The epytext string to parse. |
| 234 | @type str: C{string} |
| 235 | @param errors: A list where any errors generated during parsing |
| 236 | will be stored. If no list is specified, then fatal errors |
| 237 | will generate exceptions, and non-fatal errors will be |
| 238 | ignored. |
| 239 | @type errors: C{list} of L{ParseError} |
| 240 | @return: a DOM tree encoding the contents of an epytext string. |
| 241 | @rtype: C{Element} |
| 242 | @raise ParseError: If C{errors} is C{None} and an error is |
| 243 | encountered while parsing. |
| 244 | """ |
| 245 | # Initialize errors list. |
| 246 | if errors == None: |
| 247 | errors = [] |
| 248 | raise_on_error = 1 |
| 249 | else: |
| 250 | raise_on_error = 0 |
| 251 | |
| 252 | # Preprocess the string. |
| 253 | str = re.sub('\015\012', '\012', str) |
| 254 | str = string.expandtabs(str) |
| 255 | |
| 256 | # Tokenize the input string. |
| 257 | tokens = _tokenize(str, errors) |
| 258 | |
| 259 | # Have we encountered a field yet? |
| 260 | encountered_field = 0 |
| 261 | |
| 262 | # Create an document to hold the epytext. |
| 263 | doc = Element('epytext') |
| 264 | |
| 265 | # Maintain two parallel stacks: one contains DOM elements, and |
| 266 | # gives the ancestors of the current block. The other contains |
| 267 | # indentation values, and gives the indentation of the |
| 268 | # corresponding DOM elements. An indentation of "None" reflects |
| 269 | # an unknown indentation. However, the indentation must be |
| 270 | # greater than, or greater than or equal to, the indentation of |
| 271 | # the prior element (depending on what type of DOM element it |
| 272 | # corresponds to). No 2 consecutive indent_stack values will be |
| 273 | # ever be "None." Use initial dummy elements in the stack, so we |
| 274 | # don't have to worry about bounds checking. |
| 275 | stack = [None, doc] |
| 276 | indent_stack = [-1, None] |
| 277 | |
| 278 | for token in tokens: |
| 279 | # Uncomment this for debugging: |
| 280 | #print ('%s: %s\n%s: %s\n' % |
| 281 | # (''.join(['%-11s' % (t and t.tag) for t in stack]), |
| 282 | # token.tag, ''.join(['%-11s' % i for i in indent_stack]), |
| 283 | # token.indent)) |
| 284 | |
| 285 | # Pop any completed blocks off the stack. |
| 286 | _pop_completed_blocks(token, stack, indent_stack) |
| 287 | |
| 288 | # If Token has type PARA, colorize and add the new paragraph |
| 289 | if token.tag == Token.PARA: |
| 290 | _add_para(doc, token, stack, indent_stack, errors) |
| 291 | |
| 292 | # If Token has type HEADING, add the new section |
| 293 | elif token.tag == Token.HEADING: |
| 294 | _add_section(doc, token, stack, indent_stack, errors) |
| 295 | |
| 296 | # If Token has type LBLOCK, add the new literal block |
| 297 | elif token.tag == Token.LBLOCK: |
| 298 | stack[-1].children.append(token.to_dom(doc)) |
| 299 | |
| 300 | # If Token has type DTBLOCK, add the new doctest block |
| 301 | elif token.tag == Token.DTBLOCK: |
| 302 | stack[-1].children.append(token.to_dom(doc)) |
| 303 | |
| 304 | # If Token has type BULLET, add the new list/list item/field |
| 305 | elif token.tag == Token.BULLET: |
| 306 | _add_list(doc, token, stack, indent_stack, errors) |
| 307 | else: |
| 308 | assert 0, 'Unknown token type: '+token.tag |
| 309 | |
| 310 | # Check if the DOM element we just added was a field.. |
| 311 | if stack[-1].tag == 'field': |
| 312 | encountered_field = 1 |
| 313 | elif encountered_field == 1: |
| 314 | if len(stack) <= 3: |
| 315 | estr = ("Fields must be the final elements in an "+ |
| 316 | "epytext string.") |
| 317 | errors.append(StructuringError(estr, token.startline)) |
| 318 | |
| 319 | # Graphs use inline markup (G{...}) but are really block-level |
| 320 | # elements; so "raise" any graphs we generated. This is a bit of |
| 321 | # a hack, but the alternative is to define a new markup for |
| 322 | # block-level elements, which I'd rather not do. (See sourceforge |
| 323 | # bug #1673017.) |
| 324 | for child in doc.children: |
| 325 | _raise_graphs(child, doc) |
| 326 | |
| 327 | # If there was an error, then signal it! |
| 328 | if len([e for e in errors if e.is_fatal()]) > 0: |
| 329 | if raise_on_error: |
| 330 | raise errors[0] |
| 331 | else: |
| 332 | return None |
| 333 | |
| 334 | # Return the top-level epytext DOM element. |
| 335 | return doc |
| 336 | |
| 337 | def _raise_graphs(tree, parent): |
| 338 | # Recurse to children. |
| 339 | have_graph_child = False |
| 340 | for elt in tree.children: |
| 341 | if isinstance(elt, Element): |
| 342 | _raise_graphs(elt, tree) |
| 343 | if elt.tag == 'graph': have_graph_child = True |
| 344 | |
| 345 | block = ('section', 'fieldlist', 'field', 'ulist', 'olist', 'li') |
| 346 | if have_graph_child and tree.tag not in block: |
| 347 | child_index = 0 |
| 348 | for elt in tree.children: |
| 349 | if isinstance(elt, Element) and elt.tag == 'graph': |
| 350 | # We found a graph: splice it into the parent. |
| 351 | parent_index = parent.children.index(tree) |
| 352 | left = tree.children[:child_index] |
| 353 | right = tree.children[child_index+1:] |
| 354 | parent.children[parent_index:parent_index+1] = [ |
| 355 | Element(tree.tag, *left, **tree.attribs), |
| 356 | elt, |
| 357 | Element(tree.tag, *right, **tree.attribs)] |
| 358 | child_index = 0 |
| 359 | parent_index += 2 |
| 360 | else: |
| 361 | child_index += 1 |
| 362 | |
| 363 | def _pop_completed_blocks(token, stack, indent_stack): |
| 364 | """ |
| 365 | Pop any completed blocks off the stack. This includes any |
| 366 | blocks that we have dedented past, as well as any list item |
| 367 | blocks that we've dedented to. The top element on the stack |
| 368 | should only be a list if we're about to start a new list |
| 369 | item (i.e., if the next token is a bullet). |
| 370 | """ |
| 371 | indent = token.indent |
| 372 | if indent != None: |
| 373 | while (len(stack) > 2): |
| 374 | pop = 0 |
| 375 | |
| 376 | # Dedent past a block |
| 377 | if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1 |
| 378 | elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1 |
| 379 | |
| 380 | # Dedent to a list item, if it is follwed by another list |
| 381 | # item with the same indentation. |
| 382 | elif (token.tag == 'bullet' and indent==indent_stack[-2] and |
| 383 | stack[-1].tag in ('li', 'field')): pop=1 |
| 384 | |
| 385 | # End of a list (no more list items available) |
| 386 | elif (stack[-1].tag in ('ulist', 'olist') and |
| 387 | (token.tag != 'bullet' or token.contents[-1] == ':')): |
| 388 | pop=1 |
| 389 | |
| 390 | # Pop the block, if it's complete. Otherwise, we're done. |
| 391 | if pop == 0: return |
| 392 | stack.pop() |
| 393 | indent_stack.pop() |
| 394 | |
| 395 | def _add_para(doc, para_token, stack, indent_stack, errors): |
| 396 | """Colorize the given paragraph, and add it to the DOM tree.""" |
| 397 | # Check indentation, and update the parent's indentation |
| 398 | # when appropriate. |
| 399 | if indent_stack[-1] == None: |
| 400 | indent_stack[-1] = para_token.indent |
| 401 | if para_token.indent == indent_stack[-1]: |
| 402 | # Colorize the paragraph and add it. |
| 403 | para = _colorize(doc, para_token, errors) |
| 404 | if para_token.inline: |
| 405 | para.attribs['inline'] = True |
| 406 | stack[-1].children.append(para) |
| 407 | else: |
| 408 | estr = "Improper paragraph indentation." |
| 409 | errors.append(StructuringError(estr, para_token.startline)) |
| 410 | |
| 411 | def _add_section(doc, heading_token, stack, indent_stack, errors): |
| 412 | """Add a new section to the DOM tree, with the given heading.""" |
| 413 | if indent_stack[-1] == None: |
| 414 | indent_stack[-1] = heading_token.indent |
| 415 | elif indent_stack[-1] != heading_token.indent: |
| 416 | estr = "Improper heading indentation." |
| 417 | errors.append(StructuringError(estr, heading_token.startline)) |
| 418 | |
| 419 | # Check for errors. |
| 420 | for tok in stack[2:]: |
| 421 | if tok.tag != "section": |
| 422 | estr = "Headings must occur at the top level." |
| 423 | errors.append(StructuringError(estr, heading_token.startline)) |
| 424 | break |
| 425 | if (heading_token.level+2) > len(stack): |
| 426 | estr = "Wrong underline character for heading." |
| 427 | errors.append(StructuringError(estr, heading_token.startline)) |
| 428 | |
| 429 | # Pop the appropriate number of headings so we're at the |
| 430 | # correct level. |
| 431 | stack[heading_token.level+2:] = [] |
| 432 | indent_stack[heading_token.level+2:] = [] |
| 433 | |
| 434 | # Colorize the heading |
| 435 | head = _colorize(doc, heading_token, errors, 'heading') |
| 436 | |
| 437 | # Add the section's and heading's DOM elements. |
| 438 | sec = Element("section") |
| 439 | stack[-1].children.append(sec) |
| 440 | stack.append(sec) |
| 441 | sec.children.append(head) |
| 442 | indent_stack.append(None) |
| 443 | |
| 444 | def _add_list(doc, bullet_token, stack, indent_stack, errors): |
| 445 | """ |
| 446 | Add a new list item or field to the DOM tree, with the given |
| 447 | bullet or field tag. When necessary, create the associated |
| 448 | list. |
| 449 | """ |
| 450 | # Determine what type of bullet it is. |
| 451 | if bullet_token.contents[-1] == '-': |
| 452 | list_type = 'ulist' |
| 453 | elif bullet_token.contents[-1] == '.': |
| 454 | list_type = 'olist' |
| 455 | elif bullet_token.contents[-1] == ':': |
| 456 | list_type = 'fieldlist' |
| 457 | else: |
| 458 | raise AssertionError('Bad Bullet: %r' % bullet_token.contents) |
| 459 | |
| 460 | # Is this a new list? |
| 461 | newlist = 0 |
| 462 | if stack[-1].tag != list_type: |
| 463 | newlist = 1 |
| 464 | elif list_type == 'olist' and stack[-1].tag == 'olist': |
| 465 | old_listitem = stack[-1].children[-1] |
| 466 | old_bullet = old_listitem.attribs.get("bullet").split('.')[:-1] |
| 467 | new_bullet = bullet_token.contents.split('.')[:-1] |
| 468 | if (new_bullet[:-1] != old_bullet[:-1] or |
| 469 | int(new_bullet[-1]) != int(old_bullet[-1])+1): |
| 470 | newlist = 1 |
| 471 | |
| 472 | # Create the new list. |
| 473 | if newlist: |
| 474 | if stack[-1].tag is 'fieldlist': |
| 475 | # The new list item is not a field list item (since this |
| 476 | # is a new list); but it's indented the same as the field |
| 477 | # list. This either means that they forgot to indent the |
| 478 | # list, or they are trying to put something after the |
| 479 | # field list. The first one seems more likely, so we'll |
| 480 | # just warn about that (to avoid confusion). |
| 481 | estr = "Lists must be indented." |
| 482 | errors.append(StructuringError(estr, bullet_token.startline)) |
| 483 | if stack[-1].tag in ('ulist', 'olist', 'fieldlist'): |
| 484 | stack.pop() |
| 485 | indent_stack.pop() |
| 486 | |
| 487 | if (list_type != 'fieldlist' and indent_stack[-1] is not None and |
| 488 | bullet_token.indent == indent_stack[-1]): |
| 489 | # Ignore this error if there's text on the same line as |
| 490 | # the comment-opening quote -- epydoc can't reliably |
| 491 | # determine the indentation for that line. |
| 492 | if bullet_token.startline != 1 or bullet_token.indent != 0: |
| 493 | estr = "Lists must be indented." |
| 494 | errors.append(StructuringError(estr, bullet_token.startline)) |
| 495 | |
| 496 | if list_type == 'fieldlist': |
| 497 | # Fieldlist should be at the top-level. |
| 498 | for tok in stack[2:]: |
| 499 | if tok.tag != "section": |
| 500 | estr = "Fields must be at the top level." |
| 501 | errors.append( |
| 502 | StructuringError(estr, bullet_token.startline)) |
| 503 | break |
| 504 | stack[2:] = [] |
| 505 | indent_stack[2:] = [] |
| 506 | |
| 507 | # Add the new list. |
| 508 | lst = Element(list_type) |
| 509 | stack[-1].children.append(lst) |
| 510 | stack.append(lst) |
| 511 | indent_stack.append(bullet_token.indent) |
| 512 | if list_type == 'olist': |
| 513 | start = bullet_token.contents.split('.')[:-1] |
| 514 | if start != '1': |
| 515 | lst.attribs["start"] = start[-1] |
| 516 | |
| 517 | # Fields are treated somewhat specially: A "fieldlist" |
| 518 | # node is created to make the parsing simpler, but fields |
| 519 | # are adjoined directly into the "epytext" node, not into |
| 520 | # the "fieldlist" node. |
| 521 | if list_type == 'fieldlist': |
| 522 | li = Element("field") |
| 523 | token_words = bullet_token.contents[1:-1].split(None, 1) |
| 524 | tag_elt = Element("tag") |
| 525 | tag_elt.children.append(token_words[0]) |
| 526 | li.children.append(tag_elt) |
| 527 | |
| 528 | if len(token_words) > 1: |
| 529 | arg_elt = Element("arg") |
| 530 | arg_elt.children.append(token_words[1]) |
| 531 | li.children.append(arg_elt) |
| 532 | else: |
| 533 | li = Element("li") |
| 534 | if list_type == 'olist': |
| 535 | li.attribs["bullet"] = bullet_token.contents |
| 536 | |
| 537 | # Add the bullet. |
| 538 | stack[-1].children.append(li) |
| 539 | stack.append(li) |
| 540 | indent_stack.append(None) |
| 541 | |
| 542 | ################################################## |
| 543 | ## Tokenization |
| 544 | ################################################## |
| 545 | |
| 546 | class Token: |
| 547 | """ |
| 548 | C{Token}s are an intermediate data structure used while |
| 549 | constructing the structuring DOM tree for a formatted docstring. |
| 550 | There are five types of C{Token}: |
| 551 | |
| 552 | - Paragraphs |
| 553 | - Literal blocks |
| 554 | - Doctest blocks |
| 555 | - Headings |
| 556 | - Bullets |
| 557 | |
| 558 | The text contained in each C{Token} is stored in the |
| 559 | C{contents} variable. The string in this variable has been |
| 560 | normalized. For paragraphs, this means that it has been converted |
| 561 | into a single line of text, with newline/indentation replaced by |
| 562 | single spaces. For literal blocks and doctest blocks, this means |
| 563 | that the appropriate amount of leading whitespace has been removed |
| 564 | from each line. |
| 565 | |
| 566 | Each C{Token} has an indentation level associated with it, |
| 567 | stored in the C{indent} variable. This indentation level is used |
| 568 | by the structuring procedure to assemble hierarchical blocks. |
| 569 | |
| 570 | @type tag: C{string} |
| 571 | @ivar tag: This C{Token}'s type. Possible values are C{Token.PARA} |
| 572 | (paragraph), C{Token.LBLOCK} (literal block), C{Token.DTBLOCK} |
| 573 | (doctest block), C{Token.HEADINGC}, and C{Token.BULLETC}. |
| 574 | |
| 575 | @type startline: C{int} |
| 576 | @ivar startline: The line on which this C{Token} begins. This |
| 577 | line number is only used for issuing errors. |
| 578 | |
| 579 | @type contents: C{string} |
| 580 | @ivar contents: The normalized text contained in this C{Token}. |
| 581 | |
| 582 | @type indent: C{int} or C{None} |
| 583 | @ivar indent: The indentation level of this C{Token} (in |
| 584 | number of leading spaces). A value of C{None} indicates an |
| 585 | unknown indentation; this is used for list items and fields |
| 586 | that begin with one-line paragraphs. |
| 587 | |
| 588 | @type level: C{int} or C{None} |
| 589 | @ivar level: The heading-level of this C{Token} if it is a |
| 590 | heading; C{None}, otherwise. Valid heading levels are 0, 1, |
| 591 | and 2. |
| 592 | |
| 593 | @type inline: C{bool} |
| 594 | @ivar inline: If True, the element is an inline level element, comparable |
| 595 | to an HTML C{<span>} tag. Else, it is a block level element, comparable |
| 596 | to an HTML C{<div>}. |
| 597 | |
| 598 | @type PARA: C{string} |
| 599 | @cvar PARA: The C{tag} value for paragraph C{Token}s. |
| 600 | @type LBLOCK: C{string} |
| 601 | @cvar LBLOCK: The C{tag} value for literal C{Token}s. |
| 602 | @type DTBLOCK: C{string} |
| 603 | @cvar DTBLOCK: The C{tag} value for doctest C{Token}s. |
| 604 | @type HEADING: C{string} |
| 605 | @cvar HEADING: The C{tag} value for heading C{Token}s. |
| 606 | @type BULLET: C{string} |
| 607 | @cvar BULLET: The C{tag} value for bullet C{Token}s. This C{tag} |
| 608 | value is also used for field tag C{Token}s, since fields |
| 609 | function syntactically the same as list items. |
| 610 | """ |
| 611 | # The possible token types. |
| 612 | PARA = "para" |
| 613 | LBLOCK = "literalblock" |
| 614 | DTBLOCK = "doctestblock" |
| 615 | HEADING = "heading" |
| 616 | BULLET = "bullet" |
| 617 | |
| 618 | def __init__(self, tag, startline, contents, indent, level=None, |
| 619 | inline=False): |
| 620 | """ |
| 621 | Create a new C{Token}. |
| 622 | |
| 623 | @param tag: The type of the new C{Token}. |
| 624 | @type tag: C{string} |
| 625 | @param startline: The line on which the new C{Token} begins. |
| 626 | @type startline: C{int} |
| 627 | @param contents: The normalized contents of the new C{Token}. |
| 628 | @type contents: C{string} |
| 629 | @param indent: The indentation of the new C{Token} (in number |
| 630 | of leading spaces). A value of C{None} indicates an |
| 631 | unknown indentation. |
| 632 | @type indent: C{int} or C{None} |
| 633 | @param level: The heading-level of this C{Token} if it is a |
| 634 | heading; C{None}, otherwise. |
| 635 | @type level: C{int} or C{None} |
| 636 | @param inline: Is this C{Token} inline as a C{<span>}?. |
| 637 | @type inline: C{bool} |
| 638 | """ |
| 639 | self.tag = tag |
| 640 | self.startline = startline |
| 641 | self.contents = contents |
| 642 | self.indent = indent |
| 643 | self.level = level |
| 644 | self.inline = inline |
| 645 | |
| 646 | def __repr__(self): |
| 647 | """ |
| 648 | @rtype: C{string} |
| 649 | @return: the formal representation of this C{Token}. |
| 650 | C{Token}s have formal representaitons of the form:: |
| 651 | <Token: para at line 12> |
| 652 | """ |
| 653 | return '<Token: %s at line %s>' % (self.tag, self.startline) |
| 654 | |
| 655 | def to_dom(self, doc): |
| 656 | """ |
| 657 | @return: a DOM representation of this C{Token}. |
| 658 | @rtype: L{Element} |
| 659 | """ |
| 660 | e = Element(self.tag) |
| 661 | e.children.append(self.contents) |
| 662 | return e |
| 663 | |
| 664 | # Construct regular expressions for recognizing bullets. These are |
| 665 | # global so they don't have to be reconstructed each time we tokenize |
| 666 | # a docstring. |
| 667 | _ULIST_BULLET = '[-]( +|$)' |
| 668 | _OLIST_BULLET = '(\d+[.])+( +|$)' |
| 669 | _FIELD_BULLET = '@\w+( [^{}:\n]+)?:' |
| 670 | _BULLET_RE = re.compile(_ULIST_BULLET + '|' + |
| 671 | _OLIST_BULLET + '|' + |
| 672 | _FIELD_BULLET) |
| 673 | _LIST_BULLET_RE = re.compile(_ULIST_BULLET + '|' + _OLIST_BULLET) |
| 674 | _FIELD_BULLET_RE = re.compile(_FIELD_BULLET) |
| 675 | del _ULIST_BULLET, _OLIST_BULLET, _FIELD_BULLET |
| 676 | |
| 677 | def _tokenize_doctest(lines, start, block_indent, tokens, errors): |
| 678 | """ |
| 679 | Construct a L{Token} containing the doctest block starting at |
| 680 | C{lines[start]}, and append it to C{tokens}. C{block_indent} |
| 681 | should be the indentation of the doctest block. Any errors |
| 682 | generated while tokenizing the doctest block will be appended to |
| 683 | C{errors}. |
| 684 | |
| 685 | @param lines: The list of lines to be tokenized |
| 686 | @param start: The index into C{lines} of the first line of the |
| 687 | doctest block to be tokenized. |
| 688 | @param block_indent: The indentation of C{lines[start]}. This is |
| 689 | the indentation of the doctest block. |
| 690 | @param errors: A list where any errors generated during parsing |
| 691 | will be stored. If no list is specified, then errors will |
| 692 | generate exceptions. |
| 693 | @return: The line number of the first line following the doctest |
| 694 | block. |
| 695 | |
| 696 | @type lines: C{list} of C{string} |
| 697 | @type start: C{int} |
| 698 | @type block_indent: C{int} |
| 699 | @type tokens: C{list} of L{Token} |
| 700 | @type errors: C{list} of L{ParseError} |
| 701 | @rtype: C{int} |
| 702 | """ |
| 703 | # If they dedent past block_indent, keep track of the minimum |
| 704 | # indentation. This is used when removing leading indentation |
| 705 | # from the lines of the doctest block. |
| 706 | min_indent = block_indent |
| 707 | |
| 708 | linenum = start + 1 |
| 709 | while linenum < len(lines): |
| 710 | # Find the indentation of this line. |
| 711 | line = lines[linenum] |
| 712 | indent = len(line) - len(line.lstrip()) |
| 713 | |
| 714 | # A blank line ends doctest block. |
| 715 | if indent == len(line): break |
| 716 | |
| 717 | # A Dedent past block_indent is an error. |
| 718 | if indent < block_indent: |
| 719 | min_indent = min(min_indent, indent) |
| 720 | estr = 'Improper doctest block indentation.' |
| 721 | errors.append(TokenizationError(estr, linenum)) |
| 722 | |
| 723 | # Go on to the next line. |
| 724 | linenum += 1 |
| 725 | |
| 726 | # Add the token, and return the linenum after the token ends. |
| 727 | contents = [line[min_indent:] for line in lines[start:linenum]] |
| 728 | contents = '\n'.join(contents) |
| 729 | tokens.append(Token(Token.DTBLOCK, start, contents, block_indent)) |
| 730 | return linenum |
| 731 | |
| 732 | def _tokenize_literal(lines, start, block_indent, tokens, errors): |
| 733 | """ |
| 734 | Construct a L{Token} containing the literal block starting at |
| 735 | C{lines[start]}, and append it to C{tokens}. C{block_indent} |
| 736 | should be the indentation of the literal block. Any errors |
| 737 | generated while tokenizing the literal block will be appended to |
| 738 | C{errors}. |
| 739 | |
| 740 | @param lines: The list of lines to be tokenized |
| 741 | @param start: The index into C{lines} of the first line of the |
| 742 | literal block to be tokenized. |
| 743 | @param block_indent: The indentation of C{lines[start]}. This is |
| 744 | the indentation of the literal block. |
| 745 | @param errors: A list of the errors generated by parsing. Any |
| 746 | new errors generated while will tokenizing this paragraph |
| 747 | will be appended to this list. |
| 748 | @return: The line number of the first line following the literal |
| 749 | block. |
| 750 | |
| 751 | @type lines: C{list} of C{string} |
| 752 | @type start: C{int} |
| 753 | @type block_indent: C{int} |
| 754 | @type tokens: C{list} of L{Token} |
| 755 | @type errors: C{list} of L{ParseError} |
| 756 | @rtype: C{int} |
| 757 | """ |
| 758 | linenum = start + 1 |
| 759 | while linenum < len(lines): |
| 760 | # Find the indentation of this line. |
| 761 | line = lines[linenum] |
| 762 | indent = len(line) - len(line.lstrip()) |
| 763 | |
| 764 | # A Dedent to block_indent ends the literal block. |
| 765 | # (Ignore blank likes, though) |
| 766 | if len(line) != indent and indent <= block_indent: |
| 767 | break |
| 768 | |
| 769 | # Go on to the next line. |
| 770 | linenum += 1 |
| 771 | |
| 772 | # Add the token, and return the linenum after the token ends. |
| 773 | contents = [line[block_indent+1:] for line in lines[start:linenum]] |
| 774 | contents = '\n'.join(contents) |
| 775 | contents = re.sub('(\A[ \n]*\n)|(\n[ \n]*\Z)', '', contents) |
| 776 | tokens.append(Token(Token.LBLOCK, start, contents, block_indent)) |
| 777 | return linenum |
| 778 | |
| 779 | def _tokenize_listart(lines, start, bullet_indent, tokens, errors): |
| 780 | """ |
| 781 | Construct L{Token}s for the bullet and the first paragraph of the |
| 782 | list item (or field) starting at C{lines[start]}, and append them |
| 783 | to C{tokens}. C{bullet_indent} should be the indentation of the |
| 784 | list item. Any errors generated while tokenizing will be |
| 785 | appended to C{errors}. |
| 786 | |
| 787 | @param lines: The list of lines to be tokenized |
| 788 | @param start: The index into C{lines} of the first line of the |
| 789 | list item to be tokenized. |
| 790 | @param bullet_indent: The indentation of C{lines[start]}. This is |
| 791 | the indentation of the list item. |
| 792 | @param errors: A list of the errors generated by parsing. Any |
| 793 | new errors generated while will tokenizing this paragraph |
| 794 | will be appended to this list. |
| 795 | @return: The line number of the first line following the list |
| 796 | item's first paragraph. |
| 797 | |
| 798 | @type lines: C{list} of C{string} |
| 799 | @type start: C{int} |
| 800 | @type bullet_indent: C{int} |
| 801 | @type tokens: C{list} of L{Token} |
| 802 | @type errors: C{list} of L{ParseError} |
| 803 | @rtype: C{int} |
| 804 | """ |
| 805 | linenum = start + 1 |
| 806 | para_indent = None |
| 807 | doublecolon = lines[start].rstrip()[-2:] == '::' |
| 808 | |
| 809 | # Get the contents of the bullet. |
| 810 | para_start = _BULLET_RE.match(lines[start], bullet_indent).end() |
| 811 | bcontents = lines[start][bullet_indent:para_start].strip() |
| 812 | |
| 813 | while linenum < len(lines): |
| 814 | # Find the indentation of this line. |
| 815 | line = lines[linenum] |
| 816 | indent = len(line) - len(line.lstrip()) |
| 817 | |
| 818 | # "::" markers end paragraphs. |
| 819 | if doublecolon: break |
| 820 | if line.rstrip()[-2:] == '::': doublecolon = 1 |
| 821 | |
| 822 | # A blank line ends the token |
| 823 | if indent == len(line): break |
| 824 | |
| 825 | # Dedenting past bullet_indent ends the list item. |
| 826 | if indent < bullet_indent: break |
| 827 | |
| 828 | # A line beginning with a bullet ends the token. |
| 829 | if _BULLET_RE.match(line, indent): break |
| 830 | |
| 831 | # If this is the second line, set the paragraph indentation, or |
| 832 | # end the token, as appropriate. |
| 833 | if para_indent == None: para_indent = indent |
| 834 | |
| 835 | # A change in indentation ends the token |
| 836 | if indent != para_indent: break |
| 837 | |
| 838 | # Go on to the next line. |
| 839 | linenum += 1 |
| 840 | |
| 841 | # Add the bullet token. |
| 842 | tokens.append(Token(Token.BULLET, start, bcontents, bullet_indent, |
| 843 | inline=True)) |
| 844 | |
| 845 | # Add the paragraph token. |
| 846 | pcontents = ([lines[start][para_start:].strip()] + |
| 847 | [line.strip() for line in lines[start+1:linenum]]) |
| 848 | pcontents = ' '.join(pcontents).strip() |
| 849 | if pcontents: |
| 850 | tokens.append(Token(Token.PARA, start, pcontents, para_indent, |
| 851 | inline=True)) |
| 852 | |
| 853 | # Return the linenum after the paragraph token ends. |
| 854 | return linenum |
| 855 | |
| 856 | def _tokenize_para(lines, start, para_indent, tokens, errors): |
| 857 | """ |
| 858 | Construct a L{Token} containing the paragraph starting at |
| 859 | C{lines[start]}, and append it to C{tokens}. C{para_indent} |
| 860 | should be the indentation of the paragraph . Any errors |
| 861 | generated while tokenizing the paragraph will be appended to |
| 862 | C{errors}. |
| 863 | |
| 864 | @param lines: The list of lines to be tokenized |
| 865 | @param start: The index into C{lines} of the first line of the |
| 866 | paragraph to be tokenized. |
| 867 | @param para_indent: The indentation of C{lines[start]}. This is |
| 868 | the indentation of the paragraph. |
| 869 | @param errors: A list of the errors generated by parsing. Any |
| 870 | new errors generated while will tokenizing this paragraph |
| 871 | will be appended to this list. |
| 872 | @return: The line number of the first line following the |
| 873 | paragraph. |
| 874 | |
| 875 | @type lines: C{list} of C{string} |
| 876 | @type start: C{int} |
| 877 | @type para_indent: C{int} |
| 878 | @type tokens: C{list} of L{Token} |
| 879 | @type errors: C{list} of L{ParseError} |
| 880 | @rtype: C{int} |
| 881 | """ |
| 882 | linenum = start + 1 |
| 883 | doublecolon = 0 |
| 884 | while linenum < len(lines): |
| 885 | # Find the indentation of this line. |
| 886 | line = lines[linenum] |
| 887 | indent = len(line) - len(line.lstrip()) |
| 888 | |
| 889 | # "::" markers end paragraphs. |
| 890 | if doublecolon: break |
| 891 | if line.rstrip()[-2:] == '::': doublecolon = 1 |
| 892 | |
| 893 | # Blank lines end paragraphs |
| 894 | if indent == len(line): break |
| 895 | |
| 896 | # Indentation changes end paragraphs |
| 897 | if indent != para_indent: break |
| 898 | |
| 899 | # List bullets end paragraphs |
| 900 | if _BULLET_RE.match(line, indent): break |
| 901 | |
| 902 | # Check for mal-formatted field items. |
| 903 | if line[indent] == '@': |
| 904 | estr = "Possible mal-formatted field item." |
| 905 | errors.append(TokenizationError(estr, linenum, is_fatal=0)) |
| 906 | |
| 907 | # Go on to the next line. |
| 908 | linenum += 1 |
| 909 | |
| 910 | contents = [line.strip() for line in lines[start:linenum]] |
| 911 | |
| 912 | # Does this token look like a heading? |
| 913 | if ((len(contents) < 2) or |
| 914 | (contents[1][0] not in _HEADING_CHARS) or |
| 915 | (abs(len(contents[0])-len(contents[1])) > 5)): |
| 916 | looks_like_heading = 0 |
| 917 | else: |
| 918 | looks_like_heading = 1 |
| 919 | for char in contents[1]: |
| 920 | if char != contents[1][0]: |
| 921 | looks_like_heading = 0 |
| 922 | break |
| 923 | |
| 924 | if looks_like_heading: |
| 925 | if len(contents[0]) != len(contents[1]): |
| 926 | estr = ("Possible heading typo: the number of "+ |
| 927 | "underline characters must match the "+ |
| 928 | "number of heading characters.") |
| 929 | errors.append(TokenizationError(estr, start, is_fatal=0)) |
| 930 | else: |
| 931 | level = _HEADING_CHARS.index(contents[1][0]) |
| 932 | tokens.append(Token(Token.HEADING, start, |
| 933 | contents[0], para_indent, level)) |
| 934 | return start+2 |
| 935 | |
| 936 | # Add the paragraph token, and return the linenum after it ends. |
| 937 | contents = ' '.join(contents) |
| 938 | tokens.append(Token(Token.PARA, start, contents, para_indent)) |
| 939 | return linenum |
| 940 | |
| 941 | def _tokenize(str, errors): |
| 942 | """ |
| 943 | Split a given formatted docstring into an ordered list of |
| 944 | C{Token}s, according to the epytext markup rules. |
| 945 | |
| 946 | @param str: The epytext string |
| 947 | @type str: C{string} |
| 948 | @param errors: A list where any errors generated during parsing |
| 949 | will be stored. If no list is specified, then errors will |
| 950 | generate exceptions. |
| 951 | @type errors: C{list} of L{ParseError} |
| 952 | @return: a list of the C{Token}s that make up the given string. |
| 953 | @rtype: C{list} of L{Token} |
| 954 | """ |
| 955 | tokens = [] |
| 956 | lines = str.split('\n') |
| 957 | |
| 958 | # Scan through the lines, determining what @type of token we're |
| 959 | # dealing with, and tokenizing it, as appropriate. |
| 960 | linenum = 0 |
| 961 | while linenum < len(lines): |
| 962 | # Get the current line and its indentation. |
| 963 | line = lines[linenum] |
| 964 | indent = len(line)-len(line.lstrip()) |
| 965 | |
| 966 | if indent == len(line): |
| 967 | # Ignore blank lines. |
| 968 | linenum += 1 |
| 969 | continue |
| 970 | elif line[indent:indent+4] == '>>> ': |
| 971 | # blocks starting with ">>> " are doctest block tokens. |
| 972 | linenum = _tokenize_doctest(lines, linenum, indent, |
| 973 | tokens, errors) |
| 974 | elif _BULLET_RE.match(line, indent): |
| 975 | # blocks starting with a bullet are LI start tokens. |
| 976 | linenum = _tokenize_listart(lines, linenum, indent, |
| 977 | tokens, errors) |
| 978 | if tokens[-1].indent != None: |
| 979 | indent = tokens[-1].indent |
| 980 | else: |
| 981 | # Check for mal-formatted field items. |
| 982 | if line[indent] == '@': |
| 983 | estr = "Possible mal-formatted field item." |
| 984 | errors.append(TokenizationError(estr, linenum, is_fatal=0)) |
| 985 | |
| 986 | # anything else is either a paragraph or a heading. |
| 987 | linenum = _tokenize_para(lines, linenum, indent, tokens, errors) |
| 988 | |
| 989 | # Paragraph tokens ending in '::' initiate literal blocks. |
| 990 | if (tokens[-1].tag == Token.PARA and |
| 991 | tokens[-1].contents[-2:] == '::'): |
| 992 | tokens[-1].contents = tokens[-1].contents[:-1] |
| 993 | linenum = _tokenize_literal(lines, linenum, indent, tokens, errors) |
| 994 | |
| 995 | return tokens |
| 996 | |
| 997 | |
| 998 | ################################################## |
| 999 | ## Inline markup ("colorizing") |
| 1000 | ################################################## |
| 1001 | |
| 1002 | # Assorted regular expressions used for colorizing. |
| 1003 | _BRACE_RE = re.compile('{|}') |
| 1004 | _TARGET_RE = re.compile('^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$') |
| 1005 | |
| 1006 | def _colorize(doc, token, errors, tagName='para'): |
| 1007 | """ |
| 1008 | Given a string containing the contents of a paragraph, produce a |
| 1009 | DOM C{Element} encoding that paragraph. Colorized regions are |
| 1010 | represented using DOM C{Element}s, and text is represented using |
| 1011 | DOM C{Text}s. |
| 1012 | |
| 1013 | @param errors: A list of errors. Any newly generated errors will |
| 1014 | be appended to this list. |
| 1015 | @type errors: C{list} of C{string} |
| 1016 | |
| 1017 | @param tagName: The element tag for the DOM C{Element} that should |
| 1018 | be generated. |
| 1019 | @type tagName: C{string} |
| 1020 | |
| 1021 | @return: a DOM C{Element} encoding the given paragraph. |
| 1022 | @returntype: C{Element} |
| 1023 | """ |
| 1024 | str = token.contents |
| 1025 | linenum = 0 |
| 1026 | |
| 1027 | # Maintain a stack of DOM elements, containing the ancestors of |
| 1028 | # the text currently being analyzed. New elements are pushed when |
| 1029 | # "{" is encountered, and old elements are popped when "}" is |
| 1030 | # encountered. |
| 1031 | stack = [Element(tagName)] |
| 1032 | |
| 1033 | # This is just used to make error-reporting friendlier. It's a |
| 1034 | # stack parallel to "stack" containing the index of each element's |
| 1035 | # open brace. |
| 1036 | openbrace_stack = [0] |
| 1037 | |
| 1038 | # Process the string, scanning for '{' and '}'s. start is the |
| 1039 | # index of the first unprocessed character. Each time through the |
| 1040 | # loop, we process the text from the first unprocessed character |
| 1041 | # to the next open or close brace. |
| 1042 | start = 0 |
| 1043 | while 1: |
| 1044 | match = _BRACE_RE.search(str, start) |
| 1045 | if match == None: break |
| 1046 | end = match.start() |
| 1047 | |
| 1048 | # Open braces start new colorizing elements. When preceeded |
| 1049 | # by a capital letter, they specify a colored region, as |
| 1050 | # defined by the _COLORIZING_TAGS dictionary. Otherwise, |
| 1051 | # use a special "literal braces" element (with tag "litbrace"), |
| 1052 | # and convert them to literal braces once we find the matching |
| 1053 | # close-brace. |
| 1054 | if match.group() == '{': |
| 1055 | if (end>0) and 'A' <= str[end-1] <= 'Z': |
| 1056 | if (end-1) > start: |
| 1057 | stack[-1].children.append(str[start:end-1]) |
| 1058 | if str[end-1] not in _COLORIZING_TAGS: |
| 1059 | estr = "Unknown inline markup tag." |
| 1060 | errors.append(ColorizingError(estr, token, end-1)) |
| 1061 | stack.append(Element('unknown')) |
| 1062 | else: |
| 1063 | tag = _COLORIZING_TAGS[str[end-1]] |
| 1064 | stack.append(Element(tag)) |
| 1065 | else: |
| 1066 | if end > start: |
| 1067 | stack[-1].children.append(str[start:end]) |
| 1068 | stack.append(Element('litbrace')) |
| 1069 | openbrace_stack.append(end) |
| 1070 | stack[-2].children.append(stack[-1]) |
| 1071 | |
| 1072 | # Close braces end colorizing elements. |
| 1073 | elif match.group() == '}': |
| 1074 | # Check for (and ignore) unbalanced braces. |
| 1075 | if len(stack) <= 1: |
| 1076 | estr = "Unbalanced '}'." |
| 1077 | errors.append(ColorizingError(estr, token, end)) |
| 1078 | start = end + 1 |
| 1079 | continue |
| 1080 | |
| 1081 | # Add any remaining text. |
| 1082 | if end > start: |
| 1083 | stack[-1].children.append(str[start:end]) |
| 1084 | |
| 1085 | # Special handling for symbols: |
| 1086 | if stack[-1].tag == 'symbol': |
| 1087 | if (len(stack[-1].children) != 1 or |
| 1088 | not isinstance(stack[-1].children[0], basestring)): |
| 1089 | estr = "Invalid symbol code." |
| 1090 | errors.append(ColorizingError(estr, token, end)) |
| 1091 | else: |
| 1092 | symb = stack[-1].children[0] |
| 1093 | if symb in _SYMBOLS: |
| 1094 | # It's a symbol |
| 1095 | stack[-2].children[-1] = Element('symbol', symb) |
| 1096 | else: |
| 1097 | estr = "Invalid symbol code." |
| 1098 | errors.append(ColorizingError(estr, token, end)) |
| 1099 | |
| 1100 | # Special handling for escape elements: |
| 1101 | if stack[-1].tag == 'escape': |
| 1102 | if (len(stack[-1].children) != 1 or |
| 1103 | not isinstance(stack[-1].children[0], basestring)): |
| 1104 | estr = "Invalid escape code." |
| 1105 | errors.append(ColorizingError(estr, token, end)) |
| 1106 | else: |
| 1107 | escp = stack[-1].children[0] |
| 1108 | if escp in _ESCAPES: |
| 1109 | # It's an escape from _ESCPAES |
| 1110 | stack[-2].children[-1] = _ESCAPES[escp] |
| 1111 | elif len(escp) == 1: |
| 1112 | # It's a single-character escape (eg E{.}) |
| 1113 | stack[-2].children[-1] = escp |
| 1114 | else: |
| 1115 | estr = "Invalid escape code." |
| 1116 | errors.append(ColorizingError(estr, token, end)) |
| 1117 | |
| 1118 | # Special handling for literal braces elements: |
| 1119 | if stack[-1].tag == 'litbrace': |
| 1120 | stack[-2].children[-1:] = ['{'] + stack[-1].children + ['}'] |
| 1121 | |
| 1122 | # Special handling for graphs: |
| 1123 | if stack[-1].tag == 'graph': |
| 1124 | _colorize_graph(doc, stack[-1], token, end, errors) |
| 1125 | |
| 1126 | # Special handling for link-type elements: |
| 1127 | if stack[-1].tag in _LINK_COLORIZING_TAGS: |
| 1128 | _colorize_link(doc, stack[-1], token, end, errors) |
| 1129 | |
| 1130 | # Pop the completed element. |
| 1131 | openbrace_stack.pop() |
| 1132 | stack.pop() |
| 1133 | |
| 1134 | start = end+1 |
| 1135 | |
| 1136 | # Add any final text. |
| 1137 | if start < len(str): |
| 1138 | stack[-1].children.append(str[start:]) |
| 1139 | |
| 1140 | if len(stack) != 1: |
| 1141 | estr = "Unbalanced '{'." |
| 1142 | errors.append(ColorizingError(estr, token, openbrace_stack[-1])) |
| 1143 | |
| 1144 | return stack[0] |
| 1145 | |
| 1146 | GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph', 'callgraph'] |
| 1147 | |
| 1148 | def _colorize_graph(doc, graph, token, end, errors): |
| 1149 | """ |
| 1150 | Eg:: |
| 1151 | G{classtree} |
| 1152 | G{classtree x, y, z} |
| 1153 | G{importgraph} |
| 1154 | """ |
| 1155 | bad_graph_spec = False |
| 1156 | |
| 1157 | children = graph.children[:] |
| 1158 | graph.children = [] |
| 1159 | |
| 1160 | if len(children) != 1 or not isinstance(children[0], basestring): |
| 1161 | bad_graph_spec = "Bad graph specification" |
| 1162 | else: |
| 1163 | pieces = children[0].split(None, 1) |
| 1164 | graphtype = pieces[0].replace(':','').strip().lower() |
| 1165 | if graphtype in GRAPH_TYPES: |
| 1166 | if len(pieces) == 2: |
| 1167 | if re.match(r'\s*:?\s*([\w\.]+\s*,?\s*)*', pieces[1]): |
| 1168 | args = pieces[1].replace(',', ' ').replace(':','').split() |
| 1169 | else: |
| 1170 | bad_graph_spec = "Bad graph arg list" |
| 1171 | else: |
| 1172 | args = [] |
| 1173 | else: |
| 1174 | bad_graph_spec = ("Bad graph type %s -- use one of %s" % |
| 1175 | (pieces[0], ', '.join(GRAPH_TYPES))) |
| 1176 | |
| 1177 | if bad_graph_spec: |
| 1178 | errors.append(ColorizingError(bad_graph_spec, token, end)) |
| 1179 | graph.children.append('none') |
| 1180 | graph.children.append('') |
| 1181 | return |
| 1182 | |
| 1183 | graph.children.append(graphtype) |
| 1184 | for arg in args: |
| 1185 | graph.children.append(arg) |
| 1186 | |
| 1187 | def _colorize_link(doc, link, token, end, errors): |
| 1188 | variables = link.children[:] |
| 1189 | |
| 1190 | # If the last child isn't text, we know it's bad. |
| 1191 | if len(variables)==0 or not isinstance(variables[-1], basestring): |
| 1192 | estr = "Bad %s target." % link.tag |
| 1193 | errors.append(ColorizingError(estr, token, end)) |
| 1194 | return |
| 1195 | |
| 1196 | # Did they provide an explicit target? |
| 1197 | match2 = _TARGET_RE.match(variables[-1]) |
| 1198 | if match2: |
| 1199 | (text, target) = match2.groups() |
| 1200 | variables[-1] = text |
| 1201 | # Can we extract an implicit target? |
| 1202 | elif len(variables) == 1: |
| 1203 | target = variables[0] |
| 1204 | else: |
| 1205 | estr = "Bad %s target." % link.tag |
| 1206 | errors.append(ColorizingError(estr, token, end)) |
| 1207 | return |
| 1208 | |
| 1209 | # Construct the name element. |
| 1210 | name_elt = Element('name', *variables) |
| 1211 | |
| 1212 | # Clean up the target. For URIs, assume http or mailto if they |
| 1213 | # don't specify (no relative urls) |
| 1214 | target = re.sub(r'\s', '', target) |
| 1215 | if link.tag=='uri': |
| 1216 | if not re.match(r'\w+:', target): |
| 1217 | if re.match(r'\w+@(\w+)(\.\w+)*', target): |
| 1218 | target = 'mailto:' + target |
| 1219 | else: |
| 1220 | target = 'http://'+target |
| 1221 | elif link.tag=='link': |
| 1222 | # Remove arg lists for functions (e.g., L{_colorize_link()}) |
| 1223 | target = re.sub(r'\(.*\)$', '', target) |
| 1224 | if not re.match(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$', target): |
| 1225 | estr = "Bad link target." |
| 1226 | errors.append(ColorizingError(estr, token, end)) |
| 1227 | return |
| 1228 | |
| 1229 | # Construct the target element. |
| 1230 | target_elt = Element('target', target) |
| 1231 | |
| 1232 | # Add them to the link element. |
| 1233 | link.children = [name_elt, target_elt] |
| 1234 | |
| 1235 | ################################################## |
| 1236 | ## Formatters |
| 1237 | ################################################## |
| 1238 | |
| 1239 | def to_epytext(tree, indent=0, seclevel=0): |
| 1240 | """ |
| 1241 | Convert a DOM document encoding epytext back to an epytext string. |
| 1242 | This is the inverse operation from L{parse}. I.e., assuming there |
| 1243 | are no errors, the following is true: |
| 1244 | - C{parse(to_epytext(tree)) == tree} |
| 1245 | |
| 1246 | The inverse is true, except that whitespace, line wrapping, and |
| 1247 | character escaping may be done differently. |
| 1248 | - C{to_epytext(parse(str)) == str} (approximately) |
| 1249 | |
| 1250 | @param tree: A DOM document encoding of an epytext string. |
| 1251 | @type tree: C{Element} |
| 1252 | @param indent: The indentation for the string representation of |
| 1253 | C{tree}. Each line of the returned string will begin with |
| 1254 | C{indent} space characters. |
| 1255 | @type indent: C{int} |
| 1256 | @param seclevel: The section level that C{tree} appears at. This |
| 1257 | is used to generate section headings. |
| 1258 | @type seclevel: C{int} |
| 1259 | @return: The epytext string corresponding to C{tree}. |
| 1260 | @rtype: C{string} |
| 1261 | """ |
| 1262 | if isinstance(tree, basestring): |
| 1263 | str = re.sub(r'\{', '\0', tree) |
| 1264 | str = re.sub(r'\}', '\1', str) |
| 1265 | return str |
| 1266 | |
| 1267 | if tree.tag == 'epytext': indent -= 2 |
| 1268 | if tree.tag == 'section': seclevel += 1 |
| 1269 | variables = [to_epytext(c, indent+2, seclevel) for c in tree.children] |
| 1270 | childstr = ''.join(variables) |
| 1271 | |
| 1272 | # Clean up for literal blocks (add the double "::" back) |
| 1273 | childstr = re.sub(':(\s*)\2', '::\\1', childstr) |
| 1274 | |
| 1275 | if tree.tag == 'para': |
| 1276 | str = wordwrap(childstr, indent)+'\n' |
| 1277 | str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) |
| 1278 | str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) |
| 1279 | str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) |
| 1280 | str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) |
| 1281 | str = re.sub('\0', 'E{lb}', str) |
| 1282 | str = re.sub('\1', 'E{rb}', str) |
| 1283 | return str |
| 1284 | elif tree.tag == 'li': |
| 1285 | bullet = tree.attribs.get('bullet') or '-' |
| 1286 | return indent*' '+ bullet + ' ' + childstr.lstrip() |
| 1287 | elif tree.tag == 'heading': |
| 1288 | str = re.sub('\0', 'E{lb}',childstr) |
| 1289 | str = re.sub('\1', 'E{rb}', str) |
| 1290 | uline = len(childstr)*_HEADING_CHARS[seclevel-1] |
| 1291 | return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n' |
| 1292 | elif tree.tag == 'doctestblock': |
| 1293 | str = re.sub('\0', '{', childstr) |
| 1294 | str = re.sub('\1', '}', str) |
| 1295 | lines = [' '+indent*' '+line for line in str.split('\n')] |
| 1296 | return '\n'.join(lines) + '\n\n' |
| 1297 | elif tree.tag == 'literalblock': |
| 1298 | str = re.sub('\0', '{', childstr) |
| 1299 | str = re.sub('\1', '}', str) |
| 1300 | lines = [(indent+1)*' '+line for line in str.split('\n')] |
| 1301 | return '\2' + '\n'.join(lines) + '\n\n' |
| 1302 | elif tree.tag == 'field': |
| 1303 | numargs = 0 |
| 1304 | while tree.children[numargs+1].tag == 'arg': numargs += 1 |
| 1305 | tag = variables[0] |
| 1306 | args = variables[1:1+numargs] |
| 1307 | body = variables[1+numargs:] |
| 1308 | str = (indent)*' '+'@'+variables[0] |
| 1309 | if args: str += '(' + ', '.join(args) + ')' |
| 1310 | return str + ':\n' + ''.join(body) |
| 1311 | elif tree.tag == 'target': |
| 1312 | return '<%s>' % childstr |
| 1313 | elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext', |
| 1314 | 'section', 'olist', 'ulist', 'name'): |
| 1315 | return childstr |
| 1316 | elif tree.tag == 'symbol': |
| 1317 | return 'E{%s}' % childstr |
| 1318 | elif tree.tag == 'graph': |
| 1319 | return 'G{%s}' % ' '.join(variables) |
| 1320 | else: |
| 1321 | for (tag, name) in _COLORIZING_TAGS.items(): |
| 1322 | if name == tree.tag: |
| 1323 | return '%s{%s}' % (tag, childstr) |
| 1324 | raise ValueError('Unknown DOM element %r' % tree.tag) |
| 1325 | |
| 1326 | SYMBOL_TO_PLAINTEXT = { |
| 1327 | 'crarr': '\\', |
| 1328 | } |
| 1329 | |
| 1330 | def to_plaintext(tree, indent=0, seclevel=0): |
| 1331 | """ |
| 1332 | Convert a DOM document encoding epytext to a string representation. |
| 1333 | This representation is similar to the string generated by |
| 1334 | C{to_epytext}, but C{to_plaintext} removes inline markup, prints |
| 1335 | escaped characters in unescaped form, etc. |
| 1336 | |
| 1337 | @param tree: A DOM document encoding of an epytext string. |
| 1338 | @type tree: C{Element} |
| 1339 | @param indent: The indentation for the string representation of |
| 1340 | C{tree}. Each line of the returned string will begin with |
| 1341 | C{indent} space characters. |
| 1342 | @type indent: C{int} |
| 1343 | @param seclevel: The section level that C{tree} appears at. This |
| 1344 | is used to generate section headings. |
| 1345 | @type seclevel: C{int} |
| 1346 | @return: The epytext string corresponding to C{tree}. |
| 1347 | @rtype: C{string} |
| 1348 | """ |
| 1349 | if isinstance(tree, basestring): return tree |
| 1350 | |
| 1351 | if tree.tag == 'section': seclevel += 1 |
| 1352 | |
| 1353 | # Figure out the child indent level. |
| 1354 | if tree.tag == 'epytext': cindent = indent |
| 1355 | elif tree.tag == 'li' and tree.attribs.get('bullet'): |
| 1356 | cindent = indent + 1 + len(tree.attribs.get('bullet')) |
| 1357 | else: |
| 1358 | cindent = indent + 2 |
| 1359 | variables = [to_plaintext(c, cindent, seclevel) for c in tree.children] |
| 1360 | childstr = ''.join(variables) |
| 1361 | |
| 1362 | if tree.tag == 'para': |
| 1363 | return wordwrap(childstr, indent)+'\n' |
| 1364 | elif tree.tag == 'li': |
| 1365 | # We should be able to use getAttribute here; but there's no |
| 1366 | # convenient way to test if an element has an attribute.. |
| 1367 | bullet = tree.attribs.get('bullet') or '-' |
| 1368 | return indent*' ' + bullet + ' ' + childstr.lstrip() |
| 1369 | elif tree.tag == 'heading': |
| 1370 | uline = len(childstr)*_HEADING_CHARS[seclevel-1] |
| 1371 | return ((indent-2)*' ' + childstr + '\n' + |
| 1372 | (indent-2)*' ' + uline + '\n') |
| 1373 | elif tree.tag == 'doctestblock': |
| 1374 | lines = [(indent+2)*' '+line for line in childstr.split('\n')] |
| 1375 | return '\n'.join(lines) + '\n\n' |
| 1376 | elif tree.tag == 'literalblock': |
| 1377 | lines = [(indent+1)*' '+line for line in childstr.split('\n')] |
| 1378 | return '\n'.join(lines) + '\n\n' |
| 1379 | elif tree.tag == 'fieldlist': |
| 1380 | return childstr |
| 1381 | elif tree.tag == 'field': |
| 1382 | numargs = 0 |
| 1383 | while tree.children[numargs+1].tag == 'arg': numargs += 1 |
| 1384 | tag = variables[0] |
| 1385 | args = variables[1:1+numargs] |
| 1386 | body = variables[1+numargs:] |
| 1387 | str = (indent)*' '+'@'+variables[0] |
| 1388 | if args: str += '(' + ', '.join(args) + ')' |
| 1389 | return str + ':\n' + ''.join(body) |
| 1390 | elif tree.tag == 'uri': |
| 1391 | if len(variables) != 2: raise ValueError('Bad URI ') |
| 1392 | elif variables[0] == variables[1]: return '<%s>' % variables[1] |
| 1393 | else: return '%r<%s>' % (variables[0], variables[1]) |
| 1394 | elif tree.tag == 'link': |
| 1395 | if len(variables) != 2: raise ValueError('Bad Link') |
| 1396 | return '%s' % variables[0] |
| 1397 | elif tree.tag in ('olist', 'ulist'): |
| 1398 | # [xx] always use condensed lists. |
| 1399 | ## Use a condensed list if each list item is 1 line long. |
| 1400 | #for child in variables: |
| 1401 | # if child.count('\n') > 2: return childstr |
| 1402 | return childstr.replace('\n\n', '\n')+'\n' |
| 1403 | elif tree.tag == 'symbol': |
| 1404 | return '%s' % SYMBOL_TO_PLAINTEXT.get(childstr, childstr) |
| 1405 | elif tree.tag == 'graph': |
| 1406 | return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:])) |
| 1407 | else: |
| 1408 | # Assume that anything else can be passed through. |
| 1409 | return childstr |
| 1410 | |
| 1411 | def to_debug(tree, indent=4, seclevel=0): |
| 1412 | """ |
| 1413 | Convert a DOM document encoding epytext back to an epytext string, |
| 1414 | annotated with extra debugging information. This function is |
| 1415 | similar to L{to_epytext}, but it adds explicit information about |
| 1416 | where different blocks begin, along the left margin. |
| 1417 | |
| 1418 | @param tree: A DOM document encoding of an epytext string. |
| 1419 | @type tree: C{Element} |
| 1420 | @param indent: The indentation for the string representation of |
| 1421 | C{tree}. Each line of the returned string will begin with |
| 1422 | C{indent} space characters. |
| 1423 | @type indent: C{int} |
| 1424 | @param seclevel: The section level that C{tree} appears at. This |
| 1425 | is used to generate section headings. |
| 1426 | @type seclevel: C{int} |
| 1427 | @return: The epytext string corresponding to C{tree}. |
| 1428 | @rtype: C{string} |
| 1429 | """ |
| 1430 | if isinstance(tree, basestring): |
| 1431 | str = re.sub(r'\{', '\0', tree) |
| 1432 | str = re.sub(r'\}', '\1', str) |
| 1433 | return str |
| 1434 | |
| 1435 | if tree.tag == 'section': seclevel += 1 |
| 1436 | variables = [to_debug(c, indent+2, seclevel) for c in tree.children] |
| 1437 | childstr = ''.join(variables) |
| 1438 | |
| 1439 | # Clean up for literal blocks (add the double "::" back) |
| 1440 | childstr = re.sub(':( *\n \|\n)\2', '::\\1', childstr) |
| 1441 | |
| 1442 | if tree.tag == 'para': |
| 1443 | str = wordwrap(childstr, indent-6, 69)+'\n' |
| 1444 | str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) |
| 1445 | str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) |
| 1446 | str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) |
| 1447 | str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) |
| 1448 | str = re.sub('\0', 'E{lb}', str) |
| 1449 | str = re.sub('\1', 'E{rb}', str) |
| 1450 | lines = str.rstrip().split('\n') |
| 1451 | lines[0] = ' P>|' + lines[0] |
| 1452 | lines[1:] = [' |'+l for l in lines[1:]] |
| 1453 | return '\n'.join(lines)+'\n |\n' |
| 1454 | elif tree.tag == 'li': |
| 1455 | bullet = tree.attribs.get('bullet') or '-' |
| 1456 | return ' LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip() |
| 1457 | elif tree.tag in ('olist', 'ulist'): |
| 1458 | return 'LIST>|'+(indent-4)*' '+childstr[indent+2:] |
| 1459 | elif tree.tag == 'heading': |
| 1460 | str = re.sub('\0', 'E{lb}', childstr) |
| 1461 | str = re.sub('\1', 'E{rb}', str) |
| 1462 | uline = len(childstr)*_HEADING_CHARS[seclevel-1] |
| 1463 | return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' + |
| 1464 | ' |'+(indent-8)*' ' + uline + '\n') |
| 1465 | elif tree.tag == 'doctestblock': |
| 1466 | str = re.sub('\0', '{', childstr) |
| 1467 | str = re.sub('\1', '}', str) |
| 1468 | lines = [' |'+(indent-4)*' '+line for line in str.split('\n')] |
| 1469 | lines[0] = 'DTST>'+lines[0][5:] |
| 1470 | return '\n'.join(lines) + '\n |\n' |
| 1471 | elif tree.tag == 'literalblock': |
| 1472 | str = re.sub('\0', '{', childstr) |
| 1473 | str = re.sub('\1', '}', str) |
| 1474 | lines = [' |'+(indent-5)*' '+line for line in str.split('\n')] |
| 1475 | lines[0] = ' LIT>'+lines[0][5:] |
| 1476 | return '\2' + '\n'.join(lines) + '\n |\n' |
| 1477 | elif tree.tag == 'field': |
| 1478 | numargs = 0 |
| 1479 | while tree.children[numargs+1].tag == 'arg': numargs += 1 |
| 1480 | tag = variables[0] |
| 1481 | args = variables[1:1+numargs] |
| 1482 | body = variables[1+numargs:] |
| 1483 | str = ' FLD>|'+(indent-6)*' '+'@'+variables[0] |
| 1484 | if args: str += '(' + ', '.join(args) + ')' |
| 1485 | return str + ':\n' + ''.join(body) |
| 1486 | elif tree.tag == 'target': |
| 1487 | return '<%s>' % childstr |
| 1488 | elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext', |
| 1489 | 'section', 'olist', 'ulist', 'name'): |
| 1490 | return childstr |
| 1491 | elif tree.tag == 'symbol': |
| 1492 | return 'E{%s}' % childstr |
| 1493 | elif tree.tag == 'graph': |
| 1494 | return 'G{%s}' % ' '.join(variables) |
| 1495 | else: |
| 1496 | for (tag, name) in _COLORIZING_TAGS.items(): |
| 1497 | if name == tree.tag: |
| 1498 | return '%s{%s}' % (tag, childstr) |
| 1499 | raise ValueError('Unknown DOM element %r' % tree.tag) |
| 1500 | |
| 1501 | ################################################## |
| 1502 | ## Top-Level Wrapper function |
| 1503 | ################################################## |
| 1504 | def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr): |
| 1505 | """ |
| 1506 | Pretty-parse the string. This parses the string, and catches any |
| 1507 | warnings or errors produced. Any warnings and errors are |
| 1508 | displayed, and the resulting DOM parse structure is returned. |
| 1509 | |
| 1510 | @param str: The string to parse. |
| 1511 | @type str: C{string} |
| 1512 | @param show_warnings: Whether or not to display non-fatal errors |
| 1513 | generated by parsing C{str}. |
| 1514 | @type show_warnings: C{boolean} |
| 1515 | @param show_errors: Whether or not to display fatal errors |
| 1516 | generated by parsing C{str}. |
| 1517 | @type show_errors: C{boolean} |
| 1518 | @param stream: The stream that warnings and errors should be |
| 1519 | written to. |
| 1520 | @type stream: C{stream} |
| 1521 | @return: a DOM document encoding the contents of C{str}. |
| 1522 | @rtype: C{Element} |
| 1523 | @raise SyntaxError: If any fatal errors were encountered. |
| 1524 | """ |
| 1525 | errors = [] |
| 1526 | confused = 0 |
| 1527 | try: |
| 1528 | val = parse(str, errors) |
| 1529 | warnings = [e for e in errors if not e.is_fatal()] |
| 1530 | errors = [e for e in errors if e.is_fatal()] |
| 1531 | except: |
| 1532 | confused = 1 |
| 1533 | |
| 1534 | if not show_warnings: warnings = [] |
| 1535 | warnings.sort() |
| 1536 | errors.sort() |
| 1537 | if warnings: |
| 1538 | print >>stream, '='*SCRWIDTH |
| 1539 | print >>stream, "WARNINGS" |
| 1540 | print >>stream, '-'*SCRWIDTH |
| 1541 | for warning in warnings: |
| 1542 | print >>stream, warning.as_warning() |
| 1543 | print >>stream, '='*SCRWIDTH |
| 1544 | if errors and show_errors: |
| 1545 | if not warnings: print >>stream, '='*SCRWIDTH |
| 1546 | print >>stream, "ERRORS" |
| 1547 | print >>stream, '-'*SCRWIDTH |
| 1548 | for error in errors: |
| 1549 | print >>stream, error |
| 1550 | print >>stream, '='*SCRWIDTH |
| 1551 | |
| 1552 | if confused: raise |
| 1553 | elif errors: raise SyntaxError('Encountered Errors') |
| 1554 | else: return val |
| 1555 | |
| 1556 | ################################################## |
| 1557 | ## Parse Errors |
| 1558 | ################################################## |
| 1559 | |
| 1560 | class TokenizationError(ParseError): |
| 1561 | """ |
| 1562 | An error generated while tokenizing a formatted documentation |
| 1563 | string. |
| 1564 | """ |
| 1565 | |
| 1566 | class StructuringError(ParseError): |
| 1567 | """ |
| 1568 | An error generated while structuring a formatted documentation |
| 1569 | string. |
| 1570 | """ |
| 1571 | |
| 1572 | class ColorizingError(ParseError): |
| 1573 | """ |
| 1574 | An error generated while colorizing a paragraph. |
| 1575 | """ |
| 1576 | def __init__(self, descr, token, charnum, is_fatal=1): |
| 1577 | """ |
| 1578 | Construct a new colorizing exception. |
| 1579 | |
| 1580 | @param descr: A short description of the error. |
| 1581 | @type descr: C{string} |
| 1582 | @param token: The token where the error occured |
| 1583 | @type token: L{Token} |
| 1584 | @param charnum: The character index of the position in |
| 1585 | C{token} where the error occured. |
| 1586 | @type charnum: C{int} |
| 1587 | """ |
| 1588 | ParseError.__init__(self, descr, token.startline, is_fatal) |
| 1589 | self.token = token |
| 1590 | self.charnum = charnum |
| 1591 | |
| 1592 | CONTEXT_RANGE = 20 |
| 1593 | def descr(self): |
| 1594 | RANGE = self.CONTEXT_RANGE |
| 1595 | if self.charnum <= RANGE: |
| 1596 | left = self.token.contents[0:self.charnum] |
| 1597 | else: |
| 1598 | left = '...'+self.token.contents[self.charnum-RANGE:self.charnum] |
| 1599 | if (len(self.token.contents)-self.charnum) <= RANGE: |
| 1600 | right = self.token.contents[self.charnum:] |
| 1601 | else: |
| 1602 | right = (self.token.contents[self.charnum:self.charnum+RANGE] |
| 1603 | + '...') |
| 1604 | return ('%s\n\n%s%s\n%s^' % (self._descr, left, right, ' '*len(left))) |
| 1605 | |
| 1606 | ################################################## |
| 1607 | ## Convenience parsers |
| 1608 | ################################################## |
| 1609 | |
| 1610 | def parse_as_literal(str): |
| 1611 | """ |
| 1612 | Return a DOM document matching the epytext DTD, containing a |
| 1613 | single literal block. That literal block will include the |
| 1614 | contents of the given string. This method is typically used as a |
| 1615 | fall-back when the parser fails. |
| 1616 | |
| 1617 | @param str: The string which should be enclosed in a literal |
| 1618 | block. |
| 1619 | @type str: C{string} |
| 1620 | |
| 1621 | @return: A DOM document containing C{str} in a single literal |
| 1622 | block. |
| 1623 | @rtype: C{Element} |
| 1624 | """ |
| 1625 | return Element('epytext', Element('literalblock', str)) |
| 1626 | |
| 1627 | def parse_as_para(str): |
| 1628 | """ |
| 1629 | Return a DOM document matching the epytext DTD, containing a |
| 1630 | single paragraph. That paragraph will include the contents of the |
| 1631 | given string. This can be used to wrap some forms of |
| 1632 | automatically generated information (such as type names) in |
| 1633 | paragraphs. |
| 1634 | |
| 1635 | @param str: The string which should be enclosed in a paragraph. |
| 1636 | @type str: C{string} |
| 1637 | |
| 1638 | @return: A DOM document containing C{str} in a single paragraph. |
| 1639 | @rtype: C{Element} |
| 1640 | """ |
| 1641 | return Element('epytext', Element('para', str)) |
| 1642 | |
| 1643 | ################################################################# |
| 1644 | ## SUPPORT FOR EPYDOC |
| 1645 | ################################################################# |
| 1646 | |
| 1647 | def parse_docstring(docstring, errors, **options): |
| 1648 | """ |
| 1649 | Parse the given docstring, which is formatted using epytext; and |
| 1650 | return a C{ParsedDocstring} representation of its contents. |
| 1651 | @param docstring: The docstring to parse |
| 1652 | @type docstring: C{string} |
| 1653 | @param errors: A list where any errors generated during parsing |
| 1654 | will be stored. |
| 1655 | @type errors: C{list} of L{ParseError} |
| 1656 | @param options: Extra options. Unknown options are ignored. |
| 1657 | Currently, no extra options are defined. |
| 1658 | @rtype: L{ParsedDocstring} |
| 1659 | """ |
| 1660 | return ParsedEpytextDocstring(parse(docstring, errors), **options) |
| 1661 | |
| 1662 | class ParsedEpytextDocstring(ParsedDocstring): |
| 1663 | SYMBOL_TO_HTML = { |
| 1664 | # Symbols |
| 1665 | '<-': '←', '->': '→', '^': '↑', 'v': '↓', |
| 1666 | |
| 1667 | # Greek letters |
| 1668 | 'alpha': 'α', 'beta': 'β', 'gamma': 'γ', |
| 1669 | 'delta': 'δ', 'epsilon': 'ε', 'zeta': 'ζ', |
| 1670 | 'eta': 'η', 'theta': 'θ', 'iota': 'ι', |
| 1671 | 'kappa': 'κ', 'lambda': 'λ', 'mu': 'μ', |
| 1672 | 'nu': 'ν', 'xi': 'ξ', 'omicron': 'ο', |
| 1673 | 'pi': 'π', 'rho': 'ρ', 'sigma': 'σ', |
| 1674 | 'tau': 'τ', 'upsilon': 'υ', 'phi': 'φ', |
| 1675 | 'chi': 'χ', 'psi': 'ψ', 'omega': 'ω', |
| 1676 | 'Alpha': 'Α', 'Beta': 'Β', 'Gamma': 'Γ', |
| 1677 | 'Delta': 'Δ', 'Epsilon': 'Ε', 'Zeta': 'Ζ', |
| 1678 | 'Eta': 'Η', 'Theta': 'Θ', 'Iota': 'Ι', |
| 1679 | 'Kappa': 'Κ', 'Lambda': 'Λ', 'Mu': 'Μ', |
| 1680 | 'Nu': 'Ν', 'Xi': 'Ξ', 'Omicron': 'Ο', |
| 1681 | 'Pi': 'Π', 'Rho': 'Ρ', 'Sigma': 'Σ', |
| 1682 | 'Tau': 'Τ', 'Upsilon': 'Υ', 'Phi': 'Φ', |
| 1683 | 'Chi': 'Χ', 'Psi': 'Ψ', 'Omega': 'Ω', |
| 1684 | |
| 1685 | # HTML character entities |
| 1686 | 'larr': '←', 'rarr': '→', 'uarr': '↑', |
| 1687 | 'darr': '↓', 'harr': '↔', 'crarr': '↵', |
| 1688 | 'lArr': '⇐', 'rArr': '⇒', 'uArr': '⇑', |
| 1689 | 'dArr': '⇓', 'hArr': '⇔', |
| 1690 | 'copy': '©', 'times': '×', 'forall': '∀', |
| 1691 | 'exist': '∃', 'part': '∂', |
| 1692 | 'empty': '∅', 'isin': '∈', 'notin': '∉', |
| 1693 | 'ni': '∋', 'prod': '∏', 'sum': '∑', |
| 1694 | 'prop': '∝', 'infin': '∞', 'ang': '∠', |
| 1695 | 'and': '∧', 'or': '∨', 'cap': '∩', 'cup': '∪', |
| 1696 | 'int': '∫', 'there4': '∴', 'sim': '∼', |
| 1697 | 'cong': '≅', 'asymp': '≈', 'ne': '≠', |
| 1698 | 'equiv': '≡', 'le': '≤', 'ge': '≥', |
| 1699 | 'sub': '⊂', 'sup': '⊃', 'nsub': '⊄', |
| 1700 | 'sube': '⊆', 'supe': '⊇', 'oplus': '⊕', |
| 1701 | 'otimes': '⊗', 'perp': '⊥', |
| 1702 | |
| 1703 | # Alternate (long) names |
| 1704 | 'infinity': '∞', 'integral': '∫', 'product': '∏', |
| 1705 | '<=': '≤', '>=': '≥', |
| 1706 | } |
| 1707 | |
| 1708 | SYMBOL_TO_LATEX = { |
| 1709 | # Symbols |
| 1710 | '<-': r'\(\leftarrow\)', '->': r'\(\rightarrow\)', |
| 1711 | '^': r'\(\uparrow\)', 'v': r'\(\downarrow\)', |
| 1712 | |
| 1713 | # Greek letters (use lower case when upcase not available) |
| 1714 | |
| 1715 | 'alpha': r'\(\alpha\)', 'beta': r'\(\beta\)', 'gamma': |
| 1716 | r'\(\gamma\)', 'delta': r'\(\delta\)', 'epsilon': |
| 1717 | r'\(\epsilon\)', 'zeta': r'\(\zeta\)', 'eta': r'\(\eta\)', |
| 1718 | 'theta': r'\(\theta\)', 'iota': r'\(\iota\)', 'kappa': |
| 1719 | r'\(\kappa\)', 'lambda': r'\(\lambda\)', 'mu': r'\(\mu\)', |
| 1720 | 'nu': r'\(\nu\)', 'xi': r'\(\xi\)', 'omicron': r'\(o\)', 'pi': |
| 1721 | r'\(\pi\)', 'rho': r'\(\rho\)', 'sigma': r'\(\sigma\)', 'tau': |
| 1722 | r'\(\tau\)', 'upsilon': r'\(\upsilon\)', 'phi': r'\(\phi\)', |
| 1723 | 'chi': r'\(\chi\)', 'psi': r'\(\psi\)', 'omega': |
| 1724 | r'\(\omega\)', |
| 1725 | |
| 1726 | 'Alpha': r'\(\alpha\)', 'Beta': r'\(\beta\)', 'Gamma': |
| 1727 | r'\(\Gamma\)', 'Delta': r'\(\Delta\)', 'Epsilon': |
| 1728 | r'\(\epsilon\)', 'Zeta': r'\(\zeta\)', 'Eta': r'\(\eta\)', |
| 1729 | 'Theta': r'\(\Theta\)', 'Iota': r'\(\iota\)', 'Kappa': |
| 1730 | r'\(\kappa\)', 'Lambda': r'\(\Lambda\)', 'Mu': r'\(\mu\)', |
| 1731 | 'Nu': r'\(\nu\)', 'Xi': r'\(\Xi\)', 'Omicron': r'\(o\)', 'Pi': |
| 1732 | r'\(\Pi\)', 'ho': r'\(\rho\)', 'Sigma': r'\(\Sigma\)', 'Tau': |
| 1733 | r'\(\tau\)', 'Upsilon': r'\(\Upsilon\)', 'Phi': r'\(\Phi\)', |
| 1734 | 'Chi': r'\(\chi\)', 'Psi': r'\(\Psi\)', 'Omega': |
| 1735 | r'\(\Omega\)', |
| 1736 | |
| 1737 | # HTML character entities |
| 1738 | 'larr': r'\(\leftarrow\)', 'rarr': r'\(\rightarrow\)', 'uarr': |
| 1739 | r'\(\uparrow\)', 'darr': r'\(\downarrow\)', 'harr': |
| 1740 | r'\(\leftrightarrow\)', 'crarr': r'\(\hookleftarrow\)', |
| 1741 | 'lArr': r'\(\Leftarrow\)', 'rArr': r'\(\Rightarrow\)', 'uArr': |
| 1742 | r'\(\Uparrow\)', 'dArr': r'\(\Downarrow\)', 'hArr': |
| 1743 | r'\(\Leftrightarrow\)', 'copy': r'{\textcopyright}', |
| 1744 | 'times': r'\(\times\)', 'forall': r'\(\forall\)', 'exist': |
| 1745 | r'\(\exists\)', 'part': r'\(\partial\)', 'empty': |
| 1746 | r'\(\emptyset\)', 'isin': r'\(\in\)', 'notin': r'\(\notin\)', |
| 1747 | 'ni': r'\(\ni\)', 'prod': r'\(\prod\)', 'sum': r'\(\sum\)', |
| 1748 | 'prop': r'\(\propto\)', 'infin': r'\(\infty\)', 'ang': |
| 1749 | r'\(\angle\)', 'and': r'\(\wedge\)', 'or': r'\(\vee\)', 'cap': |
| 1750 | r'\(\cap\)', 'cup': r'\(\cup\)', 'int': r'\(\int\)', 'there4': |
| 1751 | r'\(\therefore\)', 'sim': r'\(\sim\)', 'cong': r'\(\cong\)', |
| 1752 | 'asymp': r'\(\approx\)', 'ne': r'\(\ne\)', 'equiv': |
| 1753 | r'\(\equiv\)', 'le': r'\(\le\)', 'ge': r'\(\ge\)', 'sub': |
| 1754 | r'\(\subset\)', 'sup': r'\(\supset\)', 'nsub': r'\(\supset\)', |
| 1755 | 'sube': r'\(\subseteq\)', 'supe': r'\(\supseteq\)', 'oplus': |
| 1756 | r'\(\oplus\)', 'otimes': r'\(\otimes\)', 'perp': r'\(\perp\)', |
| 1757 | |
| 1758 | # Alternate (long) names |
| 1759 | 'infinity': r'\(\infty\)', 'integral': r'\(\int\)', 'product': |
| 1760 | r'\(\prod\)', '<=': r'\(\le\)', '>=': r'\(\ge\)', |
| 1761 | } |
| 1762 | |
| 1763 | def __init__(self, dom_tree, **options): |
| 1764 | self._tree = dom_tree |
| 1765 | # Caching: |
| 1766 | self._html = self._latex = self._plaintext = None |
| 1767 | self._terms = None |
| 1768 | # inline option -- mark top-level children as inline. |
| 1769 | if options.get('inline') and self._tree is not None: |
| 1770 | for elt in self._tree.children: |
| 1771 | elt.attribs['inline'] = True |
| 1772 | |
| 1773 | def __str__(self): |
| 1774 | return str(self._tree) |
| 1775 | |
| 1776 | def to_html(self, docstring_linker, directory=None, docindex=None, |
| 1777 | context=None, **options): |
| 1778 | if self._html is not None: return self._html |
| 1779 | if self._tree is None: return '' |
| 1780 | indent = options.get('indent', 0) |
| 1781 | self._html = self._to_html(self._tree, docstring_linker, directory, |
| 1782 | docindex, context, indent) |
| 1783 | return self._html |
| 1784 | |
| 1785 | def to_latex(self, docstring_linker, **options): |
| 1786 | if self._latex is not None: return self._latex |
| 1787 | if self._tree is None: return '' |
| 1788 | indent = options.get('indent', 0) |
| 1789 | self._hyperref = options.get('hyperref', 1) |
| 1790 | self._latex = self._to_latex(self._tree, docstring_linker, indent) |
| 1791 | return self._latex |
| 1792 | |
| 1793 | def to_plaintext(self, docstring_linker, **options): |
| 1794 | # [XX] don't cache -- different options might be used!! |
| 1795 | #if self._plaintext is not None: return self._plaintext |
| 1796 | if self._tree is None: return '' |
| 1797 | if 'indent' in options: |
| 1798 | self._plaintext = to_plaintext(self._tree, |
| 1799 | indent=options['indent']) |
| 1800 | else: |
| 1801 | self._plaintext = to_plaintext(self._tree) |
| 1802 | return self._plaintext |
| 1803 | |
| 1804 | def _index_term_key(self, tree): |
| 1805 | str = to_plaintext(tree) |
| 1806 | str = re.sub(r'\s\s+', '-', str) |
| 1807 | return "index-"+re.sub("[^a-zA-Z0-9]", "_", str) |
| 1808 | |
| 1809 | def _to_html(self, tree, linker, directory, docindex, context, |
| 1810 | indent=0, seclevel=0): |
| 1811 | if isinstance(tree, basestring): |
| 1812 | return plaintext_to_html(tree) |
| 1813 | |
| 1814 | if tree.tag == 'epytext': indent -= 2 |
| 1815 | if tree.tag == 'section': seclevel += 1 |
| 1816 | |
| 1817 | # Process the variables first. |
| 1818 | variables = [self._to_html(c, linker, directory, docindex, context, |
| 1819 | indent+2, seclevel) |
| 1820 | for c in tree.children] |
| 1821 | |
| 1822 | # Construct the HTML string for the variables. |
| 1823 | childstr = ''.join(variables) |
| 1824 | |
| 1825 | # Perform the approriate action for the DOM tree type. |
| 1826 | if tree.tag == 'para': |
| 1827 | return wordwrap( |
| 1828 | (tree.attribs.get('inline') and '%s' or '<p>%s</p>') % childstr, |
| 1829 | indent) |
| 1830 | elif tree.tag == 'code': |
| 1831 | style = tree.attribs.get('style') |
| 1832 | if style: |
| 1833 | return '<code class="%s">%s</code>' % (style, childstr) |
| 1834 | else: |
| 1835 | return '<code>%s</code>' % childstr |
| 1836 | elif tree.tag == 'uri': |
| 1837 | return ('<a href="%s" target="_top">%s</a>' % |
| 1838 | (variables[1], variables[0])) |
| 1839 | elif tree.tag == 'link': |
| 1840 | return linker.translate_identifier_xref(variables[1], variables[0]) |
| 1841 | elif tree.tag == 'italic': |
| 1842 | return '<i>%s</i>' % childstr |
| 1843 | elif tree.tag == 'math': |
| 1844 | return '<i class="math">%s</i>' % childstr |
| 1845 | elif tree.tag == 'indexed': |
| 1846 | term = Element('epytext', *tree.children, **tree.attribs) |
| 1847 | return linker.translate_indexterm(ParsedEpytextDocstring(term)) |
| 1848 | #term_key = self._index_term_key(tree) |
| 1849 | #return linker.translate_indexterm(childstr, term_key) |
| 1850 | elif tree.tag == 'bold': |
| 1851 | return '<b>%s</b>' % childstr |
| 1852 | elif tree.tag == 'ulist': |
| 1853 | return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ') |
| 1854 | elif tree.tag == 'olist': |
| 1855 | start = tree.attribs.get('start') or '' |
| 1856 | return ('%s<ol start="%s">\n%s%s</ol>\n' % |
| 1857 | (indent*' ', start, childstr, indent*' ')) |
| 1858 | elif tree.tag == 'li': |
| 1859 | return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ') |
| 1860 | elif tree.tag == 'heading': |
| 1861 | return ('%s<h%s class="heading">%s</h%s>\n' % |
| 1862 | ((indent-2)*' ', seclevel, childstr, seclevel)) |
| 1863 | elif tree.tag == 'literalblock': |
| 1864 | return '<pre class="literalblock">\n%s\n</pre>\n' % childstr |
| 1865 | elif tree.tag == 'doctestblock': |
| 1866 | return doctest_to_html(tree.children[0].strip()) |
| 1867 | elif tree.tag == 'fieldlist': |
| 1868 | raise AssertionError("There should not be any field lists left") |
| 1869 | elif tree.tag in ('epytext', 'section', 'tag', 'arg', |
| 1870 | 'name', 'target', 'html'): |
| 1871 | return childstr |
| 1872 | elif tree.tag == 'symbol': |
| 1873 | symbol = tree.children[0] |
| 1874 | return self.SYMBOL_TO_HTML.get(symbol, '[%s]' % symbol) |
| 1875 | elif tree.tag == 'graph': |
| 1876 | # Generate the graph. |
| 1877 | graph = self._build_graph(variables[0], variables[1:], linker, |
| 1878 | docindex, context) |
| 1879 | if not graph: return '' |
| 1880 | # Write the graph. |
| 1881 | image_url = '%s.gif' % graph.uid |
| 1882 | image_file = os.path.join(directory, image_url) |
| 1883 | return graph.to_html(image_file, image_url) |
| 1884 | else: |
| 1885 | raise ValueError('Unknown epytext DOM element %r' % tree.tag) |
| 1886 | |
| 1887 | #GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph'] |
| 1888 | def _build_graph(self, graph_type, graph_args, linker, |
| 1889 | docindex, context): |
| 1890 | # Generate the graph |
| 1891 | if graph_type == 'classtree': |
| 1892 | from epydoc.apidoc import ClassDoc |
| 1893 | if graph_args: |
| 1894 | bases = [docindex.find(name, context) |
| 1895 | for name in graph_args] |
| 1896 | elif isinstance(context, ClassDoc): |
| 1897 | bases = [context] |
| 1898 | else: |
| 1899 | log.warning("Could not construct class tree: you must " |
| 1900 | "specify one or more base classes.") |
| 1901 | return None |
| 1902 | from epydoc.docwriter.dotgraph import class_tree_graph |
| 1903 | return class_tree_graph(bases, linker, context) |
| 1904 | elif graph_type == 'packagetree': |
| 1905 | from epydoc.apidoc import ModuleDoc |
| 1906 | if graph_args: |
| 1907 | packages = [docindex.find(name, context) |
| 1908 | for name in graph_args] |
| 1909 | elif isinstance(context, ModuleDoc): |
| 1910 | packages = [context] |
| 1911 | else: |
| 1912 | log.warning("Could not construct package tree: you must " |
| 1913 | "specify one or more root packages.") |
| 1914 | return None |
| 1915 | from epydoc.docwriter.dotgraph import package_tree_graph |
| 1916 | return package_tree_graph(packages, linker, context) |
| 1917 | elif graph_type == 'importgraph': |
| 1918 | from epydoc.apidoc import ModuleDoc |
| 1919 | modules = [d for d in docindex.root if isinstance(d, ModuleDoc)] |
| 1920 | from epydoc.docwriter.dotgraph import import_graph |
| 1921 | return import_graph(modules, docindex, linker, context) |
| 1922 | |
| 1923 | elif graph_type == 'callgraph': |
| 1924 | if graph_args: |
| 1925 | docs = [docindex.find(name, context) for name in graph_args] |
| 1926 | docs = [doc for doc in docs if doc is not None] |
| 1927 | else: |
| 1928 | docs = [context] |
| 1929 | from epydoc.docwriter.dotgraph import call_graph |
| 1930 | return call_graph(docs, docindex, linker, context) |
| 1931 | else: |
| 1932 | log.warning("Unknown graph type %s" % graph_type) |
| 1933 | |
| 1934 | |
| 1935 | def _to_latex(self, tree, linker, indent=0, seclevel=0, breakany=0): |
| 1936 | if isinstance(tree, basestring): |
| 1937 | return plaintext_to_latex(tree, breakany=breakany) |
| 1938 | |
| 1939 | if tree.tag == 'section': seclevel += 1 |
| 1940 | |
| 1941 | # Figure out the child indent level. |
| 1942 | if tree.tag == 'epytext': cindent = indent |
| 1943 | else: cindent = indent + 2 |
| 1944 | variables = [self._to_latex(c, linker, cindent, seclevel, breakany) |
| 1945 | for c in tree.children] |
| 1946 | childstr = ''.join(variables) |
| 1947 | |
| 1948 | if tree.tag == 'para': |
| 1949 | return wordwrap(childstr, indent)+'\n' |
| 1950 | elif tree.tag == 'code': |
| 1951 | return '\\texttt{%s}' % childstr |
| 1952 | elif tree.tag == 'uri': |
| 1953 | if len(variables) != 2: raise ValueError('Bad URI ') |
| 1954 | if self._hyperref: |
| 1955 | # ~ and # should not be escaped in the URI. |
| 1956 | uri = tree.children[1].children[0] |
| 1957 | uri = uri.replace('{\\textasciitilde}', '~') |
| 1958 | uri = uri.replace('\\#', '#') |
| 1959 | if variables[0] == variables[1]: |
| 1960 | return '\\href{%s}{\\textit{%s}}' % (uri, variables[1]) |
| 1961 | else: |
| 1962 | return ('%s\\footnote{\\href{%s}{%s}}' % |
| 1963 | (variables[0], uri, variables[1])) |
| 1964 | else: |
| 1965 | if variables[0] == variables[1]: |
| 1966 | return '\\textit{%s}' % variables[1] |
| 1967 | else: |
| 1968 | return '%s\\footnote{%s}' % (variables[0], variables[1]) |
| 1969 | elif tree.tag == 'link': |
| 1970 | if len(variables) != 2: raise ValueError('Bad Link') |
| 1971 | return linker.translate_identifier_xref(variables[1], variables[0]) |
| 1972 | elif tree.tag == 'italic': |
| 1973 | return '\\textit{%s}' % childstr |
| 1974 | elif tree.tag == 'math': |
| 1975 | return '\\textit{%s}' % childstr |
| 1976 | elif tree.tag == 'indexed': |
| 1977 | term = Element('epytext', *tree.children, **tree.attribs) |
| 1978 | return linker.translate_indexterm(ParsedEpytextDocstring(term)) |
| 1979 | elif tree.tag == 'bold': |
| 1980 | return '\\textbf{%s}' % childstr |
| 1981 | elif tree.tag == 'li': |
| 1982 | return indent*' ' + '\\item ' + childstr.lstrip() |
| 1983 | elif tree.tag == 'heading': |
| 1984 | return ' '*(indent-2) + '(section) %s\n\n' % childstr |
| 1985 | elif tree.tag == 'doctestblock': |
| 1986 | return doctest_to_latex(tree.children[0].strip()) |
| 1987 | elif tree.tag == 'literalblock': |
| 1988 | return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr |
| 1989 | elif tree.tag == 'fieldlist': |
| 1990 | return indent*' '+'{omitted fieldlist}\n' |
| 1991 | elif tree.tag == 'olist': |
| 1992 | return (' '*indent + '\\begin{enumerate}\n\n' + |
| 1993 | ' '*indent + '\\setlength{\\parskip}{0.5ex}\n' + |
| 1994 | childstr + |
| 1995 | ' '*indent + '\\end{enumerate}\n\n') |
| 1996 | elif tree.tag == 'ulist': |
| 1997 | return (' '*indent + '\\begin{itemize}\n' + |
| 1998 | ' '*indent + '\\setlength{\\parskip}{0.6ex}\n' + |
| 1999 | childstr + |
| 2000 | ' '*indent + '\\end{itemize}\n\n') |
| 2001 | elif tree.tag == 'symbol': |
| 2002 | symbol = tree.children[0] |
| 2003 | return self.SYMBOL_TO_LATEX.get(symbol, '[%s]' % symbol) |
| 2004 | elif tree.tag == 'graph': |
| 2005 | return '(GRAPH)' |
| 2006 | #raise ValueError, 'graph not implemented yet for latex' |
| 2007 | else: |
| 2008 | # Assume that anything else can be passed through. |
| 2009 | return childstr |
| 2010 | |
| 2011 | _SUMMARY_RE = re.compile(r'(\s*[\w\W]*?\.)(\s|$)') |
| 2012 | |
| 2013 | def summary(self): |
| 2014 | if self._tree is None: return self, False |
| 2015 | tree = self._tree |
| 2016 | doc = Element('epytext') |
| 2017 | |
| 2018 | # Find the first paragraph. |
| 2019 | variables = tree.children |
| 2020 | while (len(variables) > 0) and (variables[0].tag != 'para'): |
| 2021 | if variables[0].tag in ('section', 'ulist', 'olist', 'li'): |
| 2022 | variables = variables[0].children |
| 2023 | else: |
| 2024 | variables = variables[1:] |
| 2025 | |
| 2026 | # Special case: if the docstring contains a single literal block, |
| 2027 | # then try extracting the summary from it. |
| 2028 | if (len(variables) == 0 and len(tree.children) == 1 and |
| 2029 | tree.children[0].tag == 'literalblock'): |
| 2030 | str = re.split(r'\n\s*(\n|$).*', |
| 2031 | tree.children[0].children[0], 1)[0] |
| 2032 | variables = [Element('para')] |
| 2033 | variables[0].children.append(str) |
| 2034 | |
| 2035 | # If we didn't find a paragraph, return an empty epytext. |
| 2036 | if len(variables) == 0: return ParsedEpytextDocstring(doc), False |
| 2037 | |
| 2038 | # Is there anything else, excluding tags, after the first variable? |
| 2039 | long_docs = False |
| 2040 | for var in variables[1:]: |
| 2041 | if isinstance(var, Element) and var.tag == 'fieldlist': |
| 2042 | continue |
| 2043 | long_docs = True |
| 2044 | break |
| 2045 | |
| 2046 | # Extract the first sentence. |
| 2047 | parachildren = variables[0].children |
| 2048 | para = Element('para', inline=True) |
| 2049 | doc.children.append(para) |
| 2050 | for parachild in parachildren: |
| 2051 | if isinstance(parachild, basestring): |
| 2052 | m = self._SUMMARY_RE.match(parachild) |
| 2053 | if m: |
| 2054 | para.children.append(m.group(1)) |
| 2055 | long_docs |= parachild is not parachildren[-1] |
| 2056 | if not long_docs: |
| 2057 | other = parachild[m.end():] |
| 2058 | if other and not other.isspace(): |
| 2059 | long_docs = True |
| 2060 | return ParsedEpytextDocstring(doc), long_docs |
| 2061 | para.children.append(parachild) |
| 2062 | |
| 2063 | return ParsedEpytextDocstring(doc), long_docs |
| 2064 | |
| 2065 | def split_fields(self, errors=None): |
| 2066 | if self._tree is None: return (self, ()) |
| 2067 | tree = Element(self._tree.tag, *self._tree.children, |
| 2068 | **self._tree.attribs) |
| 2069 | fields = [] |
| 2070 | |
| 2071 | if (tree.children and |
| 2072 | tree.children[-1].tag == 'fieldlist' and |
| 2073 | tree.children[-1].children): |
| 2074 | field_nodes = tree.children[-1].children |
| 2075 | del tree.children[-1] |
| 2076 | |
| 2077 | for field in field_nodes: |
| 2078 | # Get the tag |
| 2079 | tag = field.children[0].children[0].lower() |
| 2080 | del field.children[0] |
| 2081 | |
| 2082 | # Get the argument. |
| 2083 | if field.children and field.children[0].tag == 'arg': |
| 2084 | arg = field.children[0].children[0] |
| 2085 | del field.children[0] |
| 2086 | else: |
| 2087 | arg = None |
| 2088 | |
| 2089 | # Process the field. |
| 2090 | field.tag = 'epytext' |
| 2091 | fields.append(Field(tag, arg, ParsedEpytextDocstring(field))) |
| 2092 | |
| 2093 | # Save the remaining docstring as the description.. |
| 2094 | if tree.children and tree.children[0].children: |
| 2095 | return ParsedEpytextDocstring(tree), fields |
| 2096 | else: |
| 2097 | return None, fields |
| 2098 | |
| 2099 | |
| 2100 | def index_terms(self): |
| 2101 | if self._terms is None: |
| 2102 | self._terms = [] |
| 2103 | self._index_terms(self._tree, self._terms) |
| 2104 | return self._terms |
| 2105 | |
| 2106 | def _index_terms(self, tree, terms): |
| 2107 | if tree is None or isinstance(tree, basestring): |
| 2108 | return |
| 2109 | |
| 2110 | if tree.tag == 'indexed': |
| 2111 | term = Element('epytext', *tree.children, **tree.attribs) |
| 2112 | terms.append(ParsedEpytextDocstring(term)) |
| 2113 | |
| 2114 | # Look for index items in child nodes. |
| 2115 | for child in tree.children: |
| 2116 | self._index_terms(child, terms) |