| Tor Norbye | 3a2425a | 2013-11-04 10:16:08 -0800 | [diff] [blame^] | 1 | # |
| 2 | # doctest.py: Syntax Highlighting for doctest blocks |
| 3 | # Edward Loper |
| 4 | # |
| 5 | # Created [06/28/03 02:52 AM] |
| 6 | # $Id: restructuredtext.py 1210 2006-04-10 13:25:50Z edloper $ |
| 7 | # |
| 8 | |
| 9 | """ |
| 10 | Syntax highlighting for doctest blocks. This module defines two |
| 11 | functions, L{doctest_to_html()} and L{doctest_to_latex()}, which can |
| 12 | be used to perform syntax highlighting on doctest blocks. It also |
| 13 | defines the more general C{colorize_doctest()}, which could be used to |
| 14 | do syntac highlighting on doctest blocks with other output formats. |
| 15 | (Both C{doctest_to_html()} and C{doctest_to_latex()} are defined using |
| 16 | C{colorize_doctest()}.) |
| 17 | """ |
| 18 | __docformat__ = 'epytext en' |
| 19 | |
| 20 | import re |
| 21 | from epydoc.util import plaintext_to_html, plaintext_to_latex |
| 22 | |
| 23 | __all__ = ['doctest_to_html', 'doctest_to_latex', |
| 24 | 'DoctestColorizer', 'XMLDoctestColorizer', |
| 25 | 'HTMLDoctestColorizer', 'LaTeXDoctestColorizer'] |
| 26 | |
| 27 | def doctest_to_html(s): |
| 28 | """ |
| 29 | Perform syntax highlighting on the given doctest string, and |
| 30 | return the resulting HTML code. This code consists of a C{<pre>} |
| 31 | block with class=py-doctest. Syntax highlighting is performed |
| 32 | using the following css classes: |
| 33 | |
| 34 | - C{py-prompt} -- the Python PS1 prompt (>>>) |
| 35 | - C{py-more} -- the Python PS2 prompt (...) |
| 36 | - C{py-keyword} -- a Python keyword (for, if, etc.) |
| 37 | - C{py-builtin} -- a Python builtin name (abs, dir, etc.) |
| 38 | - C{py-string} -- a string literal |
| 39 | - C{py-comment} -- a comment |
| 40 | - C{py-except} -- an exception traceback (up to the next >>>) |
| 41 | - C{py-output} -- the output from a doctest block. |
| 42 | - C{py-defname} -- the name of a function or class defined by |
| 43 | a C{def} or C{class} statement. |
| 44 | """ |
| 45 | return HTMLDoctestColorizer().colorize_doctest(s) |
| 46 | |
| 47 | def doctest_to_latex(s): |
| 48 | """ |
| 49 | Perform syntax highlighting on the given doctest string, and |
| 50 | return the resulting LaTeX code. This code consists of an |
| 51 | C{alltt} environment. Syntax highlighting is performed using |
| 52 | the following new latex commands, which must be defined externally: |
| 53 | - C{\pysrcprompt} -- the Python PS1 prompt (>>>) |
| 54 | - C{\pysrcmore} -- the Python PS2 prompt (...) |
| 55 | - C{\pysrckeyword} -- a Python keyword (for, if, etc.) |
| 56 | - C{\pysrcbuiltin} -- a Python builtin name (abs, dir, etc.) |
| 57 | - C{\pysrcstring} -- a string literal |
| 58 | - C{\pysrccomment} -- a comment |
| 59 | - C{\pysrcexcept} -- an exception traceback (up to the next >>>) |
| 60 | - C{\pysrcoutput} -- the output from a doctest block. |
| 61 | - C{\pysrcdefname} -- the name of a function or class defined by |
| 62 | a C{def} or C{class} statement. |
| 63 | """ |
| 64 | return LaTeXDoctestColorizer().colorize_doctest(s) |
| 65 | |
| 66 | class DoctestColorizer: |
| 67 | """ |
| 68 | An abstract base class for performing syntax highlighting on |
| 69 | doctest blocks and other bits of Python code. Subclasses should |
| 70 | provide definitions for: |
| 71 | |
| 72 | - The L{markup()} method, which takes a substring and a tag, and |
| 73 | returns a colorized version of the substring. |
| 74 | - The L{PREFIX} and L{SUFFIX} variables, which will be added |
| 75 | to the beginning and end of the strings returned by |
| 76 | L{colorize_codeblock} and L{colorize_doctest}. |
| 77 | """ |
| 78 | |
| 79 | #: A string that is added to the beginning of the strings |
| 80 | #: returned by L{colorize_codeblock} and L{colorize_doctest}. |
| 81 | #: Typically, this string begins a preformatted area. |
| 82 | PREFIX = None |
| 83 | |
| 84 | #: A string that is added to the end of the strings |
| 85 | #: returned by L{colorize_codeblock} and L{colorize_doctest}. |
| 86 | #: Typically, this string ends a preformatted area. |
| 87 | SUFFIX = None |
| 88 | |
| 89 | #: A list of the names of all Python keywords. ('as' is included |
| 90 | #: even though it is technically not a keyword.) |
| 91 | _KEYWORDS = ("and del for is raise" |
| 92 | "assert elif from lambda return" |
| 93 | "break else global not try" |
| 94 | "class except if or while" |
| 95 | "continue exec import pass yield" |
| 96 | "def finally in print as").split() |
| 97 | |
| 98 | #: A list of all Python builtins. |
| 99 | _BUILTINS = [_BI for _BI in dir(__builtins__) |
| 100 | if not _BI.startswith('__')] |
| 101 | |
| 102 | #: A regexp group that matches keywords. |
| 103 | _KEYWORD_GRP = '|'.join([r'\b%s\b' % _KW for _KW in _KEYWORDS]) |
| 104 | |
| 105 | #: A regexp group that matches Python builtins. |
| 106 | _BUILTIN_GRP = (r'(?<!\.)(?:%s)' % '|'.join([r'\b%s\b' % _BI |
| 107 | for _BI in _BUILTINS])) |
| 108 | |
| 109 | #: A regexp group that matches Python strings. |
| 110 | _STRING_GRP = '|'.join( |
| 111 | [r'("""("""|.*?((?!").)"""))', r'("("|.*?((?!").)"))', |
| 112 | r"('''('''|.*?[^\\']'''))", r"('('|.*?[^\\']'))"]) |
| 113 | |
| 114 | #: A regexp group that matches Python comments. |
| 115 | _COMMENT_GRP = '(#.*?$)' |
| 116 | |
| 117 | #: A regexp group that matches Python ">>>" prompts. |
| 118 | _PROMPT1_GRP = r'^[ \t]*>>>(?:[ \t]|$)' |
| 119 | |
| 120 | #: A regexp group that matches Python "..." prompts. |
| 121 | _PROMPT2_GRP = r'^[ \t]*\.\.\.(?:[ \t]|$)' |
| 122 | |
| 123 | #: A regexp group that matches function and class definitions. |
| 124 | _DEFINE_GRP = r'\b(?:def|class)[ \t]+\w+' |
| 125 | |
| 126 | #: A regexp that matches Python prompts |
| 127 | PROMPT_RE = re.compile('(%s|%s)' % (_PROMPT1_GRP, _PROMPT2_GRP), |
| 128 | re.MULTILINE | re.DOTALL) |
| 129 | |
| 130 | #: A regexp that matches Python "..." prompts. |
| 131 | PROMPT2_RE = re.compile('(%s)' % _PROMPT2_GRP, |
| 132 | re.MULTILINE | re.DOTALL) |
| 133 | |
| 134 | #: A regexp that matches doctest exception blocks. |
| 135 | EXCEPT_RE = re.compile(r'^[ \t]*Traceback \(most recent call last\):.*', |
| 136 | re.DOTALL | re.MULTILINE) |
| 137 | |
| 138 | #: A regexp that matches doctest directives. |
| 139 | DOCTEST_DIRECTIVE_RE = re.compile(r'#[ \t]*doctest:.*') |
| 140 | |
| 141 | #: A regexp that matches all of the regions of a doctest block |
| 142 | #: that should be colored. |
| 143 | DOCTEST_RE = re.compile( |
| 144 | r'(.*?)((?P<STRING>%s)|(?P<COMMENT>%s)|(?P<DEFINE>%s)|' |
| 145 | r'(?P<KEYWORD>%s)|(?P<BUILTIN>%s)|' |
| 146 | r'(?P<PROMPT1>%s)|(?P<PROMPT2>%s)|(?P<EOS>\Z))' % ( |
| 147 | _STRING_GRP, _COMMENT_GRP, _DEFINE_GRP, _KEYWORD_GRP, _BUILTIN_GRP, |
| 148 | _PROMPT1_GRP, _PROMPT2_GRP), re.MULTILINE | re.DOTALL) |
| 149 | |
| 150 | #: This regular expression is used to find doctest examples in a |
| 151 | #: string. This is copied from the standard Python doctest.py |
| 152 | #: module (after the refactoring in Python 2.4+). |
| 153 | DOCTEST_EXAMPLE_RE = re.compile(r''' |
| 154 | # Source consists of a PS1 line followed by zero or more PS2 lines. |
| 155 | (?P<source> |
| 156 | (?:^(?P<indent> [ ]*) >>> .*) # PS1 line |
| 157 | (?:\n [ ]* \.\.\. .*)* # PS2 lines |
| 158 | \n?) |
| 159 | # Want consists of any non-blank lines that do not start with PS1. |
| 160 | (?P<want> (?:(?![ ]*$) # Not a blank line |
| 161 | (?![ ]*>>>) # Not a line starting with PS1 |
| 162 | .*$\n? # But any other line |
| 163 | )*) |
| 164 | ''', re.MULTILINE | re.VERBOSE) |
| 165 | |
| 166 | def colorize_inline(self, s): |
| 167 | """ |
| 168 | Colorize a string containing Python code. Do not add the |
| 169 | L{PREFIX} and L{SUFFIX} strings to the returned value. This |
| 170 | method is intended for generating syntax-highlighted strings |
| 171 | that are appropriate for inclusion as inline expressions. |
| 172 | """ |
| 173 | return self.DOCTEST_RE.sub(self.subfunc, s) |
| 174 | |
| 175 | def colorize_codeblock(self, s): |
| 176 | """ |
| 177 | Colorize a string containing only Python code. This method |
| 178 | differs from L{colorize_doctest} in that it will not search |
| 179 | for doctest prompts when deciding how to colorize the string. |
| 180 | """ |
| 181 | body = self.DOCTEST_RE.sub(self.subfunc, s) |
| 182 | return self.PREFIX + body + self.SUFFIX |
| 183 | |
| 184 | def colorize_doctest(self, s, strip_directives=False): |
| 185 | """ |
| 186 | Colorize a string containing one or more doctest examples. |
| 187 | """ |
| 188 | output = [] |
| 189 | charno = 0 |
| 190 | for m in self.DOCTEST_EXAMPLE_RE.finditer(s): |
| 191 | # Parse the doctest example: |
| 192 | pysrc, want = m.group('source', 'want') |
| 193 | # Pre-example text: |
| 194 | output.append(s[charno:m.start()]) |
| 195 | # Example source code: |
| 196 | output.append(self.DOCTEST_RE.sub(self.subfunc, pysrc)) |
| 197 | # Example output: |
| 198 | if want: |
| 199 | if self.EXCEPT_RE.match(want): |
| 200 | output += '\n'.join([self.markup(line, 'except') |
| 201 | for line in want.split('\n')]) |
| 202 | else: |
| 203 | output += '\n'.join([self.markup(line, 'output') |
| 204 | for line in want.split('\n')]) |
| 205 | # Update charno |
| 206 | charno = m.end() |
| 207 | # Add any remaining post-example text. |
| 208 | output.append(s[charno:]) |
| 209 | |
| 210 | return self.PREFIX + ''.join(output) + self.SUFFIX |
| 211 | |
| 212 | def subfunc(self, match): |
| 213 | other, text = match.group(1, 2) |
| 214 | #print 'M %20r %20r' % (other, text) # <- for debugging |
| 215 | if other: |
| 216 | other = '\n'.join([self.markup(line, 'other') |
| 217 | for line in other.split('\n')]) |
| 218 | |
| 219 | if match.group('PROMPT1'): |
| 220 | return other + self.markup(text, 'prompt') |
| 221 | elif match.group('PROMPT2'): |
| 222 | return other + self.markup(text, 'more') |
| 223 | elif match.group('KEYWORD'): |
| 224 | return other + self.markup(text, 'keyword') |
| 225 | elif match.group('BUILTIN'): |
| 226 | return other + self.markup(text, 'builtin') |
| 227 | elif match.group('COMMENT'): |
| 228 | return other + self.markup(text, 'comment') |
| 229 | elif match.group('STRING') and '\n' not in text: |
| 230 | return other + self.markup(text, 'string') |
| 231 | elif match.group('STRING'): |
| 232 | # It's a multiline string; colorize the string & prompt |
| 233 | # portion of each line. |
| 234 | pieces = [] |
| 235 | for line in text.split('\n'): |
| 236 | if self.PROMPT2_RE.match(line): |
| 237 | if len(line) > 4: |
| 238 | pieces.append(self.markup(line[:4], 'more') + |
| 239 | self.markup(line[4:], 'string')) |
| 240 | else: |
| 241 | pieces.append(self.markup(line[:4], 'more')) |
| 242 | elif line: |
| 243 | pieces.append(self.markup(line, 'string')) |
| 244 | else: |
| 245 | pieces.append('') |
| 246 | return other + '\n'.join(pieces) |
| 247 | elif match.group('DEFINE'): |
| 248 | m = re.match('(?P<def>\w+)(?P<space>\s+)(?P<name>\w+)', text) |
| 249 | return other + (self.markup(m.group('def'), 'keyword') + |
| 250 | self.markup(m.group('space'), 'other') + |
| 251 | self.markup(m.group('name'), 'defname')) |
| 252 | elif match.group('EOS') is not None: |
| 253 | return other |
| 254 | else: |
| 255 | assert 0, 'Unexpected match!' |
| 256 | |
| 257 | def markup(self, s, tag): |
| 258 | """ |
| 259 | Apply syntax highlighting to a single substring from a doctest |
| 260 | block. C{s} is the substring, and C{tag} is the tag that |
| 261 | should be applied to the substring. C{tag} will be one of the |
| 262 | following strings: |
| 263 | |
| 264 | - C{prompt} -- the Python PS1 prompt (>>>) |
| 265 | - C{more} -- the Python PS2 prompt (...) |
| 266 | - C{keyword} -- a Python keyword (for, if, etc.) |
| 267 | - C{builtin} -- a Python builtin name (abs, dir, etc.) |
| 268 | - C{string} -- a string literal |
| 269 | - C{comment} -- a comment |
| 270 | - C{except} -- an exception traceback (up to the next >>>) |
| 271 | - C{output} -- the output from a doctest block. |
| 272 | - C{defname} -- the name of a function or class defined by |
| 273 | a C{def} or C{class} statement. |
| 274 | - C{other} -- anything else (does *not* include output.) |
| 275 | """ |
| 276 | raise AssertionError("Abstract method") |
| 277 | |
| 278 | class XMLDoctestColorizer(DoctestColorizer): |
| 279 | """ |
| 280 | A subclass of DoctestColorizer that generates XML-like output. |
| 281 | This class is mainly intended to be used for testing purposes. |
| 282 | """ |
| 283 | PREFIX = '<colorized>\n' |
| 284 | SUFFIX = '</colorized>\n' |
| 285 | def markup(self, s, tag): |
| 286 | s = s.replace('&', '&').replace('<', '<').replace('>', '>') |
| 287 | if tag == 'other': return s |
| 288 | else: return '<%s>%s</%s>' % (tag, s, tag) |
| 289 | |
| 290 | class HTMLDoctestColorizer(DoctestColorizer): |
| 291 | """A subclass of DoctestColorizer that generates HTML output.""" |
| 292 | PREFIX = '<pre class="py-doctest">\n' |
| 293 | SUFFIX = '</pre>\n' |
| 294 | def markup(self, s, tag): |
| 295 | if tag == 'other': |
| 296 | return plaintext_to_html(s) |
| 297 | else: |
| 298 | return ('<span class="py-%s">%s</span>' % |
| 299 | (tag, plaintext_to_html(s))) |
| 300 | |
| 301 | class LaTeXDoctestColorizer(DoctestColorizer): |
| 302 | """A subclass of DoctestColorizer that generates LaTeX output.""" |
| 303 | PREFIX = '\\begin{alltt}\n' |
| 304 | SUFFIX = '\\end{alltt}\n' |
| 305 | def markup(self, s, tag): |
| 306 | if tag == 'other': |
| 307 | return plaintext_to_latex(s) |
| 308 | else: |
| 309 | return '\\pysrc%s{%s}' % (tag, plaintext_to_latex(s)) |
| 310 | |
| 311 | |