blob: 987df405a25ed7d58c0478c332d6a8d1ea4be631 [file] [log] [blame]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001#
2# doctest.py: Syntax Highlighting for doctest blocks
3# Edward Loper
4#
5# Created [06/28/03 02:52 AM]
6# $Id: restructuredtext.py 1210 2006-04-10 13:25:50Z edloper $
7#
8
9"""
10Syntax highlighting for doctest blocks. This module defines two
11functions, L{doctest_to_html()} and L{doctest_to_latex()}, which can
12be used to perform syntax highlighting on doctest blocks. It also
13defines the more general C{colorize_doctest()}, which could be used to
14do syntac highlighting on doctest blocks with other output formats.
15(Both C{doctest_to_html()} and C{doctest_to_latex()} are defined using
16C{colorize_doctest()}.)
17"""
18__docformat__ = 'epytext en'
19
20import re
21from epydoc.util import plaintext_to_html, plaintext_to_latex
22
23__all__ = ['doctest_to_html', 'doctest_to_latex',
24 'DoctestColorizer', 'XMLDoctestColorizer',
25 'HTMLDoctestColorizer', 'LaTeXDoctestColorizer']
26
27def doctest_to_html(s):
28 """
29 Perform syntax highlighting on the given doctest string, and
30 return the resulting HTML code. This code consists of a C{<pre>}
31 block with class=py-doctest. Syntax highlighting is performed
32 using the following css classes:
33
34 - C{py-prompt} -- the Python PS1 prompt (>>>)
35 - C{py-more} -- the Python PS2 prompt (...)
36 - C{py-keyword} -- a Python keyword (for, if, etc.)
37 - C{py-builtin} -- a Python builtin name (abs, dir, etc.)
38 - C{py-string} -- a string literal
39 - C{py-comment} -- a comment
40 - C{py-except} -- an exception traceback (up to the next >>>)
41 - C{py-output} -- the output from a doctest block.
42 - C{py-defname} -- the name of a function or class defined by
43 a C{def} or C{class} statement.
44 """
45 return HTMLDoctestColorizer().colorize_doctest(s)
46
47def doctest_to_latex(s):
48 """
49 Perform syntax highlighting on the given doctest string, and
50 return the resulting LaTeX code. This code consists of an
51 C{alltt} environment. Syntax highlighting is performed using
52 the following new latex commands, which must be defined externally:
53 - C{\pysrcprompt} -- the Python PS1 prompt (>>>)
54 - C{\pysrcmore} -- the Python PS2 prompt (...)
55 - C{\pysrckeyword} -- a Python keyword (for, if, etc.)
56 - C{\pysrcbuiltin} -- a Python builtin name (abs, dir, etc.)
57 - C{\pysrcstring} -- a string literal
58 - C{\pysrccomment} -- a comment
59 - C{\pysrcexcept} -- an exception traceback (up to the next >>>)
60 - C{\pysrcoutput} -- the output from a doctest block.
61 - C{\pysrcdefname} -- the name of a function or class defined by
62 a C{def} or C{class} statement.
63 """
64 return LaTeXDoctestColorizer().colorize_doctest(s)
65
66class DoctestColorizer:
67 """
68 An abstract base class for performing syntax highlighting on
69 doctest blocks and other bits of Python code. Subclasses should
70 provide definitions for:
71
72 - The L{markup()} method, which takes a substring and a tag, and
73 returns a colorized version of the substring.
74 - The L{PREFIX} and L{SUFFIX} variables, which will be added
75 to the beginning and end of the strings returned by
76 L{colorize_codeblock} and L{colorize_doctest}.
77 """
78
79 #: A string that is added to the beginning of the strings
80 #: returned by L{colorize_codeblock} and L{colorize_doctest}.
81 #: Typically, this string begins a preformatted area.
82 PREFIX = None
83
84 #: A string that is added to the end of the strings
85 #: returned by L{colorize_codeblock} and L{colorize_doctest}.
86 #: Typically, this string ends a preformatted area.
87 SUFFIX = None
88
89 #: A list of the names of all Python keywords. ('as' is included
90 #: even though it is technically not a keyword.)
91 _KEYWORDS = ("and del for is raise"
92 "assert elif from lambda return"
93 "break else global not try"
94 "class except if or while"
95 "continue exec import pass yield"
96 "def finally in print as").split()
97
98 #: A list of all Python builtins.
99 _BUILTINS = [_BI for _BI in dir(__builtins__)
100 if not _BI.startswith('__')]
101
102 #: A regexp group that matches keywords.
103 _KEYWORD_GRP = '|'.join([r'\b%s\b' % _KW for _KW in _KEYWORDS])
104
105 #: A regexp group that matches Python builtins.
106 _BUILTIN_GRP = (r'(?<!\.)(?:%s)' % '|'.join([r'\b%s\b' % _BI
107 for _BI in _BUILTINS]))
108
109 #: A regexp group that matches Python strings.
110 _STRING_GRP = '|'.join(
111 [r'("""("""|.*?((?!").)"""))', r'("("|.*?((?!").)"))',
112 r"('''('''|.*?[^\\']'''))", r"('('|.*?[^\\']'))"])
113
114 #: A regexp group that matches Python comments.
115 _COMMENT_GRP = '(#.*?$)'
116
117 #: A regexp group that matches Python ">>>" prompts.
118 _PROMPT1_GRP = r'^[ \t]*>>>(?:[ \t]|$)'
119
120 #: A regexp group that matches Python "..." prompts.
121 _PROMPT2_GRP = r'^[ \t]*\.\.\.(?:[ \t]|$)'
122
123 #: A regexp group that matches function and class definitions.
124 _DEFINE_GRP = r'\b(?:def|class)[ \t]+\w+'
125
126 #: A regexp that matches Python prompts
127 PROMPT_RE = re.compile('(%s|%s)' % (_PROMPT1_GRP, _PROMPT2_GRP),
128 re.MULTILINE | re.DOTALL)
129
130 #: A regexp that matches Python "..." prompts.
131 PROMPT2_RE = re.compile('(%s)' % _PROMPT2_GRP,
132 re.MULTILINE | re.DOTALL)
133
134 #: A regexp that matches doctest exception blocks.
135 EXCEPT_RE = re.compile(r'^[ \t]*Traceback \(most recent call last\):.*',
136 re.DOTALL | re.MULTILINE)
137
138 #: A regexp that matches doctest directives.
139 DOCTEST_DIRECTIVE_RE = re.compile(r'#[ \t]*doctest:.*')
140
141 #: A regexp that matches all of the regions of a doctest block
142 #: that should be colored.
143 DOCTEST_RE = re.compile(
144 r'(.*?)((?P<STRING>%s)|(?P<COMMENT>%s)|(?P<DEFINE>%s)|'
145 r'(?P<KEYWORD>%s)|(?P<BUILTIN>%s)|'
146 r'(?P<PROMPT1>%s)|(?P<PROMPT2>%s)|(?P<EOS>\Z))' % (
147 _STRING_GRP, _COMMENT_GRP, _DEFINE_GRP, _KEYWORD_GRP, _BUILTIN_GRP,
148 _PROMPT1_GRP, _PROMPT2_GRP), re.MULTILINE | re.DOTALL)
149
150 #: This regular expression is used to find doctest examples in a
151 #: string. This is copied from the standard Python doctest.py
152 #: module (after the refactoring in Python 2.4+).
153 DOCTEST_EXAMPLE_RE = re.compile(r'''
154 # Source consists of a PS1 line followed by zero or more PS2 lines.
155 (?P<source>
156 (?:^(?P<indent> [ ]*) >>> .*) # PS1 line
157 (?:\n [ ]* \.\.\. .*)* # PS2 lines
158 \n?)
159 # Want consists of any non-blank lines that do not start with PS1.
160 (?P<want> (?:(?![ ]*$) # Not a blank line
161 (?![ ]*>>>) # Not a line starting with PS1
162 .*$\n? # But any other line
163 )*)
164 ''', re.MULTILINE | re.VERBOSE)
165
166 def colorize_inline(self, s):
167 """
168 Colorize a string containing Python code. Do not add the
169 L{PREFIX} and L{SUFFIX} strings to the returned value. This
170 method is intended for generating syntax-highlighted strings
171 that are appropriate for inclusion as inline expressions.
172 """
173 return self.DOCTEST_RE.sub(self.subfunc, s)
174
175 def colorize_codeblock(self, s):
176 """
177 Colorize a string containing only Python code. This method
178 differs from L{colorize_doctest} in that it will not search
179 for doctest prompts when deciding how to colorize the string.
180 """
181 body = self.DOCTEST_RE.sub(self.subfunc, s)
182 return self.PREFIX + body + self.SUFFIX
183
184 def colorize_doctest(self, s, strip_directives=False):
185 """
186 Colorize a string containing one or more doctest examples.
187 """
188 output = []
189 charno = 0
190 for m in self.DOCTEST_EXAMPLE_RE.finditer(s):
191 # Parse the doctest example:
192 pysrc, want = m.group('source', 'want')
193 # Pre-example text:
194 output.append(s[charno:m.start()])
195 # Example source code:
196 output.append(self.DOCTEST_RE.sub(self.subfunc, pysrc))
197 # Example output:
198 if want:
199 if self.EXCEPT_RE.match(want):
200 output += '\n'.join([self.markup(line, 'except')
201 for line in want.split('\n')])
202 else:
203 output += '\n'.join([self.markup(line, 'output')
204 for line in want.split('\n')])
205 # Update charno
206 charno = m.end()
207 # Add any remaining post-example text.
208 output.append(s[charno:])
209
210 return self.PREFIX + ''.join(output) + self.SUFFIX
211
212 def subfunc(self, match):
213 other, text = match.group(1, 2)
214 #print 'M %20r %20r' % (other, text) # <- for debugging
215 if other:
216 other = '\n'.join([self.markup(line, 'other')
217 for line in other.split('\n')])
218
219 if match.group('PROMPT1'):
220 return other + self.markup(text, 'prompt')
221 elif match.group('PROMPT2'):
222 return other + self.markup(text, 'more')
223 elif match.group('KEYWORD'):
224 return other + self.markup(text, 'keyword')
225 elif match.group('BUILTIN'):
226 return other + self.markup(text, 'builtin')
227 elif match.group('COMMENT'):
228 return other + self.markup(text, 'comment')
229 elif match.group('STRING') and '\n' not in text:
230 return other + self.markup(text, 'string')
231 elif match.group('STRING'):
232 # It's a multiline string; colorize the string & prompt
233 # portion of each line.
234 pieces = []
235 for line in text.split('\n'):
236 if self.PROMPT2_RE.match(line):
237 if len(line) > 4:
238 pieces.append(self.markup(line[:4], 'more') +
239 self.markup(line[4:], 'string'))
240 else:
241 pieces.append(self.markup(line[:4], 'more'))
242 elif line:
243 pieces.append(self.markup(line, 'string'))
244 else:
245 pieces.append('')
246 return other + '\n'.join(pieces)
247 elif match.group('DEFINE'):
248 m = re.match('(?P<def>\w+)(?P<space>\s+)(?P<name>\w+)', text)
249 return other + (self.markup(m.group('def'), 'keyword') +
250 self.markup(m.group('space'), 'other') +
251 self.markup(m.group('name'), 'defname'))
252 elif match.group('EOS') is not None:
253 return other
254 else:
255 assert 0, 'Unexpected match!'
256
257 def markup(self, s, tag):
258 """
259 Apply syntax highlighting to a single substring from a doctest
260 block. C{s} is the substring, and C{tag} is the tag that
261 should be applied to the substring. C{tag} will be one of the
262 following strings:
263
264 - C{prompt} -- the Python PS1 prompt (>>>)
265 - C{more} -- the Python PS2 prompt (...)
266 - C{keyword} -- a Python keyword (for, if, etc.)
267 - C{builtin} -- a Python builtin name (abs, dir, etc.)
268 - C{string} -- a string literal
269 - C{comment} -- a comment
270 - C{except} -- an exception traceback (up to the next >>>)
271 - C{output} -- the output from a doctest block.
272 - C{defname} -- the name of a function or class defined by
273 a C{def} or C{class} statement.
274 - C{other} -- anything else (does *not* include output.)
275 """
276 raise AssertionError("Abstract method")
277
278class XMLDoctestColorizer(DoctestColorizer):
279 """
280 A subclass of DoctestColorizer that generates XML-like output.
281 This class is mainly intended to be used for testing purposes.
282 """
283 PREFIX = '<colorized>\n'
284 SUFFIX = '</colorized>\n'
285 def markup(self, s, tag):
286 s = s.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
287 if tag == 'other': return s
288 else: return '<%s>%s</%s>' % (tag, s, tag)
289
290class HTMLDoctestColorizer(DoctestColorizer):
291 """A subclass of DoctestColorizer that generates HTML output."""
292 PREFIX = '<pre class="py-doctest">\n'
293 SUFFIX = '</pre>\n'
294 def markup(self, s, tag):
295 if tag == 'other':
296 return plaintext_to_html(s)
297 else:
298 return ('<span class="py-%s">%s</span>' %
299 (tag, plaintext_to_html(s)))
300
301class LaTeXDoctestColorizer(DoctestColorizer):
302 """A subclass of DoctestColorizer that generates LaTeX output."""
303 PREFIX = '\\begin{alltt}\n'
304 SUFFIX = '\\end{alltt}\n'
305 def markup(self, s, tag):
306 if tag == 'other':
307 return plaintext_to_latex(s)
308 else:
309 return '\\pysrc%s{%s}' % (tag, plaintext_to_latex(s))
310
311