| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python3 | 
| Raymond Hettinger | 3a96161 | 2012-07-03 14:11:40 -0700 | [diff] [blame^] | 2 | 'Add syntax highlighting to Python source code' | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 3 |  | 
| Raymond Hettinger | 3a96161 | 2012-07-03 14:11:40 -0700 | [diff] [blame^] | 4 | __all__ = ['colorize', 'build_page', 'default_css', 'default_html', | 
 | 5 |            'ansi_colorize', 'default_ansi'] | 
| Raymond Hettinger | 9b8ede6 | 2012-06-30 23:19:30 -0700 | [diff] [blame] | 6 | __author__ = 'Raymond Hettinger' | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 7 |  | 
 | 8 | import keyword, tokenize, cgi, functools | 
 | 9 |  | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 10 | def is_builtin(s): | 
 | 11 |     'Return True if s is the name of a builtin' | 
 | 12 |     return s in vars(__builtins__) | 
 | 13 |  | 
| Raymond Hettinger | 5da6039 | 2012-07-03 13:13:52 -0700 | [diff] [blame] | 14 | def combine_range(lines, start, end): | 
 | 15 |     'Join content from a range of lines between start and end' | 
| Raymond Hettinger | f2cc352 | 2012-07-02 13:29:57 -0700 | [diff] [blame] | 16 |     (srow, scol), (erow, ecol) = start, end | 
 | 17 |     if srow == erow: | 
 | 18 |         rows = [lines[srow-1][scol:ecol]] | 
 | 19 |     else: | 
 | 20 |         rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]] | 
| Raymond Hettinger | 5da6039 | 2012-07-03 13:13:52 -0700 | [diff] [blame] | 21 |     return ''.join(rows), end | 
| Raymond Hettinger | f2cc352 | 2012-07-02 13:29:57 -0700 | [diff] [blame] | 22 |  | 
| Raymond Hettinger | 5da6039 | 2012-07-03 13:13:52 -0700 | [diff] [blame] | 23 | def isolate_tokens(source): | 
 | 24 |     'Generate chunks of source and indentify chunks to be highlighted' | 
| Raymond Hettinger | ac5f846 | 2012-07-03 00:15:59 -0700 | [diff] [blame] | 25 |     lines = source.splitlines(True) | 
| Raymond Hettinger | f2cc352 | 2012-07-02 13:29:57 -0700 | [diff] [blame] | 26 |     lines.append('') | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 27 |     readline = functools.partial(next, iter(lines), '') | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 28 |     kind = tok_str = '' | 
 | 29 |     tok_type = tokenize.COMMENT | 
| Raymond Hettinger | f2cc352 | 2012-07-02 13:29:57 -0700 | [diff] [blame] | 30 |     written = (1, 0) | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 31 |     for tok in tokenize.generate_tokens(readline): | 
 | 32 |         prev_tok_type, prev_tok_str = tok_type, tok_str | 
 | 33 |         tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok | 
| Raymond Hettinger | cf6eac4 | 2012-07-03 00:12:27 -0700 | [diff] [blame] | 34 |         kind = '' | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 35 |         if tok_type == tokenize.COMMENT: | 
 | 36 |             kind = 'comment' | 
| Raymond Hettinger | e4870b5 | 2012-07-01 00:37:05 -0700 | [diff] [blame] | 37 |         elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;': | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 38 |             kind = 'operator' | 
 | 39 |         elif tok_type == tokenize.STRING: | 
 | 40 |             kind = 'string' | 
 | 41 |             if prev_tok_type == tokenize.INDENT or scol==0: | 
 | 42 |                 kind = 'docstring' | 
 | 43 |         elif tok_type == tokenize.NAME: | 
 | 44 |             if tok_str in ('def', 'class', 'import', 'from'): | 
 | 45 |                 kind = 'definition' | 
 | 46 |             elif prev_tok_str in ('def', 'class'): | 
 | 47 |                 kind = 'defname' | 
 | 48 |             elif keyword.iskeyword(tok_str): | 
 | 49 |                 kind = 'keyword' | 
 | 50 |             elif is_builtin(tok_str) and prev_tok_str != '.': | 
 | 51 |                 kind = 'builtin' | 
| Raymond Hettinger | 5da6039 | 2012-07-03 13:13:52 -0700 | [diff] [blame] | 52 |         line_upto_token, written = combine_range(lines, written, (srow, scol)) | 
 | 53 |         line_thru_token, written = combine_range(lines, written, (erow, ecol)) | 
 | 54 |         yield kind, line_upto_token, line_thru_token | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 55 |  | 
| Raymond Hettinger | 3a96161 | 2012-07-03 14:11:40 -0700 | [diff] [blame^] | 56 | default_ansi = { | 
 | 57 |     'comment': '\033[0;31m', | 
 | 58 |     'string': '\033[0;32m', | 
 | 59 |     'docstring': '\033[0;32m', | 
 | 60 |     'keyword': '\033[0;33m', | 
 | 61 |     'builtin': '\033[0;35m', | 
 | 62 |     'definition': '\033[0;33m', | 
 | 63 |     'defname': '\033[0;34m', | 
 | 64 |     'operator': '\033[0;33m', | 
 | 65 | } | 
 | 66 |  | 
 | 67 | def colorize_ansi(source, colors=default_ansi): | 
 | 68 |     'Add syntax highlighting to Python source code using ANSI escape sequences' | 
 | 69 |     # http://en.wikipedia.org/wiki/ANSI_escape_code | 
 | 70 |     result = [] | 
 | 71 |     for kind, line_upto_token, line_thru_token in isolate_tokens(source): | 
 | 72 |         if kind: | 
 | 73 |             result += [line_upto_token, colors[kind], line_thru_token, '\033[0m'] | 
 | 74 |         else: | 
 | 75 |             result += [line_upto_token, line_thru_token] | 
 | 76 |     return ''.join(result) | 
 | 77 |  | 
 | 78 | def colorize_html(source): | 
| Raymond Hettinger | 5da6039 | 2012-07-03 13:13:52 -0700 | [diff] [blame] | 79 |     'Convert Python source code to an HTML fragment with colorized markup' | 
 | 80 |     result = ['<pre class="python">\n'] | 
 | 81 |     for kind, line_upto_token, line_thru_token in isolate_tokens(source): | 
 | 82 |         if kind: | 
 | 83 |             result += [cgi.escape(line_upto_token), | 
 | 84 |                        '<span class="%s">' % kind, | 
 | 85 |                        cgi.escape(line_thru_token), | 
 | 86 |                        '</span>'] | 
 | 87 |         else: | 
 | 88 |             result += [cgi.escape(line_upto_token), | 
 | 89 |                        cgi.escape(line_thru_token)] | 
 | 90 |     result += ['</pre>\n'] | 
| Raymond Hettinger | f2cc352 | 2012-07-02 13:29:57 -0700 | [diff] [blame] | 91 |     return ''.join(result) | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 92 |  | 
 | 93 | default_css = { | 
 | 94 |     '.comment': '{color: crimson;}', | 
 | 95 |     '.string':  '{color: forestgreen;}', | 
| Raymond Hettinger | 5da6039 | 2012-07-03 13:13:52 -0700 | [diff] [blame] | 96 |     '.docstring': '{color: forestgreen; font-style:italic;}', | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 97 |     '.keyword': '{color: darkorange;}', | 
 | 98 |     '.builtin': '{color: purple;}', | 
 | 99 |     '.definition': '{color: darkorange; font-weight:bold;}', | 
 | 100 |     '.defname': '{color: blue;}', | 
 | 101 |     '.operator': '{color: brown;}', | 
 | 102 | } | 
 | 103 |  | 
 | 104 | default_html = '''\ | 
| Raymond Hettinger | fd490cc | 2012-06-30 22:19:04 -0700 | [diff] [blame] | 105 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" | 
 | 106 |           "http://www.w3.org/TR/html4/strict.dtd"> | 
 | 107 | <html> | 
 | 108 | <head> | 
 | 109 | <meta http-equiv="Content-type" content="text/html;charset=UTF-8"> | 
| Raymond Hettinger | ecea0fb | 2012-07-02 17:17:16 -0700 | [diff] [blame] | 110 | <title> {title} </title> | 
| Raymond Hettinger | fd490cc | 2012-06-30 22:19:04 -0700 | [diff] [blame] | 111 | <style type="text/css"> | 
| Raymond Hettinger | ecea0fb | 2012-07-02 17:17:16 -0700 | [diff] [blame] | 112 | {css} | 
| Raymond Hettinger | fd490cc | 2012-06-30 22:19:04 -0700 | [diff] [blame] | 113 | </style> | 
 | 114 | </head> | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 115 | <body> | 
| Raymond Hettinger | ecea0fb | 2012-07-02 17:17:16 -0700 | [diff] [blame] | 116 | {body} | 
| Raymond Hettinger | fd490cc | 2012-06-30 22:19:04 -0700 | [diff] [blame] | 117 | </body> | 
 | 118 | </html> | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 119 | ''' | 
 | 120 |  | 
| Raymond Hettinger | 9b8ede6 | 2012-06-30 23:19:30 -0700 | [diff] [blame] | 121 | def build_page(source, title='python', css=default_css, html=default_html): | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 122 |     'Create a complete HTML page with colorized Python source code' | 
| Raymond Hettinger | fd490cc | 2012-06-30 22:19:04 -0700 | [diff] [blame] | 123 |     css_str = '\n'.join(['%s %s' % item for item in css.items()]) | 
| Raymond Hettinger | 3a96161 | 2012-07-03 14:11:40 -0700 | [diff] [blame^] | 124 |     result = colorize_html(source) | 
| Raymond Hettinger | 9b8ede6 | 2012-06-30 23:19:30 -0700 | [diff] [blame] | 125 |     title = cgi.escape(title) | 
| Raymond Hettinger | ecea0fb | 2012-07-02 17:17:16 -0700 | [diff] [blame] | 126 |     return html.format(title=title, css=css_str, body=result) | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 127 |  | 
 | 128 |  | 
 | 129 | if __name__ == '__main__': | 
 | 130 |     import sys, argparse, webbrowser, os | 
 | 131 |  | 
 | 132 |     parser = argparse.ArgumentParser( | 
| Raymond Hettinger | 3a96161 | 2012-07-03 14:11:40 -0700 | [diff] [blame^] | 133 |             description = 'Add syntax highlighting to Python source') | 
| Raymond Hettinger | cf6eac4 | 2012-07-03 00:12:27 -0700 | [diff] [blame] | 134 |     parser.add_argument('sourcefile', metavar = 'SOURCEFILE', | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 135 |             help = 'File containing Python sourcecode') | 
| Raymond Hettinger | 3a96161 | 2012-07-03 14:11:40 -0700 | [diff] [blame^] | 136 |     parser.add_argument('-a', '--ansi', action = 'store_true', | 
 | 137 |             help = 'emit ANSI escape highlighted source') | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 138 |     parser.add_argument('-b', '--browser', action = 'store_true', | 
 | 139 |             help = 'launch a browser to show results') | 
| Raymond Hettinger | cf6eac4 | 2012-07-03 00:12:27 -0700 | [diff] [blame] | 140 |     parser.add_argument('-s', '--section', action = 'store_true', | 
 | 141 |             help = 'show an HTML section rather than a complete webpage') | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 142 |     args = parser.parse_args() | 
| Raymond Hettinger | 3a96161 | 2012-07-03 14:11:40 -0700 | [diff] [blame^] | 143 |  | 
| Raymond Hettinger | cf6eac4 | 2012-07-03 00:12:27 -0700 | [diff] [blame] | 144 |     if args.browser and args.section: | 
 | 145 |         parser.error('The -s/--section option is incompatible with ' | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 146 |                      'the -b/--browser option') | 
| Raymond Hettinger | 3a96161 | 2012-07-03 14:11:40 -0700 | [diff] [blame^] | 147 |     if args.ansi and (args.browser or args.section): | 
 | 148 |         parser.error('The -a/--ansi option is incompatible with ' | 
 | 149 |                      'the -b/--browser and -s/--section options') | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 150 |  | 
| Raymond Hettinger | cf6eac4 | 2012-07-03 00:12:27 -0700 | [diff] [blame] | 151 |     sourcefile = args.sourcefile | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 152 |     with open(sourcefile) as f: | 
 | 153 |         page = f.read() | 
| Raymond Hettinger | 3a96161 | 2012-07-03 14:11:40 -0700 | [diff] [blame^] | 154 |  | 
 | 155 |     if args.ansi: | 
 | 156 |         encoded = colorize_ansi(page) | 
 | 157 |     elif args.section: | 
 | 158 |         encoded = colorize_html(page) | 
 | 159 |     else: | 
 | 160 |         encoded = build_page(page, title=sourcefile) | 
 | 161 |  | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 162 |     if args.browser: | 
 | 163 |         htmlfile = os.path.splitext(os.path.basename(sourcefile))[0] + '.html' | 
 | 164 |         with open(htmlfile, 'w') as f: | 
| Raymond Hettinger | 3a96161 | 2012-07-03 14:11:40 -0700 | [diff] [blame^] | 165 |             f.write(encoded) | 
| Raymond Hettinger | bc09cf1 | 2012-06-30 16:58:06 -0700 | [diff] [blame] | 166 |         webbrowser.open('file://' + os.path.abspath(htmlfile)) | 
 | 167 |     else: | 
| Raymond Hettinger | 3a96161 | 2012-07-03 14:11:40 -0700 | [diff] [blame^] | 168 |         sys.stdout.write(encoded) |