animalize | 6261ae9 | 2018-10-08 16:20:54 -0500 | [diff] [blame^] | 1 | """ |
| 2 | Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual |
| 3 | effect on some MBCS Windows systems. |
| 4 | |
| 5 | https://bugs.python.org/issue32174 |
| 6 | """ |
| 7 | |
| 8 | import re |
| 9 | from html.entities import codepoint2name |
| 10 | |
| 11 | # escape the characters which codepoint > 0x7F |
| 12 | def _process(string): |
| 13 | def escape(matchobj): |
| 14 | codepoint = ord(matchobj.group(0)) |
| 15 | |
| 16 | name = codepoint2name.get(codepoint) |
| 17 | if name is None: |
| 18 | return '&#%d;' % codepoint |
| 19 | else: |
| 20 | return '&%s;' % name |
| 21 | |
| 22 | return re.sub(r'[^\x00-\x7F]', escape, string) |
| 23 | |
| 24 | def escape_for_chm(app, pagename, templatename, context, doctree): |
| 25 | # only works for .chm output |
| 26 | if not hasattr(app.builder, 'name') or app.builder.name != 'htmlhelp': |
| 27 | return |
| 28 | |
| 29 | # escape the `body` part to 7-bit ASCII |
| 30 | body = context.get('body') |
| 31 | if body is not None: |
| 32 | context['body'] = _process(body) |
| 33 | |
| 34 | def setup(app): |
| 35 | # `html-page-context` event emitted when the HTML builder has |
| 36 | # created a context dictionary to render a template with. |
| 37 | app.connect('html-page-context', escape_for_chm) |
| 38 | |
| 39 | return {'version': '1.0', 'parallel_read_safe': True} |