blob: 6f2e35725b3791b3c518c5e99ad3b72fad1e6350 [file] [log] [blame]
animalize6261ae92018-10-08 16:20:54 -05001"""
2Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual
3effect on some MBCS Windows systems.
4
5https://bugs.python.org/issue32174
6"""
7
8import re
9from html.entities import codepoint2name
10
11# escape the characters which codepoint > 0x7F
12def _process(string):
13 def escape(matchobj):
14 codepoint = ord(matchobj.group(0))
15
16 name = codepoint2name.get(codepoint)
17 if name is None:
18 return '&#%d;' % codepoint
19 else:
20 return '&%s;' % name
21
22 return re.sub(r'[^\x00-\x7F]', escape, string)
23
24def escape_for_chm(app, pagename, templatename, context, doctree):
25 # only works for .chm output
26 if not hasattr(app.builder, 'name') or app.builder.name != 'htmlhelp':
27 return
28
29 # escape the `body` part to 7-bit ASCII
30 body = context.get('body')
31 if body is not None:
32 context['body'] = _process(body)
33
34def setup(app):
35 # `html-page-context` event emitted when the HTML builder has
36 # created a context dictionary to render a template with.
37 app.connect('html-page-context', escape_for_chm)
38
39 return {'version': '1.0', 'parallel_read_safe': True}