blob: d1b61c9ba380a57449d0d775a337bcad741bfb1c [file] [log] [blame]
Wenzel Jakobad06e762015-07-22 01:01:52 +02001#!/usr/bin/env python3
2#
3# Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
4#
5# Extract documentation from C++ header files to use it in Python bindings
6#
7
8import os, sys, platform, re, textwrap
9from clang import cindex
10from clang.cindex import CursorKind
11from collections import OrderedDict
Wenzel Jakobfa70d302015-07-23 14:43:34 +020012from threading import Thread, Semaphore
13from multiprocessing import cpu_count
Wenzel Jakobad06e762015-07-22 01:01:52 +020014
15if platform.system() == 'Darwin':
16 libclang = '/opt/llvm/lib/libclang.dylib'
17 if os.path.exists(libclang):
18 cindex.Config.set_library_path(os.path.dirname(libclang))
19
20RECURSE_LIST = [
21 CursorKind.TRANSLATION_UNIT,
22 CursorKind.NAMESPACE,
23 CursorKind.CLASS_DECL,
24 CursorKind.STRUCT_DECL,
25 CursorKind.CLASS_TEMPLATE
26]
27
28PRINT_LIST = [
29 CursorKind.CLASS_DECL,
30 CursorKind.STRUCT_DECL,
31 CursorKind.CLASS_TEMPLATE,
32 CursorKind.FUNCTION_DECL,
33 CursorKind.FUNCTION_TEMPLATE,
34 CursorKind.CXX_METHOD,
35 CursorKind.CONSTRUCTOR,
36 CursorKind.FIELD_DECL
37]
38
39CPP_OPERATORS = {
40 '<=' : 'le', '>=' : 'ge', '==' : 'eq', '!=' : 'ne', '[]' : 'array',
41 '+=' : 'iadd', '-=' : 'isub', '*=' : 'imul', '/=' : 'idiv', '%=' :
42 'imod', '&=' : 'iand', '|=' : 'ior', '^=' : 'ixor', '<<=' : 'ilshift',
43 '>>=' : 'irshift', '++' : 'inc', '--' : 'dec', '<<' : 'lshift', '>>' :
44 'rshift', '&&' : 'land', '||' : 'lor', '!' : 'lnot', '~' : 'bnot', '&'
45 : 'band', '|' : 'bor', '+' : 'add', '-' : 'sub', '*' : 'mul', '/' :
46 'div', '%' : 'mod', '<' : 'lt', '>' : 'gt', '=' : 'assign'
47}
48CPP_OPERATORS = OrderedDict(sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
49
Wenzel Jakobfa70d302015-07-23 14:43:34 +020050job_count = cpu_count()
51job_semaphore = Semaphore(job_count)
52
Wenzel Jakobad06e762015-07-22 01:01:52 +020053registered_names = dict()
54
55def d(s):
56 return s.decode('utf8')
57
58def sanitize_name(name):
59 global registered_names
60 for k, v in CPP_OPERATORS.items():
61 name = name.replace('operator%s' % k, 'operator_%s' % v)
62 name = name.replace('<', '_')
63 name = name.replace('>', '_')
64 name = name.replace(' ', '_')
65 name = name.replace(',', '_')
66 if name in registered_names:
67 registered_names[name] += 1
68 name += '_' + str(registered_names[name])
69 else:
70 registered_names[name] = 1
71 return '__doc_' + name
72
73def process_comment(comment):
74 result = ''
75
76 # Remove C++ comment syntax
77 for s in comment.splitlines():
78 s = s.strip()
79 if s.startswith('/*'):
80 s = s[2:].lstrip('* \t')
81 elif s.endswith('*/'):
82 s = s[:-2].rstrip('* \t')
83 elif s.startswith('///'):
84 s = s[3:]
85 if s.startswith('*'):
86 s = s[1:]
87 result += s.strip() + '\n'
88
89 # Doxygen tags
90 cpp_group = '([\w:]+)'
91 param_group = '([\[\w:\]]+)'
92
93 s = result
94 s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
95 s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
96 s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
97 s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
98 s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
Wenzel Jakobfa70d302015-07-23 14:43:34 +020099 s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group), r'\n\n$Parameter ``\2``:\n\n', s)
Wenzel Jakobad06e762015-07-22 01:01:52 +0200100
101 for in_, out_ in {
102 'return' : 'Returns',
103 'author' : 'Author',
104 'authors' : 'Authors',
105 'copyright' : 'Copyright',
106 'date' : 'Date',
107 'remark' : 'Remark',
108 'sa' : 'See also',
109 'see' : 'See also',
110 'extends' : 'Extends',
111 'throw' : 'Throws',
112 'throws' : 'Throws' }.items():
113 s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
114
115 s = re.sub(r'\\details\s*', r'\n\n', s)
116 s = re.sub(r'\\brief\s*', r'', s)
117 s = re.sub(r'\\short\s*', r'', s)
118 s = re.sub(r'\\ref\s*', r'', s)
119
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200120 # HTML/TeX tags
Wenzel Jakobad06e762015-07-22 01:01:52 +0200121 s = re.sub(r'<tt>([^<]*)</tt>', r'``\1``', s)
122 s = re.sub(r'<em>([^<]*)</em>', r'*\1*', s)
123 s = re.sub(r'<b>([^<]*)</b>', r'**\1**', s)
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200124 s = re.sub(r'\\f\$([^\$]*)\\f\$', r'$\1$', s)
Wenzel Jakobad06e762015-07-22 01:01:52 +0200125
126 s = s.replace('``true``', '``True``')
127 s = s.replace('``false``', '``False``')
128
129 # Re-flow text
130 wrapper = textwrap.TextWrapper()
131 wrapper.expand_tabs = True
132 wrapper.replace_whitespace = True
133 wrapper.width = 75
134 wrapper.initial_indent = wrapper.subsequent_indent = ''
135
136 result = ''
137 for x in re.split(r'\n{2,}', s):
138 wrapped = wrapper.fill(x.strip())
139 if len(wrapped) > 0 and wrapped[0] == '$':
140 result += wrapped[1:] + '\n'
141 wrapper.initial_indent = wrapper.subsequent_indent = ' '*4
142 else:
143 result += wrapped + '\n\n'
144 wrapper.initial_indent = wrapper.subsequent_indent = ''
145 return result.rstrip()
146
147
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200148def extract(filename, node, prefix, output):
Wenzel Jakobad06e762015-07-22 01:01:52 +0200149 num_extracted = 0
150 if not (node.location.file is None or os.path.samefile(d(node.location.file.name), filename)):
151 return 0
152 if node.kind in RECURSE_LIST:
153 sub_prefix = prefix
154 if node.kind != CursorKind.TRANSLATION_UNIT:
155 if len(sub_prefix) > 0:
156 sub_prefix += '_'
157 sub_prefix += d(node.spelling)
158 for i in node.get_children():
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200159 num_extracted += extract(filename, i, sub_prefix, output)
Wenzel Jakobad06e762015-07-22 01:01:52 +0200160 if num_extracted == 0:
161 return 0
162 if node.kind in PRINT_LIST:
163 comment = d(node.raw_comment) if node.raw_comment is not None else ''
164 comment = process_comment(comment)
165 name = sanitize_name(prefix + '_' + d(node.spelling))
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200166 output.append('\nstatic const char *%s = %sR"doc(%s)doc";' % (name, '\n' if '\n' in comment else '', comment))
Wenzel Jakobad06e762015-07-22 01:01:52 +0200167 num_extracted += 1
168 return num_extracted
169
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200170class ExtractionThread(Thread):
171 def __init__ (self, filename, parameters, output):
172 Thread.__init__(self)
173 self.filename = filename
174 self.parameters = parameters
175 self.output = output
176 job_semaphore.acquire()
177
178 def run(self):
179 print('Processing "%s" ..' % self.filename, file = sys.stderr)
180 try:
181 index = cindex.Index(cindex.conf.lib.clang_createIndex(False, True))
182 tu = index.parse(self.filename, self.parameters)
183 extract(self.filename, tu.cursor, '', self.output)
184 finally:
185 job_semaphore.release()
186
Wenzel Jakobad06e762015-07-22 01:01:52 +0200187if __name__ == '__main__':
188 parameters = ['-x', 'c++', '-std=c++11']
189 filenames = []
190
191 for item in sys.argv[1:]:
192 if item.startswith('-'):
193 parameters.append(item)
194 else:
195 filenames.append(item)
196
197 if len(filenames) == 0:
198 print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
199 exit(-1)
200
201 print('''/*
202 This file contains docstrings for the Python bindings.
203 Do not edit! These were automatically extracted by mkdoc.py
204 */
205
206#define __COUNT(_1, _2, _3, _4, _5, COUNT, ...) COUNT
207#define __VA_SIZE(...) __COUNT(__VA_ARGS__, 5, 4, 3, 2, 1)
208#define __CAT1(a, b) a ## b
209#define __CAT2(a, b) __CAT1(a, b)
210#define __DOC1(n1) __doc_##n1
211#define __DOC2(n1, n2) __doc_##n1##_##n2
212#define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3
213#define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4
214#define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4_##n5
Wenzel Jakob281aa0e2015-07-30 15:29:00 +0200215#define DOC(...) __CAT2(__DOC, __VA_SIZE(__VA_ARGS__))(__VA_ARGS__)
216
217#if defined(__GNUG__)
218#pragma GCC diagnostic push
219#pragma GCC diagnostic ignored "-Wunused-variable"
220#endif
221''')
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200222
223 output = []
Wenzel Jakobad06e762015-07-22 01:01:52 +0200224 for filename in filenames:
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200225 thr = ExtractionThread(filename, parameters, output)
226 thr.start()
227
228 print('Waiting for jobs to finish ..', file = sys.stderr)
229 for i in range(job_count):
230 job_semaphore.acquire()
231
232 output.sort()
233 for l in output:
234 print(l)
Wenzel Jakob281aa0e2015-07-30 15:29:00 +0200235
236 print('''
237#if defined(__GNUG__)
238#pragma GCC diagnostic pop
239#endif
240''')