blob: a22aacdefd0171078874bd77bf0175229646656f [file] [log] [blame]
Wenzel Jakobad06e762015-07-22 01:01:52 +02001#!/usr/bin/env python3
Henry Schreinerd8c7ee02020-07-20 13:35:21 -04002# -*- coding: utf-8 -*-
Wenzel Jakobad06e762015-07-22 01:01:52 +02003#
4# Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
5#
6# Extract documentation from C++ header files to use it in Python bindings
7#
8
Wenzel Jakoba57e51c2016-04-27 00:35:03 +02009import os
10import sys
11import platform
12import re
13import textwrap
14
Wenzel Jakobad06e762015-07-22 01:01:52 +020015from clang import cindex
16from clang.cindex import CursorKind
17from collections import OrderedDict
Dan4612db52019-05-15 20:06:08 -040018from glob import glob
Wenzel Jakobfa70d302015-07-23 14:43:34 +020019from threading import Thread, Semaphore
20from multiprocessing import cpu_count
Wenzel Jakobad06e762015-07-22 01:01:52 +020021
Wenzel Jakobad06e762015-07-22 01:01:52 +020022RECURSE_LIST = [
23 CursorKind.TRANSLATION_UNIT,
24 CursorKind.NAMESPACE,
25 CursorKind.CLASS_DECL,
26 CursorKind.STRUCT_DECL,
Wenzel Jakobf64ff572016-05-20 16:37:13 +020027 CursorKind.ENUM_DECL,
Wenzel Jakobad06e762015-07-22 01:01:52 +020028 CursorKind.CLASS_TEMPLATE
29]
30
31PRINT_LIST = [
32 CursorKind.CLASS_DECL,
33 CursorKind.STRUCT_DECL,
Wenzel Jakobc993bce2016-05-01 02:32:38 +020034 CursorKind.ENUM_DECL,
Wenzel Jakobf64ff572016-05-20 16:37:13 +020035 CursorKind.ENUM_CONSTANT_DECL,
Wenzel Jakobad06e762015-07-22 01:01:52 +020036 CursorKind.CLASS_TEMPLATE,
37 CursorKind.FUNCTION_DECL,
38 CursorKind.FUNCTION_TEMPLATE,
Wenzel Jakob65f0c2a2016-05-01 22:36:09 +020039 CursorKind.CONVERSION_FUNCTION,
Wenzel Jakobad06e762015-07-22 01:01:52 +020040 CursorKind.CXX_METHOD,
41 CursorKind.CONSTRUCTOR,
42 CursorKind.FIELD_DECL
43]
44
Dane0b8bbb2019-05-15 21:14:44 -040045PREFIX_BLACKLIST = [
46 CursorKind.TRANSLATION_UNIT
47]
48
Wenzel Jakobad06e762015-07-22 01:01:52 +020049CPP_OPERATORS = {
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020050 '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
51 '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
52 'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
53 '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
54 'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
55 '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
Wenzel Jakobf64ff572016-05-20 16:37:13 +020056 'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
Wenzel Jakobad06e762015-07-22 01:01:52 +020057}
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020058
59CPP_OPERATORS = OrderedDict(
60 sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
Wenzel Jakobad06e762015-07-22 01:01:52 +020061
Wenzel Jakobfa70d302015-07-23 14:43:34 +020062job_count = cpu_count()
63job_semaphore = Semaphore(job_count)
64
Dana33212d2019-05-15 20:37:18 -040065
66class NoFilenamesError(ValueError):
67 pass
68
69
Wenzel Jakobad06e762015-07-22 01:01:52 +020070def d(s):
Dana175b212019-05-15 17:58:50 -040071 return s if isinstance(s, str) else s.decode('utf8')
Wenzel Jakobad06e762015-07-22 01:01:52 +020072
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020073
Wenzel Jakobad06e762015-07-22 01:01:52 +020074def sanitize_name(name):
Wenzel Jakob65f0c2a2016-05-01 22:36:09 +020075 name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
Wenzel Jakobad06e762015-07-22 01:01:52 +020076 for k, v in CPP_OPERATORS.items():
77 name = name.replace('operator%s' % k, 'operator_%s' % v)
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020078 name = re.sub('<.*>', '', name)
79 name = ''.join([ch if ch.isalnum() else '_' for ch in name])
80 name = re.sub('_$', '', re.sub('_+', '_', name))
Wenzel Jakobad06e762015-07-22 01:01:52 +020081 return '__doc_' + name
82
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020083
Wenzel Jakobad06e762015-07-22 01:01:52 +020084def process_comment(comment):
85 result = ''
86
87 # Remove C++ comment syntax
Wenzel Jakobf64ff572016-05-20 16:37:13 +020088 leading_spaces = float('inf')
89 for s in comment.expandtabs(tabsize=4).splitlines():
Wenzel Jakobad06e762015-07-22 01:01:52 +020090 s = s.strip()
91 if s.startswith('/*'):
Wenzel Jakobf64ff572016-05-20 16:37:13 +020092 s = s[2:].lstrip('*')
Wenzel Jakobad06e762015-07-22 01:01:52 +020093 elif s.endswith('*/'):
Wenzel Jakobf64ff572016-05-20 16:37:13 +020094 s = s[:-2].rstrip('*')
Wenzel Jakobad06e762015-07-22 01:01:52 +020095 elif s.startswith('///'):
96 s = s[3:]
97 if s.startswith('*'):
98 s = s[1:]
Wenzel Jakobf64ff572016-05-20 16:37:13 +020099 if len(s) > 0:
100 leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
101 result += s + '\n'
102
103 if leading_spaces != float('inf'):
104 result2 = ""
105 for s in result.splitlines():
106 result2 += s[leading_spaces:] + '\n'
107 result = result2
Wenzel Jakobad06e762015-07-22 01:01:52 +0200108
109 # Doxygen tags
Yannick Jadoul964ab952020-07-10 16:30:44 +0200110 cpp_group = r'([\w:]+)'
111 param_group = r'([\[\w:\]]+)'
Wenzel Jakobad06e762015-07-22 01:01:52 +0200112
113 s = result
114 s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
115 s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
116 s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
117 s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
118 s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
Wenzel Jakob295acb22016-04-30 23:36:57 +0200119 s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200120 s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
121 r'\n\n$Parameter ``\2``:\n\n', s)
Wenzel Jakobf64ff572016-05-20 16:37:13 +0200122 s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
123 r'\n\n$Template parameter ``\2``:\n\n', s)
Wenzel Jakobad06e762015-07-22 01:01:52 +0200124
125 for in_, out_ in {
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200126 'return': 'Returns',
127 'author': 'Author',
128 'authors': 'Authors',
129 'copyright': 'Copyright',
130 'date': 'Date',
131 'remark': 'Remark',
132 'sa': 'See also',
133 'see': 'See also',
134 'extends': 'Extends',
135 'throw': 'Throws',
136 'throws': 'Throws'
137 }.items():
Wenzel Jakobad06e762015-07-22 01:01:52 +0200138 s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
139
140 s = re.sub(r'\\details\s*', r'\n\n', s)
141 s = re.sub(r'\\brief\s*', r'', s)
142 s = re.sub(r'\\short\s*', r'', s)
143 s = re.sub(r'\\ref\s*', r'', s)
144
Wenzel Jakobf64ff572016-05-20 16:37:13 +0200145 s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
146 r"```\n\1\n```\n", s, flags=re.DOTALL)
147
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200148 # HTML/TeX tags
Wenzel Jakobf64ff572016-05-20 16:37:13 +0200149 s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
150 s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
151 s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
152 s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
153 s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
154 s = re.sub(r'<li>', r'\n\n* ', s)
155 s = re.sub(r'</?ul>', r'', s)
156 s = re.sub(r'</li>', r'\n\n', s)
Wenzel Jakobad06e762015-07-22 01:01:52 +0200157
158 s = s.replace('``true``', '``True``')
159 s = s.replace('``false``', '``False``')
160
161 # Re-flow text
162 wrapper = textwrap.TextWrapper()
163 wrapper.expand_tabs = True
164 wrapper.replace_whitespace = True
Wenzel Jakobf64ff572016-05-20 16:37:13 +0200165 wrapper.drop_whitespace = True
166 wrapper.width = 70
Wenzel Jakobad06e762015-07-22 01:01:52 +0200167 wrapper.initial_indent = wrapper.subsequent_indent = ''
168
169 result = ''
Wenzel Jakobf64ff572016-05-20 16:37:13 +0200170 in_code_segment = False
171 for x in re.split(r'(```)', s):
172 if x == '```':
173 if not in_code_segment:
174 result += '```\n'
175 else:
176 result += '\n```\n\n'
177 in_code_segment = not in_code_segment
178 elif in_code_segment:
179 result += x.strip()
Wenzel Jakobad06e762015-07-22 01:01:52 +0200180 else:
Wenzel Jakobf64ff572016-05-20 16:37:13 +0200181 for y in re.split(r'(?: *\n *){2,}', x):
182 wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
183 if len(wrapped) > 0 and wrapped[0] == '$':
184 result += wrapped[1:] + '\n'
185 wrapper.initial_indent = \
186 wrapper.subsequent_indent = ' ' * 4
187 else:
188 if len(wrapped) > 0:
189 result += wrapped + '\n\n'
190 wrapper.initial_indent = wrapper.subsequent_indent = ''
Wenzel Jakob65f0c2a2016-05-01 22:36:09 +0200191 return result.rstrip().lstrip('\n')
Wenzel Jakobad06e762015-07-22 01:01:52 +0200192
193
Dan590e7ac2019-05-15 20:53:35 -0400194def extract(filename, node, prefix, output):
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200195 if not (node.location.file is None or
196 os.path.samefile(d(node.location.file.name), filename)):
Wenzel Jakobad06e762015-07-22 01:01:52 +0200197 return 0
198 if node.kind in RECURSE_LIST:
199 sub_prefix = prefix
Dane0b8bbb2019-05-15 21:14:44 -0400200 if node.kind not in PREFIX_BLACKLIST:
Wenzel Jakobad06e762015-07-22 01:01:52 +0200201 if len(sub_prefix) > 0:
202 sub_prefix += '_'
203 sub_prefix += d(node.spelling)
204 for i in node.get_children():
Dan590e7ac2019-05-15 20:53:35 -0400205 extract(filename, i, sub_prefix, output)
Wenzel Jakobad06e762015-07-22 01:01:52 +0200206 if node.kind in PRINT_LIST:
207 comment = d(node.raw_comment) if node.raw_comment is not None else ''
208 comment = process_comment(comment)
Wenzel Jakobd361a572016-04-26 00:12:22 +0200209 sub_prefix = prefix
210 if len(sub_prefix) > 0:
211 sub_prefix += '_'
Wenzel Jakobf64ff572016-05-20 16:37:13 +0200212 if len(node.spelling) > 0:
213 name = sanitize_name(sub_prefix + d(node.spelling))
Wenzel Jakob257df102017-04-12 11:07:51 +0200214 output.append((name, filename, comment))
Wenzel Jakobad06e762015-07-22 01:01:52 +0200215
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200216
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200217class ExtractionThread(Thread):
Dan590e7ac2019-05-15 20:53:35 -0400218 def __init__(self, filename, parameters, output):
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200219 Thread.__init__(self)
220 self.filename = filename
221 self.parameters = parameters
Dan590e7ac2019-05-15 20:53:35 -0400222 self.output = output
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200223 job_semaphore.acquire()
224
225 def run(self):
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200226 print('Processing "%s" ..' % self.filename, file=sys.stderr)
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200227 try:
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200228 index = cindex.Index(
229 cindex.conf.lib.clang_createIndex(False, True))
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200230 tu = index.parse(self.filename, self.parameters)
Dan590e7ac2019-05-15 20:53:35 -0400231 extract(self.filename, tu.cursor, '', self.output)
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200232 finally:
233 job_semaphore.release()
234
Dana33212d2019-05-15 20:37:18 -0400235
Dan41f29cc2019-05-23 08:39:18 -0400236def read_args(args):
Danb46bb642019-05-15 20:31:51 -0400237 parameters = []
Wenzel Jakobad06e762015-07-22 01:01:52 +0200238 filenames = []
Danb46bb642019-05-15 20:31:51 -0400239 if "-x" not in args:
240 parameters.extend(['-x', 'c++'])
241 if not any(it.startswith("-std=") for it in args):
242 parameters.append('-std=c++11')
Wenzel Jakobad06e762015-07-22 01:01:52 +0200243
Wenzel Jakobafb9c172016-04-19 13:33:21 +0200244 if platform.system() == 'Darwin':
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200245 dev_path = '/Applications/Xcode.app/Contents/Developer/'
246 lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
247 sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
248 libclang = lib_dir + 'libclang.dylib'
249
Wenzel Jakobafb9c172016-04-19 13:33:21 +0200250 if os.path.exists(libclang):
251 cindex.Config.set_library_path(os.path.dirname(libclang))
252
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200253 if os.path.exists(sdk_dir):
254 sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
Wenzel Jakobafb9c172016-04-19 13:33:21 +0200255 parameters.append('-isysroot')
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200256 parameters.append(sysroot_dir)
Dan4612db52019-05-15 20:06:08 -0400257 elif platform.system() == 'Linux':
MRocholl00c462d2020-04-15 15:00:03 +0200258 # cython.util.find_library does not find `libclang` for all clang
259 # versions and distributions. LLVM switched to a monolithical setup
260 # that includes everything under /usr/lib/llvm{version_number}/
261 # We therefore glob for the library and select the highest version
MRochollb14aeb72020-04-15 15:37:41 +0200262 library_file = sorted(glob("/usr/lib/llvm-*/lib/libclang.so"), reverse=True)[0]
MRocholl9358e302020-04-15 15:35:38 +0200263 cindex.Config.set_library_file(library_file)
MRocholl00c462d2020-04-15 15:00:03 +0200264
Dan4612db52019-05-15 20:06:08 -0400265 # clang doesn't find its own base includes by default on Linux,
266 # but different distros install them in different paths.
267 # Try to autodetect, preferring the highest numbered version.
268 def clang_folder_version(d):
269 return [int(ver) for ver in re.findall(r'(?<!lib)(?<!\d)\d+', d)]
270 clang_include_dir = max((
271 path
272 for libdir in ['lib64', 'lib', 'lib32']
273 for path in glob('/usr/%s/clang/*/include' % libdir)
274 if os.path.isdir(path)
275 ), default=None, key=clang_folder_version)
276 if clang_include_dir:
277 parameters.extend(['-isystem', clang_include_dir])
Wenzel Jakobafb9c172016-04-19 13:33:21 +0200278
Dana33212d2019-05-15 20:37:18 -0400279 for item in args:
Wenzel Jakobad06e762015-07-22 01:01:52 +0200280 if item.startswith('-'):
281 parameters.append(item)
282 else:
283 filenames.append(item)
284
285 if len(filenames) == 0:
Dana33212d2019-05-15 20:37:18 -0400286 raise NoFilenamesError("args parameter did not contain any filenames")
Wenzel Jakobad06e762015-07-22 01:01:52 +0200287
Dan41f29cc2019-05-23 08:39:18 -0400288 return parameters, filenames
289
290
291def extract_all(args):
292 parameters, filenames = read_args(args)
Dan2c8c5c42019-05-15 21:04:02 -0400293 output = []
294 for filename in filenames:
295 thr = ExtractionThread(filename, parameters, output)
296 thr.start()
297
298 print('Waiting for jobs to finish ..', file=sys.stderr)
299 for i in range(job_count):
300 job_semaphore.acquire()
301
302 return output
303
304
305def write_header(comments, out_file=sys.stdout):
Wenzel Jakobad06e762015-07-22 01:01:52 +0200306 print('''/*
307 This file contains docstrings for the Python bindings.
308 Do not edit! These were automatically extracted by mkdoc.py
309 */
310
Wenzel Jakob87810d82016-04-30 23:55:10 +0200311#define __EXPAND(x) x
312#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT
313#define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
314#define __CAT1(a, b) a ## b
315#define __CAT2(a, b) __CAT1(a, b)
316#define __DOC1(n1) __doc_##n1
317#define __DOC2(n1, n2) __doc_##n1##_##n2
318#define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3
319#define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4
320#define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5
321#define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
322#define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
323#define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
Wenzel Jakob281aa0e2015-07-30 15:29:00 +0200324
325#if defined(__GNUG__)
326#pragma GCC diagnostic push
327#pragma GCC diagnostic ignored "-Wunused-variable"
328#endif
Danede328a2019-05-15 20:42:47 -0400329''', file=out_file)
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200330
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200331
Wenzel Jakob257df102017-04-12 11:07:51 +0200332 name_ctr = 1
333 name_prev = None
Dan2c8c5c42019-05-15 21:04:02 -0400334 for name, _, comment in list(sorted(comments, key=lambda x: (x[0], x[1]))):
Wenzel Jakob257df102017-04-12 11:07:51 +0200335 if name == name_prev:
336 name_ctr += 1
337 name = name + "_%i" % name_ctr
338 else:
339 name_prev = name
340 name_ctr = 1
341 print('\nstatic const char *%s =%sR"doc(%s)doc";' %
Danede328a2019-05-15 20:42:47 -0400342 (name, '\n' if '\n' in comment else ' ', comment), file=out_file)
Wenzel Jakob281aa0e2015-07-30 15:29:00 +0200343
344 print('''
345#if defined(__GNUG__)
346#pragma GCC diagnostic pop
347#endif
Danede328a2019-05-15 20:42:47 -0400348''', file=out_file)
Dana33212d2019-05-15 20:37:18 -0400349
350
Dan2c8c5c42019-05-15 21:04:02 -0400351def mkdoc(args):
352 args = list(args)
Danede328a2019-05-15 20:42:47 -0400353 out_path = None
354 for idx, arg in enumerate(args):
355 if arg.startswith("-o"):
356 args.remove(arg)
357 try:
358 out_path = arg[2:] or args.pop(idx)
359 except IndexError:
360 print("-o flag requires an argument")
361 exit(-1)
362 break
Dan2c8c5c42019-05-15 21:04:02 -0400363
364 comments = extract_all(args)
365
366 if out_path:
367 try:
368 with open(out_path, 'w') as out_file:
369 write_header(comments, out_file)
370 except:
371 # In the event of an error, don't leave a partially-written
372 # output file.
Dana163f882019-05-15 20:50:23 -0400373 try:
Dan2c8c5c42019-05-15 21:04:02 -0400374 os.unlink(out_path)
Dana163f882019-05-15 20:50:23 -0400375 except:
Dan2c8c5c42019-05-15 21:04:02 -0400376 pass
377 raise
378 else:
379 write_header(comments)
380
381
382if __name__ == '__main__':
383 try:
384 mkdoc(sys.argv[1:])
Dana33212d2019-05-15 20:37:18 -0400385 except NoFilenamesError:
386 print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
387 exit(-1)