blob: 1fd8cceed52ba79325519d503f92520fa0164824 [file] [log] [blame]
Wenzel Jakobad06e762015-07-22 01:01:52 +02001#!/usr/bin/env python3
2#
3# Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
4#
5# Extract documentation from C++ header files to use it in Python bindings
6#
7
Wenzel Jakoba57e51c2016-04-27 00:35:03 +02008import os
9import sys
10import platform
11import re
12import textwrap
13
Wenzel Jakobad06e762015-07-22 01:01:52 +020014from clang import cindex
15from clang.cindex import CursorKind
16from collections import OrderedDict
Wenzel Jakobfa70d302015-07-23 14:43:34 +020017from threading import Thread, Semaphore
18from multiprocessing import cpu_count
Wenzel Jakobad06e762015-07-22 01:01:52 +020019
Wenzel Jakobad06e762015-07-22 01:01:52 +020020RECURSE_LIST = [
21 CursorKind.TRANSLATION_UNIT,
22 CursorKind.NAMESPACE,
23 CursorKind.CLASS_DECL,
24 CursorKind.STRUCT_DECL,
Wenzel Jakobf64ff572016-05-20 16:37:13 +020025 CursorKind.ENUM_DECL,
Wenzel Jakobad06e762015-07-22 01:01:52 +020026 CursorKind.CLASS_TEMPLATE
27]
28
29PRINT_LIST = [
30 CursorKind.CLASS_DECL,
31 CursorKind.STRUCT_DECL,
Wenzel Jakobc993bce2016-05-01 02:32:38 +020032 CursorKind.ENUM_DECL,
Wenzel Jakobf64ff572016-05-20 16:37:13 +020033 CursorKind.ENUM_CONSTANT_DECL,
Wenzel Jakobad06e762015-07-22 01:01:52 +020034 CursorKind.CLASS_TEMPLATE,
35 CursorKind.FUNCTION_DECL,
36 CursorKind.FUNCTION_TEMPLATE,
Wenzel Jakob65f0c2a2016-05-01 22:36:09 +020037 CursorKind.CONVERSION_FUNCTION,
Wenzel Jakobad06e762015-07-22 01:01:52 +020038 CursorKind.CXX_METHOD,
39 CursorKind.CONSTRUCTOR,
40 CursorKind.FIELD_DECL
41]
42
43CPP_OPERATORS = {
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020044 '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
45 '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
46 'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
47 '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
48 'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
49 '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
Wenzel Jakobf64ff572016-05-20 16:37:13 +020050 'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
Wenzel Jakobad06e762015-07-22 01:01:52 +020051}
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020052
53CPP_OPERATORS = OrderedDict(
54 sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
Wenzel Jakobad06e762015-07-22 01:01:52 +020055
Wenzel Jakobfa70d302015-07-23 14:43:34 +020056job_count = cpu_count()
57job_semaphore = Semaphore(job_count)
58
Wenzel Jakob257df102017-04-12 11:07:51 +020059output = []
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020060
Wenzel Jakobad06e762015-07-22 01:01:52 +020061def d(s):
62 return s.decode('utf8')
63
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020064
Wenzel Jakobad06e762015-07-22 01:01:52 +020065def sanitize_name(name):
Wenzel Jakob65f0c2a2016-05-01 22:36:09 +020066 name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
Wenzel Jakobad06e762015-07-22 01:01:52 +020067 for k, v in CPP_OPERATORS.items():
68 name = name.replace('operator%s' % k, 'operator_%s' % v)
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020069 name = re.sub('<.*>', '', name)
70 name = ''.join([ch if ch.isalnum() else '_' for ch in name])
71 name = re.sub('_$', '', re.sub('_+', '_', name))
Wenzel Jakobad06e762015-07-22 01:01:52 +020072 return '__doc_' + name
73
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020074
Wenzel Jakobad06e762015-07-22 01:01:52 +020075def process_comment(comment):
76 result = ''
77
78 # Remove C++ comment syntax
Wenzel Jakobf64ff572016-05-20 16:37:13 +020079 leading_spaces = float('inf')
80 for s in comment.expandtabs(tabsize=4).splitlines():
Wenzel Jakobad06e762015-07-22 01:01:52 +020081 s = s.strip()
82 if s.startswith('/*'):
Wenzel Jakobf64ff572016-05-20 16:37:13 +020083 s = s[2:].lstrip('*')
Wenzel Jakobad06e762015-07-22 01:01:52 +020084 elif s.endswith('*/'):
Wenzel Jakobf64ff572016-05-20 16:37:13 +020085 s = s[:-2].rstrip('*')
Wenzel Jakobad06e762015-07-22 01:01:52 +020086 elif s.startswith('///'):
87 s = s[3:]
88 if s.startswith('*'):
89 s = s[1:]
Wenzel Jakobf64ff572016-05-20 16:37:13 +020090 if len(s) > 0:
91 leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
92 result += s + '\n'
93
94 if leading_spaces != float('inf'):
95 result2 = ""
96 for s in result.splitlines():
97 result2 += s[leading_spaces:] + '\n'
98 result = result2
Wenzel Jakobad06e762015-07-22 01:01:52 +020099
100 # Doxygen tags
101 cpp_group = '([\w:]+)'
102 param_group = '([\[\w:\]]+)'
103
104 s = result
105 s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
106 s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
107 s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
108 s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
109 s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
Wenzel Jakob295acb22016-04-30 23:36:57 +0200110 s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200111 s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
112 r'\n\n$Parameter ``\2``:\n\n', s)
Wenzel Jakobf64ff572016-05-20 16:37:13 +0200113 s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
114 r'\n\n$Template parameter ``\2``:\n\n', s)
Wenzel Jakobad06e762015-07-22 01:01:52 +0200115
116 for in_, out_ in {
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200117 'return': 'Returns',
118 'author': 'Author',
119 'authors': 'Authors',
120 'copyright': 'Copyright',
121 'date': 'Date',
122 'remark': 'Remark',
123 'sa': 'See also',
124 'see': 'See also',
125 'extends': 'Extends',
126 'throw': 'Throws',
127 'throws': 'Throws'
128 }.items():
Wenzel Jakobad06e762015-07-22 01:01:52 +0200129 s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
130
131 s = re.sub(r'\\details\s*', r'\n\n', s)
132 s = re.sub(r'\\brief\s*', r'', s)
133 s = re.sub(r'\\short\s*', r'', s)
134 s = re.sub(r'\\ref\s*', r'', s)
135
Wenzel Jakobf64ff572016-05-20 16:37:13 +0200136 s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
137 r"```\n\1\n```\n", s, flags=re.DOTALL)
138
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200139 # HTML/TeX tags
Wenzel Jakobf64ff572016-05-20 16:37:13 +0200140 s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
141 s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
142 s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
143 s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
144 s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
145 s = re.sub(r'<li>', r'\n\n* ', s)
146 s = re.sub(r'</?ul>', r'', s)
147 s = re.sub(r'</li>', r'\n\n', s)
Wenzel Jakobad06e762015-07-22 01:01:52 +0200148
149 s = s.replace('``true``', '``True``')
150 s = s.replace('``false``', '``False``')
151
152 # Re-flow text
153 wrapper = textwrap.TextWrapper()
154 wrapper.expand_tabs = True
155 wrapper.replace_whitespace = True
Wenzel Jakobf64ff572016-05-20 16:37:13 +0200156 wrapper.drop_whitespace = True
157 wrapper.width = 70
Wenzel Jakobad06e762015-07-22 01:01:52 +0200158 wrapper.initial_indent = wrapper.subsequent_indent = ''
159
160 result = ''
Wenzel Jakobf64ff572016-05-20 16:37:13 +0200161 in_code_segment = False
162 for x in re.split(r'(```)', s):
163 if x == '```':
164 if not in_code_segment:
165 result += '```\n'
166 else:
167 result += '\n```\n\n'
168 in_code_segment = not in_code_segment
169 elif in_code_segment:
170 result += x.strip()
Wenzel Jakobad06e762015-07-22 01:01:52 +0200171 else:
Wenzel Jakobf64ff572016-05-20 16:37:13 +0200172 for y in re.split(r'(?: *\n *){2,}', x):
173 wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
174 if len(wrapped) > 0 and wrapped[0] == '$':
175 result += wrapped[1:] + '\n'
176 wrapper.initial_indent = \
177 wrapper.subsequent_indent = ' ' * 4
178 else:
179 if len(wrapped) > 0:
180 result += wrapped + '\n\n'
181 wrapper.initial_indent = wrapper.subsequent_indent = ''
Wenzel Jakob65f0c2a2016-05-01 22:36:09 +0200182 return result.rstrip().lstrip('\n')
Wenzel Jakobad06e762015-07-22 01:01:52 +0200183
184
Wenzel Jakob257df102017-04-12 11:07:51 +0200185def extract(filename, node, prefix):
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200186 if not (node.location.file is None or
187 os.path.samefile(d(node.location.file.name), filename)):
Wenzel Jakobad06e762015-07-22 01:01:52 +0200188 return 0
189 if node.kind in RECURSE_LIST:
190 sub_prefix = prefix
191 if node.kind != CursorKind.TRANSLATION_UNIT:
192 if len(sub_prefix) > 0:
193 sub_prefix += '_'
194 sub_prefix += d(node.spelling)
195 for i in node.get_children():
Wenzel Jakob257df102017-04-12 11:07:51 +0200196 extract(filename, i, sub_prefix)
Wenzel Jakobad06e762015-07-22 01:01:52 +0200197 if node.kind in PRINT_LIST:
198 comment = d(node.raw_comment) if node.raw_comment is not None else ''
199 comment = process_comment(comment)
Wenzel Jakobd361a572016-04-26 00:12:22 +0200200 sub_prefix = prefix
201 if len(sub_prefix) > 0:
202 sub_prefix += '_'
Wenzel Jakobf64ff572016-05-20 16:37:13 +0200203 if len(node.spelling) > 0:
204 name = sanitize_name(sub_prefix + d(node.spelling))
Wenzel Jakob257df102017-04-12 11:07:51 +0200205 global output
206 output.append((name, filename, comment))
Wenzel Jakobad06e762015-07-22 01:01:52 +0200207
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200208
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200209class ExtractionThread(Thread):
Wenzel Jakob257df102017-04-12 11:07:51 +0200210 def __init__(self, filename, parameters):
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200211 Thread.__init__(self)
212 self.filename = filename
213 self.parameters = parameters
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200214 job_semaphore.acquire()
215
216 def run(self):
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200217 print('Processing "%s" ..' % self.filename, file=sys.stderr)
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200218 try:
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200219 index = cindex.Index(
220 cindex.conf.lib.clang_createIndex(False, True))
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200221 tu = index.parse(self.filename, self.parameters)
Wenzel Jakob257df102017-04-12 11:07:51 +0200222 extract(self.filename, tu.cursor, '')
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200223 finally:
224 job_semaphore.release()
225
Wenzel Jakobad06e762015-07-22 01:01:52 +0200226if __name__ == '__main__':
227 parameters = ['-x', 'c++', '-std=c++11']
228 filenames = []
229
Wenzel Jakobafb9c172016-04-19 13:33:21 +0200230 if platform.system() == 'Darwin':
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200231 dev_path = '/Applications/Xcode.app/Contents/Developer/'
232 lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
233 sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
234 libclang = lib_dir + 'libclang.dylib'
235
Wenzel Jakobafb9c172016-04-19 13:33:21 +0200236 if os.path.exists(libclang):
237 cindex.Config.set_library_path(os.path.dirname(libclang))
238
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200239 if os.path.exists(sdk_dir):
240 sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
Wenzel Jakobafb9c172016-04-19 13:33:21 +0200241 parameters.append('-isysroot')
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200242 parameters.append(sysroot_dir)
Wenzel Jakobafb9c172016-04-19 13:33:21 +0200243
Wenzel Jakobad06e762015-07-22 01:01:52 +0200244 for item in sys.argv[1:]:
245 if item.startswith('-'):
246 parameters.append(item)
247 else:
248 filenames.append(item)
249
250 if len(filenames) == 0:
251 print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
252 exit(-1)
253
254 print('''/*
255 This file contains docstrings for the Python bindings.
256 Do not edit! These were automatically extracted by mkdoc.py
257 */
258
Wenzel Jakob87810d82016-04-30 23:55:10 +0200259#define __EXPAND(x) x
260#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT
261#define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
262#define __CAT1(a, b) a ## b
263#define __CAT2(a, b) __CAT1(a, b)
264#define __DOC1(n1) __doc_##n1
265#define __DOC2(n1, n2) __doc_##n1##_##n2
266#define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3
267#define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4
268#define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5
269#define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
270#define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
271#define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
Wenzel Jakob281aa0e2015-07-30 15:29:00 +0200272
273#if defined(__GNUG__)
274#pragma GCC diagnostic push
275#pragma GCC diagnostic ignored "-Wunused-variable"
276#endif
277''')
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200278
Wenzel Jakob257df102017-04-12 11:07:51 +0200279 output.clear()
Wenzel Jakobad06e762015-07-22 01:01:52 +0200280 for filename in filenames:
Wenzel Jakob257df102017-04-12 11:07:51 +0200281 thr = ExtractionThread(filename, parameters)
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200282 thr.start()
283
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200284 print('Waiting for jobs to finish ..', file=sys.stderr)
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200285 for i in range(job_count):
286 job_semaphore.acquire()
287
Wenzel Jakob257df102017-04-12 11:07:51 +0200288 name_ctr = 1
289 name_prev = None
290 for name, _, comment in list(sorted(output, key=lambda x: (x[0], x[1]))):
291 if name == name_prev:
292 name_ctr += 1
293 name = name + "_%i" % name_ctr
294 else:
295 name_prev = name
296 name_ctr = 1
297 print('\nstatic const char *%s =%sR"doc(%s)doc";' %
298 (name, '\n' if '\n' in comment else ' ', comment))
Wenzel Jakob281aa0e2015-07-30 15:29:00 +0200299
300 print('''
301#if defined(__GNUG__)
302#pragma GCC diagnostic pop
303#endif
304''')