blob: aef434c25f6ca6652bf79f0c08cba2b8cc29aafa [file] [log] [blame]
Wenzel Jakobad06e762015-07-22 01:01:52 +02001#!/usr/bin/env python3
2#
3# Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
4#
5# Extract documentation from C++ header files to use it in Python bindings
6#
7
Wenzel Jakoba57e51c2016-04-27 00:35:03 +02008import os
9import sys
10import platform
11import re
12import textwrap
13
Wenzel Jakobad06e762015-07-22 01:01:52 +020014from clang import cindex
15from clang.cindex import CursorKind
16from collections import OrderedDict
Wenzel Jakobfa70d302015-07-23 14:43:34 +020017from threading import Thread, Semaphore
18from multiprocessing import cpu_count
Wenzel Jakobad06e762015-07-22 01:01:52 +020019
Wenzel Jakobad06e762015-07-22 01:01:52 +020020RECURSE_LIST = [
21 CursorKind.TRANSLATION_UNIT,
22 CursorKind.NAMESPACE,
23 CursorKind.CLASS_DECL,
24 CursorKind.STRUCT_DECL,
25 CursorKind.CLASS_TEMPLATE
26]
27
28PRINT_LIST = [
29 CursorKind.CLASS_DECL,
30 CursorKind.STRUCT_DECL,
Wenzel Jakobc993bce2016-05-01 02:32:38 +020031 CursorKind.ENUM_DECL,
Wenzel Jakobad06e762015-07-22 01:01:52 +020032 CursorKind.CLASS_TEMPLATE,
33 CursorKind.FUNCTION_DECL,
34 CursorKind.FUNCTION_TEMPLATE,
Wenzel Jakob65f0c2a2016-05-01 22:36:09 +020035 CursorKind.CONVERSION_FUNCTION,
Wenzel Jakobad06e762015-07-22 01:01:52 +020036 CursorKind.CXX_METHOD,
37 CursorKind.CONSTRUCTOR,
38 CursorKind.FIELD_DECL
39]
40
41CPP_OPERATORS = {
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020042 '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
43 '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
44 'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
45 '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
46 'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
47 '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
48 'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign'
Wenzel Jakobad06e762015-07-22 01:01:52 +020049}
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020050
51CPP_OPERATORS = OrderedDict(
52 sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
Wenzel Jakobad06e762015-07-22 01:01:52 +020053
Wenzel Jakobfa70d302015-07-23 14:43:34 +020054job_count = cpu_count()
55job_semaphore = Semaphore(job_count)
56
Wenzel Jakobad06e762015-07-22 01:01:52 +020057registered_names = dict()
58
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020059
Wenzel Jakobad06e762015-07-22 01:01:52 +020060def d(s):
61 return s.decode('utf8')
62
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020063
Wenzel Jakobad06e762015-07-22 01:01:52 +020064def sanitize_name(name):
65 global registered_names
Wenzel Jakob65f0c2a2016-05-01 22:36:09 +020066 name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
Wenzel Jakobad06e762015-07-22 01:01:52 +020067 for k, v in CPP_OPERATORS.items():
68 name = name.replace('operator%s' % k, 'operator_%s' % v)
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020069 name = re.sub('<.*>', '', name)
70 name = ''.join([ch if ch.isalnum() else '_' for ch in name])
71 name = re.sub('_$', '', re.sub('_+', '_', name))
Wenzel Jakobad06e762015-07-22 01:01:52 +020072 if name in registered_names:
73 registered_names[name] += 1
74 name += '_' + str(registered_names[name])
75 else:
76 registered_names[name] = 1
77 return '__doc_' + name
78
Wenzel Jakoba57e51c2016-04-27 00:35:03 +020079
Wenzel Jakobad06e762015-07-22 01:01:52 +020080def process_comment(comment):
81 result = ''
82
83 # Remove C++ comment syntax
84 for s in comment.splitlines():
85 s = s.strip()
86 if s.startswith('/*'):
87 s = s[2:].lstrip('* \t')
88 elif s.endswith('*/'):
89 s = s[:-2].rstrip('* \t')
90 elif s.startswith('///'):
91 s = s[3:]
92 if s.startswith('*'):
93 s = s[1:]
94 result += s.strip() + '\n'
95
96 # Doxygen tags
97 cpp_group = '([\w:]+)'
98 param_group = '([\[\w:\]]+)'
99
100 s = result
101 s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
102 s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
103 s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
104 s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
105 s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
Wenzel Jakob295acb22016-04-30 23:36:57 +0200106 s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200107 s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
108 r'\n\n$Parameter ``\2``:\n\n', s)
Wenzel Jakobad06e762015-07-22 01:01:52 +0200109
110 for in_, out_ in {
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200111 'return': 'Returns',
112 'author': 'Author',
113 'authors': 'Authors',
114 'copyright': 'Copyright',
115 'date': 'Date',
116 'remark': 'Remark',
117 'sa': 'See also',
118 'see': 'See also',
119 'extends': 'Extends',
120 'throw': 'Throws',
121 'throws': 'Throws'
122 }.items():
Wenzel Jakobad06e762015-07-22 01:01:52 +0200123 s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
124
125 s = re.sub(r'\\details\s*', r'\n\n', s)
126 s = re.sub(r'\\brief\s*', r'', s)
127 s = re.sub(r'\\short\s*', r'', s)
128 s = re.sub(r'\\ref\s*', r'', s)
129
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200130 # HTML/TeX tags
Wenzel Jakobad06e762015-07-22 01:01:52 +0200131 s = re.sub(r'<tt>([^<]*)</tt>', r'``\1``', s)
132 s = re.sub(r'<em>([^<]*)</em>', r'*\1*', s)
133 s = re.sub(r'<b>([^<]*)</b>', r'**\1**', s)
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200134 s = re.sub(r'\\f\$([^\$]*)\\f\$', r'$\1$', s)
Wenzel Jakobad06e762015-07-22 01:01:52 +0200135
136 s = s.replace('``true``', '``True``')
137 s = s.replace('``false``', '``False``')
138
139 # Re-flow text
140 wrapper = textwrap.TextWrapper()
141 wrapper.expand_tabs = True
142 wrapper.replace_whitespace = True
143 wrapper.width = 75
144 wrapper.initial_indent = wrapper.subsequent_indent = ''
145
146 result = ''
147 for x in re.split(r'\n{2,}', s):
148 wrapped = wrapper.fill(x.strip())
149 if len(wrapped) > 0 and wrapped[0] == '$':
150 result += wrapped[1:] + '\n'
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200151 wrapper.initial_indent = wrapper.subsequent_indent = ' ' * 4
Wenzel Jakobad06e762015-07-22 01:01:52 +0200152 else:
153 result += wrapped + '\n\n'
154 wrapper.initial_indent = wrapper.subsequent_indent = ''
Wenzel Jakob65f0c2a2016-05-01 22:36:09 +0200155 return result.rstrip().lstrip('\n')
Wenzel Jakobad06e762015-07-22 01:01:52 +0200156
157
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200158def extract(filename, node, prefix, output):
Wenzel Jakobad06e762015-07-22 01:01:52 +0200159 num_extracted = 0
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200160 if not (node.location.file is None or
161 os.path.samefile(d(node.location.file.name), filename)):
Wenzel Jakobad06e762015-07-22 01:01:52 +0200162 return 0
163 if node.kind in RECURSE_LIST:
164 sub_prefix = prefix
165 if node.kind != CursorKind.TRANSLATION_UNIT:
166 if len(sub_prefix) > 0:
167 sub_prefix += '_'
168 sub_prefix += d(node.spelling)
169 for i in node.get_children():
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200170 num_extracted += extract(filename, i, sub_prefix, output)
Wenzel Jakobad06e762015-07-22 01:01:52 +0200171 if num_extracted == 0:
172 return 0
173 if node.kind in PRINT_LIST:
174 comment = d(node.raw_comment) if node.raw_comment is not None else ''
175 comment = process_comment(comment)
Wenzel Jakobd361a572016-04-26 00:12:22 +0200176 sub_prefix = prefix
177 if len(sub_prefix) > 0:
178 sub_prefix += '_'
179 name = sanitize_name(sub_prefix + d(node.spelling))
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200180 output.append('\nstatic const char *%s =%sR"doc(%s)doc";' %
181 (name, '\n' if '\n' in comment else ' ', comment))
Wenzel Jakobad06e762015-07-22 01:01:52 +0200182 num_extracted += 1
183 return num_extracted
184
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200185
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200186class ExtractionThread(Thread):
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200187 def __init__(self, filename, parameters, output):
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200188 Thread.__init__(self)
189 self.filename = filename
190 self.parameters = parameters
191 self.output = output
192 job_semaphore.acquire()
193
194 def run(self):
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200195 print('Processing "%s" ..' % self.filename, file=sys.stderr)
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200196 try:
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200197 index = cindex.Index(
198 cindex.conf.lib.clang_createIndex(False, True))
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200199 tu = index.parse(self.filename, self.parameters)
200 extract(self.filename, tu.cursor, '', self.output)
201 finally:
202 job_semaphore.release()
203
Wenzel Jakobad06e762015-07-22 01:01:52 +0200204if __name__ == '__main__':
205 parameters = ['-x', 'c++', '-std=c++11']
206 filenames = []
207
Wenzel Jakobafb9c172016-04-19 13:33:21 +0200208 if platform.system() == 'Darwin':
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200209 dev_path = '/Applications/Xcode.app/Contents/Developer/'
210 lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
211 sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
212 libclang = lib_dir + 'libclang.dylib'
213
Wenzel Jakobafb9c172016-04-19 13:33:21 +0200214 if os.path.exists(libclang):
215 cindex.Config.set_library_path(os.path.dirname(libclang))
216
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200217 if os.path.exists(sdk_dir):
218 sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
Wenzel Jakobafb9c172016-04-19 13:33:21 +0200219 parameters.append('-isysroot')
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200220 parameters.append(sysroot_dir)
Wenzel Jakobafb9c172016-04-19 13:33:21 +0200221
Wenzel Jakobad06e762015-07-22 01:01:52 +0200222 for item in sys.argv[1:]:
223 if item.startswith('-'):
224 parameters.append(item)
225 else:
226 filenames.append(item)
227
228 if len(filenames) == 0:
229 print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
230 exit(-1)
231
232 print('''/*
233 This file contains docstrings for the Python bindings.
234 Do not edit! These were automatically extracted by mkdoc.py
235 */
236
Wenzel Jakob87810d82016-04-30 23:55:10 +0200237#define __EXPAND(x) x
238#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT
239#define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
240#define __CAT1(a, b) a ## b
241#define __CAT2(a, b) __CAT1(a, b)
242#define __DOC1(n1) __doc_##n1
243#define __DOC2(n1, n2) __doc_##n1##_##n2
244#define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3
245#define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4
246#define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5
247#define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
248#define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
249#define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
Wenzel Jakob281aa0e2015-07-30 15:29:00 +0200250
251#if defined(__GNUG__)
252#pragma GCC diagnostic push
253#pragma GCC diagnostic ignored "-Wunused-variable"
254#endif
255''')
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200256
257 output = []
Wenzel Jakobad06e762015-07-22 01:01:52 +0200258 for filename in filenames:
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200259 thr = ExtractionThread(filename, parameters, output)
260 thr.start()
261
Wenzel Jakoba57e51c2016-04-27 00:35:03 +0200262 print('Waiting for jobs to finish ..', file=sys.stderr)
Wenzel Jakobfa70d302015-07-23 14:43:34 +0200263 for i in range(job_count):
264 job_semaphore.acquire()
265
266 output.sort()
267 for l in output:
268 print(l)
Wenzel Jakob281aa0e2015-07-30 15:29:00 +0200269
270 print('''
271#if defined(__GNUG__)
272#pragma GCC diagnostic pop
273#endif
274''')