Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # |
| 3 | # Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..] |
| 4 | # |
| 5 | # Extract documentation from C++ header files to use it in Python bindings |
| 6 | # |
| 7 | |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 8 | import os |
| 9 | import sys |
| 10 | import platform |
| 11 | import re |
| 12 | import textwrap |
| 13 | |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 14 | from clang import cindex |
| 15 | from clang.cindex import CursorKind |
| 16 | from collections import OrderedDict |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 17 | from threading import Thread, Semaphore |
| 18 | from multiprocessing import cpu_count |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 19 | |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 20 | RECURSE_LIST = [ |
| 21 | CursorKind.TRANSLATION_UNIT, |
| 22 | CursorKind.NAMESPACE, |
| 23 | CursorKind.CLASS_DECL, |
| 24 | CursorKind.STRUCT_DECL, |
| 25 | CursorKind.CLASS_TEMPLATE |
| 26 | ] |
| 27 | |
| 28 | PRINT_LIST = [ |
| 29 | CursorKind.CLASS_DECL, |
| 30 | CursorKind.STRUCT_DECL, |
Wenzel Jakob | c993bce | 2016-05-01 02:32:38 +0200 | [diff] [blame] | 31 | CursorKind.ENUM_DECL, |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 32 | CursorKind.CLASS_TEMPLATE, |
| 33 | CursorKind.FUNCTION_DECL, |
| 34 | CursorKind.FUNCTION_TEMPLATE, |
Wenzel Jakob | 65f0c2a | 2016-05-01 22:36:09 +0200 | [diff] [blame] | 35 | CursorKind.CONVERSION_FUNCTION, |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 36 | CursorKind.CXX_METHOD, |
| 37 | CursorKind.CONSTRUCTOR, |
| 38 | CursorKind.FIELD_DECL |
| 39 | ] |
| 40 | |
| 41 | CPP_OPERATORS = { |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 42 | '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array', |
| 43 | '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=': |
| 44 | 'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift', |
| 45 | '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>': |
| 46 | 'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot', |
| 47 | '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/': |
| 48 | 'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign' |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 49 | } |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 50 | |
| 51 | CPP_OPERATORS = OrderedDict( |
| 52 | sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0]))) |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 53 | |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 54 | job_count = cpu_count() |
| 55 | job_semaphore = Semaphore(job_count) |
| 56 | |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 57 | registered_names = dict() |
| 58 | |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 59 | |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 60 | def d(s): |
| 61 | return s.decode('utf8') |
| 62 | |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 63 | |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 64 | def sanitize_name(name): |
| 65 | global registered_names |
Wenzel Jakob | 65f0c2a | 2016-05-01 22:36:09 +0200 | [diff] [blame] | 66 | name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name) |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 67 | for k, v in CPP_OPERATORS.items(): |
| 68 | name = name.replace('operator%s' % k, 'operator_%s' % v) |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 69 | name = re.sub('<.*>', '', name) |
| 70 | name = ''.join([ch if ch.isalnum() else '_' for ch in name]) |
| 71 | name = re.sub('_$', '', re.sub('_+', '_', name)) |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 72 | if name in registered_names: |
| 73 | registered_names[name] += 1 |
| 74 | name += '_' + str(registered_names[name]) |
| 75 | else: |
| 76 | registered_names[name] = 1 |
| 77 | return '__doc_' + name |
| 78 | |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 79 | |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 80 | def process_comment(comment): |
| 81 | result = '' |
| 82 | |
| 83 | # Remove C++ comment syntax |
| 84 | for s in comment.splitlines(): |
| 85 | s = s.strip() |
| 86 | if s.startswith('/*'): |
| 87 | s = s[2:].lstrip('* \t') |
| 88 | elif s.endswith('*/'): |
| 89 | s = s[:-2].rstrip('* \t') |
| 90 | elif s.startswith('///'): |
| 91 | s = s[3:] |
| 92 | if s.startswith('*'): |
| 93 | s = s[1:] |
| 94 | result += s.strip() + '\n' |
| 95 | |
| 96 | # Doxygen tags |
| 97 | cpp_group = '([\w:]+)' |
| 98 | param_group = '([\[\w:\]]+)' |
| 99 | |
| 100 | s = result |
| 101 | s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s) |
| 102 | s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s) |
| 103 | s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s) |
| 104 | s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s) |
| 105 | s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s) |
Wenzel Jakob | 295acb2 | 2016-04-30 23:36:57 +0200 | [diff] [blame] | 106 | s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s) |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 107 | s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group), |
| 108 | r'\n\n$Parameter ``\2``:\n\n', s) |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 109 | |
| 110 | for in_, out_ in { |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 111 | 'return': 'Returns', |
| 112 | 'author': 'Author', |
| 113 | 'authors': 'Authors', |
| 114 | 'copyright': 'Copyright', |
| 115 | 'date': 'Date', |
| 116 | 'remark': 'Remark', |
| 117 | 'sa': 'See also', |
| 118 | 'see': 'See also', |
| 119 | 'extends': 'Extends', |
| 120 | 'throw': 'Throws', |
| 121 | 'throws': 'Throws' |
| 122 | }.items(): |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 123 | s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s) |
| 124 | |
| 125 | s = re.sub(r'\\details\s*', r'\n\n', s) |
| 126 | s = re.sub(r'\\brief\s*', r'', s) |
| 127 | s = re.sub(r'\\short\s*', r'', s) |
| 128 | s = re.sub(r'\\ref\s*', r'', s) |
| 129 | |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 130 | # HTML/TeX tags |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 131 | s = re.sub(r'<tt>([^<]*)</tt>', r'``\1``', s) |
| 132 | s = re.sub(r'<em>([^<]*)</em>', r'*\1*', s) |
| 133 | s = re.sub(r'<b>([^<]*)</b>', r'**\1**', s) |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 134 | s = re.sub(r'\\f\$([^\$]*)\\f\$', r'$\1$', s) |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 135 | |
| 136 | s = s.replace('``true``', '``True``') |
| 137 | s = s.replace('``false``', '``False``') |
| 138 | |
| 139 | # Re-flow text |
| 140 | wrapper = textwrap.TextWrapper() |
| 141 | wrapper.expand_tabs = True |
| 142 | wrapper.replace_whitespace = True |
| 143 | wrapper.width = 75 |
| 144 | wrapper.initial_indent = wrapper.subsequent_indent = '' |
| 145 | |
| 146 | result = '' |
| 147 | for x in re.split(r'\n{2,}', s): |
| 148 | wrapped = wrapper.fill(x.strip()) |
| 149 | if len(wrapped) > 0 and wrapped[0] == '$': |
| 150 | result += wrapped[1:] + '\n' |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 151 | wrapper.initial_indent = wrapper.subsequent_indent = ' ' * 4 |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 152 | else: |
| 153 | result += wrapped + '\n\n' |
| 154 | wrapper.initial_indent = wrapper.subsequent_indent = '' |
Wenzel Jakob | 65f0c2a | 2016-05-01 22:36:09 +0200 | [diff] [blame] | 155 | return result.rstrip().lstrip('\n') |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 156 | |
| 157 | |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 158 | def extract(filename, node, prefix, output): |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 159 | num_extracted = 0 |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 160 | if not (node.location.file is None or |
| 161 | os.path.samefile(d(node.location.file.name), filename)): |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 162 | return 0 |
| 163 | if node.kind in RECURSE_LIST: |
| 164 | sub_prefix = prefix |
| 165 | if node.kind != CursorKind.TRANSLATION_UNIT: |
| 166 | if len(sub_prefix) > 0: |
| 167 | sub_prefix += '_' |
| 168 | sub_prefix += d(node.spelling) |
| 169 | for i in node.get_children(): |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 170 | num_extracted += extract(filename, i, sub_prefix, output) |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 171 | if num_extracted == 0: |
| 172 | return 0 |
| 173 | if node.kind in PRINT_LIST: |
| 174 | comment = d(node.raw_comment) if node.raw_comment is not None else '' |
| 175 | comment = process_comment(comment) |
Wenzel Jakob | d361a57 | 2016-04-26 00:12:22 +0200 | [diff] [blame] | 176 | sub_prefix = prefix |
| 177 | if len(sub_prefix) > 0: |
| 178 | sub_prefix += '_' |
| 179 | name = sanitize_name(sub_prefix + d(node.spelling)) |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 180 | output.append('\nstatic const char *%s =%sR"doc(%s)doc";' % |
| 181 | (name, '\n' if '\n' in comment else ' ', comment)) |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 182 | num_extracted += 1 |
| 183 | return num_extracted |
| 184 | |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 185 | |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 186 | class ExtractionThread(Thread): |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 187 | def __init__(self, filename, parameters, output): |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 188 | Thread.__init__(self) |
| 189 | self.filename = filename |
| 190 | self.parameters = parameters |
| 191 | self.output = output |
| 192 | job_semaphore.acquire() |
| 193 | |
| 194 | def run(self): |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 195 | print('Processing "%s" ..' % self.filename, file=sys.stderr) |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 196 | try: |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 197 | index = cindex.Index( |
| 198 | cindex.conf.lib.clang_createIndex(False, True)) |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 199 | tu = index.parse(self.filename, self.parameters) |
| 200 | extract(self.filename, tu.cursor, '', self.output) |
| 201 | finally: |
| 202 | job_semaphore.release() |
| 203 | |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 204 | if __name__ == '__main__': |
| 205 | parameters = ['-x', 'c++', '-std=c++11'] |
| 206 | filenames = [] |
| 207 | |
Wenzel Jakob | afb9c17 | 2016-04-19 13:33:21 +0200 | [diff] [blame] | 208 | if platform.system() == 'Darwin': |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 209 | dev_path = '/Applications/Xcode.app/Contents/Developer/' |
| 210 | lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/' |
| 211 | sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs' |
| 212 | libclang = lib_dir + 'libclang.dylib' |
| 213 | |
Wenzel Jakob | afb9c17 | 2016-04-19 13:33:21 +0200 | [diff] [blame] | 214 | if os.path.exists(libclang): |
| 215 | cindex.Config.set_library_path(os.path.dirname(libclang)) |
| 216 | |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 217 | if os.path.exists(sdk_dir): |
| 218 | sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0]) |
Wenzel Jakob | afb9c17 | 2016-04-19 13:33:21 +0200 | [diff] [blame] | 219 | parameters.append('-isysroot') |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 220 | parameters.append(sysroot_dir) |
Wenzel Jakob | afb9c17 | 2016-04-19 13:33:21 +0200 | [diff] [blame] | 221 | |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 222 | for item in sys.argv[1:]: |
| 223 | if item.startswith('-'): |
| 224 | parameters.append(item) |
| 225 | else: |
| 226 | filenames.append(item) |
| 227 | |
| 228 | if len(filenames) == 0: |
| 229 | print('Syntax: %s [.. a list of header files ..]' % sys.argv[0]) |
| 230 | exit(-1) |
| 231 | |
| 232 | print('''/* |
| 233 | This file contains docstrings for the Python bindings. |
| 234 | Do not edit! These were automatically extracted by mkdoc.py |
| 235 | */ |
| 236 | |
Wenzel Jakob | 87810d8 | 2016-04-30 23:55:10 +0200 | [diff] [blame] | 237 | #define __EXPAND(x) x |
| 238 | #define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT |
| 239 | #define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1)) |
| 240 | #define __CAT1(a, b) a ## b |
| 241 | #define __CAT2(a, b) __CAT1(a, b) |
| 242 | #define __DOC1(n1) __doc_##n1 |
| 243 | #define __DOC2(n1, n2) __doc_##n1##_##n2 |
| 244 | #define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3 |
| 245 | #define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4 |
| 246 | #define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5 |
| 247 | #define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6 |
| 248 | #define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 |
| 249 | #define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) |
Wenzel Jakob | 281aa0e | 2015-07-30 15:29:00 +0200 | [diff] [blame] | 250 | |
| 251 | #if defined(__GNUG__) |
| 252 | #pragma GCC diagnostic push |
| 253 | #pragma GCC diagnostic ignored "-Wunused-variable" |
| 254 | #endif |
| 255 | ''') |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 256 | |
| 257 | output = [] |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 258 | for filename in filenames: |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 259 | thr = ExtractionThread(filename, parameters, output) |
| 260 | thr.start() |
| 261 | |
Wenzel Jakob | a57e51c | 2016-04-27 00:35:03 +0200 | [diff] [blame] | 262 | print('Waiting for jobs to finish ..', file=sys.stderr) |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 263 | for i in range(job_count): |
| 264 | job_semaphore.acquire() |
| 265 | |
| 266 | output.sort() |
| 267 | for l in output: |
| 268 | print(l) |
Wenzel Jakob | 281aa0e | 2015-07-30 15:29:00 +0200 | [diff] [blame] | 269 | |
| 270 | print(''' |
| 271 | #if defined(__GNUG__) |
| 272 | #pragma GCC diagnostic pop |
| 273 | #endif |
| 274 | ''') |