Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # |
| 3 | # Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..] |
| 4 | # |
| 5 | # Extract documentation from C++ header files to use it in Python bindings |
| 6 | # |
| 7 | |
| 8 | import os, sys, platform, re, textwrap |
| 9 | from clang import cindex |
| 10 | from clang.cindex import CursorKind |
| 11 | from collections import OrderedDict |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 12 | from threading import Thread, Semaphore |
| 13 | from multiprocessing import cpu_count |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 14 | |
| 15 | if platform.system() == 'Darwin': |
| 16 | libclang = '/opt/llvm/lib/libclang.dylib' |
| 17 | if os.path.exists(libclang): |
| 18 | cindex.Config.set_library_path(os.path.dirname(libclang)) |
| 19 | |
| 20 | RECURSE_LIST = [ |
| 21 | CursorKind.TRANSLATION_UNIT, |
| 22 | CursorKind.NAMESPACE, |
| 23 | CursorKind.CLASS_DECL, |
| 24 | CursorKind.STRUCT_DECL, |
| 25 | CursorKind.CLASS_TEMPLATE |
| 26 | ] |
| 27 | |
| 28 | PRINT_LIST = [ |
| 29 | CursorKind.CLASS_DECL, |
| 30 | CursorKind.STRUCT_DECL, |
| 31 | CursorKind.CLASS_TEMPLATE, |
| 32 | CursorKind.FUNCTION_DECL, |
| 33 | CursorKind.FUNCTION_TEMPLATE, |
| 34 | CursorKind.CXX_METHOD, |
| 35 | CursorKind.CONSTRUCTOR, |
| 36 | CursorKind.FIELD_DECL |
| 37 | ] |
| 38 | |
| 39 | CPP_OPERATORS = { |
| 40 | '<=' : 'le', '>=' : 'ge', '==' : 'eq', '!=' : 'ne', '[]' : 'array', |
| 41 | '+=' : 'iadd', '-=' : 'isub', '*=' : 'imul', '/=' : 'idiv', '%=' : |
| 42 | 'imod', '&=' : 'iand', '|=' : 'ior', '^=' : 'ixor', '<<=' : 'ilshift', |
| 43 | '>>=' : 'irshift', '++' : 'inc', '--' : 'dec', '<<' : 'lshift', '>>' : |
| 44 | 'rshift', '&&' : 'land', '||' : 'lor', '!' : 'lnot', '~' : 'bnot', '&' |
| 45 | : 'band', '|' : 'bor', '+' : 'add', '-' : 'sub', '*' : 'mul', '/' : |
| 46 | 'div', '%' : 'mod', '<' : 'lt', '>' : 'gt', '=' : 'assign' |
| 47 | } |
| 48 | CPP_OPERATORS = OrderedDict(sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0]))) |
| 49 | |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 50 | job_count = cpu_count() |
| 51 | job_semaphore = Semaphore(job_count) |
| 52 | |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 53 | registered_names = dict() |
| 54 | |
| 55 | def d(s): |
| 56 | return s.decode('utf8') |
| 57 | |
| 58 | def sanitize_name(name): |
| 59 | global registered_names |
| 60 | for k, v in CPP_OPERATORS.items(): |
| 61 | name = name.replace('operator%s' % k, 'operator_%s' % v) |
| 62 | name = name.replace('<', '_') |
| 63 | name = name.replace('>', '_') |
| 64 | name = name.replace(' ', '_') |
| 65 | name = name.replace(',', '_') |
| 66 | if name in registered_names: |
| 67 | registered_names[name] += 1 |
| 68 | name += '_' + str(registered_names[name]) |
| 69 | else: |
| 70 | registered_names[name] = 1 |
| 71 | return '__doc_' + name |
| 72 | |
| 73 | def process_comment(comment): |
| 74 | result = '' |
| 75 | |
| 76 | # Remove C++ comment syntax |
| 77 | for s in comment.splitlines(): |
| 78 | s = s.strip() |
| 79 | if s.startswith('/*'): |
| 80 | s = s[2:].lstrip('* \t') |
| 81 | elif s.endswith('*/'): |
| 82 | s = s[:-2].rstrip('* \t') |
| 83 | elif s.startswith('///'): |
| 84 | s = s[3:] |
| 85 | if s.startswith('*'): |
| 86 | s = s[1:] |
| 87 | result += s.strip() + '\n' |
| 88 | |
| 89 | # Doxygen tags |
| 90 | cpp_group = '([\w:]+)' |
| 91 | param_group = '([\[\w:\]]+)' |
| 92 | |
| 93 | s = result |
| 94 | s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s) |
| 95 | s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s) |
| 96 | s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s) |
| 97 | s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s) |
| 98 | s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s) |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 99 | s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group), r'\n\n$Parameter ``\2``:\n\n', s) |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 100 | |
| 101 | for in_, out_ in { |
| 102 | 'return' : 'Returns', |
| 103 | 'author' : 'Author', |
| 104 | 'authors' : 'Authors', |
| 105 | 'copyright' : 'Copyright', |
| 106 | 'date' : 'Date', |
| 107 | 'remark' : 'Remark', |
| 108 | 'sa' : 'See also', |
| 109 | 'see' : 'See also', |
| 110 | 'extends' : 'Extends', |
| 111 | 'throw' : 'Throws', |
| 112 | 'throws' : 'Throws' }.items(): |
| 113 | s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s) |
| 114 | |
| 115 | s = re.sub(r'\\details\s*', r'\n\n', s) |
| 116 | s = re.sub(r'\\brief\s*', r'', s) |
| 117 | s = re.sub(r'\\short\s*', r'', s) |
| 118 | s = re.sub(r'\\ref\s*', r'', s) |
| 119 | |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 120 | # HTML/TeX tags |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 121 | s = re.sub(r'<tt>([^<]*)</tt>', r'``\1``', s) |
| 122 | s = re.sub(r'<em>([^<]*)</em>', r'*\1*', s) |
| 123 | s = re.sub(r'<b>([^<]*)</b>', r'**\1**', s) |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 124 | s = re.sub(r'\\f\$([^\$]*)\\f\$', r'$\1$', s) |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 125 | |
| 126 | s = s.replace('``true``', '``True``') |
| 127 | s = s.replace('``false``', '``False``') |
| 128 | |
| 129 | # Re-flow text |
| 130 | wrapper = textwrap.TextWrapper() |
| 131 | wrapper.expand_tabs = True |
| 132 | wrapper.replace_whitespace = True |
| 133 | wrapper.width = 75 |
| 134 | wrapper.initial_indent = wrapper.subsequent_indent = '' |
| 135 | |
| 136 | result = '' |
| 137 | for x in re.split(r'\n{2,}', s): |
| 138 | wrapped = wrapper.fill(x.strip()) |
| 139 | if len(wrapped) > 0 and wrapped[0] == '$': |
| 140 | result += wrapped[1:] + '\n' |
| 141 | wrapper.initial_indent = wrapper.subsequent_indent = ' '*4 |
| 142 | else: |
| 143 | result += wrapped + '\n\n' |
| 144 | wrapper.initial_indent = wrapper.subsequent_indent = '' |
| 145 | return result.rstrip() |
| 146 | |
| 147 | |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 148 | def extract(filename, node, prefix, output): |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 149 | num_extracted = 0 |
| 150 | if not (node.location.file is None or os.path.samefile(d(node.location.file.name), filename)): |
| 151 | return 0 |
| 152 | if node.kind in RECURSE_LIST: |
| 153 | sub_prefix = prefix |
| 154 | if node.kind != CursorKind.TRANSLATION_UNIT: |
| 155 | if len(sub_prefix) > 0: |
| 156 | sub_prefix += '_' |
| 157 | sub_prefix += d(node.spelling) |
| 158 | for i in node.get_children(): |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 159 | num_extracted += extract(filename, i, sub_prefix, output) |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 160 | if num_extracted == 0: |
| 161 | return 0 |
| 162 | if node.kind in PRINT_LIST: |
| 163 | comment = d(node.raw_comment) if node.raw_comment is not None else '' |
| 164 | comment = process_comment(comment) |
| 165 | name = sanitize_name(prefix + '_' + d(node.spelling)) |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 166 | output.append('\nstatic const char *%s = %sR"doc(%s)doc";' % (name, '\n' if '\n' in comment else '', comment)) |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 167 | num_extracted += 1 |
| 168 | return num_extracted |
| 169 | |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 170 | class ExtractionThread(Thread): |
| 171 | def __init__ (self, filename, parameters, output): |
| 172 | Thread.__init__(self) |
| 173 | self.filename = filename |
| 174 | self.parameters = parameters |
| 175 | self.output = output |
| 176 | job_semaphore.acquire() |
| 177 | |
| 178 | def run(self): |
| 179 | print('Processing "%s" ..' % self.filename, file = sys.stderr) |
| 180 | try: |
| 181 | index = cindex.Index(cindex.conf.lib.clang_createIndex(False, True)) |
| 182 | tu = index.parse(self.filename, self.parameters) |
| 183 | extract(self.filename, tu.cursor, '', self.output) |
| 184 | finally: |
| 185 | job_semaphore.release() |
| 186 | |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 187 | if __name__ == '__main__': |
| 188 | parameters = ['-x', 'c++', '-std=c++11'] |
| 189 | filenames = [] |
| 190 | |
| 191 | for item in sys.argv[1:]: |
| 192 | if item.startswith('-'): |
| 193 | parameters.append(item) |
| 194 | else: |
| 195 | filenames.append(item) |
| 196 | |
| 197 | if len(filenames) == 0: |
| 198 | print('Syntax: %s [.. a list of header files ..]' % sys.argv[0]) |
| 199 | exit(-1) |
| 200 | |
| 201 | print('''/* |
| 202 | This file contains docstrings for the Python bindings. |
| 203 | Do not edit! These were automatically extracted by mkdoc.py |
| 204 | */ |
| 205 | |
| 206 | #define __COUNT(_1, _2, _3, _4, _5, COUNT, ...) COUNT |
| 207 | #define __VA_SIZE(...) __COUNT(__VA_ARGS__, 5, 4, 3, 2, 1) |
| 208 | #define __CAT1(a, b) a ## b |
| 209 | #define __CAT2(a, b) __CAT1(a, b) |
| 210 | #define __DOC1(n1) __doc_##n1 |
| 211 | #define __DOC2(n1, n2) __doc_##n1##_##n2 |
| 212 | #define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3 |
| 213 | #define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4 |
| 214 | #define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4_##n5 |
Wenzel Jakob | 281aa0e | 2015-07-30 15:29:00 +0200 | [diff] [blame^] | 215 | #define DOC(...) __CAT2(__DOC, __VA_SIZE(__VA_ARGS__))(__VA_ARGS__) |
| 216 | |
| 217 | #if defined(__GNUG__) |
| 218 | #pragma GCC diagnostic push |
| 219 | #pragma GCC diagnostic ignored "-Wunused-variable" |
| 220 | #endif |
| 221 | ''') |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 222 | |
| 223 | output = [] |
Wenzel Jakob | ad06e76 | 2015-07-22 01:01:52 +0200 | [diff] [blame] | 224 | for filename in filenames: |
Wenzel Jakob | fa70d30 | 2015-07-23 14:43:34 +0200 | [diff] [blame] | 225 | thr = ExtractionThread(filename, parameters, output) |
| 226 | thr.start() |
| 227 | |
| 228 | print('Waiting for jobs to finish ..', file = sys.stderr) |
| 229 | for i in range(job_count): |
| 230 | job_semaphore.acquire() |
| 231 | |
| 232 | output.sort() |
| 233 | for l in output: |
| 234 | print(l) |
Wenzel Jakob | 281aa0e | 2015-07-30 15:29:00 +0200 | [diff] [blame^] | 235 | |
| 236 | print(''' |
| 237 | #if defined(__GNUG__) |
| 238 | #pragma GCC diagnostic pop |
| 239 | #endif |
| 240 | ''') |