| #!/usr/bin/env python3 |
| # |
| # Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..] |
| # |
| # Extract documentation from C++ header files to use it in Python bindings |
| # |
| |
| import os, sys, platform, re, textwrap |
| from clang import cindex |
| from clang.cindex import CursorKind |
| from collections import OrderedDict |
| from threading import Thread, Semaphore |
| from multiprocessing import cpu_count |
| |
| if platform.system() == 'Darwin': |
| libclang = '/opt/llvm/lib/libclang.dylib' |
| if os.path.exists(libclang): |
| cindex.Config.set_library_path(os.path.dirname(libclang)) |
| |
| RECURSE_LIST = [ |
| CursorKind.TRANSLATION_UNIT, |
| CursorKind.NAMESPACE, |
| CursorKind.CLASS_DECL, |
| CursorKind.STRUCT_DECL, |
| CursorKind.CLASS_TEMPLATE |
| ] |
| |
| PRINT_LIST = [ |
| CursorKind.CLASS_DECL, |
| CursorKind.STRUCT_DECL, |
| CursorKind.CLASS_TEMPLATE, |
| CursorKind.FUNCTION_DECL, |
| CursorKind.FUNCTION_TEMPLATE, |
| CursorKind.CXX_METHOD, |
| CursorKind.CONSTRUCTOR, |
| CursorKind.FIELD_DECL |
| ] |
| |
| CPP_OPERATORS = { |
| '<=' : 'le', '>=' : 'ge', '==' : 'eq', '!=' : 'ne', '[]' : 'array', |
| '+=' : 'iadd', '-=' : 'isub', '*=' : 'imul', '/=' : 'idiv', '%=' : |
| 'imod', '&=' : 'iand', '|=' : 'ior', '^=' : 'ixor', '<<=' : 'ilshift', |
| '>>=' : 'irshift', '++' : 'inc', '--' : 'dec', '<<' : 'lshift', '>>' : |
| 'rshift', '&&' : 'land', '||' : 'lor', '!' : 'lnot', '~' : 'bnot', '&' |
| : 'band', '|' : 'bor', '+' : 'add', '-' : 'sub', '*' : 'mul', '/' : |
| 'div', '%' : 'mod', '<' : 'lt', '>' : 'gt', '=' : 'assign' |
| } |
| CPP_OPERATORS = OrderedDict(sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0]))) |
| |
| job_count = cpu_count() |
| job_semaphore = Semaphore(job_count) |
| |
| registered_names = dict() |
| |
| def d(s): |
| return s.decode('utf8') |
| |
| def sanitize_name(name): |
| global registered_names |
| for k, v in CPP_OPERATORS.items(): |
| name = name.replace('operator%s' % k, 'operator_%s' % v) |
| name = name.replace('<', '_') |
| name = name.replace('>', '_') |
| name = name.replace(' ', '_') |
| name = name.replace(',', '_') |
| if name in registered_names: |
| registered_names[name] += 1 |
| name += '_' + str(registered_names[name]) |
| else: |
| registered_names[name] = 1 |
| return '__doc_' + name |
| |
| def process_comment(comment): |
| result = '' |
| |
| # Remove C++ comment syntax |
| for s in comment.splitlines(): |
| s = s.strip() |
| if s.startswith('/*'): |
| s = s[2:].lstrip('* \t') |
| elif s.endswith('*/'): |
| s = s[:-2].rstrip('* \t') |
| elif s.startswith('///'): |
| s = s[3:] |
| if s.startswith('*'): |
| s = s[1:] |
| result += s.strip() + '\n' |
| |
| # Doxygen tags |
| cpp_group = '([\w:]+)' |
| param_group = '([\[\w:\]]+)' |
| |
| s = result |
| s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s) |
| s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s) |
| s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s) |
| s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s) |
| s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s) |
| s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group), r'\n\n$Parameter ``\2``:\n\n', s) |
| |
| for in_, out_ in { |
| 'return' : 'Returns', |
| 'author' : 'Author', |
| 'authors' : 'Authors', |
| 'copyright' : 'Copyright', |
| 'date' : 'Date', |
| 'remark' : 'Remark', |
| 'sa' : 'See also', |
| 'see' : 'See also', |
| 'extends' : 'Extends', |
| 'throw' : 'Throws', |
| 'throws' : 'Throws' }.items(): |
| s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s) |
| |
| s = re.sub(r'\\details\s*', r'\n\n', s) |
| s = re.sub(r'\\brief\s*', r'', s) |
| s = re.sub(r'\\short\s*', r'', s) |
| s = re.sub(r'\\ref\s*', r'', s) |
| |
| # HTML/TeX tags |
| s = re.sub(r'<tt>([^<]*)</tt>', r'``\1``', s) |
| s = re.sub(r'<em>([^<]*)</em>', r'*\1*', s) |
| s = re.sub(r'<b>([^<]*)</b>', r'**\1**', s) |
| s = re.sub(r'\\f\$([^\$]*)\\f\$', r'$\1$', s) |
| |
| s = s.replace('``true``', '``True``') |
| s = s.replace('``false``', '``False``') |
| |
| # Re-flow text |
| wrapper = textwrap.TextWrapper() |
| wrapper.expand_tabs = True |
| wrapper.replace_whitespace = True |
| wrapper.width = 75 |
| wrapper.initial_indent = wrapper.subsequent_indent = '' |
| |
| result = '' |
| for x in re.split(r'\n{2,}', s): |
| wrapped = wrapper.fill(x.strip()) |
| if len(wrapped) > 0 and wrapped[0] == '$': |
| result += wrapped[1:] + '\n' |
| wrapper.initial_indent = wrapper.subsequent_indent = ' '*4 |
| else: |
| result += wrapped + '\n\n' |
| wrapper.initial_indent = wrapper.subsequent_indent = '' |
| return result.rstrip() |
| |
| |
| def extract(filename, node, prefix, output): |
| num_extracted = 0 |
| if not (node.location.file is None or os.path.samefile(d(node.location.file.name), filename)): |
| return 0 |
| if node.kind in RECURSE_LIST: |
| sub_prefix = prefix |
| if node.kind != CursorKind.TRANSLATION_UNIT: |
| if len(sub_prefix) > 0: |
| sub_prefix += '_' |
| sub_prefix += d(node.spelling) |
| for i in node.get_children(): |
| num_extracted += extract(filename, i, sub_prefix, output) |
| if num_extracted == 0: |
| return 0 |
| if node.kind in PRINT_LIST: |
| comment = d(node.raw_comment) if node.raw_comment is not None else '' |
| comment = process_comment(comment) |
| name = sanitize_name(prefix + '_' + d(node.spelling)) |
| output.append('\nstatic const char *%s = %sR"doc(%s)doc";' % (name, '\n' if '\n' in comment else '', comment)) |
| num_extracted += 1 |
| return num_extracted |
| |
| class ExtractionThread(Thread): |
| def __init__ (self, filename, parameters, output): |
| Thread.__init__(self) |
| self.filename = filename |
| self.parameters = parameters |
| self.output = output |
| job_semaphore.acquire() |
| |
| def run(self): |
| print('Processing "%s" ..' % self.filename, file = sys.stderr) |
| try: |
| index = cindex.Index(cindex.conf.lib.clang_createIndex(False, True)) |
| tu = index.parse(self.filename, self.parameters) |
| extract(self.filename, tu.cursor, '', self.output) |
| finally: |
| job_semaphore.release() |
| |
| if __name__ == '__main__': |
| parameters = ['-x', 'c++', '-std=c++11'] |
| filenames = [] |
| |
| for item in sys.argv[1:]: |
| if item.startswith('-'): |
| parameters.append(item) |
| else: |
| filenames.append(item) |
| |
| if len(filenames) == 0: |
| print('Syntax: %s [.. a list of header files ..]' % sys.argv[0]) |
| exit(-1) |
| |
| print('''/* |
| This file contains docstrings for the Python bindings. |
| Do not edit! These were automatically extracted by mkdoc.py |
| */ |
| |
| #define __EXPAND(x) x |
| #define __COUNT(_1, _2, _3, _4, _5, COUNT, ...) COUNT |
| #define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 5, 4, 3, 2, 1)) |
| #define __CAT1(a, b) a ## b |
| #define __CAT2(a, b) __CAT1(a, b) |
| #define __DOC1(n1) __doc_##n1 |
| #define __DOC2(n1, n2) __doc_##n1##_##n2 |
| #define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3 |
| #define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4 |
| #define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4_##n5 |
| #define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) |
| |
| #if defined(__GNUG__) |
| #pragma GCC diagnostic push |
| #pragma GCC diagnostic ignored "-Wunused-variable" |
| #endif |
| ''') |
| |
| output = [] |
| for filename in filenames: |
| thr = ExtractionThread(filename, parameters, output) |
| thr.start() |
| |
| print('Waiting for jobs to finish ..', file = sys.stderr) |
| for i in range(job_count): |
| job_semaphore.acquire() |
| |
| output.sort() |
| for l in output: |
| print(l) |
| |
| print(''' |
| #if defined(__GNUG__) |
| #pragma GCC diagnostic pop |
| #endif |
| ''') |