Éric Araujo | a0e92a8 | 2011-07-26 18:01:08 +0200 | [diff] [blame^] | 1 | #!/usr/bin/env python3 |
Martin v. Löwis | 4d0d471 | 2010-12-03 20:14:31 +0000 | [diff] [blame] | 2 | # This script converts a C file to use the PEP 384 type definition API |
| 3 | # Usage: abitype.py < old_code > new_code |
| 4 | import re, sys |
| 5 | |
| 6 | ############ Simplistic C scanner ################################## |
| 7 | tokenizer = re.compile( |
| 8 | r"(?P<preproc>#.*\n)" |
| 9 | r"|(?P<comment>/\*.*?\*/)" |
| 10 | r"|(?P<ident>[a-zA-Z_][a-zA-Z0-9_]*)" |
| 11 | r"|(?P<ws>[ \t\n]+)" |
| 12 | r"|(?P<other>.)", |
| 13 | re.MULTILINE) |
| 14 | |
| 15 | tokens = [] |
| 16 | source = sys.stdin.read() |
| 17 | pos = 0 |
| 18 | while pos != len(source): |
| 19 | m = tokenizer.match(source, pos) |
| 20 | tokens.append([m.lastgroup, m.group()]) |
| 21 | pos += len(tokens[-1][1]) |
| 22 | if tokens[-1][0] == 'preproc': |
| 23 | # continuation lines are considered |
| 24 | # only in preprocess statements |
| 25 | while tokens[-1][1].endswith('\\\n'): |
| 26 | nl = source.find('\n', pos) |
| 27 | if nl == -1: |
| 28 | line = source[pos:] |
| 29 | else: |
| 30 | line = source[pos:nl+1] |
| 31 | tokens[-1][1] += line |
| 32 | pos += len(line) |
| 33 | |
| 34 | ###### Replacement of PyTypeObject static instances ############## |
| 35 | |
| 36 | # classify each token, giving it a one-letter code: |
| 37 | # S: static |
| 38 | # T: PyTypeObject |
| 39 | # I: ident |
| 40 | # W: whitespace |
| 41 | # =, {, }, ; : themselves |
| 42 | def classify(): |
| 43 | res = [] |
| 44 | for t,v in tokens: |
| 45 | if t == 'other' and v in "={};": |
| 46 | res.append(v) |
| 47 | elif t == 'ident': |
| 48 | if v == 'PyTypeObject': |
| 49 | res.append('T') |
| 50 | elif v == 'static': |
| 51 | res.append('S') |
| 52 | else: |
| 53 | res.append('I') |
| 54 | elif t == 'ws': |
| 55 | res.append('W') |
| 56 | else: |
| 57 | res.append('.') |
| 58 | return ''.join(res) |
| 59 | |
| 60 | # Obtain a list of fields of a PyTypeObject, in declaration order, |
| 61 | # skipping ob_base |
| 62 | # All comments are dropped from the variable (which are typically |
| 63 | # just the slot names, anyway), and information is discarded whether |
| 64 | # the original type was static. |
| 65 | def get_fields(start, real_end): |
| 66 | pos = start |
| 67 | # static? |
| 68 | if tokens[pos][1] == 'static': |
| 69 | pos += 2 |
| 70 | # PyTypeObject |
| 71 | pos += 2 |
| 72 | # name |
| 73 | name = tokens[pos][1] |
| 74 | pos += 1 |
| 75 | while tokens[pos][1] != '{': |
| 76 | pos += 1 |
| 77 | pos += 1 |
| 78 | # PyVarObject_HEAD_INIT |
| 79 | while tokens[pos][0] in ('ws', 'comment'): |
| 80 | pos += 1 |
| 81 | if tokens[pos][1] != 'PyVarObject_HEAD_INIT': |
| 82 | raise Exception, '%s has no PyVarObject_HEAD_INIT' % name |
| 83 | while tokens[pos][1] != ')': |
| 84 | pos += 1 |
| 85 | pos += 1 |
| 86 | # field definitions: various tokens, comma-separated |
| 87 | fields = [] |
| 88 | while True: |
| 89 | while tokens[pos][0] in ('ws', 'comment'): |
| 90 | pos += 1 |
| 91 | end = pos |
| 92 | while tokens[end][1] not in ',}': |
| 93 | if tokens[end][1] == '(': |
| 94 | nesting = 1 |
| 95 | while nesting: |
| 96 | end += 1 |
| 97 | if tokens[end][1] == '(': nesting+=1 |
| 98 | if tokens[end][1] == ')': nesting-=1 |
| 99 | end += 1 |
| 100 | assert end < real_end |
| 101 | # join field, excluding separator and trailing ws |
| 102 | end1 = end-1 |
| 103 | while tokens[end1][0] in ('ws', 'comment'): |
| 104 | end1 -= 1 |
| 105 | fields.append(''.join(t[1] for t in tokens[pos:end1+1])) |
| 106 | if tokens[end][1] == '}': |
| 107 | break |
| 108 | pos = end+1 |
| 109 | return name, fields |
| 110 | |
| 111 | # List of type slots as of Python 3.2, omitting ob_base |
| 112 | typeslots = [ |
| 113 | 'tp_name', |
| 114 | 'tp_basicsize', |
| 115 | 'tp_itemsize', |
| 116 | 'tp_dealloc', |
| 117 | 'tp_print', |
| 118 | 'tp_getattr', |
| 119 | 'tp_setattr', |
| 120 | 'tp_reserved', |
| 121 | 'tp_repr', |
| 122 | 'tp_as_number', |
| 123 | 'tp_as_sequence', |
| 124 | 'tp_as_mapping', |
| 125 | 'tp_hash', |
| 126 | 'tp_call', |
| 127 | 'tp_str', |
| 128 | 'tp_getattro', |
| 129 | 'tp_setattro', |
| 130 | 'tp_as_buffer', |
| 131 | 'tp_flags', |
| 132 | 'tp_doc', |
| 133 | 'tp_traverse', |
| 134 | 'tp_clear', |
| 135 | 'tp_richcompare', |
| 136 | 'tp_weaklistoffset', |
| 137 | 'tp_iter', |
| 138 | 'iternextfunc', |
| 139 | 'tp_methods', |
| 140 | 'tp_members', |
| 141 | 'tp_getset', |
| 142 | 'tp_base', |
| 143 | 'tp_dict', |
| 144 | 'tp_descr_get', |
| 145 | 'tp_descr_set', |
| 146 | 'tp_dictoffset', |
| 147 | 'tp_init', |
| 148 | 'tp_alloc', |
| 149 | 'tp_new', |
| 150 | 'tp_free', |
| 151 | 'tp_is_gc', |
| 152 | 'tp_bases', |
| 153 | 'tp_mro', |
| 154 | 'tp_cache', |
| 155 | 'tp_subclasses', |
| 156 | 'tp_weaklist', |
| 157 | 'tp_del' |
| 158 | 'tp_version_tag' |
| 159 | ] |
| 160 | |
| 161 | # Generate a PyType_Spec definition |
| 162 | def make_slots(name, fields): |
| 163 | res = [] |
| 164 | res.append('static PyType_Slot %s_slots[] = {' % name) |
| 165 | # defaults for spec |
Martin v. Löwis | 6916806 | 2011-02-11 20:47:49 +0000 | [diff] [blame] | 166 | spec = { 'tp_itemsize':'0' } |
Martin v. Löwis | 4d0d471 | 2010-12-03 20:14:31 +0000 | [diff] [blame] | 167 | for i, val in enumerate(fields): |
| 168 | if val.endswith('0'): |
| 169 | continue |
| 170 | if typeslots[i] in ('tp_name', 'tp_doc', 'tp_basicsize', |
| 171 | 'tp_itemsize', 'tp_flags'): |
| 172 | spec[typeslots[i]] = val |
| 173 | continue |
| 174 | res.append(' {Py_%s, %s},' % (typeslots[i], val)) |
| 175 | res.append('};') |
| 176 | res.append('static PyType_Spec %s_spec = {' % name) |
| 177 | res.append(' %s,' % spec['tp_name']) |
Martin v. Löwis | 4d0d471 | 2010-12-03 20:14:31 +0000 | [diff] [blame] | 178 | res.append(' %s,' % spec['tp_basicsize']) |
| 179 | res.append(' %s,' % spec['tp_itemsize']) |
| 180 | res.append(' %s,' % spec['tp_flags']) |
| 181 | res.append(' %s_slots,' % name) |
| 182 | res.append('};\n') |
| 183 | return '\n'.join(res) |
| 184 | |
| 185 | |
| 186 | # Main loop: replace all static PyTypeObjects until |
| 187 | # there are none left. |
| 188 | while 1: |
| 189 | c = classify() |
| 190 | m = re.search('(SW)?TWIW?=W?{.*?};', c) |
| 191 | if not m: |
| 192 | break |
| 193 | start = m.start() |
| 194 | end = m.end() |
| 195 | name, fields = get_fields(start, m) |
| 196 | tokens[start:end] = [('',make_slots(name, fields))] |
| 197 | |
| 198 | # Output result to stdout |
| 199 | for t, v in tokens: |
| 200 | sys.stdout.write(v) |