| Daniel Veillard | 6100647 | 2002-01-21 17:31:47 +0000 | [diff] [blame^] | 1 | #!/usr/bin/python -u |
| 2 | # |
| 3 | # tries to parse the output of gtk-doc declaration files and make |
| 4 | # an XML reusable description from them |
| 5 | # |
| 6 | # TODO: try to extracts comments from the DocBook output of |
| 7 | |
| 8 | import sys |
| 9 | import string |
| 10 | |
| 11 | macros = [] |
| 12 | structs = [] |
| 13 | typedefs = [] |
| 14 | enums = {} |
| 15 | functions = {} |
| 16 | private_functions = {} |
| 17 | ret_types = {} |
| 18 | types = {} |
| 19 | |
| 20 | sections = [] |
| 21 | files = {} |
| 22 | identifiers_file = {} |
| 23 | identifiers_type = {} |
| 24 | |
| 25 | def mormalizeTypeSpaces(raw, function): |
| 26 | global types |
| 27 | |
| 28 | tokens = string.split(raw) |
| 29 | type = '' |
| 30 | for token in tokens: |
| 31 | if type != '': |
| 32 | type = type + ' ' + token |
| 33 | else: |
| 34 | type = token |
| 35 | if types.has_key(type): |
| 36 | types[type].append(function) |
| 37 | else: |
| 38 | types[type] = [function] |
| 39 | return type |
| 40 | |
| 41 | def removeComments(raw): |
| 42 | while string.find(raw, '/*') > 0: |
| 43 | e = string.find(raw, '/*') |
| 44 | tmp = raw[0:e] |
| 45 | raw = raw[e:] |
| 46 | e = string.find(raw, '*/') |
| 47 | if e > 0: |
| 48 | raw = tmp + raw[e + 2:] |
| 49 | else: |
| 50 | raw = tmp |
| 51 | return raw |
| 52 | |
| 53 | def extractArgs(raw, function): |
| 54 | raw = removeComments(raw) |
| 55 | list = string.split(raw, ",") |
| 56 | ret = [] |
| 57 | for arg in list: |
| 58 | i = len(arg) |
| 59 | if i == 0: |
| 60 | continue |
| 61 | i = i - 1 |
| 62 | c = arg[i] |
| 63 | while string.find(string.letters, c) >= 0 or \ |
| 64 | string.find(string.digits, c) >= 0: |
| 65 | i = i - 1 |
| 66 | if i < 0: |
| 67 | break |
| 68 | c = arg[i] |
| 69 | name = arg[i+1:] |
| 70 | while string.find(string.whitespace, c) >= 0: |
| 71 | i = i - 1 |
| 72 | if i < 0: |
| 73 | break |
| 74 | c = arg[i] |
| 75 | type = mormalizeTypeSpaces(arg[0:i+1], function) |
| 76 | # print "list: %s -> %s, %s" % (list, type, name) |
| 77 | ret.append((type, name)) |
| 78 | return ret |
| 79 | |
| 80 | def extractTypes(raw, function): |
| 81 | global ret_types |
| 82 | |
| 83 | tokens = string.split(raw) |
| 84 | type = '' |
| 85 | for token in tokens: |
| 86 | if type != '': |
| 87 | type = type + ' ' + token |
| 88 | else: |
| 89 | type = token |
| 90 | if ret_types.has_key(type): |
| 91 | ret_types[type].append(function) |
| 92 | else: |
| 93 | ret_types[type] = [function] |
| 94 | return type |
| 95 | |
| 96 | def parseMacro(): |
| 97 | global input |
| 98 | global macros |
| 99 | |
| 100 | line = input.readline()[:-1] |
| 101 | while line != "</MACRO>": |
| 102 | if line[0:6] == "<NAME>" and line[-7:] == "</NAME>": |
| 103 | name = line[6:-7] |
| 104 | line = input.readline()[:-1] |
| 105 | |
| 106 | macros.append(name) |
| 107 | identifiers_type[name] = "macro" |
| 108 | |
| 109 | def parseStruct(): |
| 110 | global input |
| 111 | global structs |
| 112 | |
| 113 | line = input.readline()[:-1] |
| 114 | while line != "</STRUCT>": |
| 115 | if line[0:6] == "<NAME>" and line[-7:] == "</NAME>": |
| 116 | name = line[6:-7] |
| 117 | line = input.readline()[:-1] |
| 118 | |
| 119 | structs.append(name) |
| 120 | identifiers_type[name] = "struct" |
| 121 | |
| 122 | def parseTypedef(): |
| 123 | global input |
| 124 | global typedefs |
| 125 | |
| 126 | line = input.readline()[:-1] |
| 127 | while line != "</TYPEDEF>": |
| 128 | if line[0:6] == "<NAME>" and line[-7:] == "</NAME>": |
| 129 | name = line[6:-7] |
| 130 | line = input.readline()[:-1] |
| 131 | |
| 132 | typedefs.append(name) |
| 133 | identifiers_type[name] = "typedef" |
| 134 | |
| 135 | def parseEnum(): |
| 136 | global input |
| 137 | global enums |
| 138 | |
| 139 | line = input.readline()[:-1] |
| 140 | consts = [] |
| 141 | while line != "</ENUM>": |
| 142 | if line[0:6] == "<NAME>" and line[-7:] == "</NAME>": |
| 143 | name = line[6:-7] |
| 144 | elif string.find(line, 'enum') >= 0: |
| 145 | pass |
| 146 | elif string.find(line, '{') >= 0: |
| 147 | pass |
| 148 | elif string.find(line, '}') >= 0: |
| 149 | pass |
| 150 | elif string.find(line, ';') >= 0: |
| 151 | pass |
| 152 | else: |
| 153 | comment = string.find(line, '/*') |
| 154 | if comment >= 0: |
| 155 | line = line[0:comment] |
| 156 | decls = string.split(line, ",") |
| 157 | for decl in decls: |
| 158 | val = string.split(decl, "=")[0] |
| 159 | tokens = string.split(val) |
| 160 | if len(tokens) >= 1: |
| 161 | token = tokens[0] |
| 162 | if string.find(string.letters, token[0]) >= 0: |
| 163 | consts.append(token) |
| 164 | identifiers_type[token] = "const" |
| 165 | line = input.readline()[:-1] |
| 166 | |
| 167 | enums[name] = consts |
| 168 | identifiers_type[name] = "enum" |
| 169 | |
| 170 | def parseStaticFunction(): |
| 171 | global input |
| 172 | global private_functions |
| 173 | |
| 174 | line = input.readline()[:-1] |
| 175 | type = None |
| 176 | signature = None |
| 177 | while line != "</USER_FUNCTION>": |
| 178 | if line[0:6] == "<NAME>" and line[-7:] == "</NAME>": |
| 179 | name = line[6:-7] |
| 180 | elif line[0:9] == "<RETURNS>" and line[-10:] == "</RETURNS>": |
| 181 | type = extractTypes(line[9:-10], name) |
| 182 | else: |
| 183 | signature = line |
| 184 | line = input.readline()[:-1] |
| 185 | |
| 186 | args = extractArgs(signature, name) |
| 187 | private_functions[name] = (type , args) |
| 188 | identifiers_type[name] = "private_func" |
| 189 | |
| 190 | def parseFunction(): |
| 191 | global input |
| 192 | global functions |
| 193 | |
| 194 | line = input.readline()[:-1] |
| 195 | type = None |
| 196 | signature = None |
| 197 | while line != "</FUNCTION>": |
| 198 | if line[0:6] == "<NAME>" and line[-7:] == "</NAME>": |
| 199 | name = line[6:-7] |
| 200 | elif line[0:9] == "<RETURNS>" and line[-10:] == "</RETURNS>": |
| 201 | type = extractTypes(line[9:-10], name) |
| 202 | else: |
| 203 | signature = line |
| 204 | line = input.readline()[:-1] |
| 205 | |
| 206 | args = extractArgs(signature, name) |
| 207 | functions[name] = (type , args) |
| 208 | identifiers_type[name] = "function" |
| 209 | |
| 210 | def parseSection(): |
| 211 | global input |
| 212 | global sections |
| 213 | global files |
| 214 | global identifiers_file |
| 215 | |
| 216 | tokens = [] |
| 217 | line = input.readline()[:-1] |
| 218 | while line != "</SECTION>": |
| 219 | if line[0:6] == "<FILE>" and line[-7:] == "</FILE>": |
| 220 | name = line[6:-7] |
| 221 | elif len(line) > 0: |
| 222 | tokens.append(line) |
| 223 | line = input.readline()[:-1] |
| 224 | |
| 225 | sections.append(name) |
| 226 | files[name] = tokens |
| 227 | for token in tokens: |
| 228 | identifiers_file[token] = name |
| 229 | # |
| 230 | # Small transitivity for enum values |
| 231 | # |
| 232 | if enums.has_key(token): |
| 233 | for const in enums[token]: |
| 234 | identifiers_file[const] = name |
| 235 | |
| 236 | print "Parsing: libxml-decl.txt" |
| 237 | input = open('libxml-decl.txt') |
| 238 | while 1: |
| 239 | line = input.readline() |
| 240 | if not line: |
| 241 | break |
| 242 | line = line[:-1] |
| 243 | if line == "<MACRO>": |
| 244 | parseMacro() |
| 245 | elif line == "<ENUM>": |
| 246 | parseEnum() |
| 247 | elif line == "<FUNCTION>": |
| 248 | parseFunction() |
| 249 | elif line == "<STRUCT>": |
| 250 | parseStruct() |
| 251 | elif line == "<TYPEDEF>": |
| 252 | parseTypedef() |
| 253 | elif line == "<USER_FUNCTION>": |
| 254 | parseStaticFunction() |
| 255 | elif len(line) >= 1 and line[0] == "<": |
| 256 | print "unhandled %s" % (line) |
| 257 | |
| 258 | print "Parsed: %d macros. %d structs, %d typedefs, %d enums" % ( |
| 259 | len(macros), len(structs), len(typedefs), len(enums)) |
| 260 | c = 0 |
| 261 | for enum in enums.keys(): |
| 262 | consts = enums[enum] |
| 263 | c = c + len(consts) |
| 264 | print " %d constants, %d functions and %d private functions" % ( |
| 265 | c, len(functions.keys()), len(private_functions.keys())) |
| 266 | print "The functions manipulates %d different types" % (len(types.keys())) |
| 267 | print "The functions returns %d different types" % (len(ret_types.keys())) |
| 268 | |
| 269 | print "Parsing: libxml-decl-list.txt" |
| 270 | input = open('libxml-decl-list.txt') |
| 271 | while 1: |
| 272 | line = input.readline() |
| 273 | if not line: |
| 274 | break |
| 275 | line = line[:-1] |
| 276 | if line == "<SECTION>": |
| 277 | parseSection() |
| 278 | elif len(line) >= 1 and line[0] == "<": |
| 279 | print "unhandled %s" % (line) |
| 280 | |
| 281 | print "Parsed: %d files %d identifiers" % (len(files), len(identifiers_file.keys())) |
| 282 | |
| 283 | print "Saving XML description libxml2-api.xml" |
| 284 | output = open("libxml2-api.xml", "w") |
| 285 | output.write("<api name='libxml2'>\n") |
| 286 | output.write(" <files>\n") |
| 287 | for file in files.keys(): |
| 288 | output.write(" <file name='%s'>\n" % file) |
| 289 | for symbol in files[file]: |
| 290 | output.write(" <exports symbol='%s'/>\n" % (symbol)) |
| 291 | output.write(" </file>\n") |
| 292 | output.write(" </files>\n") |
| 293 | |
| 294 | output.write(" <symbols>\n") |
| 295 | symbols=macros |
| 296 | for i in structs: symbols.append(i) |
| 297 | for i in typedefs: symbols.append(i) |
| 298 | for i in enums.keys(): |
| 299 | symbols.append(i) |
| 300 | for j in enums[i]: |
| 301 | symbols.append(j) |
| 302 | for i in functions.keys(): symbols.append(i) |
| 303 | symbols.sort() |
| 304 | prev = None |
| 305 | for i in symbols: |
| 306 | if i == prev: |
| 307 | # print "Symbol %s redefined" % (i) |
| 308 | continue |
| 309 | else: |
| 310 | prev = i |
| 311 | if identifiers_type.has_key(i): |
| 312 | type = identifiers_type[i] |
| 313 | |
| 314 | if identifiers_file.has_key(i): |
| 315 | file = identifiers_file[i] |
| 316 | else: |
| 317 | file = None |
| 318 | |
| 319 | output.write(" <%s name='%s'" % (type, i)) |
| 320 | if file != None: |
| 321 | output.write(" file='%s'" % (file)) |
| 322 | if type == "function": |
| 323 | output.write(">\n"); |
| 324 | (ret, args) = functions[i] |
| 325 | output.write(" <return type='%s'/>\n" % (ret)) |
| 326 | for arg in args: |
| 327 | output.write(" <arg name='%s' type='%s'/>\n" % ( |
| 328 | arg[1], arg[0])) |
| 329 | output.write(" </%s>\n" % (type)); |
| 330 | else: |
| 331 | output.write("/>\n"); |
| 332 | else: |
| 333 | print "Symbol %s not found in identifiers list" % (i) |
| 334 | output.write(" </symbols>\n") |
| 335 | output.write("</api>\n") |
| 336 | print "generated XML for %d symbols" % (len(symbols)) |