blob: 1811b4846150f1c54f90bd2c795af7744196c883 [file] [log] [blame]
Daniel Veillard61006472002-01-21 17:31:47 +00001#!/usr/bin/python -u
2#
3# tries to parse the output of gtk-doc declaration files and make
4# an XML reusable description from them
5#
6# TODO: try to extracts comments from the DocBook output of
7
8import sys
9import string
10
Daniel Veillard2d1464f2002-01-21 23:16:56 +000011macros = {}
12variables = {}
13structs = {}
14typedefs = {}
Daniel Veillard61006472002-01-21 17:31:47 +000015enums = {}
16functions = {}
Daniel Veillard2d1464f2002-01-21 23:16:56 +000017user_functions = {}
Daniel Veillard61006472002-01-21 17:31:47 +000018ret_types = {}
19types = {}
20
21sections = []
22files = {}
23identifiers_file = {}
24identifiers_type = {}
25
Daniel Veillard2d1464f2002-01-21 23:16:56 +000026##################################################################
27#
28# Parsing: libxml-decl.txt
29#
30##################################################################
Daniel Veillard61006472002-01-21 17:31:47 +000031def mormalizeTypeSpaces(raw, function):
32 global types
33
34 tokens = string.split(raw)
35 type = ''
36 for token in tokens:
37 if type != '':
38 type = type + ' ' + token
39 else:
40 type = token
41 if types.has_key(type):
42 types[type].append(function)
43 else:
44 types[type] = [function]
45 return type
46
47def removeComments(raw):
48 while string.find(raw, '/*') > 0:
49 e = string.find(raw, '/*')
50 tmp = raw[0:e]
51 raw = raw[e:]
52 e = string.find(raw, '*/')
53 if e > 0:
54 raw = tmp + raw[e + 2:]
55 else:
56 raw = tmp
57 return raw
58
59def extractArgs(raw, function):
60 raw = removeComments(raw)
61 list = string.split(raw, ",")
62 ret = []
63 for arg in list:
64 i = len(arg)
65 if i == 0:
66 continue
67 i = i - 1
68 c = arg[i]
69 while string.find(string.letters, c) >= 0 or \
70 string.find(string.digits, c) >= 0:
71 i = i - 1
72 if i < 0:
73 break
74 c = arg[i]
75 name = arg[i+1:]
76 while string.find(string.whitespace, c) >= 0:
77 i = i - 1
78 if i < 0:
79 break
80 c = arg[i]
81 type = mormalizeTypeSpaces(arg[0:i+1], function)
82# print "list: %s -> %s, %s" % (list, type, name)
83 ret.append((type, name))
84 return ret
85
86def extractTypes(raw, function):
87 global ret_types
88
89 tokens = string.split(raw)
90 type = ''
91 for token in tokens:
92 if type != '':
93 type = type + ' ' + token
94 else:
95 type = token
96 if ret_types.has_key(type):
97 ret_types[type].append(function)
98 else:
99 ret_types[type] = [function]
100 return type
101
102def parseMacro():
103 global input
104 global macros
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000105 global variables
Daniel Veillard61006472002-01-21 17:31:47 +0000106
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000107 var = 1
Daniel Veillard61006472002-01-21 17:31:47 +0000108 line = input.readline()[:-1]
109 while line != "</MACRO>":
110 if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
111 name = line[6:-7]
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000112 elif string.find(line, "#define") >= 0:
113 var = 0
Daniel Veillard61006472002-01-21 17:31:47 +0000114 line = input.readline()[:-1]
115
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000116 if var == 1:
117 variables[name] = ''
118 identifiers_type[name] = "variable"
119 else:
120 macros[name] = ''
121 identifiers_type[name] = "macro"
Daniel Veillard61006472002-01-21 17:31:47 +0000122
123def parseStruct():
124 global input
125 global structs
126
127 line = input.readline()[:-1]
128 while line != "</STRUCT>":
129 if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
130 name = line[6:-7]
131 line = input.readline()[:-1]
132
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000133 structs[name] = ''
Daniel Veillard61006472002-01-21 17:31:47 +0000134 identifiers_type[name] = "struct"
135
136def parseTypedef():
137 global input
138 global typedefs
139
140 line = input.readline()[:-1]
141 while line != "</TYPEDEF>":
142 if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
143 name = line[6:-7]
144 line = input.readline()[:-1]
145
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000146 typedefs[name] = ''
Daniel Veillard61006472002-01-21 17:31:47 +0000147 identifiers_type[name] = "typedef"
148
149def parseEnum():
150 global input
151 global enums
152
153 line = input.readline()[:-1]
154 consts = []
155 while line != "</ENUM>":
156 if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
157 name = line[6:-7]
158 elif string.find(line, 'enum') >= 0:
159 pass
160 elif string.find(line, '{') >= 0:
161 pass
162 elif string.find(line, '}') >= 0:
163 pass
164 elif string.find(line, ';') >= 0:
165 pass
166 else:
167 comment = string.find(line, '/*')
168 if comment >= 0:
169 line = line[0:comment]
170 decls = string.split(line, ",")
171 for decl in decls:
172 val = string.split(decl, "=")[0]
173 tokens = string.split(val)
174 if len(tokens) >= 1:
175 token = tokens[0]
176 if string.find(string.letters, token[0]) >= 0:
177 consts.append(token)
178 identifiers_type[token] = "const"
179 line = input.readline()[:-1]
180
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000181 enums[name] = [consts, '']
Daniel Veillard61006472002-01-21 17:31:47 +0000182 identifiers_type[name] = "enum"
183
184def parseStaticFunction():
185 global input
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000186 global user_functions
Daniel Veillard61006472002-01-21 17:31:47 +0000187
188 line = input.readline()[:-1]
189 type = None
190 signature = None
191 while line != "</USER_FUNCTION>":
192 if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
193 name = line[6:-7]
194 elif line[0:9] == "<RETURNS>" and line[-10:] == "</RETURNS>":
195 type = extractTypes(line[9:-10], name)
196 else:
197 signature = line
198 line = input.readline()[:-1]
199
200 args = extractArgs(signature, name)
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000201 user_functions[name] = [type , args, '']
202 identifiers_type[name] = "functype"
Daniel Veillard61006472002-01-21 17:31:47 +0000203
204def parseFunction():
205 global input
206 global functions
207
208 line = input.readline()[:-1]
209 type = None
210 signature = None
211 while line != "</FUNCTION>":
212 if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
213 name = line[6:-7]
214 elif line[0:9] == "<RETURNS>" and line[-10:] == "</RETURNS>":
215 type = extractTypes(line[9:-10], name)
216 else:
217 signature = line
218 line = input.readline()[:-1]
219
220 args = extractArgs(signature, name)
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000221 functions[name] = [type , args, '']
Daniel Veillard61006472002-01-21 17:31:47 +0000222 identifiers_type[name] = "function"
223
Daniel Veillard61006472002-01-21 17:31:47 +0000224print "Parsing: libxml-decl.txt"
225input = open('libxml-decl.txt')
226while 1:
227 line = input.readline()
228 if not line:
229 break
230 line = line[:-1]
231 if line == "<MACRO>":
232 parseMacro()
233 elif line == "<ENUM>":
234 parseEnum()
235 elif line == "<FUNCTION>":
236 parseFunction()
237 elif line == "<STRUCT>":
238 parseStruct()
239 elif line == "<TYPEDEF>":
240 parseTypedef()
241 elif line == "<USER_FUNCTION>":
242 parseStaticFunction()
243 elif len(line) >= 1 and line[0] == "<":
244 print "unhandled %s" % (line)
245
246print "Parsed: %d macros. %d structs, %d typedefs, %d enums" % (
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000247 len(macros.keys()), len(structs.keys()), len(typedefs.keys()),
248 len(enums))
Daniel Veillard61006472002-01-21 17:31:47 +0000249c = 0
250for enum in enums.keys():
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000251 consts = enums[enum][0]
Daniel Veillard61006472002-01-21 17:31:47 +0000252 c = c + len(consts)
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000253print " %d variables, %d constants, %d functions and %d functypes" % (
254 len(variables.keys()), c, len(functions.keys()),
255 len(user_functions.keys()))
Daniel Veillard61006472002-01-21 17:31:47 +0000256print "The functions manipulates %d different types" % (len(types.keys()))
257print "The functions returns %d different types" % (len(ret_types.keys()))
258
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000259##################################################################
260#
261# Parsing: libxml-decl-list.txt
262#
263##################################################################
264def parseSection():
265 global input
266 global sections
267 global files
268 global identifiers_file
269
270 tokens = []
271 line = input.readline()[:-1]
272 while line != "</SECTION>":
273 if line[0:6] == "<FILE>" and line[-7:] == "</FILE>":
274 name = line[6:-7]
275 elif len(line) > 0:
276 tokens.append(line)
277 line = input.readline()[:-1]
278
279 sections.append(name)
280 files[name] = tokens
281 for token in tokens:
282 identifiers_file[token] = name
283 #
284 # Small transitivity for enum values
285 #
286 if enums.has_key(token):
287 for const in enums[token][0]:
288 identifiers_file[const] = name
289
Daniel Veillard61006472002-01-21 17:31:47 +0000290print "Parsing: libxml-decl-list.txt"
291input = open('libxml-decl-list.txt')
292while 1:
293 line = input.readline()
294 if not line:
295 break
296 line = line[:-1]
297 if line == "<SECTION>":
298 parseSection()
299 elif len(line) >= 1 and line[0] == "<":
300 print "unhandled %s" % (line)
301
302print "Parsed: %d files %d identifiers" % (len(files), len(identifiers_file.keys()))
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000303##################################################################
304#
305# Parsing: xml/*.xml
306# To enrich the existing info with extracted comments
307#
308##################################################################
309
310nbcomments = 0
311
312def insertComment(name, title, value):
313 global nbcomments
314
315 if functions.has_key(name):
316 functions[name][2] = value
317 elif typedefs.has_key(name):
318 typedefs[name] = value
319 elif macros.has_key(name):
320 macros[name] = value
321 elif variables.has_key(name):
322 variables[name] = value
323 elif structs.has_key(name):
324 structs[name] = value
325 elif enums.has_key(name):
326 enums[name][1] = value
327 elif user_functions.has_key(name):
328 user_functions[name] = value
329 else:
330 print "lost comment %s: %s" % (name, value)
331 return
332 nbcomments = nbcomments + 1
333
334import os
335import xmllib
336try:
337 import sgmlop
338except ImportError:
339 sgmlop = None # accelerator not available
340
341debug = 0
342
343if sgmlop:
344 class FastParser:
345 """sgmlop based XML parser. this is typically 15x faster
346 than SlowParser..."""
347
348 def __init__(self, target):
349
350 # setup callbacks
351 self.finish_starttag = target.start
352 self.finish_endtag = target.end
353 self.handle_data = target.data
354
355 # activate parser
356 self.parser = sgmlop.XMLParser()
357 self.parser.register(self)
358 self.feed = self.parser.feed
359 self.entity = {
360 "amp": "&", "gt": ">", "lt": "<",
361 "apos": "'", "quot": '"'
362 }
363
364 def close(self):
365 try:
366 self.parser.close()
367 finally:
368 self.parser = self.feed = None # nuke circular reference
369
370 def handle_entityref(self, entity):
371 # <string> entity
372 try:
373 self.handle_data(self.entity[entity])
374 except KeyError:
375 self.handle_data("&%s;" % entity)
376
377else:
378 FastParser = None
379
380
381class SlowParser(xmllib.XMLParser):
382 """slow but safe standard parser, based on the XML parser in
383 Python's standard library."""
384
385 def __init__(self, target):
386 self.unknown_starttag = target.start
387 self.handle_data = target.data
388 self.unknown_endtag = target.end
389 xmllib.XMLParser.__init__(self)
390
391def getparser(target = None):
392 # get the fastest available parser, and attach it to an
393 # unmarshalling object. return both objects.
394 if target == None:
395 target = docParser()
396 if FastParser:
397 return FastParser(target), target
398 return SlowParser(target), target
399
400class docParser:
401 def __init__(self):
402 self._methodname = None
403 self._data = []
404 self.id = None
405 self.title = None
406 self.descr = None
407 self.string = None
408
409 def close(self):
410 if debug:
411 print "close"
412
413 def getmethodname(self):
414 return self._methodname
415
416 def data(self, text):
417 if debug:
418 print "data %s" % text
419 self._data.append(text)
420
421 def start(self, tag, attrs):
422 if debug:
423 print "start %s, %s" % (tag, attrs)
424 if tag == 'refsect2':
425 self.id = None
426 self.title = None
427 self.descr = None
428 self.string = None
429 elif tag == 'para':
430 self._data = []
431 elif tag == 'title':
432 self._data = []
433 elif tag == 'anchor' and self.id == None:
434 if attrs.has_key('id'):
435 self.id = attrs['id']
436 self.id = string.replace(self.id, '-CAPS', '')
437 self.id = string.replace(self.id, '-', '_')
438
439 def end(self, tag):
440 if debug:
441 print "end %s" % tag
442 if tag == 'refsect2':
443 insertComment(self.id, self.title, self.string)
444 elif tag == 'para':
445 if self.string == None:
446 str = ''
447 for c in self._data:
448 str = str + c
449 str = string.replace(str, '\n', ' ')
450 str = string.replace(str, '\r', ' ')
451 str = string.replace(str, ' ', ' ')
452 str = string.replace(str, ' ', ' ')
453 str = string.replace(str, ' ', ' ')
454 while len(str) >= 1 and str[0] == ' ':
455 str=str[1:]
456 self.string = str
457 self._data = []
458 elif tag == 'title':
459 str = ''
460 for c in self._data:
461 str = str + c
462 str = string.replace(str, '\n', ' ')
463 str = string.replace(str, '\r', ' ')
464 str = string.replace(str, ' ', ' ')
465 str = string.replace(str, ' ', ' ')
466 str = string.replace(str, ' ', ' ')
467 while len(str) >= 1 and str[0] == ' ':
468 str=str[1:]
469 self.title = str
470
471xmlfiles = 0
472filenames = os.listdir("xml")
473for filename in filenames:
474 try:
475 f = open("xml/" + filename, 'r')
476 except IOError, msg:
477 print file, ":", msg
478 continue
479 data = f.read()
480 (parser, target) = getparser()
481 parser.feed(data)
482 parser.close()
483 xmlfiles = xmlfiles + 1
484
485print "Parsed: %d XML files collexting %d comments" % (xmlfiles, nbcomments)
486
487##################################################################
488#
489# Saving: libxml2-api.xml
490#
491##################################################################
492
493def escape(raw):
494 raw = string.replace(raw, '<', '&lt;')
495 raw = string.replace(raw, '>', '&gt;')
496 return raw
Daniel Veillard61006472002-01-21 17:31:47 +0000497
498print "Saving XML description libxml2-api.xml"
499output = open("libxml2-api.xml", "w")
500output.write("<api name='libxml2'>\n")
501output.write(" <files>\n")
502for file in files.keys():
503 output.write(" <file name='%s'>\n" % file)
504 for symbol in files[file]:
505 output.write(" <exports symbol='%s'/>\n" % (symbol))
506 output.write(" </file>\n")
507output.write(" </files>\n")
508
509output.write(" <symbols>\n")
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000510symbols=macros.keys()
511for i in structs.keys(): symbols.append(i)
512for i in variables.keys(): variables.append(i)
513for i in typedefs.keys(): symbols.append(i)
Daniel Veillard61006472002-01-21 17:31:47 +0000514for i in enums.keys():
515 symbols.append(i)
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000516 for j in enums[i][0]:
Daniel Veillard61006472002-01-21 17:31:47 +0000517 symbols.append(j)
518for i in functions.keys(): symbols.append(i)
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000519for i in user_functions.keys(): symbols.append(i)
Daniel Veillard61006472002-01-21 17:31:47 +0000520symbols.sort()
521prev = None
522for i in symbols:
523 if i == prev:
524# print "Symbol %s redefined" % (i)
525 continue
526 else:
527 prev = i
528 if identifiers_type.has_key(i):
529 type = identifiers_type[i]
530
531 if identifiers_file.has_key(i):
532 file = identifiers_file[i]
533 else:
534 file = None
535
536 output.write(" <%s name='%s'" % (type, i))
537 if file != None:
538 output.write(" file='%s'" % (file))
539 if type == "function":
540 output.write(">\n");
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000541 (ret, args, doc) = functions[i]
542 if doc != None and doc != '':
543 output.write(" <info>%s</info>\n" % (escape(doc)))
Daniel Veillard61006472002-01-21 17:31:47 +0000544 output.write(" <return type='%s'/>\n" % (ret))
545 for arg in args:
546 output.write(" <arg name='%s' type='%s'/>\n" % (
547 arg[1], arg[0]))
548 output.write(" </%s>\n" % (type));
Daniel Veillard2d1464f2002-01-21 23:16:56 +0000549 elif type == 'macro':
550 if macros[i] != None and macros[i] != '':
551 output.write(" info='%s'/>\n" % (escape(macros[i])))
552 else:
553 output.write("/>\n");
554 elif type == 'struct':
555 if structs[i] != None and structs[i] != '':
556 output.write(" info='%s'/>\n" % (escape(structs[i])))
557 else:
558 output.write("/>\n");
559 elif type == 'functype':
560 if user_functions[i] != None and user_functions[i] != '':
561 output.write(" info='%s'/>\n" % (escape(user_functions[i])))
562 else:
563 output.write("/>\n");
564 elif type == 'variable':
565 if variables[i] != None and variables[i] != '':
566 output.write(" info='%s'/>\n" % (escape(variables[i])))
567 else:
568 output.write("/>\n");
569 elif type == 'typedef':
570 if typedefs[i] != None and typedefs[i] != '':
571 output.write(" info='%s'/>\n" % (escape(typedefs[i])))
572 else:
573 output.write("/>\n");
Daniel Veillard61006472002-01-21 17:31:47 +0000574 else:
575 output.write("/>\n");
576 else:
577 print "Symbol %s not found in identifiers list" % (i)
578output.write(" </symbols>\n")
579output.write("</api>\n")
580print "generated XML for %d symbols" % (len(symbols))