blob: d6d954963389ad67882ba1bd9a285558971b3b38 [file] [log] [blame]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
Daniel Veillard540a31a2003-01-21 11:21:07 +000010import os, sys
Daniel Veillarda9b66d02002-12-11 14:23:49 +000011import string
12import glob
13
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000014debug=0
15
Daniel Veillardde0a0a52003-04-24 17:12:57 +000016#
17# C parser analysis code
18#
19ignored_files = {
20 "trio": "too many non standard macros",
21 "trio.c": "too many non standard macros",
22 "trionan.c": "too many non standard macros",
23 "triostr.c": "too many non standard macros",
24 "acconfig.h": "generated portability layer",
25 "config.h": "generated portability layer",
26 "libxml.h": "internal only",
Daniel Veillard92fc02c2003-04-24 23:12:35 +000027 "testOOM.c": "out of memory tester",
28 "testOOMlib.h": "out of memory tester",
29 "testOOMlib.c": "out of memory tester",
Daniel Veillardfc8dc352003-10-18 09:07:46 +000030 "pattern.c": "not integrated yet",
31 "pattern.h": "not integrated yet",
Daniel Veillardde0a0a52003-04-24 17:12:57 +000032}
33
34ignored_words = {
35 "WINAPI": (0, "Windows keyword"),
36 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
William M. Brackcdfa2862003-08-29 06:03:38 +000037 "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000038 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
39 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000040 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000041 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
42 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000043 "XMLCALL": (0, "Special macro for win32 calls"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000044 "XSLTCALL": (0, "Special macro for win32 calls"),
45 "EXSLTCALL": (0, "Special macro for win32 calls"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000046 "__declspec": (3, "Windows keyword"),
47 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
48 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
49 "X_IN_Y": (5, "macro function builder"),
50}
51
Daniel Veillarda9b66d02002-12-11 14:23:49 +000052def escape(raw):
53 raw = string.replace(raw, '&', '&')
54 raw = string.replace(raw, '<', '&lt;')
55 raw = string.replace(raw, '>', '&gt;')
56 raw = string.replace(raw, "'", '&apos;')
57 raw = string.replace(raw, '"', '&quot;')
58 return raw
59
Daniel Veillard2925c0a2003-11-17 13:58:17 +000060def uniq(items):
61 d = {}
62 for item in items:
63 d[item]=1
64 return d.keys()
65
Daniel Veillarda9b66d02002-12-11 14:23:49 +000066class identifier:
Daniel Veillardd8cf9062003-11-11 21:12:36 +000067 def __init__(self, name, module=None, type=None, lineno = 0,
68 info=None, extra=None):
Daniel Veillarda9b66d02002-12-11 14:23:49 +000069 self.name = name
70 self.module = module
71 self.type = type
72 self.info = info
73 self.extra = extra
Daniel Veillardd8cf9062003-11-11 21:12:36 +000074 self.lineno = lineno
Daniel Veillarda9b66d02002-12-11 14:23:49 +000075 self.static = 0
76
77 def __repr__(self):
78 r = "%s %s:" % (self.type, self.name)
79 if self.static:
80 r = r + " static"
81 if self.module != None:
82 r = r + " from %s" % (self.module)
83 if self.info != None:
84 r = r + " " + `self.info`
85 if self.extra != None:
86 r = r + " " + `self.extra`
87 return r
88
89
90 def set_module(self, module):
91 self.module = module
92 def set_type(self, type):
93 self.type = type
94 def set_info(self, info):
95 self.info = info
96 def set_extra(self, extra):
97 self.extra = extra
Daniel Veillardd8cf9062003-11-11 21:12:36 +000098 def set_lineno(self, lineno):
99 self.lineno = lineno
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000100 def set_static(self, static):
101 self.static = static
102
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000103 def get_name(self):
104 return self.name
105 def get_module(self):
106 return self.module
107 def get_type(self):
108 return self.type
109 def get_info(self):
110 return self.info
111 def get_lineno(self):
112 return self.lineno
113 def get_extra(self):
114 return self.extra
115 def get_static(self):
116 return self.static
117
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000118 def update(self, module, type = None, info = None, extra=None):
119 if module != None and self.module == None:
120 self.set_module(module)
121 if type != None and self.type == None:
122 self.set_type(type)
123 if info != None:
124 self.set_info(info)
125 if extra != None:
126 self.set_extra(extra)
127
128
129class index:
130 def __init__(self, name = "noname"):
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000131 self.name = name
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000132 self.identifiers = {}
133 self.functions = {}
134 self.variables = {}
135 self.includes = {}
136 self.structs = {}
137 self.enums = {}
138 self.typedefs = {}
139 self.macros = {}
140 self.references = {}
141
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000142 def add_ref(self, name, module, static, type, lineno, info=None, extra=None):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000143 if name[0:2] == '__':
144 return None
145 d = None
146 try:
147 d = self.identifiers[name]
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000148 d.update(module, type, lineno, info, extra)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000149 except:
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000150 d = identifier(name, module, type, lineno, info, extra)
151 self.identifiers[name] = d
152
153 if d != None and static == 1:
154 d.set_static(1)
155
156 if d != None and name != None and type != None:
157 self.references[name] = d
158
159 def add(self, name, module, static, type, lineno, info=None, extra=None):
160 if name[0:2] == '__':
161 return None
162 d = None
163 try:
164 d = self.identifiers[name]
165 d.update(module, type, lineno, info, extra)
166 except:
167 d = identifier(name, module, type, lineno, info, extra)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000168 self.identifiers[name] = d
169
170 if d != None and static == 1:
171 d.set_static(1)
172
173 if d != None and name != None and type != None:
174 if type == "function":
175 self.functions[name] = d
176 elif type == "functype":
177 self.functions[name] = d
178 elif type == "variable":
179 self.variables[name] = d
180 elif type == "include":
181 self.includes[name] = d
182 elif type == "struct":
183 self.structs[name] = d
184 elif type == "enum":
185 self.enums[name] = d
186 elif type == "typedef":
187 self.typedefs[name] = d
188 elif type == "macro":
189 self.macros[name] = d
190 else:
191 print "Unable to register type ", type
192 return d
193
194 def merge(self, idx):
195 for id in idx.functions.keys():
196 #
197 # macro might be used to override functions or variables
198 # definitions
199 #
200 if self.macros.has_key(id):
201 del self.macros[id]
202 if self.functions.has_key(id):
203 print "function %s from %s redeclared in %s" % (
204 id, self.functions[id].module, idx.functions[id].module)
205 else:
206 self.functions[id] = idx.functions[id]
207 self.identifiers[id] = idx.functions[id]
208 for id in idx.variables.keys():
209 #
210 # macro might be used to override functions or variables
211 # definitions
212 #
213 if self.macros.has_key(id):
214 del self.macros[id]
215 if self.variables.has_key(id):
216 print "variable %s from %s redeclared in %s" % (
217 id, self.variables[id].module, idx.variables[id].module)
218 else:
219 self.variables[id] = idx.variables[id]
220 self.identifiers[id] = idx.variables[id]
221 for id in idx.structs.keys():
222 if self.structs.has_key(id):
223 print "struct %s from %s redeclared in %s" % (
224 id, self.structs[id].module, idx.structs[id].module)
225 else:
226 self.structs[id] = idx.structs[id]
227 self.identifiers[id] = idx.structs[id]
228 for id in idx.typedefs.keys():
229 if self.typedefs.has_key(id):
230 print "typedef %s from %s redeclared in %s" % (
231 id, self.typedefs[id].module, idx.typedefs[id].module)
232 else:
233 self.typedefs[id] = idx.typedefs[id]
234 self.identifiers[id] = idx.typedefs[id]
235 for id in idx.macros.keys():
236 #
237 # macro might be used to override functions or variables
238 # definitions
239 #
240 if self.variables.has_key(id):
241 continue
242 if self.functions.has_key(id):
243 continue
244 if self.enums.has_key(id):
245 continue
246 if self.macros.has_key(id):
247 print "macro %s from %s redeclared in %s" % (
248 id, self.macros[id].module, idx.macros[id].module)
249 else:
250 self.macros[id] = idx.macros[id]
251 self.identifiers[id] = idx.macros[id]
252 for id in idx.enums.keys():
253 if self.enums.has_key(id):
254 print "enum %s from %s redeclared in %s" % (
255 id, self.enums[id].module, idx.enums[id].module)
256 else:
257 self.enums[id] = idx.enums[id]
258 self.identifiers[id] = idx.enums[id]
259
260 def merge_public(self, idx):
261 for id in idx.functions.keys():
262 if self.functions.has_key(id):
263 up = idx.functions[id]
264 self.functions[id].update(None, up.type, up.info, up.extra)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000265 # else:
266 # print "Function %s from %s is not declared in headers" % (
267 # id, idx.functions[id].module)
268 # TODO: do the same for variables.
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000269
270 def analyze_dict(self, type, dict):
271 count = 0
272 public = 0
273 for name in dict.keys():
274 id = dict[name]
275 count = count + 1
276 if id.static == 0:
277 public = public + 1
278 if count != public:
279 print " %d %s , %d public" % (count, type, public)
280 elif count != 0:
281 print " %d public %s" % (count, type)
282
283
284 def analyze(self):
285 self.analyze_dict("functions", self.functions)
286 self.analyze_dict("variables", self.variables)
287 self.analyze_dict("structs", self.structs)
288 self.analyze_dict("typedefs", self.typedefs)
289 self.analyze_dict("macros", self.macros)
290
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000291class CLexer:
292 """A lexer for the C language, tokenize the input by reading and
293 analyzing it line by line"""
294 def __init__(self, input):
295 self.input = input
296 self.tokens = []
297 self.line = ""
298 self.lineno = 0
299
300 def getline(self):
301 line = ''
302 while line == '':
303 line = self.input.readline()
304 if not line:
305 return None
306 self.lineno = self.lineno + 1
307 line = string.lstrip(line)
308 line = string.rstrip(line)
309 if line == '':
310 continue
311 while line[-1] == '\\':
312 line = line[:-1]
313 n = self.input.readline()
314 self.lineno = self.lineno + 1
315 n = string.lstrip(n)
316 n = string.rstrip(n)
317 if not n:
318 break
319 else:
320 line = line + n
321 return line
322
323 def getlineno(self):
324 return self.lineno
325
326 def push(self, token):
327 self.tokens.insert(0, token);
328
329 def debug(self):
330 print "Last token: ", self.last
331 print "Token queue: ", self.tokens
332 print "Line %d end: " % (self.lineno), self.line
333
334 def token(self):
335 while self.tokens == []:
336 if self.line == "":
337 line = self.getline()
338 else:
339 line = self.line
340 self.line = ""
341 if line == None:
342 return None
343
344 if line[0] == '#':
345 self.tokens = map((lambda x: ('preproc', x)),
346 string.split(line))
347 break;
348 l = len(line)
349 if line[0] == '"' or line[0] == "'":
350 end = line[0]
351 line = line[1:]
352 found = 0
353 tok = ""
354 while found == 0:
355 i = 0
356 l = len(line)
357 while i < l:
358 if line[i] == end:
359 self.line = line[i+1:]
360 line = line[:i]
361 l = i
362 found = 1
363 break
364 if line[i] == '\\':
365 i = i + 1
366 i = i + 1
367 tok = tok + line
368 if found == 0:
369 line = self.getline()
370 if line == None:
371 return None
372 self.last = ('string', tok)
373 return self.last
374
375 if l >= 2 and line[0] == '/' and line[1] == '*':
376 line = line[2:]
377 found = 0
378 tok = ""
379 while found == 0:
380 i = 0
381 l = len(line)
382 while i < l:
383 if line[i] == '*' and i+1 < l and line[i+1] == '/':
384 self.line = line[i+2:]
385 line = line[:i-1]
386 l = i
387 found = 1
388 break
389 i = i + 1
390 if tok != "":
391 tok = tok + "\n"
392 tok = tok + line
393 if found == 0:
394 line = self.getline()
395 if line == None:
396 return None
397 self.last = ('comment', tok)
398 return self.last
399 if l >= 2 and line[0] == '/' and line[1] == '/':
400 line = line[2:]
401 self.last = ('comment', line)
402 return self.last
403 i = 0
404 while i < l:
405 if line[i] == '/' and i+1 < l and line[i+1] == '/':
406 self.line = line[i:]
407 line = line[:i]
408 break
409 if line[i] == '/' and i+1 < l and line[i+1] == '*':
410 self.line = line[i:]
411 line = line[:i]
412 break
413 if line[i] == '"' or line[i] == "'":
414 self.line = line[i:]
415 line = line[:i]
416 break
417 i = i + 1
418 l = len(line)
419 i = 0
420 while i < l:
421 if line[i] == ' ' or line[i] == '\t':
422 i = i + 1
423 continue
424 o = ord(line[i])
425 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
426 (o >= 48 and o <= 57):
427 s = i
428 while i < l:
429 o = ord(line[i])
430 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
431 (o >= 48 and o <= 57) or string.find(
432 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
433 i = i + 1
434 else:
435 break
436 self.tokens.append(('name', line[s:i]))
437 continue
438 if string.find("(){}:;,[]", line[i]) != -1:
439# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
440# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
441# line[i] == ',' or line[i] == '[' or line[i] == ']':
442 self.tokens.append(('sep', line[i]))
443 i = i + 1
444 continue
445 if string.find("+-*><=/%&!|.", line[i]) != -1:
446# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
447# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
448# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
449# line[i] == '!' or line[i] == '|' or line[i] == '.':
450 if line[i] == '.' and i + 2 < l and \
451 line[i+1] == '.' and line[i+2] == '.':
452 self.tokens.append(('name', '...'))
453 i = i + 3
454 continue
455
456 j = i + 1
457 if j < l and (
458 string.find("+-*><=/%&!|", line[j]) != -1):
459# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
460# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
461# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
462# line[j] == '!' or line[j] == '|'):
463 self.tokens.append(('op', line[i:j+1]))
464 i = j + 1
465 else:
466 self.tokens.append(('op', line[i]))
467 i = i + 1
468 continue
469 s = i
470 while i < l:
471 o = ord(line[i])
472 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
473 (o >= 48 and o <= 57) or (
474 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
475# line[i] != ' ' and line[i] != '\t' and
476# line[i] != '(' and line[i] != ')' and
477# line[i] != '{' and line[i] != '}' and
478# line[i] != ':' and line[i] != ';' and
479# line[i] != ',' and line[i] != '+' and
480# line[i] != '-' and line[i] != '*' and
481# line[i] != '/' and line[i] != '%' and
482# line[i] != '&' and line[i] != '!' and
483# line[i] != '|' and line[i] != '[' and
484# line[i] != ']' and line[i] != '=' and
485# line[i] != '*' and line[i] != '>' and
486# line[i] != '<'):
487 i = i + 1
488 else:
489 break
490 self.tokens.append(('name', line[s:i]))
491
492 tok = self.tokens[0]
493 self.tokens = self.tokens[1:]
494 self.last = tok
495 return tok
496
497class CParser:
498 """The C module parser"""
499 def __init__(self, filename, idx = None):
500 self.filename = filename
501 if len(filename) > 2 and filename[-2:] == '.h':
502 self.is_header = 1
503 else:
504 self.is_header = 0
505 self.input = open(filename)
506 self.lexer = CLexer(self.input)
507 if idx == None:
508 self.index = index()
509 else:
510 self.index = idx
511 self.top_comment = ""
512 self.last_comment = ""
513 self.comment = None
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000514 self.collect_ref = 0
515
516 def collect_references(self):
517 self.collect_ref = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000518
519 def lineno(self):
520 return self.lexer.getlineno()
521
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000522 def index_add(self, name, module, static, type, info=None, extra = None):
523 self.index.add(name, module, static, type, self.lineno(),
524 info, extra)
525
526 def index_add_ref(self, name, module, static, type, info=None,
527 extra = None):
528 self.index.add_ref(name, module, static, type, self.lineno(),
529 info, extra)
530
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000531 def error(self, msg, token=-1):
532 print "Parse Error: " + msg
533 if token != -1:
534 print "Got token ", token
535 self.lexer.debug()
536 sys.exit(1)
537
538 def debug(self, msg, token=-1):
539 print "Debug: " + msg
540 if token != -1:
541 print "Got token ", token
542 self.lexer.debug()
543
544 def parseComment(self, token):
545 if self.top_comment == "":
546 self.top_comment = token[1]
547 if self.comment == None or token[1][0] == '*':
548 self.comment = token[1];
549 else:
550 self.comment = self.comment + token[1]
551 token = self.lexer.token()
552 return token
553
554 #
555 # Parse a comment block associate to a macro
556 #
557 def parseMacroComment(self, name, quiet = 0):
558 if name[0:2] == '__':
559 quiet = 1
560
561 args = []
562 desc = ""
563
564 if self.comment == None:
565 if not quiet:
566 print "Missing comment for macro %s" % (name)
567 return((args, desc))
568 if self.comment[0] != '*':
569 if not quiet:
570 print "Missing * in macro comment for %s" % (name)
571 return((args, desc))
572 lines = string.split(self.comment, '\n')
573 if lines[0] == '*':
574 del lines[0]
575 if lines[0] != "* %s:" % (name):
576 if not quiet:
577 print "Misformatted macro comment for %s" % (name)
578 print " Expecting '* %s:' got '%s'" % (name, lines[0])
579 return((args, desc))
580 del lines[0]
581 while lines[0] == '*':
582 del lines[0]
583 while len(lines) > 0 and lines[0][0:3] == '* @':
584 l = lines[0][3:]
585 try:
586 (arg, desc) = string.split(l, ':', 1)
587 desc=string.strip(desc)
588 arg=string.strip(arg)
589 except:
590 if not quiet:
591 print "Misformatted macro comment for %s" % (name)
592 print " problem with '%s'" % (lines[0])
593 del lines[0]
594 continue
595 del lines[0]
596 l = string.strip(lines[0])
597 while len(l) > 2 and l[0:3] != '* @':
598 while l[0] == '*':
599 l = l[1:]
600 desc = desc + ' ' + string.strip(l)
601 del lines[0]
602 if len(lines) == 0:
603 break
604 l = lines[0]
605 args.append((arg, desc))
606 while len(lines) > 0 and lines[0] == '*':
607 del lines[0]
608 desc = ""
609 while len(lines) > 0:
610 l = lines[0]
611 while len(l) > 0 and l[0] == '*':
612 l = l[1:]
613 l = string.strip(l)
614 desc = desc + " " + l
615 del lines[0]
616
617 desc = string.strip(desc)
618
619 if quiet == 0:
620 if desc == "":
621 print "Macro comment for %s lack description of the macro" % (name)
622
623 return((args, desc))
624
625 #
626 # Parse a comment block and merge the informations found in the
627 # parameters descriptions, finally returns a block as complete
628 # as possible
629 #
630 def mergeFunctionComment(self, name, description, quiet = 0):
631 if name == 'main':
632 quiet = 1
633 if name[0:2] == '__':
634 quiet = 1
635
636 (ret, args) = description
637 desc = ""
638 retdesc = ""
639
640 if self.comment == None:
641 if not quiet:
642 print "Missing comment for function %s" % (name)
643 return(((ret[0], retdesc), args, desc))
644 if self.comment[0] != '*':
645 if not quiet:
646 print "Missing * in function comment for %s" % (name)
647 return(((ret[0], retdesc), args, desc))
648 lines = string.split(self.comment, '\n')
649 if lines[0] == '*':
650 del lines[0]
651 if lines[0] != "* %s:" % (name):
652 if not quiet:
653 print "Misformatted function comment for %s" % (name)
654 print " Expecting '* %s:' got '%s'" % (name, lines[0])
655 return(((ret[0], retdesc), args, desc))
656 del lines[0]
657 while lines[0] == '*':
658 del lines[0]
659 nbargs = len(args)
660 while len(lines) > 0 and lines[0][0:3] == '* @':
661 l = lines[0][3:]
662 try:
663 (arg, desc) = string.split(l, ':', 1)
664 desc=string.strip(desc)
665 arg=string.strip(arg)
666 except:
667 if not quiet:
668 print "Misformatted function comment for %s" % (name)
669 print " problem with '%s'" % (lines[0])
670 del lines[0]
671 continue
672 del lines[0]
673 l = string.strip(lines[0])
674 while len(l) > 2 and l[0:3] != '* @':
675 while l[0] == '*':
676 l = l[1:]
677 desc = desc + ' ' + string.strip(l)
678 del lines[0]
679 if len(lines) == 0:
680 break
681 l = lines[0]
682 i = 0
683 while i < nbargs:
684 if args[i][1] == arg:
685 args[i] = (args[i][0], arg, desc)
686 break;
687 i = i + 1
688 if i >= nbargs:
689 if not quiet:
William M. Brackb1d53162003-11-18 06:54:40 +0000690 print "Unable to find arg %s from function comment for %s" % (
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000691 arg, name)
692 while len(lines) > 0 and lines[0] == '*':
693 del lines[0]
694 desc = ""
695 while len(lines) > 0:
696 l = lines[0]
697 while len(l) > 0 and l[0] == '*':
698 l = l[1:]
699 l = string.strip(l)
700 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
701 try:
702 l = string.split(l, ' ', 1)[1]
703 except:
704 l = ""
705 retdesc = string.strip(l)
706 del lines[0]
707 while len(lines) > 0:
708 l = lines[0]
709 while len(l) > 0 and l[0] == '*':
710 l = l[1:]
711 l = string.strip(l)
712 retdesc = retdesc + " " + l
713 del lines[0]
714 else:
715 desc = desc + " " + l
716 del lines[0]
717
718 retdesc = string.strip(retdesc)
719 desc = string.strip(desc)
720
721 if quiet == 0:
722 #
723 # report missing comments
724 #
725 i = 0
726 while i < nbargs:
727 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
728 print "Function comment for %s lack description of arg %s" % (name, args[i][1])
729 i = i + 1
730 if retdesc == "" and ret[0] != "void":
731 print "Function comment for %s lack description of return value" % (name)
732 if desc == "":
733 print "Function comment for %s lack description of the function" % (name)
734
735
736 return(((ret[0], retdesc), args, desc))
737
738 def parsePreproc(self, token):
739 name = token[1]
740 if name == "#include":
741 token = self.lexer.token()
742 if token == None:
743 return None
744 if token[0] == 'preproc':
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000745 self.index_add(token[1], self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000746 "include")
747 return self.lexer.token()
748 return token
749 if name == "#define":
750 token = self.lexer.token()
751 if token == None:
752 return None
753 if token[0] == 'preproc':
754 # TODO macros with arguments
755 name = token[1]
756 lst = []
757 token = self.lexer.token()
758 while token != None and token[0] == 'preproc' and \
759 token[1][0] != '#':
760 lst.append(token[1])
761 token = self.lexer.token()
762 try:
763 name = string.split(name, '(') [0]
764 except:
765 pass
766 info = self.parseMacroComment(name, not self.is_header)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000767 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000768 "macro", info)
769 return token
770 token = self.lexer.token()
771 while token != None and token[0] == 'preproc' and \
772 token[1][0] != '#':
773 token = self.lexer.token()
774 return token
775
776 #
777 # token acquisition on top of the lexer, it handle internally
778 # preprocessor and comments since they are logically not part of
779 # the program structure.
780 #
781 def token(self):
782 global ignored_words
783
784 token = self.lexer.token()
785 while token != None:
786 if token[0] == 'comment':
787 token = self.parseComment(token)
788 continue
789 elif token[0] == 'preproc':
790 token = self.parsePreproc(token)
791 continue
792 elif token[0] == "name" and ignored_words.has_key(token[1]):
793 (n, info) = ignored_words[token[1]]
794 i = 0
795 while i < n:
796 token = self.lexer.token()
797 i = i + 1
798 token = self.lexer.token()
799 continue
800 else:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +0000801 if debug:
802 print "=> ", token
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000803 return token
804 return None
805
806 #
807 # Parse a typedef, it records the type and its name.
808 #
809 def parseTypedef(self, token):
810 if token == None:
811 return None
812 token = self.parseType(token)
813 if token == None:
814 self.error("parsing typedef")
815 return None
816 base_type = self.type
817 type = base_type
818 #self.debug("end typedef type", token)
819 while token != None:
820 if token[0] == "name":
821 name = token[1]
822 signature = self.signature
823 if signature != None:
Daniel Veillard000eafb2002-12-12 10:04:22 +0000824 type = string.split(type, '(')[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000825 d = self.mergeFunctionComment(name,
826 ((type, None), signature), 1)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000827 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000828 "functype", d)
829 else:
830 if base_type == "struct":
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000831 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000832 "struct", type)
833 base_type = "struct " + name
834 else:
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000835 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000836 "typedef", type)
837 token = self.token()
838 else:
839 self.error("parsing typedef: expecting a name")
840 return token
841 #self.debug("end typedef", token)
842 if token != None and token[0] == 'sep' and token[1] == ',':
843 type = base_type
844 token = self.token()
845 while token != None and token[0] == "op":
846 type = type + token[1]
847 token = self.token()
848 elif token != None and token[0] == 'sep' and token[1] == ';':
849 break;
850 elif token != None and token[0] == 'name':
851 type = base_type
852 continue;
853 else:
854 self.error("parsing typedef: expecting ';'", token)
855 return token
856 token = self.token()
857 return token
858
859 #
860 # Parse a C code block, used for functions it parse till
861 # the balancing } included
862 #
863 def parseBlock(self, token):
864 while token != None:
865 if token[0] == "sep" and token[1] == "{":
866 token = self.token()
867 token = self.parseBlock(token)
868 elif token[0] == "sep" and token[1] == "}":
869 self.comment = None
870 token = self.token()
871 return token
872 else:
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000873 if self.collect_ref == 1:
874 oldtok = token
875 token = self.token()
876 if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
877 if token[0] == "sep" and token[1] == "(":
878 self.index_add_ref(oldtok[1], self.filename,
879 0, "function")
880 token = self.token()
881 elif token[0] == "name":
882 token = self.token()
883 if token[0] == "sep" and (token[1] == ";" or
884 token[1] == "," or token[1] == "="):
885 self.index_add_ref(oldtok[1], self.filename,
886 0, "type")
887 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
888 self.index_add_ref(oldtok[1], self.filename,
889 0, "typedef")
890 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
891 self.index_add_ref(oldtok[1], self.filename,
892 0, "typedef")
893
894 else:
895 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000896 return token
897
898 #
899 # Parse a C struct definition till the balancing }
900 #
901 def parseStruct(self, token):
902 fields = []
903 #self.debug("start parseStruct", token)
904 while token != None:
905 if token[0] == "sep" and token[1] == "{":
906 token = self.token()
907 token = self.parseTypeBlock(token)
908 elif token[0] == "sep" and token[1] == "}":
909 self.struct_fields = fields
910 #self.debug("end parseStruct", token)
911 #print fields
912 token = self.token()
913 return token
914 else:
915 base_type = self.type
916 #self.debug("before parseType", token)
917 token = self.parseType(token)
918 #self.debug("after parseType", token)
919 if token != None and token[0] == "name":
920 fname = token[1]
921 token = self.token()
922 if token[0] == "sep" and token[1] == ";":
923 self.comment = None
924 token = self.token()
925 fields.append((self.type, fname, self.comment))
926 self.comment = None
927 else:
928 self.error("parseStruct: expecting ;", token)
929 elif token != None and token[0] == "sep" and token[1] == "{":
930 token = self.token()
931 token = self.parseTypeBlock(token)
932 if token != None and token[0] == "name":
933 token = self.token()
934 if token != None and token[0] == "sep" and token[1] == ";":
935 token = self.token()
936 else:
937 self.error("parseStruct: expecting ;", token)
938 else:
939 self.error("parseStruct: name", token)
940 token = self.token()
941 self.type = base_type;
942 self.struct_fields = fields
943 #self.debug("end parseStruct", token)
944 #print fields
945 return token
946
947 #
948 # Parse a C enum block, parse till the balancing }
949 #
950 def parseEnumBlock(self, token):
951 self.enums = []
952 name = None
953 self.comment = None
954 comment = ""
Daniel Veillard000eafb2002-12-12 10:04:22 +0000955 value = "0"
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000956 while token != None:
957 if token[0] == "sep" and token[1] == "{":
958 token = self.token()
959 token = self.parseTypeBlock(token)
960 elif token[0] == "sep" and token[1] == "}":
961 if name != None:
962 if self.comment != None:
963 comment = self.comment
964 self.comment = None
965 self.enums.append((name, value, comment))
966 token = self.token()
967 return token
968 elif token[0] == "name":
969 if name != None:
970 if self.comment != None:
971 comment = string.strip(self.comment)
972 self.comment = None
973 self.enums.append((name, value, comment))
974 name = token[1]
975 comment = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000976 token = self.token()
977 if token[0] == "op" and token[1][0] == "=":
Daniel Veillard000eafb2002-12-12 10:04:22 +0000978 value = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000979 if len(token[1]) > 1:
980 value = token[1][1:]
981 token = self.token()
982 while token[0] != "sep" or (token[1] != ',' and
983 token[1] != '}'):
984 value = value + token[1]
985 token = self.token()
Daniel Veillard000eafb2002-12-12 10:04:22 +0000986 else:
987 try:
988 value = "%d" % (int(value) + 1)
989 except:
990 print "Failed to compute value of enum %s" % (name)
991 value=""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000992 if token[0] == "sep" and token[1] == ",":
993 token = self.token()
994 else:
995 token = self.token()
996 return token
997
998 #
999 # Parse a C definition block, used for structs it parse till
1000 # the balancing }
1001 #
1002 def parseTypeBlock(self, token):
1003 while token != None:
1004 if token[0] == "sep" and token[1] == "{":
1005 token = self.token()
1006 token = self.parseTypeBlock(token)
1007 elif token[0] == "sep" and token[1] == "}":
1008 token = self.token()
1009 return token
1010 else:
1011 token = self.token()
1012 return token
1013
1014 #
1015 # Parse a type: the fact that the type name can either occur after
1016 # the definition or within the definition makes it a little harder
1017 # if inside, the name token is pushed back before returning
1018 #
1019 def parseType(self, token):
1020 self.type = ""
1021 self.struct_fields = []
1022 self.signature = None
1023 if token == None:
1024 return token
1025
1026 while token[0] == "name" and (
1027 token[1] == "const" or token[1] == "unsigned"):
1028 if self.type == "":
1029 self.type = token[1]
1030 else:
1031 self.type = self.type + " " + token[1]
1032 token = self.token()
1033
1034 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1035 if self.type == "":
1036 self.type = token[1]
1037 else:
1038 self.type = self.type + " " + token[1]
1039 if token[0] == "name" and token[1] == "int":
1040 if self.type == "":
1041 self.type = tmp[1]
1042 else:
1043 self.type = self.type + " " + tmp[1]
1044
1045 elif token[0] == "name" and token[1] == "struct":
1046 if self.type == "":
1047 self.type = token[1]
1048 else:
1049 self.type = self.type + " " + token[1]
1050 token = self.token()
1051 nametok = None
1052 if token[0] == "name":
1053 nametok = token
1054 token = self.token()
1055 if token != None and token[0] == "sep" and token[1] == "{":
1056 token = self.token()
1057 token = self.parseStruct(token)
1058 elif token != None and token[0] == "op" and token[1] == "*":
1059 self.type = self.type + " " + nametok[1] + " *"
1060 token = self.token()
1061 while token != None and token[0] == "op" and token[1] == "*":
1062 self.type = self.type + " *"
1063 token = self.token()
1064 if token[0] == "name":
1065 nametok = token
1066 token = self.token()
1067 else:
1068 self.error("struct : expecting name", token)
1069 return token
1070 elif token != None and token[0] == "name" and nametok != None:
1071 self.type = self.type + " " + nametok[1]
1072 return token
1073
1074 if nametok != None:
1075 self.lexer.push(token)
1076 token = nametok
1077 return token
1078
1079 elif token[0] == "name" and token[1] == "enum":
1080 if self.type == "":
1081 self.type = token[1]
1082 else:
1083 self.type = self.type + " " + token[1]
1084 self.enums = []
1085 token = self.token()
1086 if token != None and token[0] == "sep" and token[1] == "{":
1087 token = self.token()
1088 token = self.parseEnumBlock(token)
1089 else:
1090 self.error("parsing enum: expecting '{'", token)
1091 enum_type = None
1092 if token != None and token[0] != "name":
1093 self.lexer.push(token)
1094 token = ("name", "enum")
1095 else:
1096 enum_type = token[1]
1097 for enum in self.enums:
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001098 self.index_add(enum[0], self.filename,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001099 not self.is_header, "enum",
1100 (enum[1], enum[2], enum_type))
1101 return token
1102
1103 elif token[0] == "name":
1104 if self.type == "":
1105 self.type = token[1]
1106 else:
1107 self.type = self.type + " " + token[1]
1108 else:
1109 self.error("parsing type %s: expecting a name" % (self.type),
1110 token)
1111 return token
1112 token = self.token()
1113 while token != None and (token[0] == "op" or
1114 token[0] == "name" and token[1] == "const"):
1115 self.type = self.type + " " + token[1]
1116 token = self.token()
1117
1118 #
1119 # if there is a parenthesis here, this means a function type
1120 #
1121 if token != None and token[0] == "sep" and token[1] == '(':
1122 self.type = self.type + token[1]
1123 token = self.token()
1124 while token != None and token[0] == "op" and token[1] == '*':
1125 self.type = self.type + token[1]
1126 token = self.token()
1127 if token == None or token[0] != "name" :
1128 self.error("parsing function type, name expected", token);
1129 return token
1130 self.type = self.type + token[1]
1131 nametok = token
1132 token = self.token()
1133 if token != None and token[0] == "sep" and token[1] == ')':
1134 self.type = self.type + token[1]
1135 token = self.token()
1136 if token != None and token[0] == "sep" and token[1] == '(':
1137 token = self.token()
1138 type = self.type;
1139 token = self.parseSignature(token);
1140 self.type = type;
1141 else:
1142 self.error("parsing function type, '(' expected", token);
1143 return token
1144 else:
1145 self.error("parsing function type, ')' expected", token);
1146 return token
1147 self.lexer.push(token)
1148 token = nametok
1149 return token
1150
1151 #
1152 # do some lookahead for arrays
1153 #
1154 if token != None and token[0] == "name":
1155 nametok = token
1156 token = self.token()
1157 if token != None and token[0] == "sep" and token[1] == '[':
1158 self.type = self.type + nametok[1]
1159 while token != None and token[0] == "sep" and token[1] == '[':
1160 self.type = self.type + token[1]
1161 token = self.token()
1162 while token != None and token[0] != 'sep' and \
1163 token[1] != ']' and token[1] != ';':
1164 self.type = self.type + token[1]
1165 token = self.token()
1166 if token != None and token[0] == 'sep' and token[1] == ']':
1167 self.type = self.type + token[1]
1168 token = self.token()
1169 else:
1170 self.error("parsing array type, ']' expected", token);
1171 return token
1172 elif token != None and token[0] == "sep" and token[1] == ':':
1173 # remove :12 in case it's a limited int size
1174 token = self.token()
1175 token = self.token()
1176 self.lexer.push(token)
1177 token = nametok
1178
1179 return token
1180
1181 #
1182 # Parse a signature: '(' has been parsed and we scan the type definition
1183 # up to the ')' included
1184 def parseSignature(self, token):
1185 signature = []
1186 if token != None and token[0] == "sep" and token[1] == ')':
1187 self.signature = []
1188 token = self.token()
1189 return token
1190 while token != None:
1191 token = self.parseType(token)
1192 if token != None and token[0] == "name":
1193 signature.append((self.type, token[1], None))
1194 token = self.token()
1195 elif token != None and token[0] == "sep" and token[1] == ',':
1196 token = self.token()
1197 continue
1198 elif token != None and token[0] == "sep" and token[1] == ')':
1199 # only the type was provided
1200 if self.type == "...":
1201 signature.append((self.type, "...", None))
1202 else:
1203 signature.append((self.type, None, None))
1204 if token != None and token[0] == "sep":
1205 if token[1] == ',':
1206 token = self.token()
1207 continue
1208 elif token[1] == ')':
1209 token = self.token()
1210 break
1211 self.signature = signature
1212 return token
1213
1214 #
1215 # Parse a global definition, be it a type, variable or function
1216 # the extern "C" blocks are a bit nasty and require it to recurse.
1217 #
1218 def parseGlobal(self, token):
1219 static = 0
1220 if token[1] == 'extern':
1221 token = self.token()
1222 if token == None:
1223 return token
1224 if token[0] == 'string':
1225 if token[1] == 'C':
1226 token = self.token()
1227 if token == None:
1228 return token
1229 if token[0] == 'sep' and token[1] == "{":
1230 token = self.token()
1231# print 'Entering extern "C line ', self.lineno()
1232 while token != None and (token[0] != 'sep' or
1233 token[1] != "}"):
1234 if token[0] == 'name':
1235 token = self.parseGlobal(token)
1236 else:
1237 self.error(
1238 "token %s %s unexpected at the top level" % (
1239 token[0], token[1]))
1240 token = self.parseGlobal(token)
1241# print 'Exiting extern "C" line', self.lineno()
1242 token = self.token()
1243 return token
1244 else:
1245 return token
1246 elif token[1] == 'static':
1247 static = 1
1248 token = self.token()
1249 if token == None or token[0] != 'name':
1250 return token
1251
1252 if token[1] == 'typedef':
1253 token = self.token()
1254 return self.parseTypedef(token)
1255 else:
1256 token = self.parseType(token)
1257 type_orig = self.type
1258 if token == None or token[0] != "name":
1259 return token
1260 type = type_orig
1261 self.name = token[1]
1262 token = self.token()
1263 while token != None and (token[0] == "sep" or token[0] == "op"):
1264 if token[0] == "sep":
1265 if token[1] == "[":
1266 type = type + token[1]
1267 token = self.token()
1268 while token != None and (token[0] != "sep" or \
1269 token[1] != ";"):
1270 type = type + token[1]
1271 token = self.token()
1272
1273 if token != None and token[0] == "op" and token[1] == "=":
1274 #
1275 # Skip the initialization of the variable
1276 #
1277 token = self.token()
1278 if token[0] == 'sep' and token[1] == '{':
1279 token = self.token()
1280 token = self.parseBlock(token)
1281 else:
1282 self.comment = None
1283 while token != None and (token[0] != "sep" or \
1284 (token[1] != ';' and token[1] != ',')):
1285 token = self.token()
1286 self.comment = None
1287 if token == None or token[0] != "sep" or (token[1] != ';' and
1288 token[1] != ','):
1289 self.error("missing ';' or ',' after value")
1290
1291 if token != None and token[0] == "sep":
1292 if token[1] == ";":
1293 self.comment = None
1294 token = self.token()
1295 if type == "struct":
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001296 self.index_add(self.name, self.filename,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001297 not self.is_header, "struct", self.struct_fields)
1298 else:
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001299 self.index_add(self.name, self.filename,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001300 not self.is_header, "variable", type)
1301 break
1302 elif token[1] == "(":
1303 token = self.token()
1304 token = self.parseSignature(token)
1305 if token == None:
1306 return None
1307 if token[0] == "sep" and token[1] == ";":
1308 d = self.mergeFunctionComment(self.name,
1309 ((type, None), self.signature), 1)
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001310 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001311 "function", d)
1312 token = self.token()
Daniel Veillard71531f32003-02-05 13:19:53 +00001313 elif token[0] == "sep" and token[1] == "{":
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001314 d = self.mergeFunctionComment(self.name,
1315 ((type, None), self.signature), static)
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001316 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001317 "function", d)
1318 token = self.token()
1319 token = self.parseBlock(token);
1320 elif token[1] == ',':
1321 self.comment = None
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001322 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001323 "variable", type)
1324 type = type_orig
1325 token = self.token()
1326 while token != None and token[0] == "sep":
1327 type = type + token[1]
1328 token = self.token()
1329 if token != None and token[0] == "name":
1330 self.name = token[1]
1331 token = self.token()
1332 else:
1333 break
1334
1335 return token
1336
1337 def parse(self):
1338 print "Parsing %s" % (self.filename)
1339 token = self.token()
1340 while token != None:
1341 if token[0] == 'name':
1342 token = self.parseGlobal(token)
1343 else:
1344 self.error("token %s %s unexpected at the top level" % (
1345 token[0], token[1]))
1346 token = self.parseGlobal(token)
1347 return
1348 return self.index
1349
1350
1351class docBuilder:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001352 """A documentation builder"""
1353 def __init__(self, name, directories=['.'], excludes=[]):
1354 self.name = name
1355 self.directories = directories
1356 self.excludes = excludes + ignored_files.keys()
1357 self.modules = {}
1358 self.headers = {}
1359 self.idx = index()
1360 self.xref = {}
1361 self.index = {}
1362 if name == 'libxml2':
1363 self.basename = 'libxml'
1364 else:
1365 self.basename = name
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001366
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001367 def indexString(self, id, str):
1368 if str == None:
1369 return
1370 str = string.replace(str, "'", ' ')
1371 str = string.replace(str, '"', ' ')
1372 str = string.replace(str, "/", ' ')
1373 str = string.replace(str, '*', ' ')
1374 str = string.replace(str, "[", ' ')
1375 str = string.replace(str, "]", ' ')
1376 str = string.replace(str, "(", ' ')
1377 str = string.replace(str, ")", ' ')
1378 str = string.replace(str, "<", ' ')
1379 str = string.replace(str, '>', ' ')
1380 str = string.replace(str, "&", ' ')
1381 str = string.replace(str, '#', ' ')
1382 str = string.replace(str, ",", ' ')
1383 str = string.replace(str, '.', ' ')
1384 str = string.replace(str, ';', ' ')
1385 tokens = string.split(str)
1386 for token in tokens:
1387 try:
1388 c = token[0]
1389 if string.find(string.letters, c) < 0:
1390 pass
1391 elif len(token) < 3:
1392 pass
1393 else:
1394 lower = string.lower(token)
1395 # TODO: generalize this a bit
1396 if lower == 'and' or lower == 'the':
1397 pass
1398 elif self.xref.has_key(token):
1399 self.xref[token].append(id)
1400 else:
1401 self.xref[token] = [id]
1402 except:
1403 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001404
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001405 def analyze(self):
1406 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1407 self.idx.analyze()
1408
1409 def scanHeaders(self):
1410 for header in self.headers.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001411 parser = CParser(header)
1412 idx = parser.parse()
1413 self.headers[header] = idx;
1414 self.idx.merge(idx)
1415
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001416 def scanModules(self):
1417 for module in self.modules.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001418 parser = CParser(module)
1419 idx = parser.parse()
1420 # idx.analyze()
1421 self.modules[module] = idx
1422 self.idx.merge_public(idx)
1423
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001424 def scan(self):
1425 for directory in self.directories:
1426 files = glob.glob(directory + "/*.c")
1427 for file in files:
1428 skip = 0
1429 for excl in self.excludes:
1430 if string.find(file, excl) != -1:
1431 skip = 1;
1432 break
1433 if skip == 0:
1434 self.modules[file] = None;
1435 files = glob.glob(directory + "/*.h")
1436 for file in files:
1437 skip = 0
1438 for excl in self.excludes:
1439 if string.find(file, excl) != -1:
1440 skip = 1;
1441 break
1442 if skip == 0:
1443 self.headers[file] = None;
1444 self.scanHeaders()
1445 self.scanModules()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001446
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001447 def modulename_file(self, file):
1448 module = os.path.basename(file)
1449 if module[-2:] == '.h':
1450 module = module[:-2]
1451 return module
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001452
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001453 def serialize_enum(self, output, name):
1454 id = self.idx.enums[name]
1455 output.write(" <enum name='%s' file='%s'" % (name,
1456 self.modulename_file(id.module)))
1457 if id.info != None:
1458 info = id.info
1459 if info[0] != None and info[0] != '':
1460 try:
1461 val = eval(info[0])
1462 except:
1463 val = info[0]
1464 output.write(" value='%s'" % (val));
1465 if info[2] != None and info[2] != '':
1466 output.write(" type='%s'" % info[2]);
1467 if info[1] != None and info[1] != '':
1468 output.write(" info='%s'" % escape(info[1]));
1469 output.write("/>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001470
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001471 def serialize_macro(self, output, name):
1472 id = self.idx.macros[name]
1473 output.write(" <macro name='%s' file='%s'>\n" % (name,
1474 self.modulename_file(id.module)))
1475 if id.info != None:
1476 try:
1477 (args, desc) = id.info
1478 if desc != None and desc != "":
1479 output.write(" <info>%s</info>\n" % (escape(desc)))
1480 self.indexString(name, desc)
1481 for arg in args:
1482 (name, desc) = arg
1483 if desc != None and desc != "":
1484 output.write(" <arg name='%s' info='%s'/>\n" % (
1485 name, escape(desc)))
1486 self.indexString(name, desc)
1487 else:
1488 output.write(" <arg name='%s'/>\n" % (name))
1489 except:
1490 pass
1491 output.write(" </macro>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001492
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001493 def serialize_typedef(self, output, name):
1494 id = self.idx.typedefs[name]
1495 if id.info[0:7] == 'struct ':
1496 output.write(" <struct name='%s' file='%s' type='%s'" % (
1497 name, self.modulename_file(id.module), id.info))
1498 name = id.info[7:]
1499 if self.idx.structs.has_key(name) and ( \
1500 type(self.idx.structs[name].info) == type(()) or
Daniel Veillardc1eed322002-12-12 11:01:32 +00001501 type(self.idx.structs[name].info) == type([])):
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001502 output.write(">\n");
1503 try:
1504 for field in self.idx.structs[name].info:
1505 desc = field[2]
1506 self.indexString(name, desc)
1507 if desc == None:
1508 desc = ''
1509 else:
1510 desc = escape(desc)
1511 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1512 except:
1513 print "Failed to serialize struct %s" % (name)
1514 output.write(" </struct>\n")
1515 else:
1516 output.write("/>\n");
1517 else :
1518 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
1519 name, self.modulename_file(id.module), id.info))
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001520
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001521 def serialize_variable(self, output, name):
1522 id = self.idx.variables[name]
1523 if id.info != None:
1524 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
1525 name, self.modulename_file(id.module), id.info))
1526 else:
1527 output.write(" <variable name='%s' file='%s'/>\n" % (
1528 name, self.modulename_file(id.module)))
Daniel Veillardc1eed322002-12-12 11:01:32 +00001529
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001530 def serialize_function(self, output, name):
1531 id = self.idx.functions[name]
1532 output.write(" <%s name='%s' file='%s'>\n" % (id.type, name,
1533 self.modulename_file(id.module)))
1534 try:
1535 (ret, params, desc) = id.info
1536 output.write(" <info>%s</info>\n" % (escape(desc)))
1537 self.indexString(name, desc)
1538 if ret[0] != None:
1539 if ret[0] == "void":
1540 output.write(" <return type='void'/>\n")
1541 else:
1542 output.write(" <return type='%s' info='%s'/>\n" % (
1543 ret[0], escape(ret[1])))
1544 self.indexString(name, ret[1])
1545 for param in params:
1546 if param[0] == 'void':
1547 continue
1548 if param[2] == None:
1549 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1550 else:
1551 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1552 self.indexString(name, param[2])
1553 except:
1554 print "Failed to save function %s info: " % name, `id.info`
1555 output.write(" </%s>\n" % (id.type))
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001556
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001557 def serialize_exports(self, output, file):
1558 module = self.modulename_file(file)
1559 output.write(" <file name='%s'>\n" % (module))
1560 dict = self.headers[file]
1561 ids = dict.functions.keys() + dict.variables.keys() + \
1562 dict.macros.keys() + dict.typedefs.keys() + \
1563 dict.structs.keys() + dict.enums.keys()
1564 ids.sort()
1565 for id in uniq(ids):
1566 output.write(" <exports symbol='%s'/>\n" % (id))
1567 output.write(" </file>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001568
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001569 def serialize_xrefs_files(self, output):
1570 headers = self.headers.keys()
1571 headers.sort()
1572 for file in headers:
1573 module = self.modulename_file(file)
1574 output.write(" <file name='%s'>\n" % (module))
1575 dict = self.headers[file]
1576 ids = dict.functions.keys() + dict.variables.keys() + \
1577 dict.macros.keys() + dict.typedefs.keys() + \
1578 dict.structs.keys() + dict.enums.keys()
1579 ids.sort()
1580 for id in uniq(ids):
1581 output.write(" <ref name='%s'/>\n" % (id))
1582 output.write(" </file>\n")
1583 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001584
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001585 def serialize_xrefs_functions(self, output):
1586 funcs = {}
1587 for name in self.idx.functions.keys():
1588 id = self.idx.functions[name]
1589 try:
1590 (ret, params, desc) = id.info
1591 for param in params:
1592 if param[0] == 'void':
1593 continue
1594 if funcs.has_key(param[0]):
1595 funcs[param[0]].append(name)
1596 else:
1597 funcs[param[0]] = [name]
1598 except:
1599 pass
1600 typ = funcs.keys()
1601 typ.sort()
1602 for type in typ:
1603 if type == '' or type == 'void' or type == "int" or \
1604 type == "char *" or type == "const char *" :
1605 continue
1606 output.write(" <type name='%s'>\n" % (type))
1607 ids = funcs[type]
1608 ids.sort()
1609 for id in ids:
1610 output.write(" <ref name='%s'/>\n" % (id))
1611 output.write(" </type>\n")
1612
1613 def serialize_xrefs_constructors(self, output):
1614 funcs = {}
1615 for name in self.idx.functions.keys():
1616 id = self.idx.functions[name]
1617 try:
1618 (ret, params, desc) = id.info
1619 if ret[0] == "void":
1620 continue
1621 if funcs.has_key(ret[0]):
1622 funcs[ret[0]].append(name)
1623 else:
1624 funcs[ret[0]] = [name]
1625 except:
1626 pass
1627 typ = funcs.keys()
1628 typ.sort()
1629 for type in typ:
1630 if type == '' or type == 'void' or type == "int" or \
1631 type == "char *" or type == "const char *" :
1632 continue
1633 output.write(" <type name='%s'>\n" % (type))
1634 ids = funcs[type]
1635 for id in ids:
1636 output.write(" <ref name='%s'/>\n" % (id))
1637 output.write(" </type>\n")
1638
1639 def serialize_xrefs_alpha(self, output):
1640 letter = None
1641 ids = self.idx.identifiers.keys()
1642 ids.sort()
1643 for id in ids:
1644 if id[0] != letter:
1645 if letter != None:
1646 output.write(" </letter>\n")
1647 letter = id[0]
1648 output.write(" <letter name='%s'>\n" % (letter))
1649 output.write(" <ref name='%s'/>\n" % (id))
1650 if letter != None:
1651 output.write(" </letter>\n")
1652
1653 def serialize_xrefs_references(self, output):
1654 typ = self.idx.identifiers.keys()
1655 typ.sort()
1656 for id in typ:
1657 idf = self.idx.identifiers[id]
1658 module = idf.module
1659 output.write(" <reference name='%s' href='%s'/>\n" % (id,
1660 'html/' + self.basename + '-' +
1661 self.modulename_file(module) + '.html#' +
1662 id))
1663
1664 def serialize_xrefs_index(self, output):
1665 index = self.xref
1666 typ = index.keys()
1667 typ.sort()
1668 letter = None
1669 count = 0
1670 chunk = 0
1671 chunks = []
1672 for id in typ:
1673 if len(index[id]) > 30:
1674 continue
1675 if id[0] != letter:
1676 if letter == None or count > 200:
1677 if letter != None:
1678 output.write(" </letter>\n")
1679 output.write(" </chunk>\n")
1680 count = 0
1681 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1682 output.write(" <chunk name='chunk%s'>\n" % (chunk))
1683 first_letter = id[0]
1684 chunk = chunk + 1
1685 elif letter != None:
1686 output.write(" </letter>\n")
1687 letter = id[0]
1688 output.write(" <letter name='%s'>\n" % (letter))
1689 output.write(" <word name='%s'>\n" % (id))
1690 tokens = index[id];
1691 tokens.sort()
1692 tok = None
1693 for token in index[id]:
1694 if tok == token:
1695 continue
1696 tok = token
1697 output.write(" <ref name='%s'/>\n" % (token))
1698 count = count + 1
1699 output.write(" </word>\n")
1700 if letter != None:
1701 output.write(" </letter>\n")
1702 output.write(" </chunk>\n")
1703 output.write(" <chunks>\n")
1704 for ch in chunks:
1705 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
1706 ch[0], ch[1], ch[2]))
1707 output.write(" </chunks>\n")
1708
1709 def serialize_xrefs(self, output):
1710 output.write(" <references>\n")
1711 self.serialize_xrefs_references(output)
1712 output.write(" </references>\n")
1713 output.write(" <alpha>\n")
1714 self.serialize_xrefs_alpha(output)
1715 output.write(" </alpha>\n")
1716 output.write(" <constructors>\n")
1717 self.serialize_xrefs_constructors(output)
1718 output.write(" </constructors>\n")
1719 output.write(" <functions>\n")
1720 self.serialize_xrefs_functions(output)
1721 output.write(" </functions>\n")
1722 output.write(" <files>\n")
1723 self.serialize_xrefs_files(output)
1724 output.write(" </files>\n")
1725 output.write(" <index>\n")
1726 self.serialize_xrefs_index(output)
1727 output.write(" </index>\n")
1728
1729 def serialize(self):
1730 filename = "%s-api.xml" % self.name
1731 print "Saving XML description %s" % (filename)
1732 output = open(filename, "w")
1733 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1734 output.write("<api name='%s'>\n" % self.name)
1735 output.write(" <files>\n")
1736 headers = self.headers.keys()
1737 headers.sort()
1738 for file in headers:
1739 self.serialize_exports(output, file)
1740 output.write(" </files>\n")
1741 output.write(" <symbols>\n")
1742 macros = self.idx.macros.keys()
1743 macros.sort()
1744 for macro in macros:
1745 self.serialize_macro(output, macro)
1746 enums = self.idx.enums.keys()
1747 enums.sort()
1748 for enum in enums:
1749 self.serialize_enum(output, enum)
1750 typedefs = self.idx.typedefs.keys()
1751 typedefs.sort()
1752 for typedef in typedefs:
1753 self.serialize_typedef(output, typedef)
1754 variables = self.idx.variables.keys()
1755 variables.sort()
1756 for variable in variables:
1757 self.serialize_variable(output, variable)
1758 functions = self.idx.functions.keys()
1759 functions.sort()
1760 for function in functions:
1761 self.serialize_function(output, function)
1762 output.write(" </symbols>\n")
1763 output.write("</api>\n")
1764 output.close()
1765
1766 filename = "%s-refs.xml" % self.name
1767 print "Saving XML Cross References %s" % (filename)
1768 output = open(filename, "w")
1769 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1770 output.write("<apirefs name='%s'>\n" % self.name)
1771 self.serialize_xrefs(output)
1772 output.write("</apirefs>\n")
1773 output.close()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001774
1775
1776def rebuild():
1777 builder = None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001778 if glob.glob("parser.c") != [] :
1779 print "Rebuilding API description for libxml2"
1780 builder = docBuilder("libxml2", [".", "."],
1781 ["xmlwin32version.h", "tst.c"])
1782 elif glob.glob("../parser.c") != [] :
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001783 print "Rebuilding API description for libxml2"
1784 builder = docBuilder("libxml2", ["..", "../include/libxml"],
Daniel Veillardd4330462003-04-29 12:40:16 +00001785 ["xmlwin32version.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001786 elif glob.glob("../libxslt/transform.c") != [] :
1787 print "Rebuilding API description for libxslt"
1788 builder = docBuilder("libxslt", ["../libxslt"],
Daniel Veillard024b5702002-12-12 00:15:55 +00001789 ["win32config.h", "libxslt.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001790 else:
1791 print "rebuild() failed, unable to guess the module"
1792 return None
1793 builder.scan()
1794 builder.analyze()
1795 builder.serialize()
Daniel Veillard024b5702002-12-12 00:15:55 +00001796 if glob.glob("../libexslt/exslt.c") != [] :
1797 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1798 extra.scan()
1799 extra.analyze()
1800 extra.serialize()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001801 return builder
1802
1803#
1804# for debugging the parser
1805#
1806def parse(filename):
1807 parser = CParser(filename)
1808 idx = parser.parse()
1809 return idx
1810
1811if __name__ == "__main__":
1812 rebuild()