blob: 4b12d09a2844d89582834da3755e1f798646eef0 [file] [log] [blame]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
Daniel Veillard540a31a2003-01-21 11:21:07 +000010import os, sys
Daniel Veillarda9b66d02002-12-11 14:23:49 +000011import string
12import glob
13
Daniel Veillardde0a0a52003-04-24 17:12:57 +000014#
15# C parser analysis code
16#
17ignored_files = {
18 "trio": "too many non standard macros",
19 "trio.c": "too many non standard macros",
20 "trionan.c": "too many non standard macros",
21 "triostr.c": "too many non standard macros",
22 "acconfig.h": "generated portability layer",
23 "config.h": "generated portability layer",
24 "libxml.h": "internal only",
Daniel Veillard92fc02c2003-04-24 23:12:35 +000025 "testOOM.c": "out of memory tester",
26 "testOOMlib.h": "out of memory tester",
27 "testOOMlib.c": "out of memory tester",
Daniel Veillardde0a0a52003-04-24 17:12:57 +000028}
29
30ignored_words = {
31 "WINAPI": (0, "Windows keyword"),
32 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
William M. Brackcdfa2862003-08-29 06:03:38 +000033 "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
34 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
35 "XMLCALL": (0, "Special macro for win32 calls"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000036 "__declspec": (3, "Windows keyword"),
37 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
38 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
39 "X_IN_Y": (5, "macro function builder"),
40}
41
Daniel Veillarda9b66d02002-12-11 14:23:49 +000042def escape(raw):
43 raw = string.replace(raw, '&', '&')
44 raw = string.replace(raw, '<', '&lt;')
45 raw = string.replace(raw, '>', '&gt;')
46 raw = string.replace(raw, "'", '&apos;')
47 raw = string.replace(raw, '"', '&quot;')
48 return raw
49
50class identifier:
51 def __init__(self, name, module=None, type=None, info=None, extra=None):
52 self.name = name
53 self.module = module
54 self.type = type
55 self.info = info
56 self.extra = extra
57 self.static = 0
58
59 def __repr__(self):
60 r = "%s %s:" % (self.type, self.name)
61 if self.static:
62 r = r + " static"
63 if self.module != None:
64 r = r + " from %s" % (self.module)
65 if self.info != None:
66 r = r + " " + `self.info`
67 if self.extra != None:
68 r = r + " " + `self.extra`
69 return r
70
71
72 def set_module(self, module):
73 self.module = module
74 def set_type(self, type):
75 self.type = type
76 def set_info(self, info):
77 self.info = info
78 def set_extra(self, extra):
79 self.extra = extra
80 def set_static(self, static):
81 self.static = static
82
83 def update(self, module, type = None, info = None, extra=None):
84 if module != None and self.module == None:
85 self.set_module(module)
86 if type != None and self.type == None:
87 self.set_type(type)
88 if info != None:
89 self.set_info(info)
90 if extra != None:
91 self.set_extra(extra)
92
93
94class index:
95 def __init__(self, name = "noname"):
96 self.name = name;
97 self.identifiers = {}
98 self.functions = {}
99 self.variables = {}
100 self.includes = {}
101 self.structs = {}
102 self.enums = {}
103 self.typedefs = {}
104 self.macros = {}
105 self.references = {}
106
107 def add(self, name, module, static, type, info=None, extra=None):
108 if name[0:2] == '__':
109 return None
110 d = None
111 try:
112 d = self.identifiers[name]
113 d.update(module, type, info, extra)
114 except:
115 d = identifier(name, module, type, info, extra)
116 self.identifiers[name] = d
117
118 if d != None and static == 1:
119 d.set_static(1)
120
121 if d != None and name != None and type != None:
122 if type == "function":
123 self.functions[name] = d
124 elif type == "functype":
125 self.functions[name] = d
126 elif type == "variable":
127 self.variables[name] = d
128 elif type == "include":
129 self.includes[name] = d
130 elif type == "struct":
131 self.structs[name] = d
132 elif type == "enum":
133 self.enums[name] = d
134 elif type == "typedef":
135 self.typedefs[name] = d
136 elif type == "macro":
137 self.macros[name] = d
138 else:
139 print "Unable to register type ", type
140 return d
141
142 def merge(self, idx):
143 for id in idx.functions.keys():
144 #
145 # macro might be used to override functions or variables
146 # definitions
147 #
148 if self.macros.has_key(id):
149 del self.macros[id]
150 if self.functions.has_key(id):
151 print "function %s from %s redeclared in %s" % (
152 id, self.functions[id].module, idx.functions[id].module)
153 else:
154 self.functions[id] = idx.functions[id]
155 self.identifiers[id] = idx.functions[id]
156 for id in idx.variables.keys():
157 #
158 # macro might be used to override functions or variables
159 # definitions
160 #
161 if self.macros.has_key(id):
162 del self.macros[id]
163 if self.variables.has_key(id):
164 print "variable %s from %s redeclared in %s" % (
165 id, self.variables[id].module, idx.variables[id].module)
166 else:
167 self.variables[id] = idx.variables[id]
168 self.identifiers[id] = idx.variables[id]
169 for id in idx.structs.keys():
170 if self.structs.has_key(id):
171 print "struct %s from %s redeclared in %s" % (
172 id, self.structs[id].module, idx.structs[id].module)
173 else:
174 self.structs[id] = idx.structs[id]
175 self.identifiers[id] = idx.structs[id]
176 for id in idx.typedefs.keys():
177 if self.typedefs.has_key(id):
178 print "typedef %s from %s redeclared in %s" % (
179 id, self.typedefs[id].module, idx.typedefs[id].module)
180 else:
181 self.typedefs[id] = idx.typedefs[id]
182 self.identifiers[id] = idx.typedefs[id]
183 for id in idx.macros.keys():
184 #
185 # macro might be used to override functions or variables
186 # definitions
187 #
188 if self.variables.has_key(id):
189 continue
190 if self.functions.has_key(id):
191 continue
192 if self.enums.has_key(id):
193 continue
194 if self.macros.has_key(id):
195 print "macro %s from %s redeclared in %s" % (
196 id, self.macros[id].module, idx.macros[id].module)
197 else:
198 self.macros[id] = idx.macros[id]
199 self.identifiers[id] = idx.macros[id]
200 for id in idx.enums.keys():
201 if self.enums.has_key(id):
202 print "enum %s from %s redeclared in %s" % (
203 id, self.enums[id].module, idx.enums[id].module)
204 else:
205 self.enums[id] = idx.enums[id]
206 self.identifiers[id] = idx.enums[id]
207
208 def merge_public(self, idx):
209 for id in idx.functions.keys():
210 if self.functions.has_key(id):
211 up = idx.functions[id]
212 self.functions[id].update(None, up.type, up.info, up.extra)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000213 # else:
214 # print "Function %s from %s is not declared in headers" % (
215 # id, idx.functions[id].module)
216 # TODO: do the same for variables.
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000217
218 def analyze_dict(self, type, dict):
219 count = 0
220 public = 0
221 for name in dict.keys():
222 id = dict[name]
223 count = count + 1
224 if id.static == 0:
225 public = public + 1
226 if count != public:
227 print " %d %s , %d public" % (count, type, public)
228 elif count != 0:
229 print " %d public %s" % (count, type)
230
231
232 def analyze(self):
233 self.analyze_dict("functions", self.functions)
234 self.analyze_dict("variables", self.variables)
235 self.analyze_dict("structs", self.structs)
236 self.analyze_dict("typedefs", self.typedefs)
237 self.analyze_dict("macros", self.macros)
238
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000239class CLexer:
240 """A lexer for the C language, tokenize the input by reading and
241 analyzing it line by line"""
242 def __init__(self, input):
243 self.input = input
244 self.tokens = []
245 self.line = ""
246 self.lineno = 0
247
248 def getline(self):
249 line = ''
250 while line == '':
251 line = self.input.readline()
252 if not line:
253 return None
254 self.lineno = self.lineno + 1
255 line = string.lstrip(line)
256 line = string.rstrip(line)
257 if line == '':
258 continue
259 while line[-1] == '\\':
260 line = line[:-1]
261 n = self.input.readline()
262 self.lineno = self.lineno + 1
263 n = string.lstrip(n)
264 n = string.rstrip(n)
265 if not n:
266 break
267 else:
268 line = line + n
269 return line
270
271 def getlineno(self):
272 return self.lineno
273
274 def push(self, token):
275 self.tokens.insert(0, token);
276
277 def debug(self):
278 print "Last token: ", self.last
279 print "Token queue: ", self.tokens
280 print "Line %d end: " % (self.lineno), self.line
281
282 def token(self):
283 while self.tokens == []:
284 if self.line == "":
285 line = self.getline()
286 else:
287 line = self.line
288 self.line = ""
289 if line == None:
290 return None
291
292 if line[0] == '#':
293 self.tokens = map((lambda x: ('preproc', x)),
294 string.split(line))
295 break;
296 l = len(line)
297 if line[0] == '"' or line[0] == "'":
298 end = line[0]
299 line = line[1:]
300 found = 0
301 tok = ""
302 while found == 0:
303 i = 0
304 l = len(line)
305 while i < l:
306 if line[i] == end:
307 self.line = line[i+1:]
308 line = line[:i]
309 l = i
310 found = 1
311 break
312 if line[i] == '\\':
313 i = i + 1
314 i = i + 1
315 tok = tok + line
316 if found == 0:
317 line = self.getline()
318 if line == None:
319 return None
320 self.last = ('string', tok)
321 return self.last
322
323 if l >= 2 and line[0] == '/' and line[1] == '*':
324 line = line[2:]
325 found = 0
326 tok = ""
327 while found == 0:
328 i = 0
329 l = len(line)
330 while i < l:
331 if line[i] == '*' and i+1 < l and line[i+1] == '/':
332 self.line = line[i+2:]
333 line = line[:i-1]
334 l = i
335 found = 1
336 break
337 i = i + 1
338 if tok != "":
339 tok = tok + "\n"
340 tok = tok + line
341 if found == 0:
342 line = self.getline()
343 if line == None:
344 return None
345 self.last = ('comment', tok)
346 return self.last
347 if l >= 2 and line[0] == '/' and line[1] == '/':
348 line = line[2:]
349 self.last = ('comment', line)
350 return self.last
351 i = 0
352 while i < l:
353 if line[i] == '/' and i+1 < l and line[i+1] == '/':
354 self.line = line[i:]
355 line = line[:i]
356 break
357 if line[i] == '/' and i+1 < l and line[i+1] == '*':
358 self.line = line[i:]
359 line = line[:i]
360 break
361 if line[i] == '"' or line[i] == "'":
362 self.line = line[i:]
363 line = line[:i]
364 break
365 i = i + 1
366 l = len(line)
367 i = 0
368 while i < l:
369 if line[i] == ' ' or line[i] == '\t':
370 i = i + 1
371 continue
372 o = ord(line[i])
373 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
374 (o >= 48 and o <= 57):
375 s = i
376 while i < l:
377 o = ord(line[i])
378 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
379 (o >= 48 and o <= 57) or string.find(
380 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
381 i = i + 1
382 else:
383 break
384 self.tokens.append(('name', line[s:i]))
385 continue
386 if string.find("(){}:;,[]", line[i]) != -1:
387# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
388# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
389# line[i] == ',' or line[i] == '[' or line[i] == ']':
390 self.tokens.append(('sep', line[i]))
391 i = i + 1
392 continue
393 if string.find("+-*><=/%&!|.", line[i]) != -1:
394# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
395# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
396# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
397# line[i] == '!' or line[i] == '|' or line[i] == '.':
398 if line[i] == '.' and i + 2 < l and \
399 line[i+1] == '.' and line[i+2] == '.':
400 self.tokens.append(('name', '...'))
401 i = i + 3
402 continue
403
404 j = i + 1
405 if j < l and (
406 string.find("+-*><=/%&!|", line[j]) != -1):
407# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
408# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
409# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
410# line[j] == '!' or line[j] == '|'):
411 self.tokens.append(('op', line[i:j+1]))
412 i = j + 1
413 else:
414 self.tokens.append(('op', line[i]))
415 i = i + 1
416 continue
417 s = i
418 while i < l:
419 o = ord(line[i])
420 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
421 (o >= 48 and o <= 57) or (
422 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
423# line[i] != ' ' and line[i] != '\t' and
424# line[i] != '(' and line[i] != ')' and
425# line[i] != '{' and line[i] != '}' and
426# line[i] != ':' and line[i] != ';' and
427# line[i] != ',' and line[i] != '+' and
428# line[i] != '-' and line[i] != '*' and
429# line[i] != '/' and line[i] != '%' and
430# line[i] != '&' and line[i] != '!' and
431# line[i] != '|' and line[i] != '[' and
432# line[i] != ']' and line[i] != '=' and
433# line[i] != '*' and line[i] != '>' and
434# line[i] != '<'):
435 i = i + 1
436 else:
437 break
438 self.tokens.append(('name', line[s:i]))
439
440 tok = self.tokens[0]
441 self.tokens = self.tokens[1:]
442 self.last = tok
443 return tok
444
445class CParser:
446 """The C module parser"""
447 def __init__(self, filename, idx = None):
448 self.filename = filename
449 if len(filename) > 2 and filename[-2:] == '.h':
450 self.is_header = 1
451 else:
452 self.is_header = 0
453 self.input = open(filename)
454 self.lexer = CLexer(self.input)
455 if idx == None:
456 self.index = index()
457 else:
458 self.index = idx
459 self.top_comment = ""
460 self.last_comment = ""
461 self.comment = None
462
463 def lineno(self):
464 return self.lexer.getlineno()
465
466 def error(self, msg, token=-1):
467 print "Parse Error: " + msg
468 if token != -1:
469 print "Got token ", token
470 self.lexer.debug()
471 sys.exit(1)
472
473 def debug(self, msg, token=-1):
474 print "Debug: " + msg
475 if token != -1:
476 print "Got token ", token
477 self.lexer.debug()
478
479 def parseComment(self, token):
480 if self.top_comment == "":
481 self.top_comment = token[1]
482 if self.comment == None or token[1][0] == '*':
483 self.comment = token[1];
484 else:
485 self.comment = self.comment + token[1]
486 token = self.lexer.token()
487 return token
488
489 #
490 # Parse a comment block associate to a macro
491 #
492 def parseMacroComment(self, name, quiet = 0):
493 if name[0:2] == '__':
494 quiet = 1
495
496 args = []
497 desc = ""
498
499 if self.comment == None:
500 if not quiet:
501 print "Missing comment for macro %s" % (name)
502 return((args, desc))
503 if self.comment[0] != '*':
504 if not quiet:
505 print "Missing * in macro comment for %s" % (name)
506 return((args, desc))
507 lines = string.split(self.comment, '\n')
508 if lines[0] == '*':
509 del lines[0]
510 if lines[0] != "* %s:" % (name):
511 if not quiet:
512 print "Misformatted macro comment for %s" % (name)
513 print " Expecting '* %s:' got '%s'" % (name, lines[0])
514 return((args, desc))
515 del lines[0]
516 while lines[0] == '*':
517 del lines[0]
518 while len(lines) > 0 and lines[0][0:3] == '* @':
519 l = lines[0][3:]
520 try:
521 (arg, desc) = string.split(l, ':', 1)
522 desc=string.strip(desc)
523 arg=string.strip(arg)
524 except:
525 if not quiet:
526 print "Misformatted macro comment for %s" % (name)
527 print " problem with '%s'" % (lines[0])
528 del lines[0]
529 continue
530 del lines[0]
531 l = string.strip(lines[0])
532 while len(l) > 2 and l[0:3] != '* @':
533 while l[0] == '*':
534 l = l[1:]
535 desc = desc + ' ' + string.strip(l)
536 del lines[0]
537 if len(lines) == 0:
538 break
539 l = lines[0]
540 args.append((arg, desc))
541 while len(lines) > 0 and lines[0] == '*':
542 del lines[0]
543 desc = ""
544 while len(lines) > 0:
545 l = lines[0]
546 while len(l) > 0 and l[0] == '*':
547 l = l[1:]
548 l = string.strip(l)
549 desc = desc + " " + l
550 del lines[0]
551
552 desc = string.strip(desc)
553
554 if quiet == 0:
555 if desc == "":
556 print "Macro comment for %s lack description of the macro" % (name)
557
558 return((args, desc))
559
560 #
561 # Parse a comment block and merge the informations found in the
562 # parameters descriptions, finally returns a block as complete
563 # as possible
564 #
565 def mergeFunctionComment(self, name, description, quiet = 0):
566 if name == 'main':
567 quiet = 1
568 if name[0:2] == '__':
569 quiet = 1
570
571 (ret, args) = description
572 desc = ""
573 retdesc = ""
574
575 if self.comment == None:
576 if not quiet:
577 print "Missing comment for function %s" % (name)
578 return(((ret[0], retdesc), args, desc))
579 if self.comment[0] != '*':
580 if not quiet:
581 print "Missing * in function comment for %s" % (name)
582 return(((ret[0], retdesc), args, desc))
583 lines = string.split(self.comment, '\n')
584 if lines[0] == '*':
585 del lines[0]
586 if lines[0] != "* %s:" % (name):
587 if not quiet:
588 print "Misformatted function comment for %s" % (name)
589 print " Expecting '* %s:' got '%s'" % (name, lines[0])
590 return(((ret[0], retdesc), args, desc))
591 del lines[0]
592 while lines[0] == '*':
593 del lines[0]
594 nbargs = len(args)
595 while len(lines) > 0 and lines[0][0:3] == '* @':
596 l = lines[0][3:]
597 try:
598 (arg, desc) = string.split(l, ':', 1)
599 desc=string.strip(desc)
600 arg=string.strip(arg)
601 except:
602 if not quiet:
603 print "Misformatted function comment for %s" % (name)
604 print " problem with '%s'" % (lines[0])
605 del lines[0]
606 continue
607 del lines[0]
608 l = string.strip(lines[0])
609 while len(l) > 2 and l[0:3] != '* @':
610 while l[0] == '*':
611 l = l[1:]
612 desc = desc + ' ' + string.strip(l)
613 del lines[0]
614 if len(lines) == 0:
615 break
616 l = lines[0]
617 i = 0
618 while i < nbargs:
619 if args[i][1] == arg:
620 args[i] = (args[i][0], arg, desc)
621 break;
622 i = i + 1
623 if i >= nbargs:
624 if not quiet:
625 print "Uname to find arg %s from function comment for %s" % (
626 arg, name)
627 while len(lines) > 0 and lines[0] == '*':
628 del lines[0]
629 desc = ""
630 while len(lines) > 0:
631 l = lines[0]
632 while len(l) > 0 and l[0] == '*':
633 l = l[1:]
634 l = string.strip(l)
635 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
636 try:
637 l = string.split(l, ' ', 1)[1]
638 except:
639 l = ""
640 retdesc = string.strip(l)
641 del lines[0]
642 while len(lines) > 0:
643 l = lines[0]
644 while len(l) > 0 and l[0] == '*':
645 l = l[1:]
646 l = string.strip(l)
647 retdesc = retdesc + " " + l
648 del lines[0]
649 else:
650 desc = desc + " " + l
651 del lines[0]
652
653 retdesc = string.strip(retdesc)
654 desc = string.strip(desc)
655
656 if quiet == 0:
657 #
658 # report missing comments
659 #
660 i = 0
661 while i < nbargs:
662 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
663 print "Function comment for %s lack description of arg %s" % (name, args[i][1])
664 i = i + 1
665 if retdesc == "" and ret[0] != "void":
666 print "Function comment for %s lack description of return value" % (name)
667 if desc == "":
668 print "Function comment for %s lack description of the function" % (name)
669
670
671 return(((ret[0], retdesc), args, desc))
672
673 def parsePreproc(self, token):
674 name = token[1]
675 if name == "#include":
676 token = self.lexer.token()
677 if token == None:
678 return None
679 if token[0] == 'preproc':
680 self.index.add(token[1], self.filename, not self.is_header,
681 "include")
682 return self.lexer.token()
683 return token
684 if name == "#define":
685 token = self.lexer.token()
686 if token == None:
687 return None
688 if token[0] == 'preproc':
689 # TODO macros with arguments
690 name = token[1]
691 lst = []
692 token = self.lexer.token()
693 while token != None and token[0] == 'preproc' and \
694 token[1][0] != '#':
695 lst.append(token[1])
696 token = self.lexer.token()
697 try:
698 name = string.split(name, '(') [0]
699 except:
700 pass
701 info = self.parseMacroComment(name, not self.is_header)
702 self.index.add(name, self.filename, not self.is_header,
703 "macro", info)
704 return token
705 token = self.lexer.token()
706 while token != None and token[0] == 'preproc' and \
707 token[1][0] != '#':
708 token = self.lexer.token()
709 return token
710
711 #
712 # token acquisition on top of the lexer, it handle internally
713 # preprocessor and comments since they are logically not part of
714 # the program structure.
715 #
716 def token(self):
717 global ignored_words
718
719 token = self.lexer.token()
720 while token != None:
721 if token[0] == 'comment':
722 token = self.parseComment(token)
723 continue
724 elif token[0] == 'preproc':
725 token = self.parsePreproc(token)
726 continue
727 elif token[0] == "name" and ignored_words.has_key(token[1]):
728 (n, info) = ignored_words[token[1]]
729 i = 0
730 while i < n:
731 token = self.lexer.token()
732 i = i + 1
733 token = self.lexer.token()
734 continue
735 else:
736 #print "=> ", token
737 return token
738 return None
739
740 #
741 # Parse a typedef, it records the type and its name.
742 #
743 def parseTypedef(self, token):
744 if token == None:
745 return None
746 token = self.parseType(token)
747 if token == None:
748 self.error("parsing typedef")
749 return None
750 base_type = self.type
751 type = base_type
752 #self.debug("end typedef type", token)
753 while token != None:
754 if token[0] == "name":
755 name = token[1]
756 signature = self.signature
757 if signature != None:
Daniel Veillard000eafb2002-12-12 10:04:22 +0000758 type = string.split(type, '(')[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000759 d = self.mergeFunctionComment(name,
760 ((type, None), signature), 1)
761 self.index.add(name, self.filename, not self.is_header,
762 "functype", d)
763 else:
764 if base_type == "struct":
765 self.index.add(name, self.filename, not self.is_header,
766 "struct", type)
767 base_type = "struct " + name
768 else:
769 self.index.add(name, self.filename, not self.is_header,
770 "typedef", type)
771 token = self.token()
772 else:
773 self.error("parsing typedef: expecting a name")
774 return token
775 #self.debug("end typedef", token)
776 if token != None and token[0] == 'sep' and token[1] == ',':
777 type = base_type
778 token = self.token()
779 while token != None and token[0] == "op":
780 type = type + token[1]
781 token = self.token()
782 elif token != None and token[0] == 'sep' and token[1] == ';':
783 break;
784 elif token != None and token[0] == 'name':
785 type = base_type
786 continue;
787 else:
788 self.error("parsing typedef: expecting ';'", token)
789 return token
790 token = self.token()
791 return token
792
793 #
794 # Parse a C code block, used for functions it parse till
795 # the balancing } included
796 #
797 def parseBlock(self, token):
798 while token != None:
799 if token[0] == "sep" and token[1] == "{":
800 token = self.token()
801 token = self.parseBlock(token)
802 elif token[0] == "sep" and token[1] == "}":
803 self.comment = None
804 token = self.token()
805 return token
806 else:
807 token = self.token()
808 return token
809
810 #
811 # Parse a C struct definition till the balancing }
812 #
813 def parseStruct(self, token):
814 fields = []
815 #self.debug("start parseStruct", token)
816 while token != None:
817 if token[0] == "sep" and token[1] == "{":
818 token = self.token()
819 token = self.parseTypeBlock(token)
820 elif token[0] == "sep" and token[1] == "}":
821 self.struct_fields = fields
822 #self.debug("end parseStruct", token)
823 #print fields
824 token = self.token()
825 return token
826 else:
827 base_type = self.type
828 #self.debug("before parseType", token)
829 token = self.parseType(token)
830 #self.debug("after parseType", token)
831 if token != None and token[0] == "name":
832 fname = token[1]
833 token = self.token()
834 if token[0] == "sep" and token[1] == ";":
835 self.comment = None
836 token = self.token()
837 fields.append((self.type, fname, self.comment))
838 self.comment = None
839 else:
840 self.error("parseStruct: expecting ;", token)
841 elif token != None and token[0] == "sep" and token[1] == "{":
842 token = self.token()
843 token = self.parseTypeBlock(token)
844 if token != None and token[0] == "name":
845 token = self.token()
846 if token != None and token[0] == "sep" and token[1] == ";":
847 token = self.token()
848 else:
849 self.error("parseStruct: expecting ;", token)
850 else:
851 self.error("parseStruct: name", token)
852 token = self.token()
853 self.type = base_type;
854 self.struct_fields = fields
855 #self.debug("end parseStruct", token)
856 #print fields
857 return token
858
859 #
860 # Parse a C enum block, parse till the balancing }
861 #
862 def parseEnumBlock(self, token):
863 self.enums = []
864 name = None
865 self.comment = None
866 comment = ""
Daniel Veillard000eafb2002-12-12 10:04:22 +0000867 value = "0"
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000868 while token != None:
869 if token[0] == "sep" and token[1] == "{":
870 token = self.token()
871 token = self.parseTypeBlock(token)
872 elif token[0] == "sep" and token[1] == "}":
873 if name != None:
874 if self.comment != None:
875 comment = self.comment
876 self.comment = None
877 self.enums.append((name, value, comment))
878 token = self.token()
879 return token
880 elif token[0] == "name":
881 if name != None:
882 if self.comment != None:
883 comment = string.strip(self.comment)
884 self.comment = None
885 self.enums.append((name, value, comment))
886 name = token[1]
887 comment = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000888 token = self.token()
889 if token[0] == "op" and token[1][0] == "=":
Daniel Veillard000eafb2002-12-12 10:04:22 +0000890 value = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000891 if len(token[1]) > 1:
892 value = token[1][1:]
893 token = self.token()
894 while token[0] != "sep" or (token[1] != ',' and
895 token[1] != '}'):
896 value = value + token[1]
897 token = self.token()
Daniel Veillard000eafb2002-12-12 10:04:22 +0000898 else:
899 try:
900 value = "%d" % (int(value) + 1)
901 except:
902 print "Failed to compute value of enum %s" % (name)
903 value=""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000904 if token[0] == "sep" and token[1] == ",":
905 token = self.token()
906 else:
907 token = self.token()
908 return token
909
910 #
911 # Parse a C definition block, used for structs it parse till
912 # the balancing }
913 #
914 def parseTypeBlock(self, token):
915 while token != None:
916 if token[0] == "sep" and token[1] == "{":
917 token = self.token()
918 token = self.parseTypeBlock(token)
919 elif token[0] == "sep" and token[1] == "}":
920 token = self.token()
921 return token
922 else:
923 token = self.token()
924 return token
925
926 #
927 # Parse a type: the fact that the type name can either occur after
928 # the definition or within the definition makes it a little harder
929 # if inside, the name token is pushed back before returning
930 #
931 def parseType(self, token):
932 self.type = ""
933 self.struct_fields = []
934 self.signature = None
935 if token == None:
936 return token
937
938 while token[0] == "name" and (
939 token[1] == "const" or token[1] == "unsigned"):
940 if self.type == "":
941 self.type = token[1]
942 else:
943 self.type = self.type + " " + token[1]
944 token = self.token()
945
946 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
947 if self.type == "":
948 self.type = token[1]
949 else:
950 self.type = self.type + " " + token[1]
951 if token[0] == "name" and token[1] == "int":
952 if self.type == "":
953 self.type = tmp[1]
954 else:
955 self.type = self.type + " " + tmp[1]
956
957 elif token[0] == "name" and token[1] == "struct":
958 if self.type == "":
959 self.type = token[1]
960 else:
961 self.type = self.type + " " + token[1]
962 token = self.token()
963 nametok = None
964 if token[0] == "name":
965 nametok = token
966 token = self.token()
967 if token != None and token[0] == "sep" and token[1] == "{":
968 token = self.token()
969 token = self.parseStruct(token)
970 elif token != None and token[0] == "op" and token[1] == "*":
971 self.type = self.type + " " + nametok[1] + " *"
972 token = self.token()
973 while token != None and token[0] == "op" and token[1] == "*":
974 self.type = self.type + " *"
975 token = self.token()
976 if token[0] == "name":
977 nametok = token
978 token = self.token()
979 else:
980 self.error("struct : expecting name", token)
981 return token
982 elif token != None and token[0] == "name" and nametok != None:
983 self.type = self.type + " " + nametok[1]
984 return token
985
986 if nametok != None:
987 self.lexer.push(token)
988 token = nametok
989 return token
990
991 elif token[0] == "name" and token[1] == "enum":
992 if self.type == "":
993 self.type = token[1]
994 else:
995 self.type = self.type + " " + token[1]
996 self.enums = []
997 token = self.token()
998 if token != None and token[0] == "sep" and token[1] == "{":
999 token = self.token()
1000 token = self.parseEnumBlock(token)
1001 else:
1002 self.error("parsing enum: expecting '{'", token)
1003 enum_type = None
1004 if token != None and token[0] != "name":
1005 self.lexer.push(token)
1006 token = ("name", "enum")
1007 else:
1008 enum_type = token[1]
1009 for enum in self.enums:
1010 self.index.add(enum[0], self.filename,
1011 not self.is_header, "enum",
1012 (enum[1], enum[2], enum_type))
1013 return token
1014
1015 elif token[0] == "name":
1016 if self.type == "":
1017 self.type = token[1]
1018 else:
1019 self.type = self.type + " " + token[1]
1020 else:
1021 self.error("parsing type %s: expecting a name" % (self.type),
1022 token)
1023 return token
1024 token = self.token()
1025 while token != None and (token[0] == "op" or
1026 token[0] == "name" and token[1] == "const"):
1027 self.type = self.type + " " + token[1]
1028 token = self.token()
1029
1030 #
1031 # if there is a parenthesis here, this means a function type
1032 #
1033 if token != None and token[0] == "sep" and token[1] == '(':
1034 self.type = self.type + token[1]
1035 token = self.token()
1036 while token != None and token[0] == "op" and token[1] == '*':
1037 self.type = self.type + token[1]
1038 token = self.token()
1039 if token == None or token[0] != "name" :
1040 self.error("parsing function type, name expected", token);
1041 return token
1042 self.type = self.type + token[1]
1043 nametok = token
1044 token = self.token()
1045 if token != None and token[0] == "sep" and token[1] == ')':
1046 self.type = self.type + token[1]
1047 token = self.token()
1048 if token != None and token[0] == "sep" and token[1] == '(':
1049 token = self.token()
1050 type = self.type;
1051 token = self.parseSignature(token);
1052 self.type = type;
1053 else:
1054 self.error("parsing function type, '(' expected", token);
1055 return token
1056 else:
1057 self.error("parsing function type, ')' expected", token);
1058 return token
1059 self.lexer.push(token)
1060 token = nametok
1061 return token
1062
1063 #
1064 # do some lookahead for arrays
1065 #
1066 if token != None and token[0] == "name":
1067 nametok = token
1068 token = self.token()
1069 if token != None and token[0] == "sep" and token[1] == '[':
1070 self.type = self.type + nametok[1]
1071 while token != None and token[0] == "sep" and token[1] == '[':
1072 self.type = self.type + token[1]
1073 token = self.token()
1074 while token != None and token[0] != 'sep' and \
1075 token[1] != ']' and token[1] != ';':
1076 self.type = self.type + token[1]
1077 token = self.token()
1078 if token != None and token[0] == 'sep' and token[1] == ']':
1079 self.type = self.type + token[1]
1080 token = self.token()
1081 else:
1082 self.error("parsing array type, ']' expected", token);
1083 return token
1084 elif token != None and token[0] == "sep" and token[1] == ':':
1085 # remove :12 in case it's a limited int size
1086 token = self.token()
1087 token = self.token()
1088 self.lexer.push(token)
1089 token = nametok
1090
1091 return token
1092
1093 #
1094 # Parse a signature: '(' has been parsed and we scan the type definition
1095 # up to the ')' included
1096 def parseSignature(self, token):
1097 signature = []
1098 if token != None and token[0] == "sep" and token[1] == ')':
1099 self.signature = []
1100 token = self.token()
1101 return token
1102 while token != None:
1103 token = self.parseType(token)
1104 if token != None and token[0] == "name":
1105 signature.append((self.type, token[1], None))
1106 token = self.token()
1107 elif token != None and token[0] == "sep" and token[1] == ',':
1108 token = self.token()
1109 continue
1110 elif token != None and token[0] == "sep" and token[1] == ')':
1111 # only the type was provided
1112 if self.type == "...":
1113 signature.append((self.type, "...", None))
1114 else:
1115 signature.append((self.type, None, None))
1116 if token != None and token[0] == "sep":
1117 if token[1] == ',':
1118 token = self.token()
1119 continue
1120 elif token[1] == ')':
1121 token = self.token()
1122 break
1123 self.signature = signature
1124 return token
1125
1126 #
1127 # Parse a global definition, be it a type, variable or function
1128 # the extern "C" blocks are a bit nasty and require it to recurse.
1129 #
1130 def parseGlobal(self, token):
1131 static = 0
1132 if token[1] == 'extern':
1133 token = self.token()
1134 if token == None:
1135 return token
1136 if token[0] == 'string':
1137 if token[1] == 'C':
1138 token = self.token()
1139 if token == None:
1140 return token
1141 if token[0] == 'sep' and token[1] == "{":
1142 token = self.token()
1143# print 'Entering extern "C line ', self.lineno()
1144 while token != None and (token[0] != 'sep' or
1145 token[1] != "}"):
1146 if token[0] == 'name':
1147 token = self.parseGlobal(token)
1148 else:
1149 self.error(
1150 "token %s %s unexpected at the top level" % (
1151 token[0], token[1]))
1152 token = self.parseGlobal(token)
1153# print 'Exiting extern "C" line', self.lineno()
1154 token = self.token()
1155 return token
1156 else:
1157 return token
1158 elif token[1] == 'static':
1159 static = 1
1160 token = self.token()
1161 if token == None or token[0] != 'name':
1162 return token
1163
1164 if token[1] == 'typedef':
1165 token = self.token()
1166 return self.parseTypedef(token)
1167 else:
1168 token = self.parseType(token)
1169 type_orig = self.type
1170 if token == None or token[0] != "name":
1171 return token
1172 type = type_orig
1173 self.name = token[1]
1174 token = self.token()
1175 while token != None and (token[0] == "sep" or token[0] == "op"):
1176 if token[0] == "sep":
1177 if token[1] == "[":
1178 type = type + token[1]
1179 token = self.token()
1180 while token != None and (token[0] != "sep" or \
1181 token[1] != ";"):
1182 type = type + token[1]
1183 token = self.token()
1184
1185 if token != None and token[0] == "op" and token[1] == "=":
1186 #
1187 # Skip the initialization of the variable
1188 #
1189 token = self.token()
1190 if token[0] == 'sep' and token[1] == '{':
1191 token = self.token()
1192 token = self.parseBlock(token)
1193 else:
1194 self.comment = None
1195 while token != None and (token[0] != "sep" or \
1196 (token[1] != ';' and token[1] != ',')):
1197 token = self.token()
1198 self.comment = None
1199 if token == None or token[0] != "sep" or (token[1] != ';' and
1200 token[1] != ','):
1201 self.error("missing ';' or ',' after value")
1202
1203 if token != None and token[0] == "sep":
1204 if token[1] == ";":
1205 self.comment = None
1206 token = self.token()
1207 if type == "struct":
1208 self.index.add(self.name, self.filename,
1209 not self.is_header, "struct", self.struct_fields)
1210 else:
1211 self.index.add(self.name, self.filename,
1212 not self.is_header, "variable", type)
1213 break
1214 elif token[1] == "(":
1215 token = self.token()
1216 token = self.parseSignature(token)
1217 if token == None:
1218 return None
1219 if token[0] == "sep" and token[1] == ";":
1220 d = self.mergeFunctionComment(self.name,
1221 ((type, None), self.signature), 1)
1222 self.index.add(self.name, self.filename, static,
1223 "function", d)
1224 token = self.token()
Daniel Veillard71531f32003-02-05 13:19:53 +00001225 elif token[0] == "sep" and token[1] == "{":
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001226 d = self.mergeFunctionComment(self.name,
1227 ((type, None), self.signature), static)
1228 self.index.add(self.name, self.filename, static,
1229 "function", d)
1230 token = self.token()
1231 token = self.parseBlock(token);
1232 elif token[1] == ',':
1233 self.comment = None
1234 self.index.add(self.name, self.filename, static,
1235 "variable", type)
1236 type = type_orig
1237 token = self.token()
1238 while token != None and token[0] == "sep":
1239 type = type + token[1]
1240 token = self.token()
1241 if token != None and token[0] == "name":
1242 self.name = token[1]
1243 token = self.token()
1244 else:
1245 break
1246
1247 return token
1248
1249 def parse(self):
1250 print "Parsing %s" % (self.filename)
1251 token = self.token()
1252 while token != None:
1253 if token[0] == 'name':
1254 token = self.parseGlobal(token)
1255 else:
1256 self.error("token %s %s unexpected at the top level" % (
1257 token[0], token[1]))
1258 token = self.parseGlobal(token)
1259 return
1260 return self.index
1261
1262
1263class docBuilder:
1264 """A documentation builder"""
1265 def __init__(self, name, directories=['.'], excludes=[]):
1266 self.name = name
1267 self.directories = directories
1268 self.excludes = excludes + ignored_files.keys()
1269 self.modules = {}
1270 self.headers = {}
1271 self.idx = index()
1272
1273 def analyze(self):
1274 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1275 self.idx.analyze()
1276
1277 def scanHeaders(self):
1278 for header in self.headers.keys():
1279 parser = CParser(header)
1280 idx = parser.parse()
1281 self.headers[header] = idx;
1282 self.idx.merge(idx)
1283
1284 def scanModules(self):
1285 for module in self.modules.keys():
1286 parser = CParser(module)
1287 idx = parser.parse()
1288 # idx.analyze()
1289 self.modules[module] = idx
1290 self.idx.merge_public(idx)
1291
1292 def scan(self):
1293 for directory in self.directories:
1294 files = glob.glob(directory + "/*.c")
1295 for file in files:
1296 skip = 0
1297 for excl in self.excludes:
1298 if string.find(file, excl) != -1:
1299 skip = 1;
1300 break
1301 if skip == 0:
1302 self.modules[file] = None;
1303 files = glob.glob(directory + "/*.h")
1304 for file in files:
1305 skip = 0
1306 for excl in self.excludes:
1307 if string.find(file, excl) != -1:
1308 skip = 1;
1309 break
1310 if skip == 0:
1311 self.headers[file] = None;
1312 self.scanHeaders()
1313 self.scanModules()
1314
1315 def modulename_file(self, file):
Daniel Veillard540a31a2003-01-21 11:21:07 +00001316 module = os.path.basename(file)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001317 if module[-2:] == '.h':
1318 module = module[:-2]
1319 return module
1320
1321 def serialize_enum(self, output, name):
1322 id = self.idx.enums[name]
1323 output.write(" <enum name='%s' file='%s'" % (name,
1324 self.modulename_file(id.module)))
1325 if id.info != None:
1326 info = id.info
1327 if info[0] != None and info[0] != '':
1328 output.write(" value='%s'" % info[0]);
1329 if info[2] != None and info[2] != '':
1330 output.write(" type='%s'" % info[2]);
1331 if info[1] != None and info[1] != '':
1332 output.write(" info='%s'" % escape(info[1]));
1333 output.write("/>\n")
1334
1335 def serialize_macro(self, output, name):
1336 id = self.idx.macros[name]
1337 output.write(" <macro name='%s' file='%s'>\n" % (name,
1338 self.modulename_file(id.module)))
1339 if id.info != None:
1340 try:
1341 (args, desc) = id.info
1342 if desc != None and desc != "":
1343 output.write(" <info>%s</info>\n" % (escape(desc)))
1344 for arg in args:
1345 (name, desc) = arg
1346 if desc != None and desc != "":
1347 output.write(" <arg name='%s' info='%s'/>\n" % (
1348 name, escape(desc)))
1349 else:
1350 output.write(" <arg name='%s'/>\n" % (name))
1351 except:
1352 pass
1353 output.write(" </macro>\n")
1354
1355 def serialize_typedef(self, output, name):
1356 id = self.idx.typedefs[name]
1357 if id.info[0:7] == 'struct ':
1358 output.write(" <struct name='%s' file='%s' type='%s'" % (
1359 name, self.modulename_file(id.module), id.info))
1360 name = id.info[7:]
Daniel Veillardc1eed322002-12-12 11:01:32 +00001361 if self.idx.structs.has_key(name) and ( \
1362 type(self.idx.structs[name].info) == type(()) or
1363 type(self.idx.structs[name].info) == type([])):
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001364 output.write(">\n");
Daniel Veillard024b5702002-12-12 00:15:55 +00001365 try:
1366 for field in self.idx.structs[name].info:
Daniel Veillard024b5702002-12-12 00:15:55 +00001367 desc = field[2]
1368 if desc == None:
1369 desc = ''
1370 else:
1371 desc = escape(desc)
1372 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1373 except:
1374 print "Failed to serialize struct %s" % (name)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001375 output.write(" </struct>\n")
1376 else:
1377 output.write("/>\n");
1378 else :
1379 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
1380 name, self.modulename_file(id.module), id.info))
1381
Daniel Veillardc1eed322002-12-12 11:01:32 +00001382 def serialize_variable(self, output, name):
1383 id = self.idx.variables[name]
1384 if id.info != None:
1385 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
1386 name, self.modulename_file(id.module), id.info))
1387 else:
1388 output.write(" <variable name='%s' file='%s'/>\n" % (
1389 name, self.modulename_file(id.module)))
1390
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001391 def serialize_function(self, output, name):
1392 id = self.idx.functions[name]
1393 output.write(" <%s name='%s' file='%s'>\n" % (id.type, name,
1394 self.modulename_file(id.module)))
1395 try:
1396 (ret, params, desc) = id.info
1397 output.write(" <info>%s</info>\n" % (escape(desc)))
1398 if ret[0] != None:
1399 if ret[0] == "void":
1400 output.write(" <return type='void'/>\n")
1401 else:
1402 output.write(" <return type='%s' info='%s'/>\n" % (
1403 ret[0], escape(ret[1])))
1404 for param in params:
1405 if param[0] == 'void':
1406 continue
1407 if param[2] == None:
1408 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1409 else:
1410 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1411 except:
1412 print "Failed to save function %s info: " % name, `id.info`
1413 output.write(" </%s>\n" % (id.type))
1414
1415 def serialize_exports(self, output, file):
1416 module = self.modulename_file(file)
1417 output.write(" <file name='%s'>\n" % (module))
1418 dict = self.headers[file]
1419 ids = dict.functions.keys() + dict.variables.keys() + \
1420 dict.macros.keys() + dict.typedefs.keys() + \
1421 dict.structs.keys() + dict.enums.keys()
1422 ids.sort()
1423 for id in ids:
1424 output.write(" <exports symbol='%s'/>\n" % (id))
1425 output.write(" </file>\n")
1426
1427
1428 def serialize(self, filename = None):
1429 if filename == None:
1430 filename = "%s-api.xml" % self.name
1431 print "Saving XML description %s" % (filename)
1432 output = open(filename, "w")
1433 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1434 output.write("<api name='%s'>\n" % self.name)
1435 output.write(" <files>\n")
1436 for file in self.headers.keys():
1437 self.serialize_exports(output, file)
1438 output.write(" </files>\n")
1439 output.write(" <symbols>\n")
1440 macros = self.idx.macros.keys()
1441 macros.sort()
1442 for macro in macros:
1443 self.serialize_macro(output, macro)
1444 enums = self.idx.enums.keys()
1445 enums.sort()
1446 for enum in enums:
1447 self.serialize_enum(output, enum)
1448 typedefs = self.idx.typedefs.keys()
1449 typedefs.sort()
1450 for typedef in typedefs:
1451 self.serialize_typedef(output, typedef)
Daniel Veillardc1eed322002-12-12 11:01:32 +00001452 variables = self.idx.variables.keys()
1453 variables.sort()
1454 for variable in variables:
1455 self.serialize_variable(output, variable)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001456 functions = self.idx.functions.keys()
1457 functions.sort()
1458 for function in functions:
1459 self.serialize_function(output, function)
1460 output.write(" </symbols>\n")
1461 output.write("</api>\n")
1462 output.close()
1463
1464
1465def rebuild():
1466 builder = None
1467 if glob.glob("../parser.c") != [] :
1468 print "Rebuilding API description for libxml2"
1469 builder = docBuilder("libxml2", ["..", "../include/libxml"],
Daniel Veillardd4330462003-04-29 12:40:16 +00001470 ["xmlwin32version.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001471 elif glob.glob("../libxslt/transform.c") != [] :
1472 print "Rebuilding API description for libxslt"
1473 builder = docBuilder("libxslt", ["../libxslt"],
Daniel Veillard024b5702002-12-12 00:15:55 +00001474 ["win32config.h", "libxslt.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001475 else:
1476 print "rebuild() failed, unable to guess the module"
1477 return None
1478 builder.scan()
1479 builder.analyze()
1480 builder.serialize()
Daniel Veillard024b5702002-12-12 00:15:55 +00001481 if glob.glob("../libexslt/exslt.c") != [] :
1482 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1483 extra.scan()
1484 extra.analyze()
1485 extra.serialize()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001486 return builder
1487
1488#
1489# for debugging the parser
1490#
1491def parse(filename):
1492 parser = CParser(filename)
1493 idx = parser.parse()
1494 return idx
1495
1496if __name__ == "__main__":
1497 rebuild()