blob: f0308bc4ab25f1b43ea2d949f8d081d98c9a3132 [file] [log] [blame]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
Daniel Veillard540a31a2003-01-21 11:21:07 +000010import os, sys
Daniel Veillarda9b66d02002-12-11 14:23:49 +000011import string
12import glob
13
Daniel Veillardde0a0a52003-04-24 17:12:57 +000014#
15# C parser analysis code
16#
17ignored_files = {
18 "trio": "too many non standard macros",
19 "trio.c": "too many non standard macros",
20 "trionan.c": "too many non standard macros",
21 "triostr.c": "too many non standard macros",
22 "acconfig.h": "generated portability layer",
23 "config.h": "generated portability layer",
24 "libxml.h": "internal only",
25 "testOOM.c": "out of memory tester"
26 "testOOMlib.h": "out of memory tester"
27 "testOOMlib.c": "out of memory tester"
28}
29
30ignored_words = {
31 "WINAPI": (0, "Windows keyword"),
32 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
33 "__declspec": (3, "Windows keyword"),
34 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
35 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
36 "X_IN_Y": (5, "macro function builder"),
37}
38
Daniel Veillarda9b66d02002-12-11 14:23:49 +000039def escape(raw):
40 raw = string.replace(raw, '&', '&')
41 raw = string.replace(raw, '<', '&lt;')
42 raw = string.replace(raw, '>', '&gt;')
43 raw = string.replace(raw, "'", '&apos;')
44 raw = string.replace(raw, '"', '&quot;')
45 return raw
46
47class identifier:
48 def __init__(self, name, module=None, type=None, info=None, extra=None):
49 self.name = name
50 self.module = module
51 self.type = type
52 self.info = info
53 self.extra = extra
54 self.static = 0
55
56 def __repr__(self):
57 r = "%s %s:" % (self.type, self.name)
58 if self.static:
59 r = r + " static"
60 if self.module != None:
61 r = r + " from %s" % (self.module)
62 if self.info != None:
63 r = r + " " + `self.info`
64 if self.extra != None:
65 r = r + " " + `self.extra`
66 return r
67
68
69 def set_module(self, module):
70 self.module = module
71 def set_type(self, type):
72 self.type = type
73 def set_info(self, info):
74 self.info = info
75 def set_extra(self, extra):
76 self.extra = extra
77 def set_static(self, static):
78 self.static = static
79
80 def update(self, module, type = None, info = None, extra=None):
81 if module != None and self.module == None:
82 self.set_module(module)
83 if type != None and self.type == None:
84 self.set_type(type)
85 if info != None:
86 self.set_info(info)
87 if extra != None:
88 self.set_extra(extra)
89
90
91class index:
92 def __init__(self, name = "noname"):
93 self.name = name;
94 self.identifiers = {}
95 self.functions = {}
96 self.variables = {}
97 self.includes = {}
98 self.structs = {}
99 self.enums = {}
100 self.typedefs = {}
101 self.macros = {}
102 self.references = {}
103
104 def add(self, name, module, static, type, info=None, extra=None):
105 if name[0:2] == '__':
106 return None
107 d = None
108 try:
109 d = self.identifiers[name]
110 d.update(module, type, info, extra)
111 except:
112 d = identifier(name, module, type, info, extra)
113 self.identifiers[name] = d
114
115 if d != None and static == 1:
116 d.set_static(1)
117
118 if d != None and name != None and type != None:
119 if type == "function":
120 self.functions[name] = d
121 elif type == "functype":
122 self.functions[name] = d
123 elif type == "variable":
124 self.variables[name] = d
125 elif type == "include":
126 self.includes[name] = d
127 elif type == "struct":
128 self.structs[name] = d
129 elif type == "enum":
130 self.enums[name] = d
131 elif type == "typedef":
132 self.typedefs[name] = d
133 elif type == "macro":
134 self.macros[name] = d
135 else:
136 print "Unable to register type ", type
137 return d
138
139 def merge(self, idx):
140 for id in idx.functions.keys():
141 #
142 # macro might be used to override functions or variables
143 # definitions
144 #
145 if self.macros.has_key(id):
146 del self.macros[id]
147 if self.functions.has_key(id):
148 print "function %s from %s redeclared in %s" % (
149 id, self.functions[id].module, idx.functions[id].module)
150 else:
151 self.functions[id] = idx.functions[id]
152 self.identifiers[id] = idx.functions[id]
153 for id in idx.variables.keys():
154 #
155 # macro might be used to override functions or variables
156 # definitions
157 #
158 if self.macros.has_key(id):
159 del self.macros[id]
160 if self.variables.has_key(id):
161 print "variable %s from %s redeclared in %s" % (
162 id, self.variables[id].module, idx.variables[id].module)
163 else:
164 self.variables[id] = idx.variables[id]
165 self.identifiers[id] = idx.variables[id]
166 for id in idx.structs.keys():
167 if self.structs.has_key(id):
168 print "struct %s from %s redeclared in %s" % (
169 id, self.structs[id].module, idx.structs[id].module)
170 else:
171 self.structs[id] = idx.structs[id]
172 self.identifiers[id] = idx.structs[id]
173 for id in idx.typedefs.keys():
174 if self.typedefs.has_key(id):
175 print "typedef %s from %s redeclared in %s" % (
176 id, self.typedefs[id].module, idx.typedefs[id].module)
177 else:
178 self.typedefs[id] = idx.typedefs[id]
179 self.identifiers[id] = idx.typedefs[id]
180 for id in idx.macros.keys():
181 #
182 # macro might be used to override functions or variables
183 # definitions
184 #
185 if self.variables.has_key(id):
186 continue
187 if self.functions.has_key(id):
188 continue
189 if self.enums.has_key(id):
190 continue
191 if self.macros.has_key(id):
192 print "macro %s from %s redeclared in %s" % (
193 id, self.macros[id].module, idx.macros[id].module)
194 else:
195 self.macros[id] = idx.macros[id]
196 self.identifiers[id] = idx.macros[id]
197 for id in idx.enums.keys():
198 if self.enums.has_key(id):
199 print "enum %s from %s redeclared in %s" % (
200 id, self.enums[id].module, idx.enums[id].module)
201 else:
202 self.enums[id] = idx.enums[id]
203 self.identifiers[id] = idx.enums[id]
204
205 def merge_public(self, idx):
206 for id in idx.functions.keys():
207 if self.functions.has_key(id):
208 up = idx.functions[id]
209 self.functions[id].update(None, up.type, up.info, up.extra)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000210 # else:
211 # print "Function %s from %s is not declared in headers" % (
212 # id, idx.functions[id].module)
213 # TODO: do the same for variables.
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000214
215 def analyze_dict(self, type, dict):
216 count = 0
217 public = 0
218 for name in dict.keys():
219 id = dict[name]
220 count = count + 1
221 if id.static == 0:
222 public = public + 1
223 if count != public:
224 print " %d %s , %d public" % (count, type, public)
225 elif count != 0:
226 print " %d public %s" % (count, type)
227
228
229 def analyze(self):
230 self.analyze_dict("functions", self.functions)
231 self.analyze_dict("variables", self.variables)
232 self.analyze_dict("structs", self.structs)
233 self.analyze_dict("typedefs", self.typedefs)
234 self.analyze_dict("macros", self.macros)
235
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000236class CLexer:
237 """A lexer for the C language, tokenize the input by reading and
238 analyzing it line by line"""
239 def __init__(self, input):
240 self.input = input
241 self.tokens = []
242 self.line = ""
243 self.lineno = 0
244
245 def getline(self):
246 line = ''
247 while line == '':
248 line = self.input.readline()
249 if not line:
250 return None
251 self.lineno = self.lineno + 1
252 line = string.lstrip(line)
253 line = string.rstrip(line)
254 if line == '':
255 continue
256 while line[-1] == '\\':
257 line = line[:-1]
258 n = self.input.readline()
259 self.lineno = self.lineno + 1
260 n = string.lstrip(n)
261 n = string.rstrip(n)
262 if not n:
263 break
264 else:
265 line = line + n
266 return line
267
268 def getlineno(self):
269 return self.lineno
270
271 def push(self, token):
272 self.tokens.insert(0, token);
273
274 def debug(self):
275 print "Last token: ", self.last
276 print "Token queue: ", self.tokens
277 print "Line %d end: " % (self.lineno), self.line
278
279 def token(self):
280 while self.tokens == []:
281 if self.line == "":
282 line = self.getline()
283 else:
284 line = self.line
285 self.line = ""
286 if line == None:
287 return None
288
289 if line[0] == '#':
290 self.tokens = map((lambda x: ('preproc', x)),
291 string.split(line))
292 break;
293 l = len(line)
294 if line[0] == '"' or line[0] == "'":
295 end = line[0]
296 line = line[1:]
297 found = 0
298 tok = ""
299 while found == 0:
300 i = 0
301 l = len(line)
302 while i < l:
303 if line[i] == end:
304 self.line = line[i+1:]
305 line = line[:i]
306 l = i
307 found = 1
308 break
309 if line[i] == '\\':
310 i = i + 1
311 i = i + 1
312 tok = tok + line
313 if found == 0:
314 line = self.getline()
315 if line == None:
316 return None
317 self.last = ('string', tok)
318 return self.last
319
320 if l >= 2 and line[0] == '/' and line[1] == '*':
321 line = line[2:]
322 found = 0
323 tok = ""
324 while found == 0:
325 i = 0
326 l = len(line)
327 while i < l:
328 if line[i] == '*' and i+1 < l and line[i+1] == '/':
329 self.line = line[i+2:]
330 line = line[:i-1]
331 l = i
332 found = 1
333 break
334 i = i + 1
335 if tok != "":
336 tok = tok + "\n"
337 tok = tok + line
338 if found == 0:
339 line = self.getline()
340 if line == None:
341 return None
342 self.last = ('comment', tok)
343 return self.last
344 if l >= 2 and line[0] == '/' and line[1] == '/':
345 line = line[2:]
346 self.last = ('comment', line)
347 return self.last
348 i = 0
349 while i < l:
350 if line[i] == '/' and i+1 < l and line[i+1] == '/':
351 self.line = line[i:]
352 line = line[:i]
353 break
354 if line[i] == '/' and i+1 < l and line[i+1] == '*':
355 self.line = line[i:]
356 line = line[:i]
357 break
358 if line[i] == '"' or line[i] == "'":
359 self.line = line[i:]
360 line = line[:i]
361 break
362 i = i + 1
363 l = len(line)
364 i = 0
365 while i < l:
366 if line[i] == ' ' or line[i] == '\t':
367 i = i + 1
368 continue
369 o = ord(line[i])
370 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
371 (o >= 48 and o <= 57):
372 s = i
373 while i < l:
374 o = ord(line[i])
375 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
376 (o >= 48 and o <= 57) or string.find(
377 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
378 i = i + 1
379 else:
380 break
381 self.tokens.append(('name', line[s:i]))
382 continue
383 if string.find("(){}:;,[]", line[i]) != -1:
384# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
385# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
386# line[i] == ',' or line[i] == '[' or line[i] == ']':
387 self.tokens.append(('sep', line[i]))
388 i = i + 1
389 continue
390 if string.find("+-*><=/%&!|.", line[i]) != -1:
391# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
392# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
393# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
394# line[i] == '!' or line[i] == '|' or line[i] == '.':
395 if line[i] == '.' and i + 2 < l and \
396 line[i+1] == '.' and line[i+2] == '.':
397 self.tokens.append(('name', '...'))
398 i = i + 3
399 continue
400
401 j = i + 1
402 if j < l and (
403 string.find("+-*><=/%&!|", line[j]) != -1):
404# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
405# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
406# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
407# line[j] == '!' or line[j] == '|'):
408 self.tokens.append(('op', line[i:j+1]))
409 i = j + 1
410 else:
411 self.tokens.append(('op', line[i]))
412 i = i + 1
413 continue
414 s = i
415 while i < l:
416 o = ord(line[i])
417 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
418 (o >= 48 and o <= 57) or (
419 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
420# line[i] != ' ' and line[i] != '\t' and
421# line[i] != '(' and line[i] != ')' and
422# line[i] != '{' and line[i] != '}' and
423# line[i] != ':' and line[i] != ';' and
424# line[i] != ',' and line[i] != '+' and
425# line[i] != '-' and line[i] != '*' and
426# line[i] != '/' and line[i] != '%' and
427# line[i] != '&' and line[i] != '!' and
428# line[i] != '|' and line[i] != '[' and
429# line[i] != ']' and line[i] != '=' and
430# line[i] != '*' and line[i] != '>' and
431# line[i] != '<'):
432 i = i + 1
433 else:
434 break
435 self.tokens.append(('name', line[s:i]))
436
437 tok = self.tokens[0]
438 self.tokens = self.tokens[1:]
439 self.last = tok
440 return tok
441
442class CParser:
443 """The C module parser"""
444 def __init__(self, filename, idx = None):
445 self.filename = filename
446 if len(filename) > 2 and filename[-2:] == '.h':
447 self.is_header = 1
448 else:
449 self.is_header = 0
450 self.input = open(filename)
451 self.lexer = CLexer(self.input)
452 if idx == None:
453 self.index = index()
454 else:
455 self.index = idx
456 self.top_comment = ""
457 self.last_comment = ""
458 self.comment = None
459
460 def lineno(self):
461 return self.lexer.getlineno()
462
463 def error(self, msg, token=-1):
464 print "Parse Error: " + msg
465 if token != -1:
466 print "Got token ", token
467 self.lexer.debug()
468 sys.exit(1)
469
470 def debug(self, msg, token=-1):
471 print "Debug: " + msg
472 if token != -1:
473 print "Got token ", token
474 self.lexer.debug()
475
476 def parseComment(self, token):
477 if self.top_comment == "":
478 self.top_comment = token[1]
479 if self.comment == None or token[1][0] == '*':
480 self.comment = token[1];
481 else:
482 self.comment = self.comment + token[1]
483 token = self.lexer.token()
484 return token
485
486 #
487 # Parse a comment block associate to a macro
488 #
489 def parseMacroComment(self, name, quiet = 0):
490 if name[0:2] == '__':
491 quiet = 1
492
493 args = []
494 desc = ""
495
496 if self.comment == None:
497 if not quiet:
498 print "Missing comment for macro %s" % (name)
499 return((args, desc))
500 if self.comment[0] != '*':
501 if not quiet:
502 print "Missing * in macro comment for %s" % (name)
503 return((args, desc))
504 lines = string.split(self.comment, '\n')
505 if lines[0] == '*':
506 del lines[0]
507 if lines[0] != "* %s:" % (name):
508 if not quiet:
509 print "Misformatted macro comment for %s" % (name)
510 print " Expecting '* %s:' got '%s'" % (name, lines[0])
511 return((args, desc))
512 del lines[0]
513 while lines[0] == '*':
514 del lines[0]
515 while len(lines) > 0 and lines[0][0:3] == '* @':
516 l = lines[0][3:]
517 try:
518 (arg, desc) = string.split(l, ':', 1)
519 desc=string.strip(desc)
520 arg=string.strip(arg)
521 except:
522 if not quiet:
523 print "Misformatted macro comment for %s" % (name)
524 print " problem with '%s'" % (lines[0])
525 del lines[0]
526 continue
527 del lines[0]
528 l = string.strip(lines[0])
529 while len(l) > 2 and l[0:3] != '* @':
530 while l[0] == '*':
531 l = l[1:]
532 desc = desc + ' ' + string.strip(l)
533 del lines[0]
534 if len(lines) == 0:
535 break
536 l = lines[0]
537 args.append((arg, desc))
538 while len(lines) > 0 and lines[0] == '*':
539 del lines[0]
540 desc = ""
541 while len(lines) > 0:
542 l = lines[0]
543 while len(l) > 0 and l[0] == '*':
544 l = l[1:]
545 l = string.strip(l)
546 desc = desc + " " + l
547 del lines[0]
548
549 desc = string.strip(desc)
550
551 if quiet == 0:
552 if desc == "":
553 print "Macro comment for %s lack description of the macro" % (name)
554
555 return((args, desc))
556
557 #
558 # Parse a comment block and merge the informations found in the
559 # parameters descriptions, finally returns a block as complete
560 # as possible
561 #
562 def mergeFunctionComment(self, name, description, quiet = 0):
563 if name == 'main':
564 quiet = 1
565 if name[0:2] == '__':
566 quiet = 1
567
568 (ret, args) = description
569 desc = ""
570 retdesc = ""
571
572 if self.comment == None:
573 if not quiet:
574 print "Missing comment for function %s" % (name)
575 return(((ret[0], retdesc), args, desc))
576 if self.comment[0] != '*':
577 if not quiet:
578 print "Missing * in function comment for %s" % (name)
579 return(((ret[0], retdesc), args, desc))
580 lines = string.split(self.comment, '\n')
581 if lines[0] == '*':
582 del lines[0]
583 if lines[0] != "* %s:" % (name):
584 if not quiet:
585 print "Misformatted function comment for %s" % (name)
586 print " Expecting '* %s:' got '%s'" % (name, lines[0])
587 return(((ret[0], retdesc), args, desc))
588 del lines[0]
589 while lines[0] == '*':
590 del lines[0]
591 nbargs = len(args)
592 while len(lines) > 0 and lines[0][0:3] == '* @':
593 l = lines[0][3:]
594 try:
595 (arg, desc) = string.split(l, ':', 1)
596 desc=string.strip(desc)
597 arg=string.strip(arg)
598 except:
599 if not quiet:
600 print "Misformatted function comment for %s" % (name)
601 print " problem with '%s'" % (lines[0])
602 del lines[0]
603 continue
604 del lines[0]
605 l = string.strip(lines[0])
606 while len(l) > 2 and l[0:3] != '* @':
607 while l[0] == '*':
608 l = l[1:]
609 desc = desc + ' ' + string.strip(l)
610 del lines[0]
611 if len(lines) == 0:
612 break
613 l = lines[0]
614 i = 0
615 while i < nbargs:
616 if args[i][1] == arg:
617 args[i] = (args[i][0], arg, desc)
618 break;
619 i = i + 1
620 if i >= nbargs:
621 if not quiet:
622 print "Uname to find arg %s from function comment for %s" % (
623 arg, name)
624 while len(lines) > 0 and lines[0] == '*':
625 del lines[0]
626 desc = ""
627 while len(lines) > 0:
628 l = lines[0]
629 while len(l) > 0 and l[0] == '*':
630 l = l[1:]
631 l = string.strip(l)
632 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
633 try:
634 l = string.split(l, ' ', 1)[1]
635 except:
636 l = ""
637 retdesc = string.strip(l)
638 del lines[0]
639 while len(lines) > 0:
640 l = lines[0]
641 while len(l) > 0 and l[0] == '*':
642 l = l[1:]
643 l = string.strip(l)
644 retdesc = retdesc + " " + l
645 del lines[0]
646 else:
647 desc = desc + " " + l
648 del lines[0]
649
650 retdesc = string.strip(retdesc)
651 desc = string.strip(desc)
652
653 if quiet == 0:
654 #
655 # report missing comments
656 #
657 i = 0
658 while i < nbargs:
659 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
660 print "Function comment for %s lack description of arg %s" % (name, args[i][1])
661 i = i + 1
662 if retdesc == "" and ret[0] != "void":
663 print "Function comment for %s lack description of return value" % (name)
664 if desc == "":
665 print "Function comment for %s lack description of the function" % (name)
666
667
668 return(((ret[0], retdesc), args, desc))
669
670 def parsePreproc(self, token):
671 name = token[1]
672 if name == "#include":
673 token = self.lexer.token()
674 if token == None:
675 return None
676 if token[0] == 'preproc':
677 self.index.add(token[1], self.filename, not self.is_header,
678 "include")
679 return self.lexer.token()
680 return token
681 if name == "#define":
682 token = self.lexer.token()
683 if token == None:
684 return None
685 if token[0] == 'preproc':
686 # TODO macros with arguments
687 name = token[1]
688 lst = []
689 token = self.lexer.token()
690 while token != None and token[0] == 'preproc' and \
691 token[1][0] != '#':
692 lst.append(token[1])
693 token = self.lexer.token()
694 try:
695 name = string.split(name, '(') [0]
696 except:
697 pass
698 info = self.parseMacroComment(name, not self.is_header)
699 self.index.add(name, self.filename, not self.is_header,
700 "macro", info)
701 return token
702 token = self.lexer.token()
703 while token != None and token[0] == 'preproc' and \
704 token[1][0] != '#':
705 token = self.lexer.token()
706 return token
707
708 #
709 # token acquisition on top of the lexer, it handle internally
710 # preprocessor and comments since they are logically not part of
711 # the program structure.
712 #
713 def token(self):
714 global ignored_words
715
716 token = self.lexer.token()
717 while token != None:
718 if token[0] == 'comment':
719 token = self.parseComment(token)
720 continue
721 elif token[0] == 'preproc':
722 token = self.parsePreproc(token)
723 continue
724 elif token[0] == "name" and ignored_words.has_key(token[1]):
725 (n, info) = ignored_words[token[1]]
726 i = 0
727 while i < n:
728 token = self.lexer.token()
729 i = i + 1
730 token = self.lexer.token()
731 continue
732 else:
733 #print "=> ", token
734 return token
735 return None
736
737 #
738 # Parse a typedef, it records the type and its name.
739 #
740 def parseTypedef(self, token):
741 if token == None:
742 return None
743 token = self.parseType(token)
744 if token == None:
745 self.error("parsing typedef")
746 return None
747 base_type = self.type
748 type = base_type
749 #self.debug("end typedef type", token)
750 while token != None:
751 if token[0] == "name":
752 name = token[1]
753 signature = self.signature
754 if signature != None:
Daniel Veillard000eafb2002-12-12 10:04:22 +0000755 type = string.split(type, '(')[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000756 d = self.mergeFunctionComment(name,
757 ((type, None), signature), 1)
758 self.index.add(name, self.filename, not self.is_header,
759 "functype", d)
760 else:
761 if base_type == "struct":
762 self.index.add(name, self.filename, not self.is_header,
763 "struct", type)
764 base_type = "struct " + name
765 else:
766 self.index.add(name, self.filename, not self.is_header,
767 "typedef", type)
768 token = self.token()
769 else:
770 self.error("parsing typedef: expecting a name")
771 return token
772 #self.debug("end typedef", token)
773 if token != None and token[0] == 'sep' and token[1] == ',':
774 type = base_type
775 token = self.token()
776 while token != None and token[0] == "op":
777 type = type + token[1]
778 token = self.token()
779 elif token != None and token[0] == 'sep' and token[1] == ';':
780 break;
781 elif token != None and token[0] == 'name':
782 type = base_type
783 continue;
784 else:
785 self.error("parsing typedef: expecting ';'", token)
786 return token
787 token = self.token()
788 return token
789
790 #
791 # Parse a C code block, used for functions it parse till
792 # the balancing } included
793 #
794 def parseBlock(self, token):
795 while token != None:
796 if token[0] == "sep" and token[1] == "{":
797 token = self.token()
798 token = self.parseBlock(token)
799 elif token[0] == "sep" and token[1] == "}":
800 self.comment = None
801 token = self.token()
802 return token
803 else:
804 token = self.token()
805 return token
806
807 #
808 # Parse a C struct definition till the balancing }
809 #
810 def parseStruct(self, token):
811 fields = []
812 #self.debug("start parseStruct", token)
813 while token != None:
814 if token[0] == "sep" and token[1] == "{":
815 token = self.token()
816 token = self.parseTypeBlock(token)
817 elif token[0] == "sep" and token[1] == "}":
818 self.struct_fields = fields
819 #self.debug("end parseStruct", token)
820 #print fields
821 token = self.token()
822 return token
823 else:
824 base_type = self.type
825 #self.debug("before parseType", token)
826 token = self.parseType(token)
827 #self.debug("after parseType", token)
828 if token != None and token[0] == "name":
829 fname = token[1]
830 token = self.token()
831 if token[0] == "sep" and token[1] == ";":
832 self.comment = None
833 token = self.token()
834 fields.append((self.type, fname, self.comment))
835 self.comment = None
836 else:
837 self.error("parseStruct: expecting ;", token)
838 elif token != None and token[0] == "sep" and token[1] == "{":
839 token = self.token()
840 token = self.parseTypeBlock(token)
841 if token != None and token[0] == "name":
842 token = self.token()
843 if token != None and token[0] == "sep" and token[1] == ";":
844 token = self.token()
845 else:
846 self.error("parseStruct: expecting ;", token)
847 else:
848 self.error("parseStruct: name", token)
849 token = self.token()
850 self.type = base_type;
851 self.struct_fields = fields
852 #self.debug("end parseStruct", token)
853 #print fields
854 return token
855
856 #
857 # Parse a C enum block, parse till the balancing }
858 #
859 def parseEnumBlock(self, token):
860 self.enums = []
861 name = None
862 self.comment = None
863 comment = ""
Daniel Veillard000eafb2002-12-12 10:04:22 +0000864 value = "0"
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000865 while token != None:
866 if token[0] == "sep" and token[1] == "{":
867 token = self.token()
868 token = self.parseTypeBlock(token)
869 elif token[0] == "sep" and token[1] == "}":
870 if name != None:
871 if self.comment != None:
872 comment = self.comment
873 self.comment = None
874 self.enums.append((name, value, comment))
875 token = self.token()
876 return token
877 elif token[0] == "name":
878 if name != None:
879 if self.comment != None:
880 comment = string.strip(self.comment)
881 self.comment = None
882 self.enums.append((name, value, comment))
883 name = token[1]
884 comment = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000885 token = self.token()
886 if token[0] == "op" and token[1][0] == "=":
Daniel Veillard000eafb2002-12-12 10:04:22 +0000887 value = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000888 if len(token[1]) > 1:
889 value = token[1][1:]
890 token = self.token()
891 while token[0] != "sep" or (token[1] != ',' and
892 token[1] != '}'):
893 value = value + token[1]
894 token = self.token()
Daniel Veillard000eafb2002-12-12 10:04:22 +0000895 else:
896 try:
897 value = "%d" % (int(value) + 1)
898 except:
899 print "Failed to compute value of enum %s" % (name)
900 value=""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000901 if token[0] == "sep" and token[1] == ",":
902 token = self.token()
903 else:
904 token = self.token()
905 return token
906
907 #
908 # Parse a C definition block, used for structs it parse till
909 # the balancing }
910 #
911 def parseTypeBlock(self, token):
912 while token != None:
913 if token[0] == "sep" and token[1] == "{":
914 token = self.token()
915 token = self.parseTypeBlock(token)
916 elif token[0] == "sep" and token[1] == "}":
917 token = self.token()
918 return token
919 else:
920 token = self.token()
921 return token
922
923 #
924 # Parse a type: the fact that the type name can either occur after
925 # the definition or within the definition makes it a little harder
926 # if inside, the name token is pushed back before returning
927 #
928 def parseType(self, token):
929 self.type = ""
930 self.struct_fields = []
931 self.signature = None
932 if token == None:
933 return token
934
935 while token[0] == "name" and (
936 token[1] == "const" or token[1] == "unsigned"):
937 if self.type == "":
938 self.type = token[1]
939 else:
940 self.type = self.type + " " + token[1]
941 token = self.token()
942
943 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
944 if self.type == "":
945 self.type = token[1]
946 else:
947 self.type = self.type + " " + token[1]
948 if token[0] == "name" and token[1] == "int":
949 if self.type == "":
950 self.type = tmp[1]
951 else:
952 self.type = self.type + " " + tmp[1]
953
954 elif token[0] == "name" and token[1] == "struct":
955 if self.type == "":
956 self.type = token[1]
957 else:
958 self.type = self.type + " " + token[1]
959 token = self.token()
960 nametok = None
961 if token[0] == "name":
962 nametok = token
963 token = self.token()
964 if token != None and token[0] == "sep" and token[1] == "{":
965 token = self.token()
966 token = self.parseStruct(token)
967 elif token != None and token[0] == "op" and token[1] == "*":
968 self.type = self.type + " " + nametok[1] + " *"
969 token = self.token()
970 while token != None and token[0] == "op" and token[1] == "*":
971 self.type = self.type + " *"
972 token = self.token()
973 if token[0] == "name":
974 nametok = token
975 token = self.token()
976 else:
977 self.error("struct : expecting name", token)
978 return token
979 elif token != None and token[0] == "name" and nametok != None:
980 self.type = self.type + " " + nametok[1]
981 return token
982
983 if nametok != None:
984 self.lexer.push(token)
985 token = nametok
986 return token
987
988 elif token[0] == "name" and token[1] == "enum":
989 if self.type == "":
990 self.type = token[1]
991 else:
992 self.type = self.type + " " + token[1]
993 self.enums = []
994 token = self.token()
995 if token != None and token[0] == "sep" and token[1] == "{":
996 token = self.token()
997 token = self.parseEnumBlock(token)
998 else:
999 self.error("parsing enum: expecting '{'", token)
1000 enum_type = None
1001 if token != None and token[0] != "name":
1002 self.lexer.push(token)
1003 token = ("name", "enum")
1004 else:
1005 enum_type = token[1]
1006 for enum in self.enums:
1007 self.index.add(enum[0], self.filename,
1008 not self.is_header, "enum",
1009 (enum[1], enum[2], enum_type))
1010 return token
1011
1012 elif token[0] == "name":
1013 if self.type == "":
1014 self.type = token[1]
1015 else:
1016 self.type = self.type + " " + token[1]
1017 else:
1018 self.error("parsing type %s: expecting a name" % (self.type),
1019 token)
1020 return token
1021 token = self.token()
1022 while token != None and (token[0] == "op" or
1023 token[0] == "name" and token[1] == "const"):
1024 self.type = self.type + " " + token[1]
1025 token = self.token()
1026
1027 #
1028 # if there is a parenthesis here, this means a function type
1029 #
1030 if token != None and token[0] == "sep" and token[1] == '(':
1031 self.type = self.type + token[1]
1032 token = self.token()
1033 while token != None and token[0] == "op" and token[1] == '*':
1034 self.type = self.type + token[1]
1035 token = self.token()
1036 if token == None or token[0] != "name" :
1037 self.error("parsing function type, name expected", token);
1038 return token
1039 self.type = self.type + token[1]
1040 nametok = token
1041 token = self.token()
1042 if token != None and token[0] == "sep" and token[1] == ')':
1043 self.type = self.type + token[1]
1044 token = self.token()
1045 if token != None and token[0] == "sep" and token[1] == '(':
1046 token = self.token()
1047 type = self.type;
1048 token = self.parseSignature(token);
1049 self.type = type;
1050 else:
1051 self.error("parsing function type, '(' expected", token);
1052 return token
1053 else:
1054 self.error("parsing function type, ')' expected", token);
1055 return token
1056 self.lexer.push(token)
1057 token = nametok
1058 return token
1059
1060 #
1061 # do some lookahead for arrays
1062 #
1063 if token != None and token[0] == "name":
1064 nametok = token
1065 token = self.token()
1066 if token != None and token[0] == "sep" and token[1] == '[':
1067 self.type = self.type + nametok[1]
1068 while token != None and token[0] == "sep" and token[1] == '[':
1069 self.type = self.type + token[1]
1070 token = self.token()
1071 while token != None and token[0] != 'sep' and \
1072 token[1] != ']' and token[1] != ';':
1073 self.type = self.type + token[1]
1074 token = self.token()
1075 if token != None and token[0] == 'sep' and token[1] == ']':
1076 self.type = self.type + token[1]
1077 token = self.token()
1078 else:
1079 self.error("parsing array type, ']' expected", token);
1080 return token
1081 elif token != None and token[0] == "sep" and token[1] == ':':
1082 # remove :12 in case it's a limited int size
1083 token = self.token()
1084 token = self.token()
1085 self.lexer.push(token)
1086 token = nametok
1087
1088 return token
1089
1090 #
1091 # Parse a signature: '(' has been parsed and we scan the type definition
1092 # up to the ')' included
1093 def parseSignature(self, token):
1094 signature = []
1095 if token != None and token[0] == "sep" and token[1] == ')':
1096 self.signature = []
1097 token = self.token()
1098 return token
1099 while token != None:
1100 token = self.parseType(token)
1101 if token != None and token[0] == "name":
1102 signature.append((self.type, token[1], None))
1103 token = self.token()
1104 elif token != None and token[0] == "sep" and token[1] == ',':
1105 token = self.token()
1106 continue
1107 elif token != None and token[0] == "sep" and token[1] == ')':
1108 # only the type was provided
1109 if self.type == "...":
1110 signature.append((self.type, "...", None))
1111 else:
1112 signature.append((self.type, None, None))
1113 if token != None and token[0] == "sep":
1114 if token[1] == ',':
1115 token = self.token()
1116 continue
1117 elif token[1] == ')':
1118 token = self.token()
1119 break
1120 self.signature = signature
1121 return token
1122
1123 #
1124 # Parse a global definition, be it a type, variable or function
1125 # the extern "C" blocks are a bit nasty and require it to recurse.
1126 #
1127 def parseGlobal(self, token):
1128 static = 0
1129 if token[1] == 'extern':
1130 token = self.token()
1131 if token == None:
1132 return token
1133 if token[0] == 'string':
1134 if token[1] == 'C':
1135 token = self.token()
1136 if token == None:
1137 return token
1138 if token[0] == 'sep' and token[1] == "{":
1139 token = self.token()
1140# print 'Entering extern "C line ', self.lineno()
1141 while token != None and (token[0] != 'sep' or
1142 token[1] != "}"):
1143 if token[0] == 'name':
1144 token = self.parseGlobal(token)
1145 else:
1146 self.error(
1147 "token %s %s unexpected at the top level" % (
1148 token[0], token[1]))
1149 token = self.parseGlobal(token)
1150# print 'Exiting extern "C" line', self.lineno()
1151 token = self.token()
1152 return token
1153 else:
1154 return token
1155 elif token[1] == 'static':
1156 static = 1
1157 token = self.token()
1158 if token == None or token[0] != 'name':
1159 return token
1160
1161 if token[1] == 'typedef':
1162 token = self.token()
1163 return self.parseTypedef(token)
1164 else:
1165 token = self.parseType(token)
1166 type_orig = self.type
1167 if token == None or token[0] != "name":
1168 return token
1169 type = type_orig
1170 self.name = token[1]
1171 token = self.token()
1172 while token != None and (token[0] == "sep" or token[0] == "op"):
1173 if token[0] == "sep":
1174 if token[1] == "[":
1175 type = type + token[1]
1176 token = self.token()
1177 while token != None and (token[0] != "sep" or \
1178 token[1] != ";"):
1179 type = type + token[1]
1180 token = self.token()
1181
1182 if token != None and token[0] == "op" and token[1] == "=":
1183 #
1184 # Skip the initialization of the variable
1185 #
1186 token = self.token()
1187 if token[0] == 'sep' and token[1] == '{':
1188 token = self.token()
1189 token = self.parseBlock(token)
1190 else:
1191 self.comment = None
1192 while token != None and (token[0] != "sep" or \
1193 (token[1] != ';' and token[1] != ',')):
1194 token = self.token()
1195 self.comment = None
1196 if token == None or token[0] != "sep" or (token[1] != ';' and
1197 token[1] != ','):
1198 self.error("missing ';' or ',' after value")
1199
1200 if token != None and token[0] == "sep":
1201 if token[1] == ";":
1202 self.comment = None
1203 token = self.token()
1204 if type == "struct":
1205 self.index.add(self.name, self.filename,
1206 not self.is_header, "struct", self.struct_fields)
1207 else:
1208 self.index.add(self.name, self.filename,
1209 not self.is_header, "variable", type)
1210 break
1211 elif token[1] == "(":
1212 token = self.token()
1213 token = self.parseSignature(token)
1214 if token == None:
1215 return None
1216 if token[0] == "sep" and token[1] == ";":
1217 d = self.mergeFunctionComment(self.name,
1218 ((type, None), self.signature), 1)
1219 self.index.add(self.name, self.filename, static,
1220 "function", d)
1221 token = self.token()
Daniel Veillard71531f32003-02-05 13:19:53 +00001222 elif token[0] == "sep" and token[1] == "{":
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001223 d = self.mergeFunctionComment(self.name,
1224 ((type, None), self.signature), static)
1225 self.index.add(self.name, self.filename, static,
1226 "function", d)
1227 token = self.token()
1228 token = self.parseBlock(token);
1229 elif token[1] == ',':
1230 self.comment = None
1231 self.index.add(self.name, self.filename, static,
1232 "variable", type)
1233 type = type_orig
1234 token = self.token()
1235 while token != None and token[0] == "sep":
1236 type = type + token[1]
1237 token = self.token()
1238 if token != None and token[0] == "name":
1239 self.name = token[1]
1240 token = self.token()
1241 else:
1242 break
1243
1244 return token
1245
1246 def parse(self):
1247 print "Parsing %s" % (self.filename)
1248 token = self.token()
1249 while token != None:
1250 if token[0] == 'name':
1251 token = self.parseGlobal(token)
1252 else:
1253 self.error("token %s %s unexpected at the top level" % (
1254 token[0], token[1]))
1255 token = self.parseGlobal(token)
1256 return
1257 return self.index
1258
1259
1260class docBuilder:
1261 """A documentation builder"""
1262 def __init__(self, name, directories=['.'], excludes=[]):
1263 self.name = name
1264 self.directories = directories
1265 self.excludes = excludes + ignored_files.keys()
1266 self.modules = {}
1267 self.headers = {}
1268 self.idx = index()
1269
1270 def analyze(self):
1271 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1272 self.idx.analyze()
1273
1274 def scanHeaders(self):
1275 for header in self.headers.keys():
1276 parser = CParser(header)
1277 idx = parser.parse()
1278 self.headers[header] = idx;
1279 self.idx.merge(idx)
1280
1281 def scanModules(self):
1282 for module in self.modules.keys():
1283 parser = CParser(module)
1284 idx = parser.parse()
1285 # idx.analyze()
1286 self.modules[module] = idx
1287 self.idx.merge_public(idx)
1288
1289 def scan(self):
1290 for directory in self.directories:
1291 files = glob.glob(directory + "/*.c")
1292 for file in files:
1293 skip = 0
1294 for excl in self.excludes:
1295 if string.find(file, excl) != -1:
1296 skip = 1;
1297 break
1298 if skip == 0:
1299 self.modules[file] = None;
1300 files = glob.glob(directory + "/*.h")
1301 for file in files:
1302 skip = 0
1303 for excl in self.excludes:
1304 if string.find(file, excl) != -1:
1305 skip = 1;
1306 break
1307 if skip == 0:
1308 self.headers[file] = None;
1309 self.scanHeaders()
1310 self.scanModules()
1311
1312 def modulename_file(self, file):
Daniel Veillard540a31a2003-01-21 11:21:07 +00001313 module = os.path.basename(file)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001314 if module[-2:] == '.h':
1315 module = module[:-2]
1316 return module
1317
1318 def serialize_enum(self, output, name):
1319 id = self.idx.enums[name]
1320 output.write(" <enum name='%s' file='%s'" % (name,
1321 self.modulename_file(id.module)))
1322 if id.info != None:
1323 info = id.info
1324 if info[0] != None and info[0] != '':
1325 output.write(" value='%s'" % info[0]);
1326 if info[2] != None and info[2] != '':
1327 output.write(" type='%s'" % info[2]);
1328 if info[1] != None and info[1] != '':
1329 output.write(" info='%s'" % escape(info[1]));
1330 output.write("/>\n")
1331
1332 def serialize_macro(self, output, name):
1333 id = self.idx.macros[name]
1334 output.write(" <macro name='%s' file='%s'>\n" % (name,
1335 self.modulename_file(id.module)))
1336 if id.info != None:
1337 try:
1338 (args, desc) = id.info
1339 if desc != None and desc != "":
1340 output.write(" <info>%s</info>\n" % (escape(desc)))
1341 for arg in args:
1342 (name, desc) = arg
1343 if desc != None and desc != "":
1344 output.write(" <arg name='%s' info='%s'/>\n" % (
1345 name, escape(desc)))
1346 else:
1347 output.write(" <arg name='%s'/>\n" % (name))
1348 except:
1349 pass
1350 output.write(" </macro>\n")
1351
1352 def serialize_typedef(self, output, name):
1353 id = self.idx.typedefs[name]
1354 if id.info[0:7] == 'struct ':
1355 output.write(" <struct name='%s' file='%s' type='%s'" % (
1356 name, self.modulename_file(id.module), id.info))
1357 name = id.info[7:]
Daniel Veillardc1eed322002-12-12 11:01:32 +00001358 if self.idx.structs.has_key(name) and ( \
1359 type(self.idx.structs[name].info) == type(()) or
1360 type(self.idx.structs[name].info) == type([])):
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001361 output.write(">\n");
Daniel Veillard024b5702002-12-12 00:15:55 +00001362 try:
1363 for field in self.idx.structs[name].info:
Daniel Veillard024b5702002-12-12 00:15:55 +00001364 desc = field[2]
1365 if desc == None:
1366 desc = ''
1367 else:
1368 desc = escape(desc)
1369 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1370 except:
1371 print "Failed to serialize struct %s" % (name)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001372 output.write(" </struct>\n")
1373 else:
1374 output.write("/>\n");
1375 else :
1376 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
1377 name, self.modulename_file(id.module), id.info))
1378
Daniel Veillardc1eed322002-12-12 11:01:32 +00001379 def serialize_variable(self, output, name):
1380 id = self.idx.variables[name]
1381 if id.info != None:
1382 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
1383 name, self.modulename_file(id.module), id.info))
1384 else:
1385 output.write(" <variable name='%s' file='%s'/>\n" % (
1386 name, self.modulename_file(id.module)))
1387
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001388 def serialize_function(self, output, name):
1389 id = self.idx.functions[name]
1390 output.write(" <%s name='%s' file='%s'>\n" % (id.type, name,
1391 self.modulename_file(id.module)))
1392 try:
1393 (ret, params, desc) = id.info
1394 output.write(" <info>%s</info>\n" % (escape(desc)))
1395 if ret[0] != None:
1396 if ret[0] == "void":
1397 output.write(" <return type='void'/>\n")
1398 else:
1399 output.write(" <return type='%s' info='%s'/>\n" % (
1400 ret[0], escape(ret[1])))
1401 for param in params:
1402 if param[0] == 'void':
1403 continue
1404 if param[2] == None:
1405 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1406 else:
1407 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1408 except:
1409 print "Failed to save function %s info: " % name, `id.info`
1410 output.write(" </%s>\n" % (id.type))
1411
1412 def serialize_exports(self, output, file):
1413 module = self.modulename_file(file)
1414 output.write(" <file name='%s'>\n" % (module))
1415 dict = self.headers[file]
1416 ids = dict.functions.keys() + dict.variables.keys() + \
1417 dict.macros.keys() + dict.typedefs.keys() + \
1418 dict.structs.keys() + dict.enums.keys()
1419 ids.sort()
1420 for id in ids:
1421 output.write(" <exports symbol='%s'/>\n" % (id))
1422 output.write(" </file>\n")
1423
1424
1425 def serialize(self, filename = None):
1426 if filename == None:
1427 filename = "%s-api.xml" % self.name
1428 print "Saving XML description %s" % (filename)
1429 output = open(filename, "w")
1430 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1431 output.write("<api name='%s'>\n" % self.name)
1432 output.write(" <files>\n")
1433 for file in self.headers.keys():
1434 self.serialize_exports(output, file)
1435 output.write(" </files>\n")
1436 output.write(" <symbols>\n")
1437 macros = self.idx.macros.keys()
1438 macros.sort()
1439 for macro in macros:
1440 self.serialize_macro(output, macro)
1441 enums = self.idx.enums.keys()
1442 enums.sort()
1443 for enum in enums:
1444 self.serialize_enum(output, enum)
1445 typedefs = self.idx.typedefs.keys()
1446 typedefs.sort()
1447 for typedef in typedefs:
1448 self.serialize_typedef(output, typedef)
Daniel Veillardc1eed322002-12-12 11:01:32 +00001449 variables = self.idx.variables.keys()
1450 variables.sort()
1451 for variable in variables:
1452 self.serialize_variable(output, variable)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001453 functions = self.idx.functions.keys()
1454 functions.sort()
1455 for function in functions:
1456 self.serialize_function(output, function)
1457 output.write(" </symbols>\n")
1458 output.write("</api>\n")
1459 output.close()
1460
1461
1462def rebuild():
1463 builder = None
1464 if glob.glob("../parser.c") != [] :
1465 print "Rebuilding API description for libxml2"
1466 builder = docBuilder("libxml2", ["..", "../include/libxml"],
Daniel Veillard024b5702002-12-12 00:15:55 +00001467 ["xmlwin32version.h", "tst.c",
1468 "schemasInternals.h", "xmlschemas" ])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001469 elif glob.glob("../libxslt/transform.c") != [] :
1470 print "Rebuilding API description for libxslt"
1471 builder = docBuilder("libxslt", ["../libxslt"],
Daniel Veillard024b5702002-12-12 00:15:55 +00001472 ["win32config.h", "libxslt.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001473 else:
1474 print "rebuild() failed, unable to guess the module"
1475 return None
1476 builder.scan()
1477 builder.analyze()
1478 builder.serialize()
Daniel Veillard024b5702002-12-12 00:15:55 +00001479 if glob.glob("../libexslt/exslt.c") != [] :
1480 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1481 extra.scan()
1482 extra.analyze()
1483 extra.serialize()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001484 return builder
1485
1486#
1487# for debugging the parser
1488#
1489def parse(filename):
1490 parser = CParser(filename)
1491 idx = parser.parse()
1492 return idx
1493
1494if __name__ == "__main__":
1495 rebuild()