blob: 8d1b204a9e18974a7c3751942e4e9af6c08ee6b9 [file] [log] [blame]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
10import sys
11import string
12import glob
13
14def escape(raw):
15 raw = string.replace(raw, '&', '&')
16 raw = string.replace(raw, '<', '&lt;')
17 raw = string.replace(raw, '>', '&gt;')
18 raw = string.replace(raw, "'", '&apos;')
19 raw = string.replace(raw, '"', '&quot;')
20 return raw
21
22class identifier:
23 def __init__(self, name, module=None, type=None, info=None, extra=None):
24 self.name = name
25 self.module = module
26 self.type = type
27 self.info = info
28 self.extra = extra
29 self.static = 0
30
31 def __repr__(self):
32 r = "%s %s:" % (self.type, self.name)
33 if self.static:
34 r = r + " static"
35 if self.module != None:
36 r = r + " from %s" % (self.module)
37 if self.info != None:
38 r = r + " " + `self.info`
39 if self.extra != None:
40 r = r + " " + `self.extra`
41 return r
42
43
44 def set_module(self, module):
45 self.module = module
46 def set_type(self, type):
47 self.type = type
48 def set_info(self, info):
49 self.info = info
50 def set_extra(self, extra):
51 self.extra = extra
52 def set_static(self, static):
53 self.static = static
54
55 def update(self, module, type = None, info = None, extra=None):
56 if module != None and self.module == None:
57 self.set_module(module)
58 if type != None and self.type == None:
59 self.set_type(type)
60 if info != None:
61 self.set_info(info)
62 if extra != None:
63 self.set_extra(extra)
64
65
66class index:
67 def __init__(self, name = "noname"):
68 self.name = name;
69 self.identifiers = {}
70 self.functions = {}
71 self.variables = {}
72 self.includes = {}
73 self.structs = {}
74 self.enums = {}
75 self.typedefs = {}
76 self.macros = {}
77 self.references = {}
78
79 def add(self, name, module, static, type, info=None, extra=None):
80 if name[0:2] == '__':
81 return None
82 d = None
83 try:
84 d = self.identifiers[name]
85 d.update(module, type, info, extra)
86 except:
87 d = identifier(name, module, type, info, extra)
88 self.identifiers[name] = d
89
90 if d != None and static == 1:
91 d.set_static(1)
92
93 if d != None and name != None and type != None:
94 if type == "function":
95 self.functions[name] = d
96 elif type == "functype":
97 self.functions[name] = d
98 elif type == "variable":
99 self.variables[name] = d
100 elif type == "include":
101 self.includes[name] = d
102 elif type == "struct":
103 self.structs[name] = d
104 elif type == "enum":
105 self.enums[name] = d
106 elif type == "typedef":
107 self.typedefs[name] = d
108 elif type == "macro":
109 self.macros[name] = d
110 else:
111 print "Unable to register type ", type
112 return d
113
114 def merge(self, idx):
115 for id in idx.functions.keys():
116 #
117 # macro might be used to override functions or variables
118 # definitions
119 #
120 if self.macros.has_key(id):
121 del self.macros[id]
122 if self.functions.has_key(id):
123 print "function %s from %s redeclared in %s" % (
124 id, self.functions[id].module, idx.functions[id].module)
125 else:
126 self.functions[id] = idx.functions[id]
127 self.identifiers[id] = idx.functions[id]
128 for id in idx.variables.keys():
129 #
130 # macro might be used to override functions or variables
131 # definitions
132 #
133 if self.macros.has_key(id):
134 del self.macros[id]
135 if self.variables.has_key(id):
136 print "variable %s from %s redeclared in %s" % (
137 id, self.variables[id].module, idx.variables[id].module)
138 else:
139 self.variables[id] = idx.variables[id]
140 self.identifiers[id] = idx.variables[id]
141 for id in idx.structs.keys():
142 if self.structs.has_key(id):
143 print "struct %s from %s redeclared in %s" % (
144 id, self.structs[id].module, idx.structs[id].module)
145 else:
146 self.structs[id] = idx.structs[id]
147 self.identifiers[id] = idx.structs[id]
148 for id in idx.typedefs.keys():
149 if self.typedefs.has_key(id):
150 print "typedef %s from %s redeclared in %s" % (
151 id, self.typedefs[id].module, idx.typedefs[id].module)
152 else:
153 self.typedefs[id] = idx.typedefs[id]
154 self.identifiers[id] = idx.typedefs[id]
155 for id in idx.macros.keys():
156 #
157 # macro might be used to override functions or variables
158 # definitions
159 #
160 if self.variables.has_key(id):
161 continue
162 if self.functions.has_key(id):
163 continue
164 if self.enums.has_key(id):
165 continue
166 if self.macros.has_key(id):
167 print "macro %s from %s redeclared in %s" % (
168 id, self.macros[id].module, idx.macros[id].module)
169 else:
170 self.macros[id] = idx.macros[id]
171 self.identifiers[id] = idx.macros[id]
172 for id in idx.enums.keys():
173 if self.enums.has_key(id):
174 print "enum %s from %s redeclared in %s" % (
175 id, self.enums[id].module, idx.enums[id].module)
176 else:
177 self.enums[id] = idx.enums[id]
178 self.identifiers[id] = idx.enums[id]
179
180 def merge_public(self, idx):
181 for id in idx.functions.keys():
182 if self.functions.has_key(id):
183 up = idx.functions[id]
184 self.functions[id].update(None, up.type, up.info, up.extra)
185 else:
186 if idx.functions[id].static == 0:
187 self.functions[id] = idx.functions[id]
188
189 def analyze_dict(self, type, dict):
190 count = 0
191 public = 0
192 for name in dict.keys():
193 id = dict[name]
194 count = count + 1
195 if id.static == 0:
196 public = public + 1
197 if count != public:
198 print " %d %s , %d public" % (count, type, public)
199 elif count != 0:
200 print " %d public %s" % (count, type)
201
202
203 def analyze(self):
204 self.analyze_dict("functions", self.functions)
205 self.analyze_dict("variables", self.variables)
206 self.analyze_dict("structs", self.structs)
207 self.analyze_dict("typedefs", self.typedefs)
208 self.analyze_dict("macros", self.macros)
209
210#
211# C parser analysis code
212#
213ignored_files = {
214 "trio": "too many non standard macros",
215 "trio.c": "too many non standard macros",
216 "trionan.c": "too many non standard macros",
217 "triostr.c": "too many non standard macros",
218 "acconfig.h": "generated portability layer",
219 "config.h": "generated portability layer",
220 "libxml.h": "internal only",
221}
222
223ignored_words = {
224 "WINAPI": (0, "Windows keyword"),
225 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
226 "__declspec": (3, "Windows keyword"),
227 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
228}
229
230class CLexer:
231 """A lexer for the C language, tokenize the input by reading and
232 analyzing it line by line"""
233 def __init__(self, input):
234 self.input = input
235 self.tokens = []
236 self.line = ""
237 self.lineno = 0
238
239 def getline(self):
240 line = ''
241 while line == '':
242 line = self.input.readline()
243 if not line:
244 return None
245 self.lineno = self.lineno + 1
246 line = string.lstrip(line)
247 line = string.rstrip(line)
248 if line == '':
249 continue
250 while line[-1] == '\\':
251 line = line[:-1]
252 n = self.input.readline()
253 self.lineno = self.lineno + 1
254 n = string.lstrip(n)
255 n = string.rstrip(n)
256 if not n:
257 break
258 else:
259 line = line + n
260 return line
261
262 def getlineno(self):
263 return self.lineno
264
265 def push(self, token):
266 self.tokens.insert(0, token);
267
268 def debug(self):
269 print "Last token: ", self.last
270 print "Token queue: ", self.tokens
271 print "Line %d end: " % (self.lineno), self.line
272
273 def token(self):
274 while self.tokens == []:
275 if self.line == "":
276 line = self.getline()
277 else:
278 line = self.line
279 self.line = ""
280 if line == None:
281 return None
282
283 if line[0] == '#':
284 self.tokens = map((lambda x: ('preproc', x)),
285 string.split(line))
286 break;
287 l = len(line)
288 if line[0] == '"' or line[0] == "'":
289 end = line[0]
290 line = line[1:]
291 found = 0
292 tok = ""
293 while found == 0:
294 i = 0
295 l = len(line)
296 while i < l:
297 if line[i] == end:
298 self.line = line[i+1:]
299 line = line[:i]
300 l = i
301 found = 1
302 break
303 if line[i] == '\\':
304 i = i + 1
305 i = i + 1
306 tok = tok + line
307 if found == 0:
308 line = self.getline()
309 if line == None:
310 return None
311 self.last = ('string', tok)
312 return self.last
313
314 if l >= 2 and line[0] == '/' and line[1] == '*':
315 line = line[2:]
316 found = 0
317 tok = ""
318 while found == 0:
319 i = 0
320 l = len(line)
321 while i < l:
322 if line[i] == '*' and i+1 < l and line[i+1] == '/':
323 self.line = line[i+2:]
324 line = line[:i-1]
325 l = i
326 found = 1
327 break
328 i = i + 1
329 if tok != "":
330 tok = tok + "\n"
331 tok = tok + line
332 if found == 0:
333 line = self.getline()
334 if line == None:
335 return None
336 self.last = ('comment', tok)
337 return self.last
338 if l >= 2 and line[0] == '/' and line[1] == '/':
339 line = line[2:]
340 self.last = ('comment', line)
341 return self.last
342 i = 0
343 while i < l:
344 if line[i] == '/' and i+1 < l and line[i+1] == '/':
345 self.line = line[i:]
346 line = line[:i]
347 break
348 if line[i] == '/' and i+1 < l and line[i+1] == '*':
349 self.line = line[i:]
350 line = line[:i]
351 break
352 if line[i] == '"' or line[i] == "'":
353 self.line = line[i:]
354 line = line[:i]
355 break
356 i = i + 1
357 l = len(line)
358 i = 0
359 while i < l:
360 if line[i] == ' ' or line[i] == '\t':
361 i = i + 1
362 continue
363 o = ord(line[i])
364 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
365 (o >= 48 and o <= 57):
366 s = i
367 while i < l:
368 o = ord(line[i])
369 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
370 (o >= 48 and o <= 57) or string.find(
371 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
372 i = i + 1
373 else:
374 break
375 self.tokens.append(('name', line[s:i]))
376 continue
377 if string.find("(){}:;,[]", line[i]) != -1:
378# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
379# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
380# line[i] == ',' or line[i] == '[' or line[i] == ']':
381 self.tokens.append(('sep', line[i]))
382 i = i + 1
383 continue
384 if string.find("+-*><=/%&!|.", line[i]) != -1:
385# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
386# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
387# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
388# line[i] == '!' or line[i] == '|' or line[i] == '.':
389 if line[i] == '.' and i + 2 < l and \
390 line[i+1] == '.' and line[i+2] == '.':
391 self.tokens.append(('name', '...'))
392 i = i + 3
393 continue
394
395 j = i + 1
396 if j < l and (
397 string.find("+-*><=/%&!|", line[j]) != -1):
398# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
399# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
400# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
401# line[j] == '!' or line[j] == '|'):
402 self.tokens.append(('op', line[i:j+1]))
403 i = j + 1
404 else:
405 self.tokens.append(('op', line[i]))
406 i = i + 1
407 continue
408 s = i
409 while i < l:
410 o = ord(line[i])
411 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
412 (o >= 48 and o <= 57) or (
413 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
414# line[i] != ' ' and line[i] != '\t' and
415# line[i] != '(' and line[i] != ')' and
416# line[i] != '{' and line[i] != '}' and
417# line[i] != ':' and line[i] != ';' and
418# line[i] != ',' and line[i] != '+' and
419# line[i] != '-' and line[i] != '*' and
420# line[i] != '/' and line[i] != '%' and
421# line[i] != '&' and line[i] != '!' and
422# line[i] != '|' and line[i] != '[' and
423# line[i] != ']' and line[i] != '=' and
424# line[i] != '*' and line[i] != '>' and
425# line[i] != '<'):
426 i = i + 1
427 else:
428 break
429 self.tokens.append(('name', line[s:i]))
430
431 tok = self.tokens[0]
432 self.tokens = self.tokens[1:]
433 self.last = tok
434 return tok
435
436class CParser:
437 """The C module parser"""
438 def __init__(self, filename, idx = None):
439 self.filename = filename
440 if len(filename) > 2 and filename[-2:] == '.h':
441 self.is_header = 1
442 else:
443 self.is_header = 0
444 self.input = open(filename)
445 self.lexer = CLexer(self.input)
446 if idx == None:
447 self.index = index()
448 else:
449 self.index = idx
450 self.top_comment = ""
451 self.last_comment = ""
452 self.comment = None
453
454 def lineno(self):
455 return self.lexer.getlineno()
456
457 def error(self, msg, token=-1):
458 print "Parse Error: " + msg
459 if token != -1:
460 print "Got token ", token
461 self.lexer.debug()
462 sys.exit(1)
463
464 def debug(self, msg, token=-1):
465 print "Debug: " + msg
466 if token != -1:
467 print "Got token ", token
468 self.lexer.debug()
469
470 def parseComment(self, token):
471 if self.top_comment == "":
472 self.top_comment = token[1]
473 if self.comment == None or token[1][0] == '*':
474 self.comment = token[1];
475 else:
476 self.comment = self.comment + token[1]
477 token = self.lexer.token()
478 return token
479
480 #
481 # Parse a comment block associate to a macro
482 #
483 def parseMacroComment(self, name, quiet = 0):
484 if name[0:2] == '__':
485 quiet = 1
486
487 args = []
488 desc = ""
489
490 if self.comment == None:
491 if not quiet:
492 print "Missing comment for macro %s" % (name)
493 return((args, desc))
494 if self.comment[0] != '*':
495 if not quiet:
496 print "Missing * in macro comment for %s" % (name)
497 return((args, desc))
498 lines = string.split(self.comment, '\n')
499 if lines[0] == '*':
500 del lines[0]
501 if lines[0] != "* %s:" % (name):
502 if not quiet:
503 print "Misformatted macro comment for %s" % (name)
504 print " Expecting '* %s:' got '%s'" % (name, lines[0])
505 return((args, desc))
506 del lines[0]
507 while lines[0] == '*':
508 del lines[0]
509 while len(lines) > 0 and lines[0][0:3] == '* @':
510 l = lines[0][3:]
511 try:
512 (arg, desc) = string.split(l, ':', 1)
513 desc=string.strip(desc)
514 arg=string.strip(arg)
515 except:
516 if not quiet:
517 print "Misformatted macro comment for %s" % (name)
518 print " problem with '%s'" % (lines[0])
519 del lines[0]
520 continue
521 del lines[0]
522 l = string.strip(lines[0])
523 while len(l) > 2 and l[0:3] != '* @':
524 while l[0] == '*':
525 l = l[1:]
526 desc = desc + ' ' + string.strip(l)
527 del lines[0]
528 if len(lines) == 0:
529 break
530 l = lines[0]
531 args.append((arg, desc))
532 while len(lines) > 0 and lines[0] == '*':
533 del lines[0]
534 desc = ""
535 while len(lines) > 0:
536 l = lines[0]
537 while len(l) > 0 and l[0] == '*':
538 l = l[1:]
539 l = string.strip(l)
540 desc = desc + " " + l
541 del lines[0]
542
543 desc = string.strip(desc)
544
545 if quiet == 0:
546 if desc == "":
547 print "Macro comment for %s lack description of the macro" % (name)
548
549 return((args, desc))
550
551 #
552 # Parse a comment block and merge the informations found in the
553 # parameters descriptions, finally returns a block as complete
554 # as possible
555 #
556 def mergeFunctionComment(self, name, description, quiet = 0):
557 if name == 'main':
558 quiet = 1
559 if name[0:2] == '__':
560 quiet = 1
561
562 (ret, args) = description
563 desc = ""
564 retdesc = ""
565
566 if self.comment == None:
567 if not quiet:
568 print "Missing comment for function %s" % (name)
569 return(((ret[0], retdesc), args, desc))
570 if self.comment[0] != '*':
571 if not quiet:
572 print "Missing * in function comment for %s" % (name)
573 return(((ret[0], retdesc), args, desc))
574 lines = string.split(self.comment, '\n')
575 if lines[0] == '*':
576 del lines[0]
577 if lines[0] != "* %s:" % (name):
578 if not quiet:
579 print "Misformatted function comment for %s" % (name)
580 print " Expecting '* %s:' got '%s'" % (name, lines[0])
581 return(((ret[0], retdesc), args, desc))
582 del lines[0]
583 while lines[0] == '*':
584 del lines[0]
585 nbargs = len(args)
586 while len(lines) > 0 and lines[0][0:3] == '* @':
587 l = lines[0][3:]
588 try:
589 (arg, desc) = string.split(l, ':', 1)
590 desc=string.strip(desc)
591 arg=string.strip(arg)
592 except:
593 if not quiet:
594 print "Misformatted function comment for %s" % (name)
595 print " problem with '%s'" % (lines[0])
596 del lines[0]
597 continue
598 del lines[0]
599 l = string.strip(lines[0])
600 while len(l) > 2 and l[0:3] != '* @':
601 while l[0] == '*':
602 l = l[1:]
603 desc = desc + ' ' + string.strip(l)
604 del lines[0]
605 if len(lines) == 0:
606 break
607 l = lines[0]
608 i = 0
609 while i < nbargs:
610 if args[i][1] == arg:
611 args[i] = (args[i][0], arg, desc)
612 break;
613 i = i + 1
614 if i >= nbargs:
615 if not quiet:
616 print "Uname to find arg %s from function comment for %s" % (
617 arg, name)
618 while len(lines) > 0 and lines[0] == '*':
619 del lines[0]
620 desc = ""
621 while len(lines) > 0:
622 l = lines[0]
623 while len(l) > 0 and l[0] == '*':
624 l = l[1:]
625 l = string.strip(l)
626 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
627 try:
628 l = string.split(l, ' ', 1)[1]
629 except:
630 l = ""
631 retdesc = string.strip(l)
632 del lines[0]
633 while len(lines) > 0:
634 l = lines[0]
635 while len(l) > 0 and l[0] == '*':
636 l = l[1:]
637 l = string.strip(l)
638 retdesc = retdesc + " " + l
639 del lines[0]
640 else:
641 desc = desc + " " + l
642 del lines[0]
643
644 retdesc = string.strip(retdesc)
645 desc = string.strip(desc)
646
647 if quiet == 0:
648 #
649 # report missing comments
650 #
651 i = 0
652 while i < nbargs:
653 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
654 print "Function comment for %s lack description of arg %s" % (name, args[i][1])
655 i = i + 1
656 if retdesc == "" and ret[0] != "void":
657 print "Function comment for %s lack description of return value" % (name)
658 if desc == "":
659 print "Function comment for %s lack description of the function" % (name)
660
661
662 return(((ret[0], retdesc), args, desc))
663
664 def parsePreproc(self, token):
665 name = token[1]
666 if name == "#include":
667 token = self.lexer.token()
668 if token == None:
669 return None
670 if token[0] == 'preproc':
671 self.index.add(token[1], self.filename, not self.is_header,
672 "include")
673 return self.lexer.token()
674 return token
675 if name == "#define":
676 token = self.lexer.token()
677 if token == None:
678 return None
679 if token[0] == 'preproc':
680 # TODO macros with arguments
681 name = token[1]
682 lst = []
683 token = self.lexer.token()
684 while token != None and token[0] == 'preproc' and \
685 token[1][0] != '#':
686 lst.append(token[1])
687 token = self.lexer.token()
688 try:
689 name = string.split(name, '(') [0]
690 except:
691 pass
692 info = self.parseMacroComment(name, not self.is_header)
693 self.index.add(name, self.filename, not self.is_header,
694 "macro", info)
695 return token
696 token = self.lexer.token()
697 while token != None and token[0] == 'preproc' and \
698 token[1][0] != '#':
699 token = self.lexer.token()
700 return token
701
702 #
703 # token acquisition on top of the lexer, it handle internally
704 # preprocessor and comments since they are logically not part of
705 # the program structure.
706 #
707 def token(self):
708 global ignored_words
709
710 token = self.lexer.token()
711 while token != None:
712 if token[0] == 'comment':
713 token = self.parseComment(token)
714 continue
715 elif token[0] == 'preproc':
716 token = self.parsePreproc(token)
717 continue
718 elif token[0] == "name" and ignored_words.has_key(token[1]):
719 (n, info) = ignored_words[token[1]]
720 i = 0
721 while i < n:
722 token = self.lexer.token()
723 i = i + 1
724 token = self.lexer.token()
725 continue
726 else:
727 #print "=> ", token
728 return token
729 return None
730
731 #
732 # Parse a typedef, it records the type and its name.
733 #
734 def parseTypedef(self, token):
735 if token == None:
736 return None
737 token = self.parseType(token)
738 if token == None:
739 self.error("parsing typedef")
740 return None
741 base_type = self.type
742 type = base_type
743 #self.debug("end typedef type", token)
744 while token != None:
745 if token[0] == "name":
746 name = token[1]
747 signature = self.signature
748 if signature != None:
749 d = self.mergeFunctionComment(name,
750 ((type, None), signature), 1)
751 self.index.add(name, self.filename, not self.is_header,
752 "functype", d)
753 else:
754 if base_type == "struct":
755 self.index.add(name, self.filename, not self.is_header,
756 "struct", type)
757 base_type = "struct " + name
758 else:
759 self.index.add(name, self.filename, not self.is_header,
760 "typedef", type)
761 token = self.token()
762 else:
763 self.error("parsing typedef: expecting a name")
764 return token
765 #self.debug("end typedef", token)
766 if token != None and token[0] == 'sep' and token[1] == ',':
767 type = base_type
768 token = self.token()
769 while token != None and token[0] == "op":
770 type = type + token[1]
771 token = self.token()
772 elif token != None and token[0] == 'sep' and token[1] == ';':
773 break;
774 elif token != None and token[0] == 'name':
775 type = base_type
776 continue;
777 else:
778 self.error("parsing typedef: expecting ';'", token)
779 return token
780 token = self.token()
781 return token
782
783 #
784 # Parse a C code block, used for functions it parse till
785 # the balancing } included
786 #
787 def parseBlock(self, token):
788 while token != None:
789 if token[0] == "sep" and token[1] == "{":
790 token = self.token()
791 token = self.parseBlock(token)
792 elif token[0] == "sep" and token[1] == "}":
793 self.comment = None
794 token = self.token()
795 return token
796 else:
797 token = self.token()
798 return token
799
800 #
801 # Parse a C struct definition till the balancing }
802 #
803 def parseStruct(self, token):
804 fields = []
805 #self.debug("start parseStruct", token)
806 while token != None:
807 if token[0] == "sep" and token[1] == "{":
808 token = self.token()
809 token = self.parseTypeBlock(token)
810 elif token[0] == "sep" and token[1] == "}":
811 self.struct_fields = fields
812 #self.debug("end parseStruct", token)
813 #print fields
814 token = self.token()
815 return token
816 else:
817 base_type = self.type
818 #self.debug("before parseType", token)
819 token = self.parseType(token)
820 #self.debug("after parseType", token)
821 if token != None and token[0] == "name":
822 fname = token[1]
823 token = self.token()
824 if token[0] == "sep" and token[1] == ";":
825 self.comment = None
826 token = self.token()
827 fields.append((self.type, fname, self.comment))
828 self.comment = None
829 else:
830 self.error("parseStruct: expecting ;", token)
831 elif token != None and token[0] == "sep" and token[1] == "{":
832 token = self.token()
833 token = self.parseTypeBlock(token)
834 if token != None and token[0] == "name":
835 token = self.token()
836 if token != None and token[0] == "sep" and token[1] == ";":
837 token = self.token()
838 else:
839 self.error("parseStruct: expecting ;", token)
840 else:
841 self.error("parseStruct: name", token)
842 token = self.token()
843 self.type = base_type;
844 self.struct_fields = fields
845 #self.debug("end parseStruct", token)
846 #print fields
847 return token
848
849 #
850 # Parse a C enum block, parse till the balancing }
851 #
852 def parseEnumBlock(self, token):
853 self.enums = []
854 name = None
855 self.comment = None
856 comment = ""
857 value = ""
858 while token != None:
859 if token[0] == "sep" and token[1] == "{":
860 token = self.token()
861 token = self.parseTypeBlock(token)
862 elif token[0] == "sep" and token[1] == "}":
863 if name != None:
864 if self.comment != None:
865 comment = self.comment
866 self.comment = None
867 self.enums.append((name, value, comment))
868 token = self.token()
869 return token
870 elif token[0] == "name":
871 if name != None:
872 if self.comment != None:
873 comment = string.strip(self.comment)
874 self.comment = None
875 self.enums.append((name, value, comment))
876 name = token[1]
877 comment = ""
878 value = ""
879 token = self.token()
880 if token[0] == "op" and token[1][0] == "=":
881 if len(token[1]) > 1:
882 value = token[1][1:]
883 token = self.token()
884 while token[0] != "sep" or (token[1] != ',' and
885 token[1] != '}'):
886 value = value + token[1]
887 token = self.token()
888 if token[0] == "sep" and token[1] == ",":
889 token = self.token()
890 else:
891 token = self.token()
892 return token
893
894 #
895 # Parse a C definition block, used for structs it parse till
896 # the balancing }
897 #
898 def parseTypeBlock(self, token):
899 while token != None:
900 if token[0] == "sep" and token[1] == "{":
901 token = self.token()
902 token = self.parseTypeBlock(token)
903 elif token[0] == "sep" and token[1] == "}":
904 token = self.token()
905 return token
906 else:
907 token = self.token()
908 return token
909
910 #
911 # Parse a type: the fact that the type name can either occur after
912 # the definition or within the definition makes it a little harder
913 # if inside, the name token is pushed back before returning
914 #
915 def parseType(self, token):
916 self.type = ""
917 self.struct_fields = []
918 self.signature = None
919 if token == None:
920 return token
921
922 while token[0] == "name" and (
923 token[1] == "const" or token[1] == "unsigned"):
924 if self.type == "":
925 self.type = token[1]
926 else:
927 self.type = self.type + " " + token[1]
928 token = self.token()
929
930 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
931 if self.type == "":
932 self.type = token[1]
933 else:
934 self.type = self.type + " " + token[1]
935 if token[0] == "name" and token[1] == "int":
936 if self.type == "":
937 self.type = tmp[1]
938 else:
939 self.type = self.type + " " + tmp[1]
940
941 elif token[0] == "name" and token[1] == "struct":
942 if self.type == "":
943 self.type = token[1]
944 else:
945 self.type = self.type + " " + token[1]
946 token = self.token()
947 nametok = None
948 if token[0] == "name":
949 nametok = token
950 token = self.token()
951 if token != None and token[0] == "sep" and token[1] == "{":
952 token = self.token()
953 token = self.parseStruct(token)
954 elif token != None and token[0] == "op" and token[1] == "*":
955 self.type = self.type + " " + nametok[1] + " *"
956 token = self.token()
957 while token != None and token[0] == "op" and token[1] == "*":
958 self.type = self.type + " *"
959 token = self.token()
960 if token[0] == "name":
961 nametok = token
962 token = self.token()
963 else:
964 self.error("struct : expecting name", token)
965 return token
966 elif token != None and token[0] == "name" and nametok != None:
967 self.type = self.type + " " + nametok[1]
968 return token
969
970 if nametok != None:
971 self.lexer.push(token)
972 token = nametok
973 return token
974
975 elif token[0] == "name" and token[1] == "enum":
976 if self.type == "":
977 self.type = token[1]
978 else:
979 self.type = self.type + " " + token[1]
980 self.enums = []
981 token = self.token()
982 if token != None and token[0] == "sep" and token[1] == "{":
983 token = self.token()
984 token = self.parseEnumBlock(token)
985 else:
986 self.error("parsing enum: expecting '{'", token)
987 enum_type = None
988 if token != None and token[0] != "name":
989 self.lexer.push(token)
990 token = ("name", "enum")
991 else:
992 enum_type = token[1]
993 for enum in self.enums:
994 self.index.add(enum[0], self.filename,
995 not self.is_header, "enum",
996 (enum[1], enum[2], enum_type))
997 return token
998
999 elif token[0] == "name":
1000 if self.type == "":
1001 self.type = token[1]
1002 else:
1003 self.type = self.type + " " + token[1]
1004 else:
1005 self.error("parsing type %s: expecting a name" % (self.type),
1006 token)
1007 return token
1008 token = self.token()
1009 while token != None and (token[0] == "op" or
1010 token[0] == "name" and token[1] == "const"):
1011 self.type = self.type + " " + token[1]
1012 token = self.token()
1013
1014 #
1015 # if there is a parenthesis here, this means a function type
1016 #
1017 if token != None and token[0] == "sep" and token[1] == '(':
1018 self.type = self.type + token[1]
1019 token = self.token()
1020 while token != None and token[0] == "op" and token[1] == '*':
1021 self.type = self.type + token[1]
1022 token = self.token()
1023 if token == None or token[0] != "name" :
1024 self.error("parsing function type, name expected", token);
1025 return token
1026 self.type = self.type + token[1]
1027 nametok = token
1028 token = self.token()
1029 if token != None and token[0] == "sep" and token[1] == ')':
1030 self.type = self.type + token[1]
1031 token = self.token()
1032 if token != None and token[0] == "sep" and token[1] == '(':
1033 token = self.token()
1034 type = self.type;
1035 token = self.parseSignature(token);
1036 self.type = type;
1037 else:
1038 self.error("parsing function type, '(' expected", token);
1039 return token
1040 else:
1041 self.error("parsing function type, ')' expected", token);
1042 return token
1043 self.lexer.push(token)
1044 token = nametok
1045 return token
1046
1047 #
1048 # do some lookahead for arrays
1049 #
1050 if token != None and token[0] == "name":
1051 nametok = token
1052 token = self.token()
1053 if token != None and token[0] == "sep" and token[1] == '[':
1054 self.type = self.type + nametok[1]
1055 while token != None and token[0] == "sep" and token[1] == '[':
1056 self.type = self.type + token[1]
1057 token = self.token()
1058 while token != None and token[0] != 'sep' and \
1059 token[1] != ']' and token[1] != ';':
1060 self.type = self.type + token[1]
1061 token = self.token()
1062 if token != None and token[0] == 'sep' and token[1] == ']':
1063 self.type = self.type + token[1]
1064 token = self.token()
1065 else:
1066 self.error("parsing array type, ']' expected", token);
1067 return token
1068 elif token != None and token[0] == "sep" and token[1] == ':':
1069 # remove :12 in case it's a limited int size
1070 token = self.token()
1071 token = self.token()
1072 self.lexer.push(token)
1073 token = nametok
1074
1075 return token
1076
1077 #
1078 # Parse a signature: '(' has been parsed and we scan the type definition
1079 # up to the ')' included
1080 def parseSignature(self, token):
1081 signature = []
1082 if token != None and token[0] == "sep" and token[1] == ')':
1083 self.signature = []
1084 token = self.token()
1085 return token
1086 while token != None:
1087 token = self.parseType(token)
1088 if token != None and token[0] == "name":
1089 signature.append((self.type, token[1], None))
1090 token = self.token()
1091 elif token != None and token[0] == "sep" and token[1] == ',':
1092 token = self.token()
1093 continue
1094 elif token != None and token[0] == "sep" and token[1] == ')':
1095 # only the type was provided
1096 if self.type == "...":
1097 signature.append((self.type, "...", None))
1098 else:
1099 signature.append((self.type, None, None))
1100 if token != None and token[0] == "sep":
1101 if token[1] == ',':
1102 token = self.token()
1103 continue
1104 elif token[1] == ')':
1105 token = self.token()
1106 break
1107 self.signature = signature
1108 return token
1109
1110 #
1111 # Parse a global definition, be it a type, variable or function
1112 # the extern "C" blocks are a bit nasty and require it to recurse.
1113 #
1114 def parseGlobal(self, token):
1115 static = 0
1116 if token[1] == 'extern':
1117 token = self.token()
1118 if token == None:
1119 return token
1120 if token[0] == 'string':
1121 if token[1] == 'C':
1122 token = self.token()
1123 if token == None:
1124 return token
1125 if token[0] == 'sep' and token[1] == "{":
1126 token = self.token()
1127# print 'Entering extern "C line ', self.lineno()
1128 while token != None and (token[0] != 'sep' or
1129 token[1] != "}"):
1130 if token[0] == 'name':
1131 token = self.parseGlobal(token)
1132 else:
1133 self.error(
1134 "token %s %s unexpected at the top level" % (
1135 token[0], token[1]))
1136 token = self.parseGlobal(token)
1137# print 'Exiting extern "C" line', self.lineno()
1138 token = self.token()
1139 return token
1140 else:
1141 return token
1142 elif token[1] == 'static':
1143 static = 1
1144 token = self.token()
1145 if token == None or token[0] != 'name':
1146 return token
1147
1148 if token[1] == 'typedef':
1149 token = self.token()
1150 return self.parseTypedef(token)
1151 else:
1152 token = self.parseType(token)
1153 type_orig = self.type
1154 if token == None or token[0] != "name":
1155 return token
1156 type = type_orig
1157 self.name = token[1]
1158 token = self.token()
1159 while token != None and (token[0] == "sep" or token[0] == "op"):
1160 if token[0] == "sep":
1161 if token[1] == "[":
1162 type = type + token[1]
1163 token = self.token()
1164 while token != None and (token[0] != "sep" or \
1165 token[1] != ";"):
1166 type = type + token[1]
1167 token = self.token()
1168
1169 if token != None and token[0] == "op" and token[1] == "=":
1170 #
1171 # Skip the initialization of the variable
1172 #
1173 token = self.token()
1174 if token[0] == 'sep' and token[1] == '{':
1175 token = self.token()
1176 token = self.parseBlock(token)
1177 else:
1178 self.comment = None
1179 while token != None and (token[0] != "sep" or \
1180 (token[1] != ';' and token[1] != ',')):
1181 token = self.token()
1182 self.comment = None
1183 if token == None or token[0] != "sep" or (token[1] != ';' and
1184 token[1] != ','):
1185 self.error("missing ';' or ',' after value")
1186
1187 if token != None and token[0] == "sep":
1188 if token[1] == ";":
1189 self.comment = None
1190 token = self.token()
1191 if type == "struct":
1192 self.index.add(self.name, self.filename,
1193 not self.is_header, "struct", self.struct_fields)
1194 else:
1195 self.index.add(self.name, self.filename,
1196 not self.is_header, "variable", type)
1197 break
1198 elif token[1] == "(":
1199 token = self.token()
1200 token = self.parseSignature(token)
1201 if token == None:
1202 return None
1203 if token[0] == "sep" and token[1] == ";":
1204 d = self.mergeFunctionComment(self.name,
1205 ((type, None), self.signature), 1)
1206 self.index.add(self.name, self.filename, static,
1207 "function", d)
1208 token = self.token()
1209 if token[0] == "sep" and token[1] == "{":
1210 d = self.mergeFunctionComment(self.name,
1211 ((type, None), self.signature), static)
1212 self.index.add(self.name, self.filename, static,
1213 "function", d)
1214 token = self.token()
1215 token = self.parseBlock(token);
1216 elif token[1] == ',':
1217 self.comment = None
1218 self.index.add(self.name, self.filename, static,
1219 "variable", type)
1220 type = type_orig
1221 token = self.token()
1222 while token != None and token[0] == "sep":
1223 type = type + token[1]
1224 token = self.token()
1225 if token != None and token[0] == "name":
1226 self.name = token[1]
1227 token = self.token()
1228 else:
1229 break
1230
1231 return token
1232
1233 def parse(self):
1234 print "Parsing %s" % (self.filename)
1235 token = self.token()
1236 while token != None:
1237 if token[0] == 'name':
1238 token = self.parseGlobal(token)
1239 else:
1240 self.error("token %s %s unexpected at the top level" % (
1241 token[0], token[1]))
1242 token = self.parseGlobal(token)
1243 return
1244 return self.index
1245
1246
1247class docBuilder:
1248 """A documentation builder"""
1249 def __init__(self, name, directories=['.'], excludes=[]):
1250 self.name = name
1251 self.directories = directories
1252 self.excludes = excludes + ignored_files.keys()
1253 self.modules = {}
1254 self.headers = {}
1255 self.idx = index()
1256
1257 def analyze(self):
1258 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1259 self.idx.analyze()
1260
1261 def scanHeaders(self):
1262 for header in self.headers.keys():
1263 parser = CParser(header)
1264 idx = parser.parse()
1265 self.headers[header] = idx;
1266 self.idx.merge(idx)
1267
1268 def scanModules(self):
1269 for module in self.modules.keys():
1270 parser = CParser(module)
1271 idx = parser.parse()
1272 # idx.analyze()
1273 self.modules[module] = idx
1274 self.idx.merge_public(idx)
1275
1276 def scan(self):
1277 for directory in self.directories:
1278 files = glob.glob(directory + "/*.c")
1279 for file in files:
1280 skip = 0
1281 for excl in self.excludes:
1282 if string.find(file, excl) != -1:
1283 skip = 1;
1284 break
1285 if skip == 0:
1286 self.modules[file] = None;
1287 files = glob.glob(directory + "/*.h")
1288 for file in files:
1289 skip = 0
1290 for excl in self.excludes:
1291 if string.find(file, excl) != -1:
1292 skip = 1;
1293 break
1294 if skip == 0:
1295 self.headers[file] = None;
1296 self.scanHeaders()
1297 self.scanModules()
1298
1299 def modulename_file(self, file):
1300 module = string.split(file, '/')[-1]
1301 if module[-2:] == '.h':
1302 module = module[:-2]
1303 return module
1304
1305 def serialize_enum(self, output, name):
1306 id = self.idx.enums[name]
1307 output.write(" <enum name='%s' file='%s'" % (name,
1308 self.modulename_file(id.module)))
1309 if id.info != None:
1310 info = id.info
1311 if info[0] != None and info[0] != '':
1312 output.write(" value='%s'" % info[0]);
1313 if info[2] != None and info[2] != '':
1314 output.write(" type='%s'" % info[2]);
1315 if info[1] != None and info[1] != '':
1316 output.write(" info='%s'" % escape(info[1]));
1317 output.write("/>\n")
1318
1319 def serialize_macro(self, output, name):
1320 id = self.idx.macros[name]
1321 output.write(" <macro name='%s' file='%s'>\n" % (name,
1322 self.modulename_file(id.module)))
1323 if id.info != None:
1324 try:
1325 (args, desc) = id.info
1326 if desc != None and desc != "":
1327 output.write(" <info>%s</info>\n" % (escape(desc)))
1328 for arg in args:
1329 (name, desc) = arg
1330 if desc != None and desc != "":
1331 output.write(" <arg name='%s' info='%s'/>\n" % (
1332 name, escape(desc)))
1333 else:
1334 output.write(" <arg name='%s'/>\n" % (name))
1335 except:
1336 pass
1337 output.write(" </macro>\n")
1338
1339 def serialize_typedef(self, output, name):
1340 id = self.idx.typedefs[name]
1341 if id.info[0:7] == 'struct ':
1342 output.write(" <struct name='%s' file='%s' type='%s'" % (
1343 name, self.modulename_file(id.module), id.info))
1344 name = id.info[7:]
1345 if self.idx.structs.has_key(name):
1346 output.write(">\n");
1347 for field in self.idx.structs[name].info:
1348 desc = field[2]
1349 if desc == None:
1350 desc = ''
1351 else:
1352 desc = escape(desc)
1353 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1354 output.write(" </struct>\n")
1355 else:
1356 output.write("/>\n");
1357 else :
1358 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
1359 name, self.modulename_file(id.module), id.info))
1360
1361 def serialize_function(self, output, name):
1362 id = self.idx.functions[name]
1363 output.write(" <%s name='%s' file='%s'>\n" % (id.type, name,
1364 self.modulename_file(id.module)))
1365 try:
1366 (ret, params, desc) = id.info
1367 output.write(" <info>%s</info>\n" % (escape(desc)))
1368 if ret[0] != None:
1369 if ret[0] == "void":
1370 output.write(" <return type='void'/>\n")
1371 else:
1372 output.write(" <return type='%s' info='%s'/>\n" % (
1373 ret[0], escape(ret[1])))
1374 for param in params:
1375 if param[0] == 'void':
1376 continue
1377 if param[2] == None:
1378 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1379 else:
1380 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1381 except:
1382 print "Failed to save function %s info: " % name, `id.info`
1383 output.write(" </%s>\n" % (id.type))
1384
1385 def serialize_exports(self, output, file):
1386 module = self.modulename_file(file)
1387 output.write(" <file name='%s'>\n" % (module))
1388 dict = self.headers[file]
1389 ids = dict.functions.keys() + dict.variables.keys() + \
1390 dict.macros.keys() + dict.typedefs.keys() + \
1391 dict.structs.keys() + dict.enums.keys()
1392 ids.sort()
1393 for id in ids:
1394 output.write(" <exports symbol='%s'/>\n" % (id))
1395 output.write(" </file>\n")
1396
1397
1398 def serialize(self, filename = None):
1399 if filename == None:
1400 filename = "%s-api.xml" % self.name
1401 print "Saving XML description %s" % (filename)
1402 output = open(filename, "w")
1403 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1404 output.write("<api name='%s'>\n" % self.name)
1405 output.write(" <files>\n")
1406 for file in self.headers.keys():
1407 self.serialize_exports(output, file)
1408 output.write(" </files>\n")
1409 output.write(" <symbols>\n")
1410 macros = self.idx.macros.keys()
1411 macros.sort()
1412 for macro in macros:
1413 self.serialize_macro(output, macro)
1414 enums = self.idx.enums.keys()
1415 enums.sort()
1416 for enum in enums:
1417 self.serialize_enum(output, enum)
1418 typedefs = self.idx.typedefs.keys()
1419 typedefs.sort()
1420 for typedef in typedefs:
1421 self.serialize_typedef(output, typedef)
1422 functions = self.idx.functions.keys()
1423 functions.sort()
1424 for function in functions:
1425 self.serialize_function(output, function)
1426 output.write(" </symbols>\n")
1427 output.write("</api>\n")
1428 output.close()
1429
1430
1431def rebuild():
1432 builder = None
1433 if glob.glob("../parser.c") != [] :
1434 print "Rebuilding API description for libxml2"
1435 builder = docBuilder("libxml2", ["..", "../include/libxml"],
1436 ["xmlwin32version.h", "tst.c"])
1437 elif glob.glob("../libxslt/transform.c") != [] :
1438 print "Rebuilding API description for libxslt"
1439 builder = docBuilder("libxslt", ["../libxslt"],
1440 ["win32config.h", "tst.c"])
1441 else:
1442 print "rebuild() failed, unable to guess the module"
1443 return None
1444 builder.scan()
1445 builder.analyze()
1446 builder.serialize()
1447 return builder
1448
1449#
1450# for debugging the parser
1451#
1452def parse(filename):
1453 parser = CParser(filename)
1454 idx = parser.parse()
1455 return idx
1456
1457if __name__ == "__main__":
1458 rebuild()