blob: 3c4824607762d86936ac1588c79926727afa451c [file] [log] [blame]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
10import sys
11import string
12import glob
13
14def escape(raw):
15 raw = string.replace(raw, '&', '&')
16 raw = string.replace(raw, '<', '&lt;')
17 raw = string.replace(raw, '>', '&gt;')
18 raw = string.replace(raw, "'", '&apos;')
19 raw = string.replace(raw, '"', '&quot;')
20 return raw
21
22class identifier:
23 def __init__(self, name, module=None, type=None, info=None, extra=None):
24 self.name = name
25 self.module = module
26 self.type = type
27 self.info = info
28 self.extra = extra
29 self.static = 0
30
31 def __repr__(self):
32 r = "%s %s:" % (self.type, self.name)
33 if self.static:
34 r = r + " static"
35 if self.module != None:
36 r = r + " from %s" % (self.module)
37 if self.info != None:
38 r = r + " " + `self.info`
39 if self.extra != None:
40 r = r + " " + `self.extra`
41 return r
42
43
44 def set_module(self, module):
45 self.module = module
46 def set_type(self, type):
47 self.type = type
48 def set_info(self, info):
49 self.info = info
50 def set_extra(self, extra):
51 self.extra = extra
52 def set_static(self, static):
53 self.static = static
54
55 def update(self, module, type = None, info = None, extra=None):
56 if module != None and self.module == None:
57 self.set_module(module)
58 if type != None and self.type == None:
59 self.set_type(type)
60 if info != None:
61 self.set_info(info)
62 if extra != None:
63 self.set_extra(extra)
64
65
66class index:
67 def __init__(self, name = "noname"):
68 self.name = name;
69 self.identifiers = {}
70 self.functions = {}
71 self.variables = {}
72 self.includes = {}
73 self.structs = {}
74 self.enums = {}
75 self.typedefs = {}
76 self.macros = {}
77 self.references = {}
78
79 def add(self, name, module, static, type, info=None, extra=None):
80 if name[0:2] == '__':
81 return None
82 d = None
83 try:
84 d = self.identifiers[name]
85 d.update(module, type, info, extra)
86 except:
87 d = identifier(name, module, type, info, extra)
88 self.identifiers[name] = d
89
90 if d != None and static == 1:
91 d.set_static(1)
92
93 if d != None and name != None and type != None:
94 if type == "function":
95 self.functions[name] = d
96 elif type == "functype":
97 self.functions[name] = d
98 elif type == "variable":
99 self.variables[name] = d
100 elif type == "include":
101 self.includes[name] = d
102 elif type == "struct":
103 self.structs[name] = d
104 elif type == "enum":
105 self.enums[name] = d
106 elif type == "typedef":
107 self.typedefs[name] = d
108 elif type == "macro":
109 self.macros[name] = d
110 else:
111 print "Unable to register type ", type
112 return d
113
114 def merge(self, idx):
115 for id in idx.functions.keys():
116 #
117 # macro might be used to override functions or variables
118 # definitions
119 #
120 if self.macros.has_key(id):
121 del self.macros[id]
122 if self.functions.has_key(id):
123 print "function %s from %s redeclared in %s" % (
124 id, self.functions[id].module, idx.functions[id].module)
125 else:
126 self.functions[id] = idx.functions[id]
127 self.identifiers[id] = idx.functions[id]
128 for id in idx.variables.keys():
129 #
130 # macro might be used to override functions or variables
131 # definitions
132 #
133 if self.macros.has_key(id):
134 del self.macros[id]
135 if self.variables.has_key(id):
136 print "variable %s from %s redeclared in %s" % (
137 id, self.variables[id].module, idx.variables[id].module)
138 else:
139 self.variables[id] = idx.variables[id]
140 self.identifiers[id] = idx.variables[id]
141 for id in idx.structs.keys():
142 if self.structs.has_key(id):
143 print "struct %s from %s redeclared in %s" % (
144 id, self.structs[id].module, idx.structs[id].module)
145 else:
146 self.structs[id] = idx.structs[id]
147 self.identifiers[id] = idx.structs[id]
148 for id in idx.typedefs.keys():
149 if self.typedefs.has_key(id):
150 print "typedef %s from %s redeclared in %s" % (
151 id, self.typedefs[id].module, idx.typedefs[id].module)
152 else:
153 self.typedefs[id] = idx.typedefs[id]
154 self.identifiers[id] = idx.typedefs[id]
155 for id in idx.macros.keys():
156 #
157 # macro might be used to override functions or variables
158 # definitions
159 #
160 if self.variables.has_key(id):
161 continue
162 if self.functions.has_key(id):
163 continue
164 if self.enums.has_key(id):
165 continue
166 if self.macros.has_key(id):
167 print "macro %s from %s redeclared in %s" % (
168 id, self.macros[id].module, idx.macros[id].module)
169 else:
170 self.macros[id] = idx.macros[id]
171 self.identifiers[id] = idx.macros[id]
172 for id in idx.enums.keys():
173 if self.enums.has_key(id):
174 print "enum %s from %s redeclared in %s" % (
175 id, self.enums[id].module, idx.enums[id].module)
176 else:
177 self.enums[id] = idx.enums[id]
178 self.identifiers[id] = idx.enums[id]
179
180 def merge_public(self, idx):
181 for id in idx.functions.keys():
182 if self.functions.has_key(id):
183 up = idx.functions[id]
184 self.functions[id].update(None, up.type, up.info, up.extra)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000185
186 def analyze_dict(self, type, dict):
187 count = 0
188 public = 0
189 for name in dict.keys():
190 id = dict[name]
191 count = count + 1
192 if id.static == 0:
193 public = public + 1
194 if count != public:
195 print " %d %s , %d public" % (count, type, public)
196 elif count != 0:
197 print " %d public %s" % (count, type)
198
199
200 def analyze(self):
201 self.analyze_dict("functions", self.functions)
202 self.analyze_dict("variables", self.variables)
203 self.analyze_dict("structs", self.structs)
204 self.analyze_dict("typedefs", self.typedefs)
205 self.analyze_dict("macros", self.macros)
206
207#
208# C parser analysis code
209#
210ignored_files = {
211 "trio": "too many non standard macros",
212 "trio.c": "too many non standard macros",
213 "trionan.c": "too many non standard macros",
214 "triostr.c": "too many non standard macros",
215 "acconfig.h": "generated portability layer",
216 "config.h": "generated portability layer",
217 "libxml.h": "internal only",
218}
219
220ignored_words = {
221 "WINAPI": (0, "Windows keyword"),
222 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
223 "__declspec": (3, "Windows keyword"),
224 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
Daniel Veillard024b5702002-12-12 00:15:55 +0000225 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
226 "X_IN_Y": (5, "macro function builder"),
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000227}
228
229class CLexer:
230 """A lexer for the C language, tokenize the input by reading and
231 analyzing it line by line"""
232 def __init__(self, input):
233 self.input = input
234 self.tokens = []
235 self.line = ""
236 self.lineno = 0
237
238 def getline(self):
239 line = ''
240 while line == '':
241 line = self.input.readline()
242 if not line:
243 return None
244 self.lineno = self.lineno + 1
245 line = string.lstrip(line)
246 line = string.rstrip(line)
247 if line == '':
248 continue
249 while line[-1] == '\\':
250 line = line[:-1]
251 n = self.input.readline()
252 self.lineno = self.lineno + 1
253 n = string.lstrip(n)
254 n = string.rstrip(n)
255 if not n:
256 break
257 else:
258 line = line + n
259 return line
260
261 def getlineno(self):
262 return self.lineno
263
264 def push(self, token):
265 self.tokens.insert(0, token);
266
267 def debug(self):
268 print "Last token: ", self.last
269 print "Token queue: ", self.tokens
270 print "Line %d end: " % (self.lineno), self.line
271
272 def token(self):
273 while self.tokens == []:
274 if self.line == "":
275 line = self.getline()
276 else:
277 line = self.line
278 self.line = ""
279 if line == None:
280 return None
281
282 if line[0] == '#':
283 self.tokens = map((lambda x: ('preproc', x)),
284 string.split(line))
285 break;
286 l = len(line)
287 if line[0] == '"' or line[0] == "'":
288 end = line[0]
289 line = line[1:]
290 found = 0
291 tok = ""
292 while found == 0:
293 i = 0
294 l = len(line)
295 while i < l:
296 if line[i] == end:
297 self.line = line[i+1:]
298 line = line[:i]
299 l = i
300 found = 1
301 break
302 if line[i] == '\\':
303 i = i + 1
304 i = i + 1
305 tok = tok + line
306 if found == 0:
307 line = self.getline()
308 if line == None:
309 return None
310 self.last = ('string', tok)
311 return self.last
312
313 if l >= 2 and line[0] == '/' and line[1] == '*':
314 line = line[2:]
315 found = 0
316 tok = ""
317 while found == 0:
318 i = 0
319 l = len(line)
320 while i < l:
321 if line[i] == '*' and i+1 < l and line[i+1] == '/':
322 self.line = line[i+2:]
323 line = line[:i-1]
324 l = i
325 found = 1
326 break
327 i = i + 1
328 if tok != "":
329 tok = tok + "\n"
330 tok = tok + line
331 if found == 0:
332 line = self.getline()
333 if line == None:
334 return None
335 self.last = ('comment', tok)
336 return self.last
337 if l >= 2 and line[0] == '/' and line[1] == '/':
338 line = line[2:]
339 self.last = ('comment', line)
340 return self.last
341 i = 0
342 while i < l:
343 if line[i] == '/' and i+1 < l and line[i+1] == '/':
344 self.line = line[i:]
345 line = line[:i]
346 break
347 if line[i] == '/' and i+1 < l and line[i+1] == '*':
348 self.line = line[i:]
349 line = line[:i]
350 break
351 if line[i] == '"' or line[i] == "'":
352 self.line = line[i:]
353 line = line[:i]
354 break
355 i = i + 1
356 l = len(line)
357 i = 0
358 while i < l:
359 if line[i] == ' ' or line[i] == '\t':
360 i = i + 1
361 continue
362 o = ord(line[i])
363 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
364 (o >= 48 and o <= 57):
365 s = i
366 while i < l:
367 o = ord(line[i])
368 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
369 (o >= 48 and o <= 57) or string.find(
370 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
371 i = i + 1
372 else:
373 break
374 self.tokens.append(('name', line[s:i]))
375 continue
376 if string.find("(){}:;,[]", line[i]) != -1:
377# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
378# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
379# line[i] == ',' or line[i] == '[' or line[i] == ']':
380 self.tokens.append(('sep', line[i]))
381 i = i + 1
382 continue
383 if string.find("+-*><=/%&!|.", line[i]) != -1:
384# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
385# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
386# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
387# line[i] == '!' or line[i] == '|' or line[i] == '.':
388 if line[i] == '.' and i + 2 < l and \
389 line[i+1] == '.' and line[i+2] == '.':
390 self.tokens.append(('name', '...'))
391 i = i + 3
392 continue
393
394 j = i + 1
395 if j < l and (
396 string.find("+-*><=/%&!|", line[j]) != -1):
397# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
398# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
399# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
400# line[j] == '!' or line[j] == '|'):
401 self.tokens.append(('op', line[i:j+1]))
402 i = j + 1
403 else:
404 self.tokens.append(('op', line[i]))
405 i = i + 1
406 continue
407 s = i
408 while i < l:
409 o = ord(line[i])
410 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
411 (o >= 48 and o <= 57) or (
412 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
413# line[i] != ' ' and line[i] != '\t' and
414# line[i] != '(' and line[i] != ')' and
415# line[i] != '{' and line[i] != '}' and
416# line[i] != ':' and line[i] != ';' and
417# line[i] != ',' and line[i] != '+' and
418# line[i] != '-' and line[i] != '*' and
419# line[i] != '/' and line[i] != '%' and
420# line[i] != '&' and line[i] != '!' and
421# line[i] != '|' and line[i] != '[' and
422# line[i] != ']' and line[i] != '=' and
423# line[i] != '*' and line[i] != '>' and
424# line[i] != '<'):
425 i = i + 1
426 else:
427 break
428 self.tokens.append(('name', line[s:i]))
429
430 tok = self.tokens[0]
431 self.tokens = self.tokens[1:]
432 self.last = tok
433 return tok
434
435class CParser:
436 """The C module parser"""
437 def __init__(self, filename, idx = None):
438 self.filename = filename
439 if len(filename) > 2 and filename[-2:] == '.h':
440 self.is_header = 1
441 else:
442 self.is_header = 0
443 self.input = open(filename)
444 self.lexer = CLexer(self.input)
445 if idx == None:
446 self.index = index()
447 else:
448 self.index = idx
449 self.top_comment = ""
450 self.last_comment = ""
451 self.comment = None
452
453 def lineno(self):
454 return self.lexer.getlineno()
455
456 def error(self, msg, token=-1):
457 print "Parse Error: " + msg
458 if token != -1:
459 print "Got token ", token
460 self.lexer.debug()
461 sys.exit(1)
462
463 def debug(self, msg, token=-1):
464 print "Debug: " + msg
465 if token != -1:
466 print "Got token ", token
467 self.lexer.debug()
468
469 def parseComment(self, token):
470 if self.top_comment == "":
471 self.top_comment = token[1]
472 if self.comment == None or token[1][0] == '*':
473 self.comment = token[1];
474 else:
475 self.comment = self.comment + token[1]
476 token = self.lexer.token()
477 return token
478
479 #
480 # Parse a comment block associate to a macro
481 #
482 def parseMacroComment(self, name, quiet = 0):
483 if name[0:2] == '__':
484 quiet = 1
485
486 args = []
487 desc = ""
488
489 if self.comment == None:
490 if not quiet:
491 print "Missing comment for macro %s" % (name)
492 return((args, desc))
493 if self.comment[0] != '*':
494 if not quiet:
495 print "Missing * in macro comment for %s" % (name)
496 return((args, desc))
497 lines = string.split(self.comment, '\n')
498 if lines[0] == '*':
499 del lines[0]
500 if lines[0] != "* %s:" % (name):
501 if not quiet:
502 print "Misformatted macro comment for %s" % (name)
503 print " Expecting '* %s:' got '%s'" % (name, lines[0])
504 return((args, desc))
505 del lines[0]
506 while lines[0] == '*':
507 del lines[0]
508 while len(lines) > 0 and lines[0][0:3] == '* @':
509 l = lines[0][3:]
510 try:
511 (arg, desc) = string.split(l, ':', 1)
512 desc=string.strip(desc)
513 arg=string.strip(arg)
514 except:
515 if not quiet:
516 print "Misformatted macro comment for %s" % (name)
517 print " problem with '%s'" % (lines[0])
518 del lines[0]
519 continue
520 del lines[0]
521 l = string.strip(lines[0])
522 while len(l) > 2 and l[0:3] != '* @':
523 while l[0] == '*':
524 l = l[1:]
525 desc = desc + ' ' + string.strip(l)
526 del lines[0]
527 if len(lines) == 0:
528 break
529 l = lines[0]
530 args.append((arg, desc))
531 while len(lines) > 0 and lines[0] == '*':
532 del lines[0]
533 desc = ""
534 while len(lines) > 0:
535 l = lines[0]
536 while len(l) > 0 and l[0] == '*':
537 l = l[1:]
538 l = string.strip(l)
539 desc = desc + " " + l
540 del lines[0]
541
542 desc = string.strip(desc)
543
544 if quiet == 0:
545 if desc == "":
546 print "Macro comment for %s lack description of the macro" % (name)
547
548 return((args, desc))
549
550 #
551 # Parse a comment block and merge the informations found in the
552 # parameters descriptions, finally returns a block as complete
553 # as possible
554 #
555 def mergeFunctionComment(self, name, description, quiet = 0):
556 if name == 'main':
557 quiet = 1
558 if name[0:2] == '__':
559 quiet = 1
560
561 (ret, args) = description
562 desc = ""
563 retdesc = ""
564
565 if self.comment == None:
566 if not quiet:
567 print "Missing comment for function %s" % (name)
568 return(((ret[0], retdesc), args, desc))
569 if self.comment[0] != '*':
570 if not quiet:
571 print "Missing * in function comment for %s" % (name)
572 return(((ret[0], retdesc), args, desc))
573 lines = string.split(self.comment, '\n')
574 if lines[0] == '*':
575 del lines[0]
576 if lines[0] != "* %s:" % (name):
577 if not quiet:
578 print "Misformatted function comment for %s" % (name)
579 print " Expecting '* %s:' got '%s'" % (name, lines[0])
580 return(((ret[0], retdesc), args, desc))
581 del lines[0]
582 while lines[0] == '*':
583 del lines[0]
584 nbargs = len(args)
585 while len(lines) > 0 and lines[0][0:3] == '* @':
586 l = lines[0][3:]
587 try:
588 (arg, desc) = string.split(l, ':', 1)
589 desc=string.strip(desc)
590 arg=string.strip(arg)
591 except:
592 if not quiet:
593 print "Misformatted function comment for %s" % (name)
594 print " problem with '%s'" % (lines[0])
595 del lines[0]
596 continue
597 del lines[0]
598 l = string.strip(lines[0])
599 while len(l) > 2 and l[0:3] != '* @':
600 while l[0] == '*':
601 l = l[1:]
602 desc = desc + ' ' + string.strip(l)
603 del lines[0]
604 if len(lines) == 0:
605 break
606 l = lines[0]
607 i = 0
608 while i < nbargs:
609 if args[i][1] == arg:
610 args[i] = (args[i][0], arg, desc)
611 break;
612 i = i + 1
613 if i >= nbargs:
614 if not quiet:
615 print "Uname to find arg %s from function comment for %s" % (
616 arg, name)
617 while len(lines) > 0 and lines[0] == '*':
618 del lines[0]
619 desc = ""
620 while len(lines) > 0:
621 l = lines[0]
622 while len(l) > 0 and l[0] == '*':
623 l = l[1:]
624 l = string.strip(l)
625 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
626 try:
627 l = string.split(l, ' ', 1)[1]
628 except:
629 l = ""
630 retdesc = string.strip(l)
631 del lines[0]
632 while len(lines) > 0:
633 l = lines[0]
634 while len(l) > 0 and l[0] == '*':
635 l = l[1:]
636 l = string.strip(l)
637 retdesc = retdesc + " " + l
638 del lines[0]
639 else:
640 desc = desc + " " + l
641 del lines[0]
642
643 retdesc = string.strip(retdesc)
644 desc = string.strip(desc)
645
646 if quiet == 0:
647 #
648 # report missing comments
649 #
650 i = 0
651 while i < nbargs:
652 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
653 print "Function comment for %s lack description of arg %s" % (name, args[i][1])
654 i = i + 1
655 if retdesc == "" and ret[0] != "void":
656 print "Function comment for %s lack description of return value" % (name)
657 if desc == "":
658 print "Function comment for %s lack description of the function" % (name)
659
660
661 return(((ret[0], retdesc), args, desc))
662
663 def parsePreproc(self, token):
664 name = token[1]
665 if name == "#include":
666 token = self.lexer.token()
667 if token == None:
668 return None
669 if token[0] == 'preproc':
670 self.index.add(token[1], self.filename, not self.is_header,
671 "include")
672 return self.lexer.token()
673 return token
674 if name == "#define":
675 token = self.lexer.token()
676 if token == None:
677 return None
678 if token[0] == 'preproc':
679 # TODO macros with arguments
680 name = token[1]
681 lst = []
682 token = self.lexer.token()
683 while token != None and token[0] == 'preproc' and \
684 token[1][0] != '#':
685 lst.append(token[1])
686 token = self.lexer.token()
687 try:
688 name = string.split(name, '(') [0]
689 except:
690 pass
691 info = self.parseMacroComment(name, not self.is_header)
692 self.index.add(name, self.filename, not self.is_header,
693 "macro", info)
694 return token
695 token = self.lexer.token()
696 while token != None and token[0] == 'preproc' and \
697 token[1][0] != '#':
698 token = self.lexer.token()
699 return token
700
701 #
702 # token acquisition on top of the lexer, it handle internally
703 # preprocessor and comments since they are logically not part of
704 # the program structure.
705 #
706 def token(self):
707 global ignored_words
708
709 token = self.lexer.token()
710 while token != None:
711 if token[0] == 'comment':
712 token = self.parseComment(token)
713 continue
714 elif token[0] == 'preproc':
715 token = self.parsePreproc(token)
716 continue
717 elif token[0] == "name" and ignored_words.has_key(token[1]):
718 (n, info) = ignored_words[token[1]]
719 i = 0
720 while i < n:
721 token = self.lexer.token()
722 i = i + 1
723 token = self.lexer.token()
724 continue
725 else:
726 #print "=> ", token
727 return token
728 return None
729
730 #
731 # Parse a typedef, it records the type and its name.
732 #
733 def parseTypedef(self, token):
734 if token == None:
735 return None
736 token = self.parseType(token)
737 if token == None:
738 self.error("parsing typedef")
739 return None
740 base_type = self.type
741 type = base_type
742 #self.debug("end typedef type", token)
743 while token != None:
744 if token[0] == "name":
745 name = token[1]
746 signature = self.signature
747 if signature != None:
748 d = self.mergeFunctionComment(name,
749 ((type, None), signature), 1)
750 self.index.add(name, self.filename, not self.is_header,
751 "functype", d)
752 else:
753 if base_type == "struct":
754 self.index.add(name, self.filename, not self.is_header,
755 "struct", type)
756 base_type = "struct " + name
757 else:
758 self.index.add(name, self.filename, not self.is_header,
759 "typedef", type)
760 token = self.token()
761 else:
762 self.error("parsing typedef: expecting a name")
763 return token
764 #self.debug("end typedef", token)
765 if token != None and token[0] == 'sep' and token[1] == ',':
766 type = base_type
767 token = self.token()
768 while token != None and token[0] == "op":
769 type = type + token[1]
770 token = self.token()
771 elif token != None and token[0] == 'sep' and token[1] == ';':
772 break;
773 elif token != None and token[0] == 'name':
774 type = base_type
775 continue;
776 else:
777 self.error("parsing typedef: expecting ';'", token)
778 return token
779 token = self.token()
780 return token
781
782 #
783 # Parse a C code block, used for functions it parse till
784 # the balancing } included
785 #
786 def parseBlock(self, token):
787 while token != None:
788 if token[0] == "sep" and token[1] == "{":
789 token = self.token()
790 token = self.parseBlock(token)
791 elif token[0] == "sep" and token[1] == "}":
792 self.comment = None
793 token = self.token()
794 return token
795 else:
796 token = self.token()
797 return token
798
799 #
800 # Parse a C struct definition till the balancing }
801 #
802 def parseStruct(self, token):
803 fields = []
804 #self.debug("start parseStruct", token)
805 while token != None:
806 if token[0] == "sep" and token[1] == "{":
807 token = self.token()
808 token = self.parseTypeBlock(token)
809 elif token[0] == "sep" and token[1] == "}":
810 self.struct_fields = fields
811 #self.debug("end parseStruct", token)
812 #print fields
813 token = self.token()
814 return token
815 else:
816 base_type = self.type
817 #self.debug("before parseType", token)
818 token = self.parseType(token)
819 #self.debug("after parseType", token)
820 if token != None and token[0] == "name":
821 fname = token[1]
822 token = self.token()
823 if token[0] == "sep" and token[1] == ";":
824 self.comment = None
825 token = self.token()
826 fields.append((self.type, fname, self.comment))
827 self.comment = None
828 else:
829 self.error("parseStruct: expecting ;", token)
830 elif token != None and token[0] == "sep" and token[1] == "{":
831 token = self.token()
832 token = self.parseTypeBlock(token)
833 if token != None and token[0] == "name":
834 token = self.token()
835 if token != None and token[0] == "sep" and token[1] == ";":
836 token = self.token()
837 else:
838 self.error("parseStruct: expecting ;", token)
839 else:
840 self.error("parseStruct: name", token)
841 token = self.token()
842 self.type = base_type;
843 self.struct_fields = fields
844 #self.debug("end parseStruct", token)
845 #print fields
846 return token
847
848 #
849 # Parse a C enum block, parse till the balancing }
850 #
851 def parseEnumBlock(self, token):
852 self.enums = []
853 name = None
854 self.comment = None
855 comment = ""
856 value = ""
857 while token != None:
858 if token[0] == "sep" and token[1] == "{":
859 token = self.token()
860 token = self.parseTypeBlock(token)
861 elif token[0] == "sep" and token[1] == "}":
862 if name != None:
863 if self.comment != None:
864 comment = self.comment
865 self.comment = None
866 self.enums.append((name, value, comment))
867 token = self.token()
868 return token
869 elif token[0] == "name":
870 if name != None:
871 if self.comment != None:
872 comment = string.strip(self.comment)
873 self.comment = None
874 self.enums.append((name, value, comment))
875 name = token[1]
876 comment = ""
877 value = ""
878 token = self.token()
879 if token[0] == "op" and token[1][0] == "=":
880 if len(token[1]) > 1:
881 value = token[1][1:]
882 token = self.token()
883 while token[0] != "sep" or (token[1] != ',' and
884 token[1] != '}'):
885 value = value + token[1]
886 token = self.token()
887 if token[0] == "sep" and token[1] == ",":
888 token = self.token()
889 else:
890 token = self.token()
891 return token
892
893 #
894 # Parse a C definition block, used for structs it parse till
895 # the balancing }
896 #
897 def parseTypeBlock(self, token):
898 while token != None:
899 if token[0] == "sep" and token[1] == "{":
900 token = self.token()
901 token = self.parseTypeBlock(token)
902 elif token[0] == "sep" and token[1] == "}":
903 token = self.token()
904 return token
905 else:
906 token = self.token()
907 return token
908
909 #
910 # Parse a type: the fact that the type name can either occur after
911 # the definition or within the definition makes it a little harder
912 # if inside, the name token is pushed back before returning
913 #
914 def parseType(self, token):
915 self.type = ""
916 self.struct_fields = []
917 self.signature = None
918 if token == None:
919 return token
920
921 while token[0] == "name" and (
922 token[1] == "const" or token[1] == "unsigned"):
923 if self.type == "":
924 self.type = token[1]
925 else:
926 self.type = self.type + " " + token[1]
927 token = self.token()
928
929 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
930 if self.type == "":
931 self.type = token[1]
932 else:
933 self.type = self.type + " " + token[1]
934 if token[0] == "name" and token[1] == "int":
935 if self.type == "":
936 self.type = tmp[1]
937 else:
938 self.type = self.type + " " + tmp[1]
939
940 elif token[0] == "name" and token[1] == "struct":
941 if self.type == "":
942 self.type = token[1]
943 else:
944 self.type = self.type + " " + token[1]
945 token = self.token()
946 nametok = None
947 if token[0] == "name":
948 nametok = token
949 token = self.token()
950 if token != None and token[0] == "sep" and token[1] == "{":
951 token = self.token()
952 token = self.parseStruct(token)
953 elif token != None and token[0] == "op" and token[1] == "*":
954 self.type = self.type + " " + nametok[1] + " *"
955 token = self.token()
956 while token != None and token[0] == "op" and token[1] == "*":
957 self.type = self.type + " *"
958 token = self.token()
959 if token[0] == "name":
960 nametok = token
961 token = self.token()
962 else:
963 self.error("struct : expecting name", token)
964 return token
965 elif token != None and token[0] == "name" and nametok != None:
966 self.type = self.type + " " + nametok[1]
967 return token
968
969 if nametok != None:
970 self.lexer.push(token)
971 token = nametok
972 return token
973
974 elif token[0] == "name" and token[1] == "enum":
975 if self.type == "":
976 self.type = token[1]
977 else:
978 self.type = self.type + " " + token[1]
979 self.enums = []
980 token = self.token()
981 if token != None and token[0] == "sep" and token[1] == "{":
982 token = self.token()
983 token = self.parseEnumBlock(token)
984 else:
985 self.error("parsing enum: expecting '{'", token)
986 enum_type = None
987 if token != None and token[0] != "name":
988 self.lexer.push(token)
989 token = ("name", "enum")
990 else:
991 enum_type = token[1]
992 for enum in self.enums:
993 self.index.add(enum[0], self.filename,
994 not self.is_header, "enum",
995 (enum[1], enum[2], enum_type))
996 return token
997
998 elif token[0] == "name":
999 if self.type == "":
1000 self.type = token[1]
1001 else:
1002 self.type = self.type + " " + token[1]
1003 else:
1004 self.error("parsing type %s: expecting a name" % (self.type),
1005 token)
1006 return token
1007 token = self.token()
1008 while token != None and (token[0] == "op" or
1009 token[0] == "name" and token[1] == "const"):
1010 self.type = self.type + " " + token[1]
1011 token = self.token()
1012
1013 #
1014 # if there is a parenthesis here, this means a function type
1015 #
1016 if token != None and token[0] == "sep" and token[1] == '(':
1017 self.type = self.type + token[1]
1018 token = self.token()
1019 while token != None and token[0] == "op" and token[1] == '*':
1020 self.type = self.type + token[1]
1021 token = self.token()
1022 if token == None or token[0] != "name" :
1023 self.error("parsing function type, name expected", token);
1024 return token
1025 self.type = self.type + token[1]
1026 nametok = token
1027 token = self.token()
1028 if token != None and token[0] == "sep" and token[1] == ')':
1029 self.type = self.type + token[1]
1030 token = self.token()
1031 if token != None and token[0] == "sep" and token[1] == '(':
1032 token = self.token()
1033 type = self.type;
1034 token = self.parseSignature(token);
1035 self.type = type;
1036 else:
1037 self.error("parsing function type, '(' expected", token);
1038 return token
1039 else:
1040 self.error("parsing function type, ')' expected", token);
1041 return token
1042 self.lexer.push(token)
1043 token = nametok
1044 return token
1045
1046 #
1047 # do some lookahead for arrays
1048 #
1049 if token != None and token[0] == "name":
1050 nametok = token
1051 token = self.token()
1052 if token != None and token[0] == "sep" and token[1] == '[':
1053 self.type = self.type + nametok[1]
1054 while token != None and token[0] == "sep" and token[1] == '[':
1055 self.type = self.type + token[1]
1056 token = self.token()
1057 while token != None and token[0] != 'sep' and \
1058 token[1] != ']' and token[1] != ';':
1059 self.type = self.type + token[1]
1060 token = self.token()
1061 if token != None and token[0] == 'sep' and token[1] == ']':
1062 self.type = self.type + token[1]
1063 token = self.token()
1064 else:
1065 self.error("parsing array type, ']' expected", token);
1066 return token
1067 elif token != None and token[0] == "sep" and token[1] == ':':
1068 # remove :12 in case it's a limited int size
1069 token = self.token()
1070 token = self.token()
1071 self.lexer.push(token)
1072 token = nametok
1073
1074 return token
1075
1076 #
1077 # Parse a signature: '(' has been parsed and we scan the type definition
1078 # up to the ')' included
1079 def parseSignature(self, token):
1080 signature = []
1081 if token != None and token[0] == "sep" and token[1] == ')':
1082 self.signature = []
1083 token = self.token()
1084 return token
1085 while token != None:
1086 token = self.parseType(token)
1087 if token != None and token[0] == "name":
1088 signature.append((self.type, token[1], None))
1089 token = self.token()
1090 elif token != None and token[0] == "sep" and token[1] == ',':
1091 token = self.token()
1092 continue
1093 elif token != None and token[0] == "sep" and token[1] == ')':
1094 # only the type was provided
1095 if self.type == "...":
1096 signature.append((self.type, "...", None))
1097 else:
1098 signature.append((self.type, None, None))
1099 if token != None and token[0] == "sep":
1100 if token[1] == ',':
1101 token = self.token()
1102 continue
1103 elif token[1] == ')':
1104 token = self.token()
1105 break
1106 self.signature = signature
1107 return token
1108
1109 #
1110 # Parse a global definition, be it a type, variable or function
1111 # the extern "C" blocks are a bit nasty and require it to recurse.
1112 #
1113 def parseGlobal(self, token):
1114 static = 0
1115 if token[1] == 'extern':
1116 token = self.token()
1117 if token == None:
1118 return token
1119 if token[0] == 'string':
1120 if token[1] == 'C':
1121 token = self.token()
1122 if token == None:
1123 return token
1124 if token[0] == 'sep' and token[1] == "{":
1125 token = self.token()
1126# print 'Entering extern "C line ', self.lineno()
1127 while token != None and (token[0] != 'sep' or
1128 token[1] != "}"):
1129 if token[0] == 'name':
1130 token = self.parseGlobal(token)
1131 else:
1132 self.error(
1133 "token %s %s unexpected at the top level" % (
1134 token[0], token[1]))
1135 token = self.parseGlobal(token)
1136# print 'Exiting extern "C" line', self.lineno()
1137 token = self.token()
1138 return token
1139 else:
1140 return token
1141 elif token[1] == 'static':
1142 static = 1
1143 token = self.token()
1144 if token == None or token[0] != 'name':
1145 return token
1146
1147 if token[1] == 'typedef':
1148 token = self.token()
1149 return self.parseTypedef(token)
1150 else:
1151 token = self.parseType(token)
1152 type_orig = self.type
1153 if token == None or token[0] != "name":
1154 return token
1155 type = type_orig
1156 self.name = token[1]
1157 token = self.token()
1158 while token != None and (token[0] == "sep" or token[0] == "op"):
1159 if token[0] == "sep":
1160 if token[1] == "[":
1161 type = type + token[1]
1162 token = self.token()
1163 while token != None and (token[0] != "sep" or \
1164 token[1] != ";"):
1165 type = type + token[1]
1166 token = self.token()
1167
1168 if token != None and token[0] == "op" and token[1] == "=":
1169 #
1170 # Skip the initialization of the variable
1171 #
1172 token = self.token()
1173 if token[0] == 'sep' and token[1] == '{':
1174 token = self.token()
1175 token = self.parseBlock(token)
1176 else:
1177 self.comment = None
1178 while token != None and (token[0] != "sep" or \
1179 (token[1] != ';' and token[1] != ',')):
1180 token = self.token()
1181 self.comment = None
1182 if token == None or token[0] != "sep" or (token[1] != ';' and
1183 token[1] != ','):
1184 self.error("missing ';' or ',' after value")
1185
1186 if token != None and token[0] == "sep":
1187 if token[1] == ";":
1188 self.comment = None
1189 token = self.token()
1190 if type == "struct":
1191 self.index.add(self.name, self.filename,
1192 not self.is_header, "struct", self.struct_fields)
1193 else:
1194 self.index.add(self.name, self.filename,
1195 not self.is_header, "variable", type)
1196 break
1197 elif token[1] == "(":
1198 token = self.token()
1199 token = self.parseSignature(token)
1200 if token == None:
1201 return None
1202 if token[0] == "sep" and token[1] == ";":
1203 d = self.mergeFunctionComment(self.name,
1204 ((type, None), self.signature), 1)
1205 self.index.add(self.name, self.filename, static,
1206 "function", d)
1207 token = self.token()
1208 if token[0] == "sep" and token[1] == "{":
1209 d = self.mergeFunctionComment(self.name,
1210 ((type, None), self.signature), static)
1211 self.index.add(self.name, self.filename, static,
1212 "function", d)
1213 token = self.token()
1214 token = self.parseBlock(token);
1215 elif token[1] == ',':
1216 self.comment = None
1217 self.index.add(self.name, self.filename, static,
1218 "variable", type)
1219 type = type_orig
1220 token = self.token()
1221 while token != None and token[0] == "sep":
1222 type = type + token[1]
1223 token = self.token()
1224 if token != None and token[0] == "name":
1225 self.name = token[1]
1226 token = self.token()
1227 else:
1228 break
1229
1230 return token
1231
1232 def parse(self):
1233 print "Parsing %s" % (self.filename)
1234 token = self.token()
1235 while token != None:
1236 if token[0] == 'name':
1237 token = self.parseGlobal(token)
1238 else:
1239 self.error("token %s %s unexpected at the top level" % (
1240 token[0], token[1]))
1241 token = self.parseGlobal(token)
1242 return
1243 return self.index
1244
1245
1246class docBuilder:
1247 """A documentation builder"""
1248 def __init__(self, name, directories=['.'], excludes=[]):
1249 self.name = name
1250 self.directories = directories
1251 self.excludes = excludes + ignored_files.keys()
1252 self.modules = {}
1253 self.headers = {}
1254 self.idx = index()
1255
1256 def analyze(self):
1257 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1258 self.idx.analyze()
1259
1260 def scanHeaders(self):
1261 for header in self.headers.keys():
1262 parser = CParser(header)
1263 idx = parser.parse()
1264 self.headers[header] = idx;
1265 self.idx.merge(idx)
1266
1267 def scanModules(self):
1268 for module in self.modules.keys():
1269 parser = CParser(module)
1270 idx = parser.parse()
1271 # idx.analyze()
1272 self.modules[module] = idx
1273 self.idx.merge_public(idx)
1274
1275 def scan(self):
1276 for directory in self.directories:
1277 files = glob.glob(directory + "/*.c")
1278 for file in files:
1279 skip = 0
1280 for excl in self.excludes:
1281 if string.find(file, excl) != -1:
1282 skip = 1;
1283 break
1284 if skip == 0:
1285 self.modules[file] = None;
1286 files = glob.glob(directory + "/*.h")
1287 for file in files:
1288 skip = 0
1289 for excl in self.excludes:
1290 if string.find(file, excl) != -1:
1291 skip = 1;
1292 break
1293 if skip == 0:
1294 self.headers[file] = None;
1295 self.scanHeaders()
1296 self.scanModules()
1297
1298 def modulename_file(self, file):
1299 module = string.split(file, '/')[-1]
1300 if module[-2:] == '.h':
1301 module = module[:-2]
1302 return module
1303
1304 def serialize_enum(self, output, name):
1305 id = self.idx.enums[name]
1306 output.write(" <enum name='%s' file='%s'" % (name,
1307 self.modulename_file(id.module)))
1308 if id.info != None:
1309 info = id.info
1310 if info[0] != None and info[0] != '':
1311 output.write(" value='%s'" % info[0]);
1312 if info[2] != None and info[2] != '':
1313 output.write(" type='%s'" % info[2]);
1314 if info[1] != None and info[1] != '':
1315 output.write(" info='%s'" % escape(info[1]));
1316 output.write("/>\n")
1317
1318 def serialize_macro(self, output, name):
1319 id = self.idx.macros[name]
1320 output.write(" <macro name='%s' file='%s'>\n" % (name,
1321 self.modulename_file(id.module)))
1322 if id.info != None:
1323 try:
1324 (args, desc) = id.info
1325 if desc != None and desc != "":
1326 output.write(" <info>%s</info>\n" % (escape(desc)))
1327 for arg in args:
1328 (name, desc) = arg
1329 if desc != None and desc != "":
1330 output.write(" <arg name='%s' info='%s'/>\n" % (
1331 name, escape(desc)))
1332 else:
1333 output.write(" <arg name='%s'/>\n" % (name))
1334 except:
1335 pass
1336 output.write(" </macro>\n")
1337
1338 def serialize_typedef(self, output, name):
1339 id = self.idx.typedefs[name]
1340 if id.info[0:7] == 'struct ':
1341 output.write(" <struct name='%s' file='%s' type='%s'" % (
1342 name, self.modulename_file(id.module), id.info))
1343 name = id.info[7:]
Daniel Veillard024b5702002-12-12 00:15:55 +00001344 if self.idx.structs.has_key(name) and \
1345 type(self.idx.structs[name]) == type(()):
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001346 output.write(">\n");
Daniel Veillard024b5702002-12-12 00:15:55 +00001347 try:
1348 for field in self.idx.structs[name].info:
1349 print name, field
1350 desc = field[2]
1351 if desc == None:
1352 desc = ''
1353 else:
1354 desc = escape(desc)
1355 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1356 except:
1357 print "Failed to serialize struct %s" % (name)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001358 output.write(" </struct>\n")
1359 else:
1360 output.write("/>\n");
1361 else :
1362 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
1363 name, self.modulename_file(id.module), id.info))
1364
1365 def serialize_function(self, output, name):
1366 id = self.idx.functions[name]
1367 output.write(" <%s name='%s' file='%s'>\n" % (id.type, name,
1368 self.modulename_file(id.module)))
1369 try:
1370 (ret, params, desc) = id.info
1371 output.write(" <info>%s</info>\n" % (escape(desc)))
1372 if ret[0] != None:
1373 if ret[0] == "void":
1374 output.write(" <return type='void'/>\n")
1375 else:
1376 output.write(" <return type='%s' info='%s'/>\n" % (
1377 ret[0], escape(ret[1])))
1378 for param in params:
1379 if param[0] == 'void':
1380 continue
1381 if param[2] == None:
1382 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1383 else:
1384 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1385 except:
1386 print "Failed to save function %s info: " % name, `id.info`
1387 output.write(" </%s>\n" % (id.type))
1388
1389 def serialize_exports(self, output, file):
1390 module = self.modulename_file(file)
1391 output.write(" <file name='%s'>\n" % (module))
1392 dict = self.headers[file]
1393 ids = dict.functions.keys() + dict.variables.keys() + \
1394 dict.macros.keys() + dict.typedefs.keys() + \
1395 dict.structs.keys() + dict.enums.keys()
1396 ids.sort()
1397 for id in ids:
1398 output.write(" <exports symbol='%s'/>\n" % (id))
1399 output.write(" </file>\n")
1400
1401
1402 def serialize(self, filename = None):
1403 if filename == None:
1404 filename = "%s-api.xml" % self.name
1405 print "Saving XML description %s" % (filename)
1406 output = open(filename, "w")
1407 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1408 output.write("<api name='%s'>\n" % self.name)
1409 output.write(" <files>\n")
1410 for file in self.headers.keys():
1411 self.serialize_exports(output, file)
1412 output.write(" </files>\n")
1413 output.write(" <symbols>\n")
1414 macros = self.idx.macros.keys()
1415 macros.sort()
1416 for macro in macros:
1417 self.serialize_macro(output, macro)
1418 enums = self.idx.enums.keys()
1419 enums.sort()
1420 for enum in enums:
1421 self.serialize_enum(output, enum)
1422 typedefs = self.idx.typedefs.keys()
1423 typedefs.sort()
1424 for typedef in typedefs:
1425 self.serialize_typedef(output, typedef)
1426 functions = self.idx.functions.keys()
1427 functions.sort()
1428 for function in functions:
1429 self.serialize_function(output, function)
1430 output.write(" </symbols>\n")
1431 output.write("</api>\n")
1432 output.close()
1433
1434
1435def rebuild():
1436 builder = None
1437 if glob.glob("../parser.c") != [] :
1438 print "Rebuilding API description for libxml2"
1439 builder = docBuilder("libxml2", ["..", "../include/libxml"],
Daniel Veillard024b5702002-12-12 00:15:55 +00001440 ["xmlwin32version.h", "tst.c",
1441 "schemasInternals.h", "xmlschemas" ])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001442 elif glob.glob("../libxslt/transform.c") != [] :
1443 print "Rebuilding API description for libxslt"
1444 builder = docBuilder("libxslt", ["../libxslt"],
Daniel Veillard024b5702002-12-12 00:15:55 +00001445 ["win32config.h", "libxslt.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001446 else:
1447 print "rebuild() failed, unable to guess the module"
1448 return None
1449 builder.scan()
1450 builder.analyze()
1451 builder.serialize()
Daniel Veillard024b5702002-12-12 00:15:55 +00001452 if glob.glob("../libexslt/exslt.c") != [] :
1453 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1454 extra.scan()
1455 extra.analyze()
1456 extra.serialize()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001457 return builder
1458
1459#
1460# for debugging the parser
1461#
1462def parse(filename):
1463 parser = CParser(filename)
1464 idx = parser.parse()
1465 return idx
1466
1467if __name__ == "__main__":
1468 rebuild()