blob: 241ceaeb088e1c715f44c11b2000b5abd373ba6d [file] [log] [blame]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
Daniel Veillard540a31a2003-01-21 11:21:07 +000010import os, sys
Daniel Veillarda9b66d02002-12-11 14:23:49 +000011import string
12import glob
13
14def escape(raw):
15 raw = string.replace(raw, '&', '&')
16 raw = string.replace(raw, '<', '&lt;')
17 raw = string.replace(raw, '>', '&gt;')
18 raw = string.replace(raw, "'", '&apos;')
19 raw = string.replace(raw, '"', '&quot;')
20 return raw
21
22class identifier:
23 def __init__(self, name, module=None, type=None, info=None, extra=None):
24 self.name = name
25 self.module = module
26 self.type = type
27 self.info = info
28 self.extra = extra
29 self.static = 0
30
31 def __repr__(self):
32 r = "%s %s:" % (self.type, self.name)
33 if self.static:
34 r = r + " static"
35 if self.module != None:
36 r = r + " from %s" % (self.module)
37 if self.info != None:
38 r = r + " " + `self.info`
39 if self.extra != None:
40 r = r + " " + `self.extra`
41 return r
42
43
44 def set_module(self, module):
45 self.module = module
46 def set_type(self, type):
47 self.type = type
48 def set_info(self, info):
49 self.info = info
50 def set_extra(self, extra):
51 self.extra = extra
52 def set_static(self, static):
53 self.static = static
54
55 def update(self, module, type = None, info = None, extra=None):
56 if module != None and self.module == None:
57 self.set_module(module)
58 if type != None and self.type == None:
59 self.set_type(type)
60 if info != None:
61 self.set_info(info)
62 if extra != None:
63 self.set_extra(extra)
64
65
66class index:
67 def __init__(self, name = "noname"):
68 self.name = name;
69 self.identifiers = {}
70 self.functions = {}
71 self.variables = {}
72 self.includes = {}
73 self.structs = {}
74 self.enums = {}
75 self.typedefs = {}
76 self.macros = {}
77 self.references = {}
78
79 def add(self, name, module, static, type, info=None, extra=None):
80 if name[0:2] == '__':
81 return None
82 d = None
83 try:
84 d = self.identifiers[name]
85 d.update(module, type, info, extra)
86 except:
87 d = identifier(name, module, type, info, extra)
88 self.identifiers[name] = d
89
90 if d != None and static == 1:
91 d.set_static(1)
92
93 if d != None and name != None and type != None:
94 if type == "function":
95 self.functions[name] = d
96 elif type == "functype":
97 self.functions[name] = d
98 elif type == "variable":
99 self.variables[name] = d
100 elif type == "include":
101 self.includes[name] = d
102 elif type == "struct":
103 self.structs[name] = d
104 elif type == "enum":
105 self.enums[name] = d
106 elif type == "typedef":
107 self.typedefs[name] = d
108 elif type == "macro":
109 self.macros[name] = d
110 else:
111 print "Unable to register type ", type
112 return d
113
114 def merge(self, idx):
115 for id in idx.functions.keys():
116 #
117 # macro might be used to override functions or variables
118 # definitions
119 #
120 if self.macros.has_key(id):
121 del self.macros[id]
122 if self.functions.has_key(id):
123 print "function %s from %s redeclared in %s" % (
124 id, self.functions[id].module, idx.functions[id].module)
125 else:
126 self.functions[id] = idx.functions[id]
127 self.identifiers[id] = idx.functions[id]
128 for id in idx.variables.keys():
129 #
130 # macro might be used to override functions or variables
131 # definitions
132 #
133 if self.macros.has_key(id):
134 del self.macros[id]
135 if self.variables.has_key(id):
136 print "variable %s from %s redeclared in %s" % (
137 id, self.variables[id].module, idx.variables[id].module)
138 else:
139 self.variables[id] = idx.variables[id]
140 self.identifiers[id] = idx.variables[id]
141 for id in idx.structs.keys():
142 if self.structs.has_key(id):
143 print "struct %s from %s redeclared in %s" % (
144 id, self.structs[id].module, idx.structs[id].module)
145 else:
146 self.structs[id] = idx.structs[id]
147 self.identifiers[id] = idx.structs[id]
148 for id in idx.typedefs.keys():
149 if self.typedefs.has_key(id):
150 print "typedef %s from %s redeclared in %s" % (
151 id, self.typedefs[id].module, idx.typedefs[id].module)
152 else:
153 self.typedefs[id] = idx.typedefs[id]
154 self.identifiers[id] = idx.typedefs[id]
155 for id in idx.macros.keys():
156 #
157 # macro might be used to override functions or variables
158 # definitions
159 #
160 if self.variables.has_key(id):
161 continue
162 if self.functions.has_key(id):
163 continue
164 if self.enums.has_key(id):
165 continue
166 if self.macros.has_key(id):
167 print "macro %s from %s redeclared in %s" % (
168 id, self.macros[id].module, idx.macros[id].module)
169 else:
170 self.macros[id] = idx.macros[id]
171 self.identifiers[id] = idx.macros[id]
172 for id in idx.enums.keys():
173 if self.enums.has_key(id):
174 print "enum %s from %s redeclared in %s" % (
175 id, self.enums[id].module, idx.enums[id].module)
176 else:
177 self.enums[id] = idx.enums[id]
178 self.identifiers[id] = idx.enums[id]
179
180 def merge_public(self, idx):
181 for id in idx.functions.keys():
182 if self.functions.has_key(id):
183 up = idx.functions[id]
184 self.functions[id].update(None, up.type, up.info, up.extra)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000185 # else:
186 # print "Function %s from %s is not declared in headers" % (
187 # id, idx.functions[id].module)
188 # TODO: do the same for variables.
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000189
190 def analyze_dict(self, type, dict):
191 count = 0
192 public = 0
193 for name in dict.keys():
194 id = dict[name]
195 count = count + 1
196 if id.static == 0:
197 public = public + 1
198 if count != public:
199 print " %d %s , %d public" % (count, type, public)
200 elif count != 0:
201 print " %d public %s" % (count, type)
202
203
204 def analyze(self):
205 self.analyze_dict("functions", self.functions)
206 self.analyze_dict("variables", self.variables)
207 self.analyze_dict("structs", self.structs)
208 self.analyze_dict("typedefs", self.typedefs)
209 self.analyze_dict("macros", self.macros)
210
211#
212# C parser analysis code
213#
214ignored_files = {
215 "trio": "too many non standard macros",
216 "trio.c": "too many non standard macros",
217 "trionan.c": "too many non standard macros",
218 "triostr.c": "too many non standard macros",
219 "acconfig.h": "generated portability layer",
220 "config.h": "generated portability layer",
221 "libxml.h": "internal only",
222}
223
224ignored_words = {
225 "WINAPI": (0, "Windows keyword"),
226 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
227 "__declspec": (3, "Windows keyword"),
228 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
Daniel Veillard024b5702002-12-12 00:15:55 +0000229 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
230 "X_IN_Y": (5, "macro function builder"),
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000231}
232
233class CLexer:
234 """A lexer for the C language, tokenize the input by reading and
235 analyzing it line by line"""
236 def __init__(self, input):
237 self.input = input
238 self.tokens = []
239 self.line = ""
240 self.lineno = 0
241
242 def getline(self):
243 line = ''
244 while line == '':
245 line = self.input.readline()
246 if not line:
247 return None
248 self.lineno = self.lineno + 1
249 line = string.lstrip(line)
250 line = string.rstrip(line)
251 if line == '':
252 continue
253 while line[-1] == '\\':
254 line = line[:-1]
255 n = self.input.readline()
256 self.lineno = self.lineno + 1
257 n = string.lstrip(n)
258 n = string.rstrip(n)
259 if not n:
260 break
261 else:
262 line = line + n
263 return line
264
265 def getlineno(self):
266 return self.lineno
267
268 def push(self, token):
269 self.tokens.insert(0, token);
270
271 def debug(self):
272 print "Last token: ", self.last
273 print "Token queue: ", self.tokens
274 print "Line %d end: " % (self.lineno), self.line
275
276 def token(self):
277 while self.tokens == []:
278 if self.line == "":
279 line = self.getline()
280 else:
281 line = self.line
282 self.line = ""
283 if line == None:
284 return None
285
286 if line[0] == '#':
287 self.tokens = map((lambda x: ('preproc', x)),
288 string.split(line))
289 break;
290 l = len(line)
291 if line[0] == '"' or line[0] == "'":
292 end = line[0]
293 line = line[1:]
294 found = 0
295 tok = ""
296 while found == 0:
297 i = 0
298 l = len(line)
299 while i < l:
300 if line[i] == end:
301 self.line = line[i+1:]
302 line = line[:i]
303 l = i
304 found = 1
305 break
306 if line[i] == '\\':
307 i = i + 1
308 i = i + 1
309 tok = tok + line
310 if found == 0:
311 line = self.getline()
312 if line == None:
313 return None
314 self.last = ('string', tok)
315 return self.last
316
317 if l >= 2 and line[0] == '/' and line[1] == '*':
318 line = line[2:]
319 found = 0
320 tok = ""
321 while found == 0:
322 i = 0
323 l = len(line)
324 while i < l:
325 if line[i] == '*' and i+1 < l and line[i+1] == '/':
326 self.line = line[i+2:]
327 line = line[:i-1]
328 l = i
329 found = 1
330 break
331 i = i + 1
332 if tok != "":
333 tok = tok + "\n"
334 tok = tok + line
335 if found == 0:
336 line = self.getline()
337 if line == None:
338 return None
339 self.last = ('comment', tok)
340 return self.last
341 if l >= 2 and line[0] == '/' and line[1] == '/':
342 line = line[2:]
343 self.last = ('comment', line)
344 return self.last
345 i = 0
346 while i < l:
347 if line[i] == '/' and i+1 < l and line[i+1] == '/':
348 self.line = line[i:]
349 line = line[:i]
350 break
351 if line[i] == '/' and i+1 < l and line[i+1] == '*':
352 self.line = line[i:]
353 line = line[:i]
354 break
355 if line[i] == '"' or line[i] == "'":
356 self.line = line[i:]
357 line = line[:i]
358 break
359 i = i + 1
360 l = len(line)
361 i = 0
362 while i < l:
363 if line[i] == ' ' or line[i] == '\t':
364 i = i + 1
365 continue
366 o = ord(line[i])
367 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
368 (o >= 48 and o <= 57):
369 s = i
370 while i < l:
371 o = ord(line[i])
372 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
373 (o >= 48 and o <= 57) or string.find(
374 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
375 i = i + 1
376 else:
377 break
378 self.tokens.append(('name', line[s:i]))
379 continue
380 if string.find("(){}:;,[]", line[i]) != -1:
381# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
382# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
383# line[i] == ',' or line[i] == '[' or line[i] == ']':
384 self.tokens.append(('sep', line[i]))
385 i = i + 1
386 continue
387 if string.find("+-*><=/%&!|.", line[i]) != -1:
388# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
389# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
390# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
391# line[i] == '!' or line[i] == '|' or line[i] == '.':
392 if line[i] == '.' and i + 2 < l and \
393 line[i+1] == '.' and line[i+2] == '.':
394 self.tokens.append(('name', '...'))
395 i = i + 3
396 continue
397
398 j = i + 1
399 if j < l and (
400 string.find("+-*><=/%&!|", line[j]) != -1):
401# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
402# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
403# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
404# line[j] == '!' or line[j] == '|'):
405 self.tokens.append(('op', line[i:j+1]))
406 i = j + 1
407 else:
408 self.tokens.append(('op', line[i]))
409 i = i + 1
410 continue
411 s = i
412 while i < l:
413 o = ord(line[i])
414 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
415 (o >= 48 and o <= 57) or (
416 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
417# line[i] != ' ' and line[i] != '\t' and
418# line[i] != '(' and line[i] != ')' and
419# line[i] != '{' and line[i] != '}' and
420# line[i] != ':' and line[i] != ';' and
421# line[i] != ',' and line[i] != '+' and
422# line[i] != '-' and line[i] != '*' and
423# line[i] != '/' and line[i] != '%' and
424# line[i] != '&' and line[i] != '!' and
425# line[i] != '|' and line[i] != '[' and
426# line[i] != ']' and line[i] != '=' and
427# line[i] != '*' and line[i] != '>' and
428# line[i] != '<'):
429 i = i + 1
430 else:
431 break
432 self.tokens.append(('name', line[s:i]))
433
434 tok = self.tokens[0]
435 self.tokens = self.tokens[1:]
436 self.last = tok
437 return tok
438
439class CParser:
440 """The C module parser"""
441 def __init__(self, filename, idx = None):
442 self.filename = filename
443 if len(filename) > 2 and filename[-2:] == '.h':
444 self.is_header = 1
445 else:
446 self.is_header = 0
447 self.input = open(filename)
448 self.lexer = CLexer(self.input)
449 if idx == None:
450 self.index = index()
451 else:
452 self.index = idx
453 self.top_comment = ""
454 self.last_comment = ""
455 self.comment = None
456
457 def lineno(self):
458 return self.lexer.getlineno()
459
460 def error(self, msg, token=-1):
461 print "Parse Error: " + msg
462 if token != -1:
463 print "Got token ", token
464 self.lexer.debug()
465 sys.exit(1)
466
467 def debug(self, msg, token=-1):
468 print "Debug: " + msg
469 if token != -1:
470 print "Got token ", token
471 self.lexer.debug()
472
473 def parseComment(self, token):
474 if self.top_comment == "":
475 self.top_comment = token[1]
476 if self.comment == None or token[1][0] == '*':
477 self.comment = token[1];
478 else:
479 self.comment = self.comment + token[1]
480 token = self.lexer.token()
481 return token
482
483 #
484 # Parse a comment block associate to a macro
485 #
486 def parseMacroComment(self, name, quiet = 0):
487 if name[0:2] == '__':
488 quiet = 1
489
490 args = []
491 desc = ""
492
493 if self.comment == None:
494 if not quiet:
495 print "Missing comment for macro %s" % (name)
496 return((args, desc))
497 if self.comment[0] != '*':
498 if not quiet:
499 print "Missing * in macro comment for %s" % (name)
500 return((args, desc))
501 lines = string.split(self.comment, '\n')
502 if lines[0] == '*':
503 del lines[0]
504 if lines[0] != "* %s:" % (name):
505 if not quiet:
506 print "Misformatted macro comment for %s" % (name)
507 print " Expecting '* %s:' got '%s'" % (name, lines[0])
508 return((args, desc))
509 del lines[0]
510 while lines[0] == '*':
511 del lines[0]
512 while len(lines) > 0 and lines[0][0:3] == '* @':
513 l = lines[0][3:]
514 try:
515 (arg, desc) = string.split(l, ':', 1)
516 desc=string.strip(desc)
517 arg=string.strip(arg)
518 except:
519 if not quiet:
520 print "Misformatted macro comment for %s" % (name)
521 print " problem with '%s'" % (lines[0])
522 del lines[0]
523 continue
524 del lines[0]
525 l = string.strip(lines[0])
526 while len(l) > 2 and l[0:3] != '* @':
527 while l[0] == '*':
528 l = l[1:]
529 desc = desc + ' ' + string.strip(l)
530 del lines[0]
531 if len(lines) == 0:
532 break
533 l = lines[0]
534 args.append((arg, desc))
535 while len(lines) > 0 and lines[0] == '*':
536 del lines[0]
537 desc = ""
538 while len(lines) > 0:
539 l = lines[0]
540 while len(l) > 0 and l[0] == '*':
541 l = l[1:]
542 l = string.strip(l)
543 desc = desc + " " + l
544 del lines[0]
545
546 desc = string.strip(desc)
547
548 if quiet == 0:
549 if desc == "":
550 print "Macro comment for %s lack description of the macro" % (name)
551
552 return((args, desc))
553
554 #
555 # Parse a comment block and merge the informations found in the
556 # parameters descriptions, finally returns a block as complete
557 # as possible
558 #
559 def mergeFunctionComment(self, name, description, quiet = 0):
560 if name == 'main':
561 quiet = 1
562 if name[0:2] == '__':
563 quiet = 1
564
565 (ret, args) = description
566 desc = ""
567 retdesc = ""
568
569 if self.comment == None:
570 if not quiet:
571 print "Missing comment for function %s" % (name)
572 return(((ret[0], retdesc), args, desc))
573 if self.comment[0] != '*':
574 if not quiet:
575 print "Missing * in function comment for %s" % (name)
576 return(((ret[0], retdesc), args, desc))
577 lines = string.split(self.comment, '\n')
578 if lines[0] == '*':
579 del lines[0]
580 if lines[0] != "* %s:" % (name):
581 if not quiet:
582 print "Misformatted function comment for %s" % (name)
583 print " Expecting '* %s:' got '%s'" % (name, lines[0])
584 return(((ret[0], retdesc), args, desc))
585 del lines[0]
586 while lines[0] == '*':
587 del lines[0]
588 nbargs = len(args)
589 while len(lines) > 0 and lines[0][0:3] == '* @':
590 l = lines[0][3:]
591 try:
592 (arg, desc) = string.split(l, ':', 1)
593 desc=string.strip(desc)
594 arg=string.strip(arg)
595 except:
596 if not quiet:
597 print "Misformatted function comment for %s" % (name)
598 print " problem with '%s'" % (lines[0])
599 del lines[0]
600 continue
601 del lines[0]
602 l = string.strip(lines[0])
603 while len(l) > 2 and l[0:3] != '* @':
604 while l[0] == '*':
605 l = l[1:]
606 desc = desc + ' ' + string.strip(l)
607 del lines[0]
608 if len(lines) == 0:
609 break
610 l = lines[0]
611 i = 0
612 while i < nbargs:
613 if args[i][1] == arg:
614 args[i] = (args[i][0], arg, desc)
615 break;
616 i = i + 1
617 if i >= nbargs:
618 if not quiet:
619 print "Uname to find arg %s from function comment for %s" % (
620 arg, name)
621 while len(lines) > 0 and lines[0] == '*':
622 del lines[0]
623 desc = ""
624 while len(lines) > 0:
625 l = lines[0]
626 while len(l) > 0 and l[0] == '*':
627 l = l[1:]
628 l = string.strip(l)
629 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
630 try:
631 l = string.split(l, ' ', 1)[1]
632 except:
633 l = ""
634 retdesc = string.strip(l)
635 del lines[0]
636 while len(lines) > 0:
637 l = lines[0]
638 while len(l) > 0 and l[0] == '*':
639 l = l[1:]
640 l = string.strip(l)
641 retdesc = retdesc + " " + l
642 del lines[0]
643 else:
644 desc = desc + " " + l
645 del lines[0]
646
647 retdesc = string.strip(retdesc)
648 desc = string.strip(desc)
649
650 if quiet == 0:
651 #
652 # report missing comments
653 #
654 i = 0
655 while i < nbargs:
656 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
657 print "Function comment for %s lack description of arg %s" % (name, args[i][1])
658 i = i + 1
659 if retdesc == "" and ret[0] != "void":
660 print "Function comment for %s lack description of return value" % (name)
661 if desc == "":
662 print "Function comment for %s lack description of the function" % (name)
663
664
665 return(((ret[0], retdesc), args, desc))
666
667 def parsePreproc(self, token):
668 name = token[1]
669 if name == "#include":
670 token = self.lexer.token()
671 if token == None:
672 return None
673 if token[0] == 'preproc':
674 self.index.add(token[1], self.filename, not self.is_header,
675 "include")
676 return self.lexer.token()
677 return token
678 if name == "#define":
679 token = self.lexer.token()
680 if token == None:
681 return None
682 if token[0] == 'preproc':
683 # TODO macros with arguments
684 name = token[1]
685 lst = []
686 token = self.lexer.token()
687 while token != None and token[0] == 'preproc' and \
688 token[1][0] != '#':
689 lst.append(token[1])
690 token = self.lexer.token()
691 try:
692 name = string.split(name, '(') [0]
693 except:
694 pass
695 info = self.parseMacroComment(name, not self.is_header)
696 self.index.add(name, self.filename, not self.is_header,
697 "macro", info)
698 return token
699 token = self.lexer.token()
700 while token != None and token[0] == 'preproc' and \
701 token[1][0] != '#':
702 token = self.lexer.token()
703 return token
704
705 #
706 # token acquisition on top of the lexer, it handle internally
707 # preprocessor and comments since they are logically not part of
708 # the program structure.
709 #
710 def token(self):
711 global ignored_words
712
713 token = self.lexer.token()
714 while token != None:
715 if token[0] == 'comment':
716 token = self.parseComment(token)
717 continue
718 elif token[0] == 'preproc':
719 token = self.parsePreproc(token)
720 continue
721 elif token[0] == "name" and ignored_words.has_key(token[1]):
722 (n, info) = ignored_words[token[1]]
723 i = 0
724 while i < n:
725 token = self.lexer.token()
726 i = i + 1
727 token = self.lexer.token()
728 continue
729 else:
730 #print "=> ", token
731 return token
732 return None
733
734 #
735 # Parse a typedef, it records the type and its name.
736 #
737 def parseTypedef(self, token):
738 if token == None:
739 return None
740 token = self.parseType(token)
741 if token == None:
742 self.error("parsing typedef")
743 return None
744 base_type = self.type
745 type = base_type
746 #self.debug("end typedef type", token)
747 while token != None:
748 if token[0] == "name":
749 name = token[1]
750 signature = self.signature
751 if signature != None:
Daniel Veillard000eafb2002-12-12 10:04:22 +0000752 type = string.split(type, '(')[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000753 d = self.mergeFunctionComment(name,
754 ((type, None), signature), 1)
755 self.index.add(name, self.filename, not self.is_header,
756 "functype", d)
757 else:
758 if base_type == "struct":
759 self.index.add(name, self.filename, not self.is_header,
760 "struct", type)
761 base_type = "struct " + name
762 else:
763 self.index.add(name, self.filename, not self.is_header,
764 "typedef", type)
765 token = self.token()
766 else:
767 self.error("parsing typedef: expecting a name")
768 return token
769 #self.debug("end typedef", token)
770 if token != None and token[0] == 'sep' and token[1] == ',':
771 type = base_type
772 token = self.token()
773 while token != None and token[0] == "op":
774 type = type + token[1]
775 token = self.token()
776 elif token != None and token[0] == 'sep' and token[1] == ';':
777 break;
778 elif token != None and token[0] == 'name':
779 type = base_type
780 continue;
781 else:
782 self.error("parsing typedef: expecting ';'", token)
783 return token
784 token = self.token()
785 return token
786
787 #
788 # Parse a C code block, used for functions it parse till
789 # the balancing } included
790 #
791 def parseBlock(self, token):
792 while token != None:
793 if token[0] == "sep" and token[1] == "{":
794 token = self.token()
795 token = self.parseBlock(token)
796 elif token[0] == "sep" and token[1] == "}":
797 self.comment = None
798 token = self.token()
799 return token
800 else:
801 token = self.token()
802 return token
803
804 #
805 # Parse a C struct definition till the balancing }
806 #
807 def parseStruct(self, token):
808 fields = []
809 #self.debug("start parseStruct", token)
810 while token != None:
811 if token[0] == "sep" and token[1] == "{":
812 token = self.token()
813 token = self.parseTypeBlock(token)
814 elif token[0] == "sep" and token[1] == "}":
815 self.struct_fields = fields
816 #self.debug("end parseStruct", token)
817 #print fields
818 token = self.token()
819 return token
820 else:
821 base_type = self.type
822 #self.debug("before parseType", token)
823 token = self.parseType(token)
824 #self.debug("after parseType", token)
825 if token != None and token[0] == "name":
826 fname = token[1]
827 token = self.token()
828 if token[0] == "sep" and token[1] == ";":
829 self.comment = None
830 token = self.token()
831 fields.append((self.type, fname, self.comment))
832 self.comment = None
833 else:
834 self.error("parseStruct: expecting ;", token)
835 elif token != None and token[0] == "sep" and token[1] == "{":
836 token = self.token()
837 token = self.parseTypeBlock(token)
838 if token != None and token[0] == "name":
839 token = self.token()
840 if token != None and token[0] == "sep" and token[1] == ";":
841 token = self.token()
842 else:
843 self.error("parseStruct: expecting ;", token)
844 else:
845 self.error("parseStruct: name", token)
846 token = self.token()
847 self.type = base_type;
848 self.struct_fields = fields
849 #self.debug("end parseStruct", token)
850 #print fields
851 return token
852
853 #
854 # Parse a C enum block, parse till the balancing }
855 #
856 def parseEnumBlock(self, token):
857 self.enums = []
858 name = None
859 self.comment = None
860 comment = ""
Daniel Veillard000eafb2002-12-12 10:04:22 +0000861 value = "0"
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000862 while token != None:
863 if token[0] == "sep" and token[1] == "{":
864 token = self.token()
865 token = self.parseTypeBlock(token)
866 elif token[0] == "sep" and token[1] == "}":
867 if name != None:
868 if self.comment != None:
869 comment = self.comment
870 self.comment = None
871 self.enums.append((name, value, comment))
872 token = self.token()
873 return token
874 elif token[0] == "name":
875 if name != None:
876 if self.comment != None:
877 comment = string.strip(self.comment)
878 self.comment = None
879 self.enums.append((name, value, comment))
880 name = token[1]
881 comment = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000882 token = self.token()
883 if token[0] == "op" and token[1][0] == "=":
Daniel Veillard000eafb2002-12-12 10:04:22 +0000884 value = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000885 if len(token[1]) > 1:
886 value = token[1][1:]
887 token = self.token()
888 while token[0] != "sep" or (token[1] != ',' and
889 token[1] != '}'):
890 value = value + token[1]
891 token = self.token()
Daniel Veillard000eafb2002-12-12 10:04:22 +0000892 else:
893 try:
894 value = "%d" % (int(value) + 1)
895 except:
896 print "Failed to compute value of enum %s" % (name)
897 value=""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000898 if token[0] == "sep" and token[1] == ",":
899 token = self.token()
900 else:
901 token = self.token()
902 return token
903
904 #
905 # Parse a C definition block, used for structs it parse till
906 # the balancing }
907 #
908 def parseTypeBlock(self, token):
909 while token != None:
910 if token[0] == "sep" and token[1] == "{":
911 token = self.token()
912 token = self.parseTypeBlock(token)
913 elif token[0] == "sep" and token[1] == "}":
914 token = self.token()
915 return token
916 else:
917 token = self.token()
918 return token
919
920 #
921 # Parse a type: the fact that the type name can either occur after
922 # the definition or within the definition makes it a little harder
923 # if inside, the name token is pushed back before returning
924 #
925 def parseType(self, token):
926 self.type = ""
927 self.struct_fields = []
928 self.signature = None
929 if token == None:
930 return token
931
932 while token[0] == "name" and (
933 token[1] == "const" or token[1] == "unsigned"):
934 if self.type == "":
935 self.type = token[1]
936 else:
937 self.type = self.type + " " + token[1]
938 token = self.token()
939
940 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
941 if self.type == "":
942 self.type = token[1]
943 else:
944 self.type = self.type + " " + token[1]
945 if token[0] == "name" and token[1] == "int":
946 if self.type == "":
947 self.type = tmp[1]
948 else:
949 self.type = self.type + " " + tmp[1]
950
951 elif token[0] == "name" and token[1] == "struct":
952 if self.type == "":
953 self.type = token[1]
954 else:
955 self.type = self.type + " " + token[1]
956 token = self.token()
957 nametok = None
958 if token[0] == "name":
959 nametok = token
960 token = self.token()
961 if token != None and token[0] == "sep" and token[1] == "{":
962 token = self.token()
963 token = self.parseStruct(token)
964 elif token != None and token[0] == "op" and token[1] == "*":
965 self.type = self.type + " " + nametok[1] + " *"
966 token = self.token()
967 while token != None and token[0] == "op" and token[1] == "*":
968 self.type = self.type + " *"
969 token = self.token()
970 if token[0] == "name":
971 nametok = token
972 token = self.token()
973 else:
974 self.error("struct : expecting name", token)
975 return token
976 elif token != None and token[0] == "name" and nametok != None:
977 self.type = self.type + " " + nametok[1]
978 return token
979
980 if nametok != None:
981 self.lexer.push(token)
982 token = nametok
983 return token
984
985 elif token[0] == "name" and token[1] == "enum":
986 if self.type == "":
987 self.type = token[1]
988 else:
989 self.type = self.type + " " + token[1]
990 self.enums = []
991 token = self.token()
992 if token != None and token[0] == "sep" and token[1] == "{":
993 token = self.token()
994 token = self.parseEnumBlock(token)
995 else:
996 self.error("parsing enum: expecting '{'", token)
997 enum_type = None
998 if token != None and token[0] != "name":
999 self.lexer.push(token)
1000 token = ("name", "enum")
1001 else:
1002 enum_type = token[1]
1003 for enum in self.enums:
1004 self.index.add(enum[0], self.filename,
1005 not self.is_header, "enum",
1006 (enum[1], enum[2], enum_type))
1007 return token
1008
1009 elif token[0] == "name":
1010 if self.type == "":
1011 self.type = token[1]
1012 else:
1013 self.type = self.type + " " + token[1]
1014 else:
1015 self.error("parsing type %s: expecting a name" % (self.type),
1016 token)
1017 return token
1018 token = self.token()
1019 while token != None and (token[0] == "op" or
1020 token[0] == "name" and token[1] == "const"):
1021 self.type = self.type + " " + token[1]
1022 token = self.token()
1023
1024 #
1025 # if there is a parenthesis here, this means a function type
1026 #
1027 if token != None and token[0] == "sep" and token[1] == '(':
1028 self.type = self.type + token[1]
1029 token = self.token()
1030 while token != None and token[0] == "op" and token[1] == '*':
1031 self.type = self.type + token[1]
1032 token = self.token()
1033 if token == None or token[0] != "name" :
1034 self.error("parsing function type, name expected", token);
1035 return token
1036 self.type = self.type + token[1]
1037 nametok = token
1038 token = self.token()
1039 if token != None and token[0] == "sep" and token[1] == ')':
1040 self.type = self.type + token[1]
1041 token = self.token()
1042 if token != None and token[0] == "sep" and token[1] == '(':
1043 token = self.token()
1044 type = self.type;
1045 token = self.parseSignature(token);
1046 self.type = type;
1047 else:
1048 self.error("parsing function type, '(' expected", token);
1049 return token
1050 else:
1051 self.error("parsing function type, ')' expected", token);
1052 return token
1053 self.lexer.push(token)
1054 token = nametok
1055 return token
1056
1057 #
1058 # do some lookahead for arrays
1059 #
1060 if token != None and token[0] == "name":
1061 nametok = token
1062 token = self.token()
1063 if token != None and token[0] == "sep" and token[1] == '[':
1064 self.type = self.type + nametok[1]
1065 while token != None and token[0] == "sep" and token[1] == '[':
1066 self.type = self.type + token[1]
1067 token = self.token()
1068 while token != None and token[0] != 'sep' and \
1069 token[1] != ']' and token[1] != ';':
1070 self.type = self.type + token[1]
1071 token = self.token()
1072 if token != None and token[0] == 'sep' and token[1] == ']':
1073 self.type = self.type + token[1]
1074 token = self.token()
1075 else:
1076 self.error("parsing array type, ']' expected", token);
1077 return token
1078 elif token != None and token[0] == "sep" and token[1] == ':':
1079 # remove :12 in case it's a limited int size
1080 token = self.token()
1081 token = self.token()
1082 self.lexer.push(token)
1083 token = nametok
1084
1085 return token
1086
1087 #
1088 # Parse a signature: '(' has been parsed and we scan the type definition
1089 # up to the ')' included
1090 def parseSignature(self, token):
1091 signature = []
1092 if token != None and token[0] == "sep" and token[1] == ')':
1093 self.signature = []
1094 token = self.token()
1095 return token
1096 while token != None:
1097 token = self.parseType(token)
1098 if token != None and token[0] == "name":
1099 signature.append((self.type, token[1], None))
1100 token = self.token()
1101 elif token != None and token[0] == "sep" and token[1] == ',':
1102 token = self.token()
1103 continue
1104 elif token != None and token[0] == "sep" and token[1] == ')':
1105 # only the type was provided
1106 if self.type == "...":
1107 signature.append((self.type, "...", None))
1108 else:
1109 signature.append((self.type, None, None))
1110 if token != None and token[0] == "sep":
1111 if token[1] == ',':
1112 token = self.token()
1113 continue
1114 elif token[1] == ')':
1115 token = self.token()
1116 break
1117 self.signature = signature
1118 return token
1119
1120 #
1121 # Parse a global definition, be it a type, variable or function
1122 # the extern "C" blocks are a bit nasty and require it to recurse.
1123 #
1124 def parseGlobal(self, token):
1125 static = 0
1126 if token[1] == 'extern':
1127 token = self.token()
1128 if token == None:
1129 return token
1130 if token[0] == 'string':
1131 if token[1] == 'C':
1132 token = self.token()
1133 if token == None:
1134 return token
1135 if token[0] == 'sep' and token[1] == "{":
1136 token = self.token()
1137# print 'Entering extern "C line ', self.lineno()
1138 while token != None and (token[0] != 'sep' or
1139 token[1] != "}"):
1140 if token[0] == 'name':
1141 token = self.parseGlobal(token)
1142 else:
1143 self.error(
1144 "token %s %s unexpected at the top level" % (
1145 token[0], token[1]))
1146 token = self.parseGlobal(token)
1147# print 'Exiting extern "C" line', self.lineno()
1148 token = self.token()
1149 return token
1150 else:
1151 return token
1152 elif token[1] == 'static':
1153 static = 1
1154 token = self.token()
1155 if token == None or token[0] != 'name':
1156 return token
1157
1158 if token[1] == 'typedef':
1159 token = self.token()
1160 return self.parseTypedef(token)
1161 else:
1162 token = self.parseType(token)
1163 type_orig = self.type
1164 if token == None or token[0] != "name":
1165 return token
1166 type = type_orig
1167 self.name = token[1]
1168 token = self.token()
1169 while token != None and (token[0] == "sep" or token[0] == "op"):
1170 if token[0] == "sep":
1171 if token[1] == "[":
1172 type = type + token[1]
1173 token = self.token()
1174 while token != None and (token[0] != "sep" or \
1175 token[1] != ";"):
1176 type = type + token[1]
1177 token = self.token()
1178
1179 if token != None and token[0] == "op" and token[1] == "=":
1180 #
1181 # Skip the initialization of the variable
1182 #
1183 token = self.token()
1184 if token[0] == 'sep' and token[1] == '{':
1185 token = self.token()
1186 token = self.parseBlock(token)
1187 else:
1188 self.comment = None
1189 while token != None and (token[0] != "sep" or \
1190 (token[1] != ';' and token[1] != ',')):
1191 token = self.token()
1192 self.comment = None
1193 if token == None or token[0] != "sep" or (token[1] != ';' and
1194 token[1] != ','):
1195 self.error("missing ';' or ',' after value")
1196
1197 if token != None and token[0] == "sep":
1198 if token[1] == ";":
1199 self.comment = None
1200 token = self.token()
1201 if type == "struct":
1202 self.index.add(self.name, self.filename,
1203 not self.is_header, "struct", self.struct_fields)
1204 else:
1205 self.index.add(self.name, self.filename,
1206 not self.is_header, "variable", type)
1207 break
1208 elif token[1] == "(":
1209 token = self.token()
1210 token = self.parseSignature(token)
1211 if token == None:
1212 return None
1213 if token[0] == "sep" and token[1] == ";":
1214 d = self.mergeFunctionComment(self.name,
1215 ((type, None), self.signature), 1)
1216 self.index.add(self.name, self.filename, static,
1217 "function", d)
1218 token = self.token()
Daniel Veillard71531f32003-02-05 13:19:53 +00001219 elif token[0] == "sep" and token[1] == "{":
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001220 d = self.mergeFunctionComment(self.name,
1221 ((type, None), self.signature), static)
1222 self.index.add(self.name, self.filename, static,
1223 "function", d)
1224 token = self.token()
1225 token = self.parseBlock(token);
1226 elif token[1] == ',':
1227 self.comment = None
1228 self.index.add(self.name, self.filename, static,
1229 "variable", type)
1230 type = type_orig
1231 token = self.token()
1232 while token != None and token[0] == "sep":
1233 type = type + token[1]
1234 token = self.token()
1235 if token != None and token[0] == "name":
1236 self.name = token[1]
1237 token = self.token()
1238 else:
1239 break
1240
1241 return token
1242
1243 def parse(self):
1244 print "Parsing %s" % (self.filename)
1245 token = self.token()
1246 while token != None:
1247 if token[0] == 'name':
1248 token = self.parseGlobal(token)
1249 else:
1250 self.error("token %s %s unexpected at the top level" % (
1251 token[0], token[1]))
1252 token = self.parseGlobal(token)
1253 return
1254 return self.index
1255
1256
1257class docBuilder:
1258 """A documentation builder"""
1259 def __init__(self, name, directories=['.'], excludes=[]):
1260 self.name = name
1261 self.directories = directories
1262 self.excludes = excludes + ignored_files.keys()
1263 self.modules = {}
1264 self.headers = {}
1265 self.idx = index()
1266
1267 def analyze(self):
1268 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1269 self.idx.analyze()
1270
1271 def scanHeaders(self):
1272 for header in self.headers.keys():
1273 parser = CParser(header)
1274 idx = parser.parse()
1275 self.headers[header] = idx;
1276 self.idx.merge(idx)
1277
1278 def scanModules(self):
1279 for module in self.modules.keys():
1280 parser = CParser(module)
1281 idx = parser.parse()
1282 # idx.analyze()
1283 self.modules[module] = idx
1284 self.idx.merge_public(idx)
1285
1286 def scan(self):
1287 for directory in self.directories:
1288 files = glob.glob(directory + "/*.c")
1289 for file in files:
1290 skip = 0
1291 for excl in self.excludes:
1292 if string.find(file, excl) != -1:
1293 skip = 1;
1294 break
1295 if skip == 0:
1296 self.modules[file] = None;
1297 files = glob.glob(directory + "/*.h")
1298 for file in files:
1299 skip = 0
1300 for excl in self.excludes:
1301 if string.find(file, excl) != -1:
1302 skip = 1;
1303 break
1304 if skip == 0:
1305 self.headers[file] = None;
1306 self.scanHeaders()
1307 self.scanModules()
1308
1309 def modulename_file(self, file):
Daniel Veillard540a31a2003-01-21 11:21:07 +00001310 module = os.path.basename(file)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001311 if module[-2:] == '.h':
1312 module = module[:-2]
1313 return module
1314
1315 def serialize_enum(self, output, name):
1316 id = self.idx.enums[name]
1317 output.write(" <enum name='%s' file='%s'" % (name,
1318 self.modulename_file(id.module)))
1319 if id.info != None:
1320 info = id.info
1321 if info[0] != None and info[0] != '':
1322 output.write(" value='%s'" % info[0]);
1323 if info[2] != None and info[2] != '':
1324 output.write(" type='%s'" % info[2]);
1325 if info[1] != None and info[1] != '':
1326 output.write(" info='%s'" % escape(info[1]));
1327 output.write("/>\n")
1328
1329 def serialize_macro(self, output, name):
1330 id = self.idx.macros[name]
1331 output.write(" <macro name='%s' file='%s'>\n" % (name,
1332 self.modulename_file(id.module)))
1333 if id.info != None:
1334 try:
1335 (args, desc) = id.info
1336 if desc != None and desc != "":
1337 output.write(" <info>%s</info>\n" % (escape(desc)))
1338 for arg in args:
1339 (name, desc) = arg
1340 if desc != None and desc != "":
1341 output.write(" <arg name='%s' info='%s'/>\n" % (
1342 name, escape(desc)))
1343 else:
1344 output.write(" <arg name='%s'/>\n" % (name))
1345 except:
1346 pass
1347 output.write(" </macro>\n")
1348
1349 def serialize_typedef(self, output, name):
1350 id = self.idx.typedefs[name]
1351 if id.info[0:7] == 'struct ':
1352 output.write(" <struct name='%s' file='%s' type='%s'" % (
1353 name, self.modulename_file(id.module), id.info))
1354 name = id.info[7:]
Daniel Veillardc1eed322002-12-12 11:01:32 +00001355 if self.idx.structs.has_key(name) and ( \
1356 type(self.idx.structs[name].info) == type(()) or
1357 type(self.idx.structs[name].info) == type([])):
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001358 output.write(">\n");
Daniel Veillard024b5702002-12-12 00:15:55 +00001359 try:
1360 for field in self.idx.structs[name].info:
Daniel Veillard024b5702002-12-12 00:15:55 +00001361 desc = field[2]
1362 if desc == None:
1363 desc = ''
1364 else:
1365 desc = escape(desc)
1366 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1367 except:
1368 print "Failed to serialize struct %s" % (name)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001369 output.write(" </struct>\n")
1370 else:
1371 output.write("/>\n");
1372 else :
1373 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
1374 name, self.modulename_file(id.module), id.info))
1375
Daniel Veillardc1eed322002-12-12 11:01:32 +00001376 def serialize_variable(self, output, name):
1377 id = self.idx.variables[name]
1378 if id.info != None:
1379 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
1380 name, self.modulename_file(id.module), id.info))
1381 else:
1382 output.write(" <variable name='%s' file='%s'/>\n" % (
1383 name, self.modulename_file(id.module)))
1384
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001385 def serialize_function(self, output, name):
1386 id = self.idx.functions[name]
1387 output.write(" <%s name='%s' file='%s'>\n" % (id.type, name,
1388 self.modulename_file(id.module)))
1389 try:
1390 (ret, params, desc) = id.info
1391 output.write(" <info>%s</info>\n" % (escape(desc)))
1392 if ret[0] != None:
1393 if ret[0] == "void":
1394 output.write(" <return type='void'/>\n")
1395 else:
1396 output.write(" <return type='%s' info='%s'/>\n" % (
1397 ret[0], escape(ret[1])))
1398 for param in params:
1399 if param[0] == 'void':
1400 continue
1401 if param[2] == None:
1402 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1403 else:
1404 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1405 except:
1406 print "Failed to save function %s info: " % name, `id.info`
1407 output.write(" </%s>\n" % (id.type))
1408
1409 def serialize_exports(self, output, file):
1410 module = self.modulename_file(file)
1411 output.write(" <file name='%s'>\n" % (module))
1412 dict = self.headers[file]
1413 ids = dict.functions.keys() + dict.variables.keys() + \
1414 dict.macros.keys() + dict.typedefs.keys() + \
1415 dict.structs.keys() + dict.enums.keys()
1416 ids.sort()
1417 for id in ids:
1418 output.write(" <exports symbol='%s'/>\n" % (id))
1419 output.write(" </file>\n")
1420
1421
1422 def serialize(self, filename = None):
1423 if filename == None:
1424 filename = "%s-api.xml" % self.name
1425 print "Saving XML description %s" % (filename)
1426 output = open(filename, "w")
1427 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1428 output.write("<api name='%s'>\n" % self.name)
1429 output.write(" <files>\n")
1430 for file in self.headers.keys():
1431 self.serialize_exports(output, file)
1432 output.write(" </files>\n")
1433 output.write(" <symbols>\n")
1434 macros = self.idx.macros.keys()
1435 macros.sort()
1436 for macro in macros:
1437 self.serialize_macro(output, macro)
1438 enums = self.idx.enums.keys()
1439 enums.sort()
1440 for enum in enums:
1441 self.serialize_enum(output, enum)
1442 typedefs = self.idx.typedefs.keys()
1443 typedefs.sort()
1444 for typedef in typedefs:
1445 self.serialize_typedef(output, typedef)
Daniel Veillardc1eed322002-12-12 11:01:32 +00001446 variables = self.idx.variables.keys()
1447 variables.sort()
1448 for variable in variables:
1449 self.serialize_variable(output, variable)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001450 functions = self.idx.functions.keys()
1451 functions.sort()
1452 for function in functions:
1453 self.serialize_function(output, function)
1454 output.write(" </symbols>\n")
1455 output.write("</api>\n")
1456 output.close()
1457
1458
1459def rebuild():
1460 builder = None
1461 if glob.glob("../parser.c") != [] :
1462 print "Rebuilding API description for libxml2"
1463 builder = docBuilder("libxml2", ["..", "../include/libxml"],
Daniel Veillard024b5702002-12-12 00:15:55 +00001464 ["xmlwin32version.h", "tst.c",
1465 "schemasInternals.h", "xmlschemas" ])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001466 elif glob.glob("../libxslt/transform.c") != [] :
1467 print "Rebuilding API description for libxslt"
1468 builder = docBuilder("libxslt", ["../libxslt"],
Daniel Veillard024b5702002-12-12 00:15:55 +00001469 ["win32config.h", "libxslt.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001470 else:
1471 print "rebuild() failed, unable to guess the module"
1472 return None
1473 builder.scan()
1474 builder.analyze()
1475 builder.serialize()
Daniel Veillard024b5702002-12-12 00:15:55 +00001476 if glob.glob("../libexslt/exslt.c") != [] :
1477 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1478 extra.scan()
1479 extra.analyze()
1480 extra.serialize()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001481 return builder
1482
1483#
1484# for debugging the parser
1485#
1486def parse(filename):
1487 parser = CParser(filename)
1488 idx = parser.parse()
1489 return idx
1490
1491if __name__ == "__main__":
1492 rebuild()