blob: bcc146388e497d081b2413267d9aa249f526f36c [file] [log] [blame]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
Daniel Veillard540a31a2003-01-21 11:21:07 +000010import os, sys
Daniel Veillarda9b66d02002-12-11 14:23:49 +000011import string
12import glob
13
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000014debug=0
Daniel Veillarda2351322004-06-27 12:08:10 +000015#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000017
Daniel Veillardde0a0a52003-04-24 17:12:57 +000018#
19# C parser analysis code
20#
21ignored_files = {
22 "trio": "too many non standard macros",
23 "trio.c": "too many non standard macros",
24 "trionan.c": "too many non standard macros",
25 "triostr.c": "too many non standard macros",
26 "acconfig.h": "generated portability layer",
27 "config.h": "generated portability layer",
28 "libxml.h": "internal only",
Daniel Veillard92fc02c2003-04-24 23:12:35 +000029 "testOOM.c": "out of memory tester",
30 "testOOMlib.h": "out of memory tester",
31 "testOOMlib.c": "out of memory tester",
Daniel Veillard189f46b2004-01-25 21:03:04 +000032 "rngparser.c": "not yet integrated",
33 "rngparser.h": "not yet integrated",
Daniel Veillarda2351322004-06-27 12:08:10 +000034 "elfgcchack.h": "not a normal header",
Daniel Veillarde43cc572004-11-03 11:50:29 +000035 "testapi.c": "generated regression tests",
Daniel Veillardde0a0a52003-04-24 17:12:57 +000036}
37
38ignored_words = {
39 "WINAPI": (0, "Windows keyword"),
40 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
William M. Brackcdfa2862003-08-29 06:03:38 +000041 "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000042 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
43 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000044 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000045 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
46 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000047 "XMLCALL": (0, "Special macro for win32 calls"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000048 "XSLTCALL": (0, "Special macro for win32 calls"),
49 "EXSLTCALL": (0, "Special macro for win32 calls"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000050 "__declspec": (3, "Windows keyword"),
Daniel Veillarda2351322004-06-27 12:08:10 +000051 "__stdcall": (0, "Windows keyword"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000052 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
53 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
54 "X_IN_Y": (5, "macro function builder"),
55}
56
Daniel Veillarda9b66d02002-12-11 14:23:49 +000057def escape(raw):
58 raw = string.replace(raw, '&', '&')
59 raw = string.replace(raw, '<', '&lt;')
60 raw = string.replace(raw, '>', '&gt;')
61 raw = string.replace(raw, "'", '&apos;')
62 raw = string.replace(raw, '"', '&quot;')
63 return raw
64
Daniel Veillard2925c0a2003-11-17 13:58:17 +000065def uniq(items):
66 d = {}
67 for item in items:
68 d[item]=1
69 return d.keys()
70
Daniel Veillarda9b66d02002-12-11 14:23:49 +000071class identifier:
Daniel Veillardbe586972003-11-18 20:56:51 +000072 def __init__(self, name, module=None, type=None, lineno = 0,
Daniel Veillarda2351322004-06-27 12:08:10 +000073 info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +000074 self.name = name
75 self.module = module
76 self.type = type
77 self.info = info
78 self.extra = extra
79 self.lineno = lineno
80 self.static = 0
Daniel Veillarda2351322004-06-27 12:08:10 +000081 if conditionals == None or len(conditionals) == 0:
82 self.conditionals = None
83 else:
84 self.conditionals = conditionals[:]
85 if self.name == debugsym:
86 print "=> define %s : %s" % (debugsym, (module, type, info,
87 extra, conditionals))
Daniel Veillarda9b66d02002-12-11 14:23:49 +000088
Daniel Veillardbe586972003-11-18 20:56:51 +000089 def __repr__(self):
90 r = "%s %s:" % (self.type, self.name)
91 if self.static:
92 r = r + " static"
93 if self.module != None:
94 r = r + " from %s" % (self.module)
95 if self.info != None:
96 r = r + " " + `self.info`
97 if self.extra != None:
98 r = r + " " + `self.extra`
Daniel Veillarda2351322004-06-27 12:08:10 +000099 if self.conditionals != None:
100 r = r + " " + `self.conditionals`
Daniel Veillardbe586972003-11-18 20:56:51 +0000101 return r
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000102
103
Daniel Veillardbe586972003-11-18 20:56:51 +0000104 def set_module(self, module):
105 self.module = module
106 def set_type(self, type):
107 self.type = type
108 def set_info(self, info):
109 self.info = info
110 def set_extra(self, extra):
111 self.extra = extra
112 def set_lineno(self, lineno):
113 self.lineno = lineno
114 def set_static(self, static):
115 self.static = static
Daniel Veillarda2351322004-06-27 12:08:10 +0000116 def set_conditionals(self, conditionals):
117 if conditionals == None or len(conditionals) == 0:
118 self.conditionals = None
119 else:
120 self.conditionals = conditionals[:]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000121
Daniel Veillardbe586972003-11-18 20:56:51 +0000122 def get_name(self):
123 return self.name
124 def get_module(self):
125 return self.module
126 def get_type(self):
127 return self.type
128 def get_info(self):
129 return self.info
130 def get_lineno(self):
131 return self.lineno
132 def get_extra(self):
133 return self.extra
134 def get_static(self):
135 return self.static
Daniel Veillarda2351322004-06-27 12:08:10 +0000136 def get_conditionals(self):
137 return self.conditionals
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000138
Daniel Veillarda2351322004-06-27 12:08:10 +0000139 def update(self, module, type = None, info = None, extra=None,
140 conditionals=None):
141 if self.name == debugsym:
142 print "=> update %s : %s" % (debugsym, (module, type, info,
143 extra, conditionals))
Daniel Veillardbe586972003-11-18 20:56:51 +0000144 if module != None and self.module == None:
145 self.set_module(module)
146 if type != None and self.type == None:
147 self.set_type(type)
148 if info != None:
149 self.set_info(info)
150 if extra != None:
151 self.set_extra(extra)
Daniel Veillarda2351322004-06-27 12:08:10 +0000152 if conditionals != None:
153 self.set_conditionals(conditionals)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000154
155
156class index:
Daniel Veillardbe586972003-11-18 20:56:51 +0000157 def __init__(self, name = "noname"):
158 self.name = name
159 self.identifiers = {}
160 self.functions = {}
161 self.variables = {}
162 self.includes = {}
163 self.structs = {}
164 self.enums = {}
165 self.typedefs = {}
166 self.macros = {}
167 self.references = {}
168 self.info = {}
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000169
Daniel Veillarda2351322004-06-27 12:08:10 +0000170 def add_ref(self, name, module, static, type, lineno, info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +0000171 if name[0:2] == '__':
172 return None
173 d = None
174 try:
175 d = self.identifiers[name]
Daniel Veillarda2351322004-06-27 12:08:10 +0000176 d.update(module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000177 except:
Daniel Veillarda2351322004-06-27 12:08:10 +0000178 d = identifier(name, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000179 self.identifiers[name] = d
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000180
Daniel Veillardbe586972003-11-18 20:56:51 +0000181 if d != None and static == 1:
182 d.set_static(1)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000183
Daniel Veillardbe586972003-11-18 20:56:51 +0000184 if d != None and name != None and type != None:
185 self.references[name] = d
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000186
Daniel Veillarda2351322004-06-27 12:08:10 +0000187 if name == debugsym:
188 print "New ref: %s" % (d)
189
190 return d
191
192 def add(self, name, module, static, type, lineno, info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +0000193 if name[0:2] == '__':
194 return None
195 d = None
196 try:
197 d = self.identifiers[name]
Daniel Veillarda2351322004-06-27 12:08:10 +0000198 d.update(module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000199 except:
Daniel Veillarda2351322004-06-27 12:08:10 +0000200 d = identifier(name, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000201 self.identifiers[name] = d
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000202
Daniel Veillardbe586972003-11-18 20:56:51 +0000203 if d != None and static == 1:
204 d.set_static(1)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000205
Daniel Veillardbe586972003-11-18 20:56:51 +0000206 if d != None and name != None and type != None:
207 if type == "function":
208 self.functions[name] = d
209 elif type == "functype":
210 self.functions[name] = d
211 elif type == "variable":
212 self.variables[name] = d
213 elif type == "include":
214 self.includes[name] = d
215 elif type == "struct":
216 self.structs[name] = d
217 elif type == "enum":
218 self.enums[name] = d
219 elif type == "typedef":
220 self.typedefs[name] = d
221 elif type == "macro":
222 self.macros[name] = d
223 else:
224 print "Unable to register type ", type
Daniel Veillarda2351322004-06-27 12:08:10 +0000225
226 if name == debugsym:
227 print "New symbol: %s" % (d)
228
Daniel Veillardbe586972003-11-18 20:56:51 +0000229 return d
230
231 def merge(self, idx):
232 for id in idx.functions.keys():
233 #
234 # macro might be used to override functions or variables
235 # definitions
236 #
237 if self.macros.has_key(id):
238 del self.macros[id]
239 if self.functions.has_key(id):
240 print "function %s from %s redeclared in %s" % (
241 id, self.functions[id].module, idx.functions[id].module)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000242 else:
Daniel Veillardbe586972003-11-18 20:56:51 +0000243 self.functions[id] = idx.functions[id]
244 self.identifiers[id] = idx.functions[id]
245 for id in idx.variables.keys():
246 #
247 # macro might be used to override functions or variables
248 # definitions
249 #
250 if self.macros.has_key(id):
251 del self.macros[id]
252 if self.variables.has_key(id):
253 print "variable %s from %s redeclared in %s" % (
254 id, self.variables[id].module, idx.variables[id].module)
255 else:
256 self.variables[id] = idx.variables[id]
257 self.identifiers[id] = idx.variables[id]
258 for id in idx.structs.keys():
259 if self.structs.has_key(id):
260 print "struct %s from %s redeclared in %s" % (
261 id, self.structs[id].module, idx.structs[id].module)
262 else:
263 self.structs[id] = idx.structs[id]
264 self.identifiers[id] = idx.structs[id]
265 for id in idx.typedefs.keys():
266 if self.typedefs.has_key(id):
267 print "typedef %s from %s redeclared in %s" % (
268 id, self.typedefs[id].module, idx.typedefs[id].module)
269 else:
270 self.typedefs[id] = idx.typedefs[id]
271 self.identifiers[id] = idx.typedefs[id]
272 for id in idx.macros.keys():
273 #
274 # macro might be used to override functions or variables
275 # definitions
276 #
277 if self.variables.has_key(id):
278 continue
279 if self.functions.has_key(id):
280 continue
281 if self.enums.has_key(id):
282 continue
283 if self.macros.has_key(id):
284 print "macro %s from %s redeclared in %s" % (
285 id, self.macros[id].module, idx.macros[id].module)
286 else:
287 self.macros[id] = idx.macros[id]
288 self.identifiers[id] = idx.macros[id]
289 for id in idx.enums.keys():
290 if self.enums.has_key(id):
291 print "enum %s from %s redeclared in %s" % (
292 id, self.enums[id].module, idx.enums[id].module)
293 else:
294 self.enums[id] = idx.enums[id]
295 self.identifiers[id] = idx.enums[id]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000296
Daniel Veillardbe586972003-11-18 20:56:51 +0000297 def merge_public(self, idx):
298 for id in idx.functions.keys():
299 if self.functions.has_key(id):
William M. Brack21e4ef22005-01-02 09:53:13 +0000300 # check that function condition agrees with header
301 if idx.functions[id].conditionals != \
302 self.functions[id].conditionals:
303 print "Header condition differs from Function for %s:" \
304 % id
305 print " H: %s" % self.functions[id].conditionals
306 print " C: %s" % idx.functions[id].conditionals
Daniel Veillardbe586972003-11-18 20:56:51 +0000307 up = idx.functions[id]
308 self.functions[id].update(None, up.type, up.info, up.extra)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000309 # else:
310 # print "Function %s from %s is not declared in headers" % (
William M. Brack21e4ef22005-01-02 09:53:13 +0000311 # id, idx.functions[id].module)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000312 # TODO: do the same for variables.
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000313
Daniel Veillardbe586972003-11-18 20:56:51 +0000314 def analyze_dict(self, type, dict):
315 count = 0
316 public = 0
317 for name in dict.keys():
318 id = dict[name]
319 count = count + 1
320 if id.static == 0:
321 public = public + 1
322 if count != public:
323 print " %d %s , %d public" % (count, type, public)
324 elif count != 0:
325 print " %d public %s" % (count, type)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000326
327
Daniel Veillardbe586972003-11-18 20:56:51 +0000328 def analyze(self):
329 self.analyze_dict("functions", self.functions)
330 self.analyze_dict("variables", self.variables)
331 self.analyze_dict("structs", self.structs)
332 self.analyze_dict("typedefs", self.typedefs)
333 self.analyze_dict("macros", self.macros)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000334
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000335class CLexer:
Daniel Veillardbe586972003-11-18 20:56:51 +0000336 """A lexer for the C language, tokenize the input by reading and
337 analyzing it line by line"""
338 def __init__(self, input):
339 self.input = input
340 self.tokens = []
341 self.line = ""
342 self.lineno = 0
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000343
Daniel Veillardbe586972003-11-18 20:56:51 +0000344 def getline(self):
345 line = ''
346 while line == '':
347 line = self.input.readline()
348 if not line:
349 return None
350 self.lineno = self.lineno + 1
351 line = string.lstrip(line)
352 line = string.rstrip(line)
353 if line == '':
354 continue
355 while line[-1] == '\\':
356 line = line[:-1]
357 n = self.input.readline()
358 self.lineno = self.lineno + 1
359 n = string.lstrip(n)
360 n = string.rstrip(n)
361 if not n:
362 break
363 else:
364 line = line + n
365 return line
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000366
Daniel Veillardbe586972003-11-18 20:56:51 +0000367 def getlineno(self):
368 return self.lineno
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000369
Daniel Veillardbe586972003-11-18 20:56:51 +0000370 def push(self, token):
371 self.tokens.insert(0, token);
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000372
Daniel Veillardbe586972003-11-18 20:56:51 +0000373 def debug(self):
374 print "Last token: ", self.last
375 print "Token queue: ", self.tokens
376 print "Line %d end: " % (self.lineno), self.line
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000377
Daniel Veillardbe586972003-11-18 20:56:51 +0000378 def token(self):
379 while self.tokens == []:
380 if self.line == "":
381 line = self.getline()
382 else:
383 line = self.line
384 self.line = ""
385 if line == None:
386 return None
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000387
Daniel Veillardbe586972003-11-18 20:56:51 +0000388 if line[0] == '#':
389 self.tokens = map((lambda x: ('preproc', x)),
390 string.split(line))
391 break;
392 l = len(line)
393 if line[0] == '"' or line[0] == "'":
394 end = line[0]
395 line = line[1:]
396 found = 0
397 tok = ""
398 while found == 0:
399 i = 0
400 l = len(line)
401 while i < l:
402 if line[i] == end:
403 self.line = line[i+1:]
404 line = line[:i]
405 l = i
406 found = 1
407 break
408 if line[i] == '\\':
409 i = i + 1
410 i = i + 1
411 tok = tok + line
412 if found == 0:
413 line = self.getline()
414 if line == None:
415 return None
416 self.last = ('string', tok)
417 return self.last
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000418
Daniel Veillardbe586972003-11-18 20:56:51 +0000419 if l >= 2 and line[0] == '/' and line[1] == '*':
420 line = line[2:]
421 found = 0
422 tok = ""
423 while found == 0:
424 i = 0
425 l = len(line)
426 while i < l:
427 if line[i] == '*' and i+1 < l and line[i+1] == '/':
428 self.line = line[i+2:]
429 line = line[:i-1]
430 l = i
431 found = 1
432 break
433 i = i + 1
434 if tok != "":
435 tok = tok + "\n"
436 tok = tok + line
437 if found == 0:
438 line = self.getline()
439 if line == None:
440 return None
441 self.last = ('comment', tok)
442 return self.last
443 if l >= 2 and line[0] == '/' and line[1] == '/':
444 line = line[2:]
445 self.last = ('comment', line)
446 return self.last
447 i = 0
448 while i < l:
449 if line[i] == '/' and i+1 < l and line[i+1] == '/':
450 self.line = line[i:]
451 line = line[:i]
452 break
453 if line[i] == '/' and i+1 < l and line[i+1] == '*':
454 self.line = line[i:]
455 line = line[:i]
456 break
457 if line[i] == '"' or line[i] == "'":
458 self.line = line[i:]
459 line = line[:i]
460 break
461 i = i + 1
462 l = len(line)
463 i = 0
464 while i < l:
465 if line[i] == ' ' or line[i] == '\t':
466 i = i + 1
467 continue
468 o = ord(line[i])
469 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
470 (o >= 48 and o <= 57):
471 s = i
472 while i < l:
473 o = ord(line[i])
474 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
475 (o >= 48 and o <= 57) or string.find(
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000476 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
Daniel Veillardbe586972003-11-18 20:56:51 +0000477 i = i + 1
478 else:
479 break
480 self.tokens.append(('name', line[s:i]))
481 continue
482 if string.find("(){}:;,[]", line[i]) != -1:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000483# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
484# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
485# line[i] == ',' or line[i] == '[' or line[i] == ']':
Daniel Veillardbe586972003-11-18 20:56:51 +0000486 self.tokens.append(('sep', line[i]))
487 i = i + 1
488 continue
489 if string.find("+-*><=/%&!|.", line[i]) != -1:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000490# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
491# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
492# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
493# line[i] == '!' or line[i] == '|' or line[i] == '.':
Daniel Veillardbe586972003-11-18 20:56:51 +0000494 if line[i] == '.' and i + 2 < l and \
495 line[i+1] == '.' and line[i+2] == '.':
496 self.tokens.append(('name', '...'))
497 i = i + 3
498 continue
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000499
Daniel Veillardbe586972003-11-18 20:56:51 +0000500 j = i + 1
501 if j < l and (
502 string.find("+-*><=/%&!|", line[j]) != -1):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000503# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
504# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
505# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
506# line[j] == '!' or line[j] == '|'):
Daniel Veillardbe586972003-11-18 20:56:51 +0000507 self.tokens.append(('op', line[i:j+1]))
508 i = j + 1
509 else:
510 self.tokens.append(('op', line[i]))
511 i = i + 1
512 continue
513 s = i
514 while i < l:
515 o = ord(line[i])
516 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
517 (o >= 48 and o <= 57) or (
518 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000519# line[i] != ' ' and line[i] != '\t' and
520# line[i] != '(' and line[i] != ')' and
521# line[i] != '{' and line[i] != '}' and
522# line[i] != ':' and line[i] != ';' and
523# line[i] != ',' and line[i] != '+' and
524# line[i] != '-' and line[i] != '*' and
525# line[i] != '/' and line[i] != '%' and
526# line[i] != '&' and line[i] != '!' and
527# line[i] != '|' and line[i] != '[' and
528# line[i] != ']' and line[i] != '=' and
529# line[i] != '*' and line[i] != '>' and
530# line[i] != '<'):
Daniel Veillardbe586972003-11-18 20:56:51 +0000531 i = i + 1
532 else:
533 break
534 self.tokens.append(('name', line[s:i]))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000535
Daniel Veillardbe586972003-11-18 20:56:51 +0000536 tok = self.tokens[0]
537 self.tokens = self.tokens[1:]
538 self.last = tok
539 return tok
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000540
541class CParser:
Daniel Veillardbe586972003-11-18 20:56:51 +0000542 """The C module parser"""
543 def __init__(self, filename, idx = None):
544 self.filename = filename
545 if len(filename) > 2 and filename[-2:] == '.h':
546 self.is_header = 1
547 else:
548 self.is_header = 0
549 self.input = open(filename)
550 self.lexer = CLexer(self.input)
551 if idx == None:
552 self.index = index()
553 else:
554 self.index = idx
555 self.top_comment = ""
556 self.last_comment = ""
557 self.comment = None
558 self.collect_ref = 0
Daniel Veillard1e906612003-12-05 14:57:46 +0000559 self.no_error = 0
Daniel Veillarda2351322004-06-27 12:08:10 +0000560 self.conditionals = []
561 self.defines = []
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000562
Daniel Veillardbe586972003-11-18 20:56:51 +0000563 def collect_references(self):
564 self.collect_ref = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000565
Daniel Veillard1e906612003-12-05 14:57:46 +0000566 def stop_error(self):
567 self.no_error = 1
568
569 def start_error(self):
570 self.no_error = 0
571
Daniel Veillardbe586972003-11-18 20:56:51 +0000572 def lineno(self):
573 return self.lexer.getlineno()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000574
Daniel Veillardbe586972003-11-18 20:56:51 +0000575 def index_add(self, name, module, static, type, info=None, extra = None):
576 self.index.add(name, module, static, type, self.lineno(),
Daniel Veillarda2351322004-06-27 12:08:10 +0000577 info, extra, self.conditionals)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000578
Daniel Veillardbe586972003-11-18 20:56:51 +0000579 def index_add_ref(self, name, module, static, type, info=None,
580 extra = None):
581 self.index.add_ref(name, module, static, type, self.lineno(),
Daniel Veillarda2351322004-06-27 12:08:10 +0000582 info, extra, self.conditionals)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000583
Daniel Veillard1e906612003-12-05 14:57:46 +0000584 def warning(self, msg):
585 if self.no_error:
586 return
587 print msg
588
Daniel Veillardbe586972003-11-18 20:56:51 +0000589 def error(self, msg, token=-1):
Daniel Veillard1e906612003-12-05 14:57:46 +0000590 if self.no_error:
591 return
592
Daniel Veillardbe586972003-11-18 20:56:51 +0000593 print "Parse Error: " + msg
594 if token != -1:
595 print "Got token ", token
596 self.lexer.debug()
597 sys.exit(1)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000598
Daniel Veillardbe586972003-11-18 20:56:51 +0000599 def debug(self, msg, token=-1):
600 print "Debug: " + msg
601 if token != -1:
602 print "Got token ", token
603 self.lexer.debug()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000604
Daniel Veillardbe586972003-11-18 20:56:51 +0000605 def parseTopComment(self, comment):
606 res = {}
607 lines = string.split(comment, "\n")
608 item = None
609 for line in lines:
610 while line != "" and (line[0] == ' ' or line[0] == '\t'):
611 line = line[1:]
612 while line != "" and line[0] == '*':
613 line = line[1:]
614 while line != "" and (line[0] == ' ' or line[0] == '\t'):
615 line = line[1:]
616 try:
617 (it, line) = string.split(line, ":", 1)
618 item = it
619 while line != "" and (line[0] == ' ' or line[0] == '\t'):
620 line = line[1:]
621 if res.has_key(item):
622 res[item] = res[item] + " " + line
623 else:
624 res[item] = line
625 except:
626 if item != None:
627 if res.has_key(item):
628 res[item] = res[item] + " " + line
629 else:
630 res[item] = line
631 self.index.info = res
632
633 def parseComment(self, token):
634 if self.top_comment == "":
635 self.top_comment = token[1]
636 if self.comment == None or token[1][0] == '*':
637 self.comment = token[1];
638 else:
639 self.comment = self.comment + token[1]
640 token = self.lexer.token()
Daniel Veillard1e906612003-12-05 14:57:46 +0000641
642 if string.find(self.comment, "DOC_DISABLE") != -1:
643 self.stop_error()
644
645 if string.find(self.comment, "DOC_ENABLE") != -1:
646 self.start_error()
647
Daniel Veillardbe586972003-11-18 20:56:51 +0000648 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000649
650 #
651 # Parse a comment block associate to a macro
652 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000653 def parseMacroComment(self, name, quiet = 0):
654 if name[0:2] == '__':
655 quiet = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000656
Daniel Veillardbe586972003-11-18 20:56:51 +0000657 args = []
658 desc = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000659
Daniel Veillardbe586972003-11-18 20:56:51 +0000660 if self.comment == None:
661 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000662 self.warning("Missing comment for macro %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000663 return((args, desc))
664 if self.comment[0] != '*':
665 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000666 self.warning("Missing * in macro comment for %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000667 return((args, desc))
668 lines = string.split(self.comment, '\n')
669 if lines[0] == '*':
670 del lines[0]
671 if lines[0] != "* %s:" % (name):
672 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000673 self.warning("Misformatted macro comment for %s" % (name))
674 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000675 return((args, desc))
676 del lines[0]
677 while lines[0] == '*':
678 del lines[0]
679 while len(lines) > 0 and lines[0][0:3] == '* @':
680 l = lines[0][3:]
681 try:
682 (arg, desc) = string.split(l, ':', 1)
683 desc=string.strip(desc)
684 arg=string.strip(arg)
685 except:
686 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000687 self.warning("Misformatted macro comment for %s" % (name))
688 self.warning(" problem with '%s'" % (lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000689 del lines[0]
690 continue
691 del lines[0]
692 l = string.strip(lines[0])
693 while len(l) > 2 and l[0:3] != '* @':
694 while l[0] == '*':
695 l = l[1:]
696 desc = desc + ' ' + string.strip(l)
697 del lines[0]
698 if len(lines) == 0:
699 break
700 l = lines[0]
701 args.append((arg, desc))
702 while len(lines) > 0 and lines[0] == '*':
703 del lines[0]
704 desc = ""
705 while len(lines) > 0:
706 l = lines[0]
707 while len(l) > 0 and l[0] == '*':
708 l = l[1:]
709 l = string.strip(l)
710 desc = desc + " " + l
711 del lines[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000712
Daniel Veillardbe586972003-11-18 20:56:51 +0000713 desc = string.strip(desc)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000714
Daniel Veillardbe586972003-11-18 20:56:51 +0000715 if quiet == 0:
716 if desc == "":
Daniel Veillard1e906612003-12-05 14:57:46 +0000717 self.warning("Macro comment for %s lack description of the macro" % (name))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000718
Daniel Veillardbe586972003-11-18 20:56:51 +0000719 return((args, desc))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000720
721 #
722 # Parse a comment block and merge the informations found in the
723 # parameters descriptions, finally returns a block as complete
724 # as possible
725 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000726 def mergeFunctionComment(self, name, description, quiet = 0):
727 if name == 'main':
728 quiet = 1
729 if name[0:2] == '__':
730 quiet = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000731
Daniel Veillardbe586972003-11-18 20:56:51 +0000732 (ret, args) = description
733 desc = ""
734 retdesc = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000735
Daniel Veillardbe586972003-11-18 20:56:51 +0000736 if self.comment == None:
737 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000738 self.warning("Missing comment for function %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000739 return(((ret[0], retdesc), args, desc))
740 if self.comment[0] != '*':
741 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000742 self.warning("Missing * in function comment for %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000743 return(((ret[0], retdesc), args, desc))
744 lines = string.split(self.comment, '\n')
745 if lines[0] == '*':
746 del lines[0]
747 if lines[0] != "* %s:" % (name):
748 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000749 self.warning("Misformatted function comment for %s" % (name))
750 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000751 return(((ret[0], retdesc), args, desc))
752 del lines[0]
753 while lines[0] == '*':
754 del lines[0]
755 nbargs = len(args)
756 while len(lines) > 0 and lines[0][0:3] == '* @':
757 l = lines[0][3:]
758 try:
759 (arg, desc) = string.split(l, ':', 1)
760 desc=string.strip(desc)
761 arg=string.strip(arg)
762 except:
763 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000764 self.warning("Misformatted function comment for %s" % (name))
765 self.warning(" problem with '%s'" % (lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000766 del lines[0]
767 continue
768 del lines[0]
769 l = string.strip(lines[0])
770 while len(l) > 2 and l[0:3] != '* @':
771 while l[0] == '*':
772 l = l[1:]
773 desc = desc + ' ' + string.strip(l)
774 del lines[0]
775 if len(lines) == 0:
776 break
777 l = lines[0]
778 i = 0
779 while i < nbargs:
780 if args[i][1] == arg:
781 args[i] = (args[i][0], arg, desc)
782 break;
783 i = i + 1
784 if i >= nbargs:
785 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000786 self.warning("Unable to find arg %s from function comment for %s" % (
787 arg, name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000788 while len(lines) > 0 and lines[0] == '*':
789 del lines[0]
790 desc = ""
791 while len(lines) > 0:
792 l = lines[0]
793 while len(l) > 0 and l[0] == '*':
794 l = l[1:]
795 l = string.strip(l)
796 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
797 try:
798 l = string.split(l, ' ', 1)[1]
799 except:
800 l = ""
801 retdesc = string.strip(l)
802 del lines[0]
803 while len(lines) > 0:
804 l = lines[0]
805 while len(l) > 0 and l[0] == '*':
806 l = l[1:]
807 l = string.strip(l)
808 retdesc = retdesc + " " + l
809 del lines[0]
810 else:
811 desc = desc + " " + l
812 del lines[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000813
Daniel Veillardbe586972003-11-18 20:56:51 +0000814 retdesc = string.strip(retdesc)
815 desc = string.strip(desc)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000816
Daniel Veillardbe586972003-11-18 20:56:51 +0000817 if quiet == 0:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000818 #
819 # report missing comments
820 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000821 i = 0
822 while i < nbargs:
823 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
William M. Brack21e4ef22005-01-02 09:53:13 +0000824 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000825 i = i + 1
826 if retdesc == "" and ret[0] != "void":
William M. Brack21e4ef22005-01-02 09:53:13 +0000827 self.warning("Function comment for %s lacks description of return value" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000828 if desc == "":
William M. Brack21e4ef22005-01-02 09:53:13 +0000829 self.warning("Function comment for %s lacks description of the function" % (name))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000830
831
Daniel Veillardbe586972003-11-18 20:56:51 +0000832 return(((ret[0], retdesc), args, desc))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000833
Daniel Veillardbe586972003-11-18 20:56:51 +0000834 def parsePreproc(self, token):
Daniel Veillarda2351322004-06-27 12:08:10 +0000835 if debug:
836 print "=> preproc ", token, self.lexer.tokens
Daniel Veillardbe586972003-11-18 20:56:51 +0000837 name = token[1]
838 if name == "#include":
839 token = self.lexer.token()
840 if token == None:
841 return None
842 if token[0] == 'preproc':
843 self.index_add(token[1], self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000844 "include")
Daniel Veillardbe586972003-11-18 20:56:51 +0000845 return self.lexer.token()
846 return token
847 if name == "#define":
848 token = self.lexer.token()
849 if token == None:
850 return None
851 if token[0] == 'preproc':
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000852 # TODO macros with arguments
Daniel Veillardbe586972003-11-18 20:56:51 +0000853 name = token[1]
854 lst = []
855 token = self.lexer.token()
856 while token != None and token[0] == 'preproc' and \
857 token[1][0] != '#':
858 lst.append(token[1])
859 token = self.lexer.token()
860 try:
861 name = string.split(name, '(') [0]
862 except:
863 pass
864 info = self.parseMacroComment(name, not self.is_header)
865 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000866 "macro", info)
Daniel Veillardbe586972003-11-18 20:56:51 +0000867 return token
William M. Brack21e4ef22005-01-02 09:53:13 +0000868
869 #
870 # Processing of conditionals modified by Bill 1/1/05
871 #
872 # We process conditionals (i.e. tokens from #ifdef, #ifndef,
873 # #if, #else and #endif) for headers and mainline code,
874 # store the ones from the header in libxml2-api.xml, and later
875 # (in the routine merge_public) verify that the two (header and
876 # mainline code) agree.
877 #
878 # There is a small problem with processing the headers. Some of
879 # the variables are not concerned with enabling / disabling of
880 # library functions (e.g. '__XML_PARSER_H__'), and we don't want
881 # them to be included in libxml2-api.xml, or involved in
882 # the check between the header and the mainline code. To
883 # accomplish this, we ignore any conditional which doesn't include
884 # the string 'ENABLED'
885 #
886 if name == "#ifdef":
887 apstr = self.lexer.tokens[0][1]
Daniel Veillarda2351322004-06-27 12:08:10 +0000888 try:
William M. Brack21e4ef22005-01-02 09:53:13 +0000889 self.defines.append(apstr)
890 if string.find(apstr, 'ENABLED') != -1:
891 self.conditionals.append("defined(%s)" % apstr)
Daniel Veillarda2351322004-06-27 12:08:10 +0000892 except:
893 pass
William M. Brack21e4ef22005-01-02 09:53:13 +0000894 elif name == "#ifndef":
895 apstr = self.lexer.tokens[0][1]
896 try:
897 self.defines.append(apstr)
898 if string.find(apstr, 'ENABLED') != -1:
899 self.conditionals.append("!defined(%s)" % apstr)
900 except:
901 pass
902 elif name == "#if":
903 apstr = ""
904 for tok in self.lexer.tokens:
905 if apstr != "":
906 apstr = apstr + " "
907 apstr = apstr + tok[1]
908 try:
909 self.defines.append(apstr)
910 if string.find(apstr, 'ENABLED') != -1:
911 self.conditionals.append(apstr)
912 except:
913 pass
914 elif name == "#else":
915 if self.conditionals != [] and \
916 string.find(self.defines[-1], 'ENABLED') != -1:
917 self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
918 elif name == "#endif":
919 if self.conditionals != [] and \
920 string.find(self.defines[-1], 'ENABLED') != -1:
Daniel Veillarda2351322004-06-27 12:08:10 +0000921 self.conditionals = self.conditionals[:-1]
922 self.defines = self.defines[:-1]
Daniel Veillardbe586972003-11-18 20:56:51 +0000923 token = self.lexer.token()
924 while token != None and token[0] == 'preproc' and \
925 token[1][0] != '#':
926 token = self.lexer.token()
927 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000928
929 #
930 # token acquisition on top of the lexer, it handle internally
931 # preprocessor and comments since they are logically not part of
932 # the program structure.
933 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000934 def token(self):
935 global ignored_words
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000936
Daniel Veillardbe586972003-11-18 20:56:51 +0000937 token = self.lexer.token()
938 while token != None:
939 if token[0] == 'comment':
940 token = self.parseComment(token)
941 continue
942 elif token[0] == 'preproc':
943 token = self.parsePreproc(token)
944 continue
945 elif token[0] == "name" and ignored_words.has_key(token[1]):
946 (n, info) = ignored_words[token[1]]
947 i = 0
948 while i < n:
949 token = self.lexer.token()
950 i = i + 1
951 token = self.lexer.token()
952 continue
953 else:
954 if debug:
955 print "=> ", token
956 return token
957 return None
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000958
959 #
960 # Parse a typedef, it records the type and its name.
961 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000962 def parseTypedef(self, token):
963 if token == None:
964 return None
965 token = self.parseType(token)
966 if token == None:
967 self.error("parsing typedef")
968 return None
969 base_type = self.type
970 type = base_type
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000971 #self.debug("end typedef type", token)
Daniel Veillardbe586972003-11-18 20:56:51 +0000972 while token != None:
973 if token[0] == "name":
974 name = token[1]
975 signature = self.signature
976 if signature != None:
977 type = string.split(type, '(')[0]
978 d = self.mergeFunctionComment(name,
979 ((type, None), signature), 1)
980 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000981 "functype", d)
Daniel Veillardbe586972003-11-18 20:56:51 +0000982 else:
983 if base_type == "struct":
984 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000985 "struct", type)
Daniel Veillardbe586972003-11-18 20:56:51 +0000986 base_type = "struct " + name
987 else:
988 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000989 "typedef", type)
Daniel Veillardbe586972003-11-18 20:56:51 +0000990 token = self.token()
991 else:
992 self.error("parsing typedef: expecting a name")
993 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000994 #self.debug("end typedef", token)
Daniel Veillardbe586972003-11-18 20:56:51 +0000995 if token != None and token[0] == 'sep' and token[1] == ',':
996 type = base_type
997 token = self.token()
998 while token != None and token[0] == "op":
999 type = type + token[1]
1000 token = self.token()
1001 elif token != None and token[0] == 'sep' and token[1] == ';':
1002 break;
1003 elif token != None and token[0] == 'name':
1004 type = base_type
1005 continue;
1006 else:
1007 self.error("parsing typedef: expecting ';'", token)
1008 return token
1009 token = self.token()
1010 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001011
1012 #
1013 # Parse a C code block, used for functions it parse till
1014 # the balancing } included
1015 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001016 def parseBlock(self, token):
1017 while token != None:
1018 if token[0] == "sep" and token[1] == "{":
1019 token = self.token()
1020 token = self.parseBlock(token)
1021 elif token[0] == "sep" and token[1] == "}":
1022 self.comment = None
1023 token = self.token()
1024 return token
1025 else:
1026 if self.collect_ref == 1:
1027 oldtok = token
1028 token = self.token()
1029 if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1030 if token[0] == "sep" and token[1] == "(":
1031 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001032 0, "function")
Daniel Veillardbe586972003-11-18 20:56:51 +00001033 token = self.token()
1034 elif token[0] == "name":
1035 token = self.token()
1036 if token[0] == "sep" and (token[1] == ";" or
1037 token[1] == "," or token[1] == "="):
1038 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001039 0, "type")
Daniel Veillardbe586972003-11-18 20:56:51 +00001040 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1041 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001042 0, "typedef")
Daniel Veillardbe586972003-11-18 20:56:51 +00001043 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1044 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001045 0, "typedef")
1046
Daniel Veillardbe586972003-11-18 20:56:51 +00001047 else:
1048 token = self.token()
1049 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001050
1051 #
1052 # Parse a C struct definition till the balancing }
1053 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001054 def parseStruct(self, token):
1055 fields = []
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001056 #self.debug("start parseStruct", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001057 while token != None:
1058 if token[0] == "sep" and token[1] == "{":
1059 token = self.token()
1060 token = self.parseTypeBlock(token)
1061 elif token[0] == "sep" and token[1] == "}":
1062 self.struct_fields = fields
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001063 #self.debug("end parseStruct", token)
1064 #print fields
Daniel Veillardbe586972003-11-18 20:56:51 +00001065 token = self.token()
1066 return token
1067 else:
1068 base_type = self.type
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001069 #self.debug("before parseType", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001070 token = self.parseType(token)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001071 #self.debug("after parseType", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001072 if token != None and token[0] == "name":
1073 fname = token[1]
1074 token = self.token()
1075 if token[0] == "sep" and token[1] == ";":
1076 self.comment = None
1077 token = self.token()
1078 fields.append((self.type, fname, self.comment))
1079 self.comment = None
1080 else:
1081 self.error("parseStruct: expecting ;", token)
1082 elif token != None and token[0] == "sep" and token[1] == "{":
1083 token = self.token()
1084 token = self.parseTypeBlock(token)
1085 if token != None and token[0] == "name":
1086 token = self.token()
1087 if token != None and token[0] == "sep" and token[1] == ";":
1088 token = self.token()
1089 else:
1090 self.error("parseStruct: expecting ;", token)
1091 else:
1092 self.error("parseStruct: name", token)
1093 token = self.token()
1094 self.type = base_type;
1095 self.struct_fields = fields
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001096 #self.debug("end parseStruct", token)
1097 #print fields
Daniel Veillardbe586972003-11-18 20:56:51 +00001098 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001099
1100 #
1101 # Parse a C enum block, parse till the balancing }
1102 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001103 def parseEnumBlock(self, token):
1104 self.enums = []
1105 name = None
1106 self.comment = None
1107 comment = ""
1108 value = "0"
1109 while token != None:
1110 if token[0] == "sep" and token[1] == "{":
1111 token = self.token()
1112 token = self.parseTypeBlock(token)
1113 elif token[0] == "sep" and token[1] == "}":
1114 if name != None:
1115 if self.comment != None:
1116 comment = self.comment
1117 self.comment = None
1118 self.enums.append((name, value, comment))
1119 token = self.token()
1120 return token
1121 elif token[0] == "name":
1122 if name != None:
1123 if self.comment != None:
1124 comment = string.strip(self.comment)
1125 self.comment = None
1126 self.enums.append((name, value, comment))
1127 name = token[1]
1128 comment = ""
1129 token = self.token()
1130 if token[0] == "op" and token[1][0] == "=":
1131 value = ""
1132 if len(token[1]) > 1:
1133 value = token[1][1:]
1134 token = self.token()
1135 while token[0] != "sep" or (token[1] != ',' and
1136 token[1] != '}'):
1137 value = value + token[1]
1138 token = self.token()
1139 else:
1140 try:
1141 value = "%d" % (int(value) + 1)
1142 except:
Daniel Veillard1e906612003-12-05 14:57:46 +00001143 self.warning("Failed to compute value of enum %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +00001144 value=""
1145 if token[0] == "sep" and token[1] == ",":
1146 token = self.token()
1147 else:
1148 token = self.token()
1149 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001150
1151 #
1152 # Parse a C definition block, used for structs it parse till
1153 # the balancing }
1154 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001155 def parseTypeBlock(self, token):
1156 while token != None:
1157 if token[0] == "sep" and token[1] == "{":
1158 token = self.token()
1159 token = self.parseTypeBlock(token)
1160 elif token[0] == "sep" and token[1] == "}":
1161 token = self.token()
1162 return token
1163 else:
1164 token = self.token()
1165 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001166
1167 #
1168 # Parse a type: the fact that the type name can either occur after
1169 # the definition or within the definition makes it a little harder
1170 # if inside, the name token is pushed back before returning
1171 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001172 def parseType(self, token):
1173 self.type = ""
1174 self.struct_fields = []
1175 self.signature = None
1176 if token == None:
1177 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001178
Daniel Veillardbe586972003-11-18 20:56:51 +00001179 while token[0] == "name" and (
1180 token[1] == "const" or token[1] == "unsigned"):
1181 if self.type == "":
1182 self.type = token[1]
1183 else:
1184 self.type = self.type + " " + token[1]
1185 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001186
Daniel Veillardbe586972003-11-18 20:56:51 +00001187 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1188 if self.type == "":
1189 self.type = token[1]
1190 else:
1191 self.type = self.type + " " + token[1]
1192 if token[0] == "name" and token[1] == "int":
1193 if self.type == "":
1194 self.type = tmp[1]
1195 else:
1196 self.type = self.type + " " + tmp[1]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001197
Daniel Veillardbe586972003-11-18 20:56:51 +00001198 elif token[0] == "name" and token[1] == "struct":
1199 if self.type == "":
1200 self.type = token[1]
1201 else:
1202 self.type = self.type + " " + token[1]
1203 token = self.token()
1204 nametok = None
1205 if token[0] == "name":
1206 nametok = token
1207 token = self.token()
1208 if token != None and token[0] == "sep" and token[1] == "{":
1209 token = self.token()
1210 token = self.parseStruct(token)
1211 elif token != None and token[0] == "op" and token[1] == "*":
1212 self.type = self.type + " " + nametok[1] + " *"
1213 token = self.token()
1214 while token != None and token[0] == "op" and token[1] == "*":
1215 self.type = self.type + " *"
1216 token = self.token()
1217 if token[0] == "name":
1218 nametok = token
1219 token = self.token()
1220 else:
1221 self.error("struct : expecting name", token)
1222 return token
1223 elif token != None and token[0] == "name" and nametok != None:
1224 self.type = self.type + " " + nametok[1]
1225 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001226
Daniel Veillardbe586972003-11-18 20:56:51 +00001227 if nametok != None:
1228 self.lexer.push(token)
1229 token = nametok
1230 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001231
Daniel Veillardbe586972003-11-18 20:56:51 +00001232 elif token[0] == "name" and token[1] == "enum":
1233 if self.type == "":
1234 self.type = token[1]
1235 else:
1236 self.type = self.type + " " + token[1]
1237 self.enums = []
1238 token = self.token()
1239 if token != None and token[0] == "sep" and token[1] == "{":
1240 token = self.token()
1241 token = self.parseEnumBlock(token)
1242 else:
1243 self.error("parsing enum: expecting '{'", token)
1244 enum_type = None
1245 if token != None and token[0] != "name":
1246 self.lexer.push(token)
1247 token = ("name", "enum")
1248 else:
1249 enum_type = token[1]
1250 for enum in self.enums:
1251 self.index_add(enum[0], self.filename,
1252 not self.is_header, "enum",
1253 (enum[1], enum[2], enum_type))
1254 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001255
Daniel Veillardbe586972003-11-18 20:56:51 +00001256 elif token[0] == "name":
1257 if self.type == "":
1258 self.type = token[1]
1259 else:
1260 self.type = self.type + " " + token[1]
1261 else:
1262 self.error("parsing type %s: expecting a name" % (self.type),
1263 token)
1264 return token
1265 token = self.token()
1266 while token != None and (token[0] == "op" or
1267 token[0] == "name" and token[1] == "const"):
1268 self.type = self.type + " " + token[1]
1269 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001270
1271 #
1272 # if there is a parenthesis here, this means a function type
1273 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001274 if token != None and token[0] == "sep" and token[1] == '(':
1275 self.type = self.type + token[1]
1276 token = self.token()
1277 while token != None and token[0] == "op" and token[1] == '*':
1278 self.type = self.type + token[1]
1279 token = self.token()
1280 if token == None or token[0] != "name" :
1281 self.error("parsing function type, name expected", token);
1282 return token
1283 self.type = self.type + token[1]
1284 nametok = token
1285 token = self.token()
1286 if token != None and token[0] == "sep" and token[1] == ')':
1287 self.type = self.type + token[1]
1288 token = self.token()
1289 if token != None and token[0] == "sep" and token[1] == '(':
1290 token = self.token()
1291 type = self.type;
1292 token = self.parseSignature(token);
1293 self.type = type;
1294 else:
1295 self.error("parsing function type, '(' expected", token);
1296 return token
1297 else:
1298 self.error("parsing function type, ')' expected", token);
1299 return token
1300 self.lexer.push(token)
1301 token = nametok
1302 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001303
1304 #
1305 # do some lookahead for arrays
1306 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001307 if token != None and token[0] == "name":
1308 nametok = token
1309 token = self.token()
1310 if token != None and token[0] == "sep" and token[1] == '[':
1311 self.type = self.type + nametok[1]
1312 while token != None and token[0] == "sep" and token[1] == '[':
1313 self.type = self.type + token[1]
1314 token = self.token()
1315 while token != None and token[0] != 'sep' and \
1316 token[1] != ']' and token[1] != ';':
1317 self.type = self.type + token[1]
1318 token = self.token()
1319 if token != None and token[0] == 'sep' and token[1] == ']':
1320 self.type = self.type + token[1]
1321 token = self.token()
1322 else:
1323 self.error("parsing array type, ']' expected", token);
1324 return token
1325 elif token != None and token[0] == "sep" and token[1] == ':':
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001326 # remove :12 in case it's a limited int size
Daniel Veillardbe586972003-11-18 20:56:51 +00001327 token = self.token()
1328 token = self.token()
1329 self.lexer.push(token)
1330 token = nametok
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001331
Daniel Veillardbe586972003-11-18 20:56:51 +00001332 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001333
1334 #
1335 # Parse a signature: '(' has been parsed and we scan the type definition
1336 # up to the ')' included
Daniel Veillardbe586972003-11-18 20:56:51 +00001337 def parseSignature(self, token):
1338 signature = []
1339 if token != None and token[0] == "sep" and token[1] == ')':
1340 self.signature = []
1341 token = self.token()
1342 return token
1343 while token != None:
1344 token = self.parseType(token)
1345 if token != None and token[0] == "name":
1346 signature.append((self.type, token[1], None))
1347 token = self.token()
1348 elif token != None and token[0] == "sep" and token[1] == ',':
1349 token = self.token()
1350 continue
1351 elif token != None and token[0] == "sep" and token[1] == ')':
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001352 # only the type was provided
Daniel Veillardbe586972003-11-18 20:56:51 +00001353 if self.type == "...":
1354 signature.append((self.type, "...", None))
1355 else:
1356 signature.append((self.type, None, None))
1357 if token != None and token[0] == "sep":
1358 if token[1] == ',':
1359 token = self.token()
1360 continue
1361 elif token[1] == ')':
1362 token = self.token()
1363 break
1364 self.signature = signature
1365 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001366
1367 #
1368 # Parse a global definition, be it a type, variable or function
1369 # the extern "C" blocks are a bit nasty and require it to recurse.
1370 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001371 def parseGlobal(self, token):
1372 static = 0
1373 if token[1] == 'extern':
1374 token = self.token()
1375 if token == None:
1376 return token
1377 if token[0] == 'string':
1378 if token[1] == 'C':
1379 token = self.token()
1380 if token == None:
1381 return token
1382 if token[0] == 'sep' and token[1] == "{":
1383 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001384# print 'Entering extern "C line ', self.lineno()
Daniel Veillardbe586972003-11-18 20:56:51 +00001385 while token != None and (token[0] != 'sep' or
1386 token[1] != "}"):
1387 if token[0] == 'name':
1388 token = self.parseGlobal(token)
1389 else:
1390 self.error(
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001391 "token %s %s unexpected at the top level" % (
1392 token[0], token[1]))
Daniel Veillardbe586972003-11-18 20:56:51 +00001393 token = self.parseGlobal(token)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001394# print 'Exiting extern "C" line', self.lineno()
Daniel Veillardbe586972003-11-18 20:56:51 +00001395 token = self.token()
1396 return token
1397 else:
1398 return token
1399 elif token[1] == 'static':
1400 static = 1
1401 token = self.token()
1402 if token == None or token[0] != 'name':
1403 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001404
Daniel Veillardbe586972003-11-18 20:56:51 +00001405 if token[1] == 'typedef':
1406 token = self.token()
1407 return self.parseTypedef(token)
1408 else:
1409 token = self.parseType(token)
1410 type_orig = self.type
1411 if token == None or token[0] != "name":
1412 return token
1413 type = type_orig
1414 self.name = token[1]
1415 token = self.token()
1416 while token != None and (token[0] == "sep" or token[0] == "op"):
1417 if token[0] == "sep":
1418 if token[1] == "[":
1419 type = type + token[1]
1420 token = self.token()
1421 while token != None and (token[0] != "sep" or \
1422 token[1] != ";"):
1423 type = type + token[1]
1424 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001425
Daniel Veillardbe586972003-11-18 20:56:51 +00001426 if token != None and token[0] == "op" and token[1] == "=":
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001427 #
1428 # Skip the initialization of the variable
1429 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001430 token = self.token()
1431 if token[0] == 'sep' and token[1] == '{':
1432 token = self.token()
1433 token = self.parseBlock(token)
1434 else:
1435 self.comment = None
1436 while token != None and (token[0] != "sep" or \
1437 (token[1] != ';' and token[1] != ',')):
1438 token = self.token()
1439 self.comment = None
1440 if token == None or token[0] != "sep" or (token[1] != ';' and
1441 token[1] != ','):
1442 self.error("missing ';' or ',' after value")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001443
Daniel Veillardbe586972003-11-18 20:56:51 +00001444 if token != None and token[0] == "sep":
1445 if token[1] == ";":
1446 self.comment = None
1447 token = self.token()
1448 if type == "struct":
1449 self.index_add(self.name, self.filename,
1450 not self.is_header, "struct", self.struct_fields)
1451 else:
1452 self.index_add(self.name, self.filename,
1453 not self.is_header, "variable", type)
1454 break
1455 elif token[1] == "(":
1456 token = self.token()
1457 token = self.parseSignature(token)
1458 if token == None:
1459 return None
1460 if token[0] == "sep" and token[1] == ";":
1461 d = self.mergeFunctionComment(self.name,
1462 ((type, None), self.signature), 1)
1463 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001464 "function", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001465 token = self.token()
1466 elif token[0] == "sep" and token[1] == "{":
1467 d = self.mergeFunctionComment(self.name,
1468 ((type, None), self.signature), static)
1469 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001470 "function", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001471 token = self.token()
1472 token = self.parseBlock(token);
1473 elif token[1] == ',':
1474 self.comment = None
1475 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001476 "variable", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001477 type = type_orig
1478 token = self.token()
1479 while token != None and token[0] == "sep":
1480 type = type + token[1]
1481 token = self.token()
1482 if token != None and token[0] == "name":
1483 self.name = token[1]
1484 token = self.token()
1485 else:
1486 break
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001487
Daniel Veillardbe586972003-11-18 20:56:51 +00001488 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001489
Daniel Veillardbe586972003-11-18 20:56:51 +00001490 def parse(self):
Daniel Veillard1e906612003-12-05 14:57:46 +00001491 self.warning("Parsing %s" % (self.filename))
Daniel Veillardbe586972003-11-18 20:56:51 +00001492 token = self.token()
1493 while token != None:
1494 if token[0] == 'name':
1495 token = self.parseGlobal(token)
1496 else:
1497 self.error("token %s %s unexpected at the top level" % (
1498 token[0], token[1]))
1499 token = self.parseGlobal(token)
1500 return
1501 self.parseTopComment(self.top_comment)
1502 return self.index
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001503
1504
1505class docBuilder:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001506 """A documentation builder"""
1507 def __init__(self, name, directories=['.'], excludes=[]):
1508 self.name = name
1509 self.directories = directories
1510 self.excludes = excludes + ignored_files.keys()
1511 self.modules = {}
1512 self.headers = {}
1513 self.idx = index()
1514 self.xref = {}
1515 self.index = {}
1516 if name == 'libxml2':
1517 self.basename = 'libxml'
1518 else:
1519 self.basename = name
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001520
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001521 def indexString(self, id, str):
1522 if str == None:
1523 return
1524 str = string.replace(str, "'", ' ')
1525 str = string.replace(str, '"', ' ')
1526 str = string.replace(str, "/", ' ')
1527 str = string.replace(str, '*', ' ')
1528 str = string.replace(str, "[", ' ')
1529 str = string.replace(str, "]", ' ')
1530 str = string.replace(str, "(", ' ')
1531 str = string.replace(str, ")", ' ')
1532 str = string.replace(str, "<", ' ')
1533 str = string.replace(str, '>', ' ')
1534 str = string.replace(str, "&", ' ')
1535 str = string.replace(str, '#', ' ')
1536 str = string.replace(str, ",", ' ')
1537 str = string.replace(str, '.', ' ')
1538 str = string.replace(str, ';', ' ')
1539 tokens = string.split(str)
1540 for token in tokens:
1541 try:
1542 c = token[0]
1543 if string.find(string.letters, c) < 0:
1544 pass
1545 elif len(token) < 3:
1546 pass
1547 else:
1548 lower = string.lower(token)
1549 # TODO: generalize this a bit
1550 if lower == 'and' or lower == 'the':
1551 pass
1552 elif self.xref.has_key(token):
1553 self.xref[token].append(id)
1554 else:
1555 self.xref[token] = [id]
1556 except:
1557 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001558
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001559 def analyze(self):
1560 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1561 self.idx.analyze()
1562
1563 def scanHeaders(self):
1564 for header in self.headers.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001565 parser = CParser(header)
1566 idx = parser.parse()
1567 self.headers[header] = idx;
1568 self.idx.merge(idx)
1569
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001570 def scanModules(self):
1571 for module in self.modules.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001572 parser = CParser(module)
1573 idx = parser.parse()
1574 # idx.analyze()
1575 self.modules[module] = idx
1576 self.idx.merge_public(idx)
1577
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001578 def scan(self):
1579 for directory in self.directories:
1580 files = glob.glob(directory + "/*.c")
1581 for file in files:
1582 skip = 0
1583 for excl in self.excludes:
1584 if string.find(file, excl) != -1:
1585 skip = 1;
1586 break
1587 if skip == 0:
1588 self.modules[file] = None;
1589 files = glob.glob(directory + "/*.h")
1590 for file in files:
1591 skip = 0
1592 for excl in self.excludes:
1593 if string.find(file, excl) != -1:
1594 skip = 1;
1595 break
1596 if skip == 0:
1597 self.headers[file] = None;
1598 self.scanHeaders()
1599 self.scanModules()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001600
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001601 def modulename_file(self, file):
1602 module = os.path.basename(file)
1603 if module[-2:] == '.h':
1604 module = module[:-2]
1605 return module
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001606
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001607 def serialize_enum(self, output, name):
1608 id = self.idx.enums[name]
1609 output.write(" <enum name='%s' file='%s'" % (name,
1610 self.modulename_file(id.module)))
1611 if id.info != None:
1612 info = id.info
1613 if info[0] != None and info[0] != '':
1614 try:
1615 val = eval(info[0])
1616 except:
1617 val = info[0]
1618 output.write(" value='%s'" % (val));
1619 if info[2] != None and info[2] != '':
1620 output.write(" type='%s'" % info[2]);
1621 if info[1] != None and info[1] != '':
1622 output.write(" info='%s'" % escape(info[1]));
1623 output.write("/>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001624
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001625 def serialize_macro(self, output, name):
1626 id = self.idx.macros[name]
1627 output.write(" <macro name='%s' file='%s'>\n" % (name,
1628 self.modulename_file(id.module)))
1629 if id.info != None:
1630 try:
1631 (args, desc) = id.info
1632 if desc != None and desc != "":
1633 output.write(" <info>%s</info>\n" % (escape(desc)))
1634 self.indexString(name, desc)
1635 for arg in args:
1636 (name, desc) = arg
1637 if desc != None and desc != "":
1638 output.write(" <arg name='%s' info='%s'/>\n" % (
1639 name, escape(desc)))
1640 self.indexString(name, desc)
1641 else:
1642 output.write(" <arg name='%s'/>\n" % (name))
1643 except:
1644 pass
1645 output.write(" </macro>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001646
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001647 def serialize_typedef(self, output, name):
1648 id = self.idx.typedefs[name]
1649 if id.info[0:7] == 'struct ':
1650 output.write(" <struct name='%s' file='%s' type='%s'" % (
1651 name, self.modulename_file(id.module), id.info))
1652 name = id.info[7:]
1653 if self.idx.structs.has_key(name) and ( \
1654 type(self.idx.structs[name].info) == type(()) or
Daniel Veillardc1eed322002-12-12 11:01:32 +00001655 type(self.idx.structs[name].info) == type([])):
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001656 output.write(">\n");
1657 try:
1658 for field in self.idx.structs[name].info:
1659 desc = field[2]
1660 self.indexString(name, desc)
1661 if desc == None:
1662 desc = ''
1663 else:
1664 desc = escape(desc)
1665 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1666 except:
1667 print "Failed to serialize struct %s" % (name)
1668 output.write(" </struct>\n")
1669 else:
1670 output.write("/>\n");
1671 else :
1672 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
1673 name, self.modulename_file(id.module), id.info))
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001674
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001675 def serialize_variable(self, output, name):
1676 id = self.idx.variables[name]
1677 if id.info != None:
1678 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
1679 name, self.modulename_file(id.module), id.info))
1680 else:
1681 output.write(" <variable name='%s' file='%s'/>\n" % (
1682 name, self.modulename_file(id.module)))
Daniel Veillardc1eed322002-12-12 11:01:32 +00001683
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001684 def serialize_function(self, output, name):
1685 id = self.idx.functions[name]
Daniel Veillarda2351322004-06-27 12:08:10 +00001686 if name == debugsym:
1687 print "=>", id
1688
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001689 output.write(" <%s name='%s' file='%s'>\n" % (id.type, name,
1690 self.modulename_file(id.module)))
William M. Brack21e4ef22005-01-02 09:53:13 +00001691 #
1692 # Processing of conditionals modified by Bill 1/1/05
1693 #
Daniel Veillarda2351322004-06-27 12:08:10 +00001694 if id.conditionals != None:
William M. Brack21e4ef22005-01-02 09:53:13 +00001695 apstr = ""
Daniel Veillarda2351322004-06-27 12:08:10 +00001696 for cond in id.conditionals:
William M. Brack21e4ef22005-01-02 09:53:13 +00001697 if apstr != "":
1698 apstr = apstr + " &amp;&amp; "
1699 apstr = apstr + cond
1700 output.write(" <cond>%s</cond>\n"% (apstr));
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001701 try:
1702 (ret, params, desc) = id.info
1703 output.write(" <info>%s</info>\n" % (escape(desc)))
1704 self.indexString(name, desc)
1705 if ret[0] != None:
1706 if ret[0] == "void":
1707 output.write(" <return type='void'/>\n")
1708 else:
1709 output.write(" <return type='%s' info='%s'/>\n" % (
1710 ret[0], escape(ret[1])))
1711 self.indexString(name, ret[1])
1712 for param in params:
1713 if param[0] == 'void':
1714 continue
1715 if param[2] == None:
1716 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1717 else:
1718 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1719 self.indexString(name, param[2])
1720 except:
1721 print "Failed to save function %s info: " % name, `id.info`
1722 output.write(" </%s>\n" % (id.type))
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001723
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001724 def serialize_exports(self, output, file):
1725 module = self.modulename_file(file)
1726 output.write(" <file name='%s'>\n" % (module))
1727 dict = self.headers[file]
Daniel Veillardbe586972003-11-18 20:56:51 +00001728 if dict.info != None:
1729 for data in ('Summary', 'Description', 'Author'):
1730 try:
1731 output.write(" <%s>%s</%s>\n" % (
1732 string.lower(data),
1733 escape(dict.info[data]),
1734 string.lower(data)))
1735 except:
1736 print "Header %s lacks a %s description" % (module, data)
1737 if dict.info.has_key('Description'):
1738 desc = dict.info['Description']
1739 if string.find(desc, "DEPRECATED") != -1:
1740 output.write(" <deprecated/>\n")
1741
Daniel Veillard1a792412003-11-18 23:52:38 +00001742 ids = dict.macros.keys()
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001743 ids.sort()
1744 for id in uniq(ids):
Daniel Veillard1a792412003-11-18 23:52:38 +00001745 # Macros are sometime used to masquerade other types.
1746 if dict.functions.has_key(id):
1747 continue
1748 if dict.variables.has_key(id):
1749 continue
1750 if dict.typedefs.has_key(id):
1751 continue
1752 if dict.structs.has_key(id):
1753 continue
1754 if dict.enums.has_key(id):
1755 continue
1756 output.write(" <exports symbol='%s' type='macro'/>\n" % (id))
1757 ids = dict.enums.keys()
1758 ids.sort()
1759 for id in uniq(ids):
1760 output.write(" <exports symbol='%s' type='enum'/>\n" % (id))
1761 ids = dict.typedefs.keys()
1762 ids.sort()
1763 for id in uniq(ids):
1764 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id))
1765 ids = dict.structs.keys()
1766 ids.sort()
1767 for id in uniq(ids):
1768 output.write(" <exports symbol='%s' type='struct'/>\n" % (id))
1769 ids = dict.variables.keys()
1770 ids.sort()
1771 for id in uniq(ids):
1772 output.write(" <exports symbol='%s' type='variable'/>\n" % (id))
1773 ids = dict.functions.keys()
1774 ids.sort()
1775 for id in uniq(ids):
1776 output.write(" <exports symbol='%s' type='function'/>\n" % (id))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001777 output.write(" </file>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001778
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001779 def serialize_xrefs_files(self, output):
1780 headers = self.headers.keys()
1781 headers.sort()
1782 for file in headers:
1783 module = self.modulename_file(file)
1784 output.write(" <file name='%s'>\n" % (module))
1785 dict = self.headers[file]
William M. Bracka2e844a2004-01-06 11:52:13 +00001786 ids = uniq(dict.functions.keys() + dict.variables.keys() + \
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001787 dict.macros.keys() + dict.typedefs.keys() + \
William M. Bracka2e844a2004-01-06 11:52:13 +00001788 dict.structs.keys() + dict.enums.keys())
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001789 ids.sort()
William M. Bracka2e844a2004-01-06 11:52:13 +00001790 for id in ids:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001791 output.write(" <ref name='%s'/>\n" % (id))
1792 output.write(" </file>\n")
1793 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001794
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001795 def serialize_xrefs_functions(self, output):
1796 funcs = {}
1797 for name in self.idx.functions.keys():
1798 id = self.idx.functions[name]
1799 try:
1800 (ret, params, desc) = id.info
1801 for param in params:
1802 if param[0] == 'void':
1803 continue
1804 if funcs.has_key(param[0]):
1805 funcs[param[0]].append(name)
1806 else:
1807 funcs[param[0]] = [name]
1808 except:
1809 pass
1810 typ = funcs.keys()
1811 typ.sort()
1812 for type in typ:
1813 if type == '' or type == 'void' or type == "int" or \
1814 type == "char *" or type == "const char *" :
1815 continue
1816 output.write(" <type name='%s'>\n" % (type))
1817 ids = funcs[type]
1818 ids.sort()
William M. Brackcf9eadf2003-12-25 13:24:05 +00001819 pid = '' # not sure why we have dups, but get rid of them!
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001820 for id in ids:
William M. Brackcf9eadf2003-12-25 13:24:05 +00001821 if id != pid:
1822 output.write(" <ref name='%s'/>\n" % (id))
1823 pid = id
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001824 output.write(" </type>\n")
1825
1826 def serialize_xrefs_constructors(self, output):
1827 funcs = {}
1828 for name in self.idx.functions.keys():
1829 id = self.idx.functions[name]
1830 try:
1831 (ret, params, desc) = id.info
1832 if ret[0] == "void":
1833 continue
1834 if funcs.has_key(ret[0]):
1835 funcs[ret[0]].append(name)
1836 else:
1837 funcs[ret[0]] = [name]
1838 except:
1839 pass
1840 typ = funcs.keys()
1841 typ.sort()
1842 for type in typ:
1843 if type == '' or type == 'void' or type == "int" or \
1844 type == "char *" or type == "const char *" :
1845 continue
1846 output.write(" <type name='%s'>\n" % (type))
1847 ids = funcs[type]
William M. Brack09a4d0a2004-01-05 14:28:43 +00001848 ids.sort()
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001849 for id in ids:
1850 output.write(" <ref name='%s'/>\n" % (id))
1851 output.write(" </type>\n")
1852
1853 def serialize_xrefs_alpha(self, output):
1854 letter = None
1855 ids = self.idx.identifiers.keys()
1856 ids.sort()
1857 for id in ids:
1858 if id[0] != letter:
1859 if letter != None:
1860 output.write(" </letter>\n")
1861 letter = id[0]
1862 output.write(" <letter name='%s'>\n" % (letter))
1863 output.write(" <ref name='%s'/>\n" % (id))
1864 if letter != None:
1865 output.write(" </letter>\n")
1866
1867 def serialize_xrefs_references(self, output):
1868 typ = self.idx.identifiers.keys()
1869 typ.sort()
1870 for id in typ:
1871 idf = self.idx.identifiers[id]
1872 module = idf.module
1873 output.write(" <reference name='%s' href='%s'/>\n" % (id,
1874 'html/' + self.basename + '-' +
1875 self.modulename_file(module) + '.html#' +
1876 id))
1877
1878 def serialize_xrefs_index(self, output):
1879 index = self.xref
1880 typ = index.keys()
1881 typ.sort()
1882 letter = None
1883 count = 0
1884 chunk = 0
1885 chunks = []
1886 for id in typ:
1887 if len(index[id]) > 30:
1888 continue
1889 if id[0] != letter:
1890 if letter == None or count > 200:
1891 if letter != None:
1892 output.write(" </letter>\n")
1893 output.write(" </chunk>\n")
1894 count = 0
1895 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1896 output.write(" <chunk name='chunk%s'>\n" % (chunk))
1897 first_letter = id[0]
1898 chunk = chunk + 1
1899 elif letter != None:
1900 output.write(" </letter>\n")
1901 letter = id[0]
1902 output.write(" <letter name='%s'>\n" % (letter))
1903 output.write(" <word name='%s'>\n" % (id))
1904 tokens = index[id];
1905 tokens.sort()
1906 tok = None
William M. Bracka2e844a2004-01-06 11:52:13 +00001907 for token in tokens:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001908 if tok == token:
1909 continue
1910 tok = token
1911 output.write(" <ref name='%s'/>\n" % (token))
1912 count = count + 1
1913 output.write(" </word>\n")
1914 if letter != None:
1915 output.write(" </letter>\n")
1916 output.write(" </chunk>\n")
William M. Brack966668a2003-12-20 02:10:28 +00001917 if count != 0:
1918 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001919 output.write(" <chunks>\n")
1920 for ch in chunks:
1921 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
1922 ch[0], ch[1], ch[2]))
1923 output.write(" </chunks>\n")
1924
1925 def serialize_xrefs(self, output):
1926 output.write(" <references>\n")
1927 self.serialize_xrefs_references(output)
1928 output.write(" </references>\n")
1929 output.write(" <alpha>\n")
1930 self.serialize_xrefs_alpha(output)
1931 output.write(" </alpha>\n")
1932 output.write(" <constructors>\n")
1933 self.serialize_xrefs_constructors(output)
1934 output.write(" </constructors>\n")
1935 output.write(" <functions>\n")
1936 self.serialize_xrefs_functions(output)
1937 output.write(" </functions>\n")
1938 output.write(" <files>\n")
1939 self.serialize_xrefs_files(output)
1940 output.write(" </files>\n")
1941 output.write(" <index>\n")
1942 self.serialize_xrefs_index(output)
1943 output.write(" </index>\n")
1944
1945 def serialize(self):
1946 filename = "%s-api.xml" % self.name
1947 print "Saving XML description %s" % (filename)
1948 output = open(filename, "w")
1949 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1950 output.write("<api name='%s'>\n" % self.name)
1951 output.write(" <files>\n")
1952 headers = self.headers.keys()
1953 headers.sort()
1954 for file in headers:
1955 self.serialize_exports(output, file)
1956 output.write(" </files>\n")
1957 output.write(" <symbols>\n")
1958 macros = self.idx.macros.keys()
1959 macros.sort()
1960 for macro in macros:
1961 self.serialize_macro(output, macro)
1962 enums = self.idx.enums.keys()
1963 enums.sort()
1964 for enum in enums:
1965 self.serialize_enum(output, enum)
1966 typedefs = self.idx.typedefs.keys()
1967 typedefs.sort()
1968 for typedef in typedefs:
1969 self.serialize_typedef(output, typedef)
1970 variables = self.idx.variables.keys()
1971 variables.sort()
1972 for variable in variables:
1973 self.serialize_variable(output, variable)
1974 functions = self.idx.functions.keys()
1975 functions.sort()
1976 for function in functions:
1977 self.serialize_function(output, function)
1978 output.write(" </symbols>\n")
1979 output.write("</api>\n")
1980 output.close()
1981
1982 filename = "%s-refs.xml" % self.name
1983 print "Saving XML Cross References %s" % (filename)
1984 output = open(filename, "w")
1985 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1986 output.write("<apirefs name='%s'>\n" % self.name)
1987 self.serialize_xrefs(output)
1988 output.write("</apirefs>\n")
1989 output.close()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001990
1991
1992def rebuild():
1993 builder = None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001994 if glob.glob("parser.c") != [] :
1995 print "Rebuilding API description for libxml2"
1996 builder = docBuilder("libxml2", [".", "."],
1997 ["xmlwin32version.h", "tst.c"])
1998 elif glob.glob("../parser.c") != [] :
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001999 print "Rebuilding API description for libxml2"
2000 builder = docBuilder("libxml2", ["..", "../include/libxml"],
Daniel Veillardd4330462003-04-29 12:40:16 +00002001 ["xmlwin32version.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002002 elif glob.glob("../libxslt/transform.c") != [] :
2003 print "Rebuilding API description for libxslt"
2004 builder = docBuilder("libxslt", ["../libxslt"],
Daniel Veillard024b5702002-12-12 00:15:55 +00002005 ["win32config.h", "libxslt.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002006 else:
2007 print "rebuild() failed, unable to guess the module"
2008 return None
2009 builder.scan()
2010 builder.analyze()
2011 builder.serialize()
Daniel Veillard024b5702002-12-12 00:15:55 +00002012 if glob.glob("../libexslt/exslt.c") != [] :
2013 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2014 extra.scan()
2015 extra.analyze()
2016 extra.serialize()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002017 return builder
2018
2019#
2020# for debugging the parser
2021#
2022def parse(filename):
2023 parser = CParser(filename)
2024 idx = parser.parse()
2025 return idx
2026
2027if __name__ == "__main__":
Daniel Veillarda2351322004-06-27 12:08:10 +00002028 if len(sys.argv) > 1:
2029 debug = 1
2030 parse(sys.argv[1])
2031 else:
2032 rebuild()