blob: 8a11db0cd5e30b985a09379370a6dc8044f2e224 [file] [log] [blame]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
Daniel Veillard540a31a2003-01-21 11:21:07 +000010import os, sys
Daniel Veillarda9b66d02002-12-11 14:23:49 +000011import string
12import glob
13
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000014debug=0
Daniel Veillarda2351322004-06-27 12:08:10 +000015#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000017
Daniel Veillardde0a0a52003-04-24 17:12:57 +000018#
19# C parser analysis code
20#
21ignored_files = {
22 "trio": "too many non standard macros",
23 "trio.c": "too many non standard macros",
24 "trionan.c": "too many non standard macros",
25 "triostr.c": "too many non standard macros",
26 "acconfig.h": "generated portability layer",
27 "config.h": "generated portability layer",
28 "libxml.h": "internal only",
Daniel Veillard92fc02c2003-04-24 23:12:35 +000029 "testOOM.c": "out of memory tester",
30 "testOOMlib.h": "out of memory tester",
31 "testOOMlib.c": "out of memory tester",
Daniel Veillard189f46b2004-01-25 21:03:04 +000032 "rngparser.c": "not yet integrated",
33 "rngparser.h": "not yet integrated",
Daniel Veillarda2351322004-06-27 12:08:10 +000034 "elfgcchack.h": "not a normal header",
Daniel Veillard95175012005-07-03 16:09:51 +000035 "testHTML.c": "test tool",
36 "testReader.c": "test tool",
37 "testSchemas.c": "test tool",
38 "testXPath.c": "test tool",
39 "testAutomata.c": "test tool",
40 "testModule.c": "test tool",
41 "testRegexp.c": "test tool",
42 "testThreads.c": "test tool",
43 "testC14N.c": "test tool",
44 "testRelax.c": "test tool",
45 "testThreadsWin32.c": "test tool",
46 "testSAX.c": "test tool",
47 "testURI.c": "test tool",
Daniel Veillarde43cc572004-11-03 11:50:29 +000048 "testapi.c": "generated regression tests",
Daniel Veillard95175012005-07-03 16:09:51 +000049 "runtest.c": "regression tests program",
50 "runsuite.c": "regression tests program",
Daniel Veillard5d4644e2005-04-01 13:11:58 +000051 "tst.c": "not part of the library",
Daniel Veillardf6b71bd2005-01-04 17:50:14 +000052 "testdso.c": "test for dynamid shared libraries",
Daniel Veillardde0a0a52003-04-24 17:12:57 +000053}
54
55ignored_words = {
56 "WINAPI": (0, "Windows keyword"),
57 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
William M. Brackcdfa2862003-08-29 06:03:38 +000058 "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000059 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
60 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000061 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000062 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
63 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000064 "XMLCALL": (0, "Special macro for win32 calls"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000065 "XSLTCALL": (0, "Special macro for win32 calls"),
Daniel Veillard5eee7672005-08-22 21:22:27 +000066 "XMLCDECL": (0, "Special macro for win32 calls"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000067 "EXSLTCALL": (0, "Special macro for win32 calls"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000068 "__declspec": (3, "Windows keyword"),
Daniel Veillarda2351322004-06-27 12:08:10 +000069 "__stdcall": (0, "Windows keyword"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000070 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
71 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
72 "X_IN_Y": (5, "macro function builder"),
73}
74
Daniel Veillarda9b66d02002-12-11 14:23:49 +000075def escape(raw):
76 raw = string.replace(raw, '&', '&')
77 raw = string.replace(raw, '<', '&lt;')
78 raw = string.replace(raw, '>', '&gt;')
79 raw = string.replace(raw, "'", '&apos;')
80 raw = string.replace(raw, '"', '&quot;')
81 return raw
82
Daniel Veillard2925c0a2003-11-17 13:58:17 +000083def uniq(items):
84 d = {}
85 for item in items:
86 d[item]=1
87 return d.keys()
88
Daniel Veillarda9b66d02002-12-11 14:23:49 +000089class identifier:
Daniel Veillard5d4644e2005-04-01 13:11:58 +000090 def __init__(self, name, header=None, module=None, type=None, lineno = 0,
Daniel Veillarda2351322004-06-27 12:08:10 +000091 info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +000092 self.name = name
Daniel Veillard5d4644e2005-04-01 13:11:58 +000093 self.header = header
Daniel Veillardbe586972003-11-18 20:56:51 +000094 self.module = module
95 self.type = type
96 self.info = info
97 self.extra = extra
98 self.lineno = lineno
99 self.static = 0
Daniel Veillarda2351322004-06-27 12:08:10 +0000100 if conditionals == None or len(conditionals) == 0:
101 self.conditionals = None
102 else:
103 self.conditionals = conditionals[:]
104 if self.name == debugsym:
105 print "=> define %s : %s" % (debugsym, (module, type, info,
106 extra, conditionals))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000107
Daniel Veillardbe586972003-11-18 20:56:51 +0000108 def __repr__(self):
109 r = "%s %s:" % (self.type, self.name)
110 if self.static:
111 r = r + " static"
112 if self.module != None:
113 r = r + " from %s" % (self.module)
114 if self.info != None:
115 r = r + " " + `self.info`
116 if self.extra != None:
117 r = r + " " + `self.extra`
Daniel Veillarda2351322004-06-27 12:08:10 +0000118 if self.conditionals != None:
119 r = r + " " + `self.conditionals`
Daniel Veillardbe586972003-11-18 20:56:51 +0000120 return r
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000121
122
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000123 def set_header(self, header):
124 self.header = header
Daniel Veillardbe586972003-11-18 20:56:51 +0000125 def set_module(self, module):
126 self.module = module
127 def set_type(self, type):
128 self.type = type
129 def set_info(self, info):
130 self.info = info
131 def set_extra(self, extra):
132 self.extra = extra
133 def set_lineno(self, lineno):
134 self.lineno = lineno
135 def set_static(self, static):
136 self.static = static
Daniel Veillarda2351322004-06-27 12:08:10 +0000137 def set_conditionals(self, conditionals):
138 if conditionals == None or len(conditionals) == 0:
139 self.conditionals = None
140 else:
141 self.conditionals = conditionals[:]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000142
Daniel Veillardbe586972003-11-18 20:56:51 +0000143 def get_name(self):
144 return self.name
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000145 def get_header(self):
146 return self.module
Daniel Veillardbe586972003-11-18 20:56:51 +0000147 def get_module(self):
148 return self.module
149 def get_type(self):
150 return self.type
151 def get_info(self):
152 return self.info
153 def get_lineno(self):
154 return self.lineno
155 def get_extra(self):
156 return self.extra
157 def get_static(self):
158 return self.static
Daniel Veillarda2351322004-06-27 12:08:10 +0000159 def get_conditionals(self):
160 return self.conditionals
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000161
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000162 def update(self, header, module, type = None, info = None, extra=None,
Daniel Veillarda2351322004-06-27 12:08:10 +0000163 conditionals=None):
164 if self.name == debugsym:
165 print "=> update %s : %s" % (debugsym, (module, type, info,
166 extra, conditionals))
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000167 if header != None and self.header == None:
168 self.set_header(module)
169 if module != None and (self.module == None or self.header == self.module):
Daniel Veillardbe586972003-11-18 20:56:51 +0000170 self.set_module(module)
171 if type != None and self.type == None:
172 self.set_type(type)
173 if info != None:
174 self.set_info(info)
175 if extra != None:
176 self.set_extra(extra)
Daniel Veillarda2351322004-06-27 12:08:10 +0000177 if conditionals != None:
178 self.set_conditionals(conditionals)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000179
180class index:
Daniel Veillardbe586972003-11-18 20:56:51 +0000181 def __init__(self, name = "noname"):
182 self.name = name
183 self.identifiers = {}
184 self.functions = {}
185 self.variables = {}
186 self.includes = {}
187 self.structs = {}
188 self.enums = {}
189 self.typedefs = {}
190 self.macros = {}
191 self.references = {}
192 self.info = {}
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000193
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000194 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +0000195 if name[0:2] == '__':
196 return None
197 d = None
198 try:
199 d = self.identifiers[name]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000200 d.update(header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000201 except:
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000202 d = identifier(name, header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000203 self.identifiers[name] = d
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000204
Daniel Veillardbe586972003-11-18 20:56:51 +0000205 if d != None and static == 1:
206 d.set_static(1)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000207
Daniel Veillardbe586972003-11-18 20:56:51 +0000208 if d != None and name != None and type != None:
209 self.references[name] = d
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000210
Daniel Veillarda2351322004-06-27 12:08:10 +0000211 if name == debugsym:
212 print "New ref: %s" % (d)
213
214 return d
215
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000216 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +0000217 if name[0:2] == '__':
218 return None
219 d = None
220 try:
221 d = self.identifiers[name]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000222 d.update(header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000223 except:
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000224 d = identifier(name, header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000225 self.identifiers[name] = d
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000226
Daniel Veillardbe586972003-11-18 20:56:51 +0000227 if d != None and static == 1:
228 d.set_static(1)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000229
Daniel Veillardbe586972003-11-18 20:56:51 +0000230 if d != None and name != None and type != None:
231 if type == "function":
232 self.functions[name] = d
233 elif type == "functype":
234 self.functions[name] = d
235 elif type == "variable":
236 self.variables[name] = d
237 elif type == "include":
238 self.includes[name] = d
239 elif type == "struct":
240 self.structs[name] = d
241 elif type == "enum":
242 self.enums[name] = d
243 elif type == "typedef":
244 self.typedefs[name] = d
245 elif type == "macro":
246 self.macros[name] = d
247 else:
248 print "Unable to register type ", type
Daniel Veillarda2351322004-06-27 12:08:10 +0000249
250 if name == debugsym:
251 print "New symbol: %s" % (d)
252
Daniel Veillardbe586972003-11-18 20:56:51 +0000253 return d
254
255 def merge(self, idx):
256 for id in idx.functions.keys():
257 #
258 # macro might be used to override functions or variables
259 # definitions
260 #
261 if self.macros.has_key(id):
262 del self.macros[id]
263 if self.functions.has_key(id):
264 print "function %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000265 id, self.functions[id].header, idx.functions[id].header)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000266 else:
Daniel Veillardbe586972003-11-18 20:56:51 +0000267 self.functions[id] = idx.functions[id]
268 self.identifiers[id] = idx.functions[id]
269 for id in idx.variables.keys():
270 #
271 # macro might be used to override functions or variables
272 # definitions
273 #
274 if self.macros.has_key(id):
275 del self.macros[id]
276 if self.variables.has_key(id):
277 print "variable %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000278 id, self.variables[id].header, idx.variables[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000279 else:
280 self.variables[id] = idx.variables[id]
281 self.identifiers[id] = idx.variables[id]
282 for id in idx.structs.keys():
283 if self.structs.has_key(id):
284 print "struct %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000285 id, self.structs[id].header, idx.structs[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000286 else:
287 self.structs[id] = idx.structs[id]
288 self.identifiers[id] = idx.structs[id]
289 for id in idx.typedefs.keys():
290 if self.typedefs.has_key(id):
291 print "typedef %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000292 id, self.typedefs[id].header, idx.typedefs[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000293 else:
294 self.typedefs[id] = idx.typedefs[id]
295 self.identifiers[id] = idx.typedefs[id]
296 for id in idx.macros.keys():
297 #
298 # macro might be used to override functions or variables
299 # definitions
300 #
301 if self.variables.has_key(id):
302 continue
303 if self.functions.has_key(id):
304 continue
305 if self.enums.has_key(id):
306 continue
307 if self.macros.has_key(id):
308 print "macro %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000309 id, self.macros[id].header, idx.macros[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000310 else:
311 self.macros[id] = idx.macros[id]
312 self.identifiers[id] = idx.macros[id]
313 for id in idx.enums.keys():
314 if self.enums.has_key(id):
315 print "enum %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000316 id, self.enums[id].header, idx.enums[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000317 else:
318 self.enums[id] = idx.enums[id]
319 self.identifiers[id] = idx.enums[id]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000320
Daniel Veillardbe586972003-11-18 20:56:51 +0000321 def merge_public(self, idx):
322 for id in idx.functions.keys():
323 if self.functions.has_key(id):
William M. Brack21e4ef22005-01-02 09:53:13 +0000324 # check that function condition agrees with header
325 if idx.functions[id].conditionals != \
326 self.functions[id].conditionals:
327 print "Header condition differs from Function for %s:" \
328 % id
329 print " H: %s" % self.functions[id].conditionals
330 print " C: %s" % idx.functions[id].conditionals
Daniel Veillardbe586972003-11-18 20:56:51 +0000331 up = idx.functions[id]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000332 self.functions[id].update(None, up.module, up.type, up.info, up.extra)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000333 # else:
334 # print "Function %s from %s is not declared in headers" % (
William M. Brack21e4ef22005-01-02 09:53:13 +0000335 # id, idx.functions[id].module)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000336 # TODO: do the same for variables.
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000337
Daniel Veillardbe586972003-11-18 20:56:51 +0000338 def analyze_dict(self, type, dict):
339 count = 0
340 public = 0
341 for name in dict.keys():
342 id = dict[name]
343 count = count + 1
344 if id.static == 0:
345 public = public + 1
346 if count != public:
347 print " %d %s , %d public" % (count, type, public)
348 elif count != 0:
349 print " %d public %s" % (count, type)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000350
351
Daniel Veillardbe586972003-11-18 20:56:51 +0000352 def analyze(self):
353 self.analyze_dict("functions", self.functions)
354 self.analyze_dict("variables", self.variables)
355 self.analyze_dict("structs", self.structs)
356 self.analyze_dict("typedefs", self.typedefs)
357 self.analyze_dict("macros", self.macros)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000358
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000359class CLexer:
Daniel Veillardbe586972003-11-18 20:56:51 +0000360 """A lexer for the C language, tokenize the input by reading and
361 analyzing it line by line"""
362 def __init__(self, input):
363 self.input = input
364 self.tokens = []
365 self.line = ""
366 self.lineno = 0
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000367
Daniel Veillardbe586972003-11-18 20:56:51 +0000368 def getline(self):
369 line = ''
370 while line == '':
371 line = self.input.readline()
372 if not line:
373 return None
374 self.lineno = self.lineno + 1
375 line = string.lstrip(line)
376 line = string.rstrip(line)
377 if line == '':
378 continue
379 while line[-1] == '\\':
380 line = line[:-1]
381 n = self.input.readline()
382 self.lineno = self.lineno + 1
383 n = string.lstrip(n)
384 n = string.rstrip(n)
385 if not n:
386 break
387 else:
388 line = line + n
389 return line
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000390
Daniel Veillardbe586972003-11-18 20:56:51 +0000391 def getlineno(self):
392 return self.lineno
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000393
Daniel Veillardbe586972003-11-18 20:56:51 +0000394 def push(self, token):
395 self.tokens.insert(0, token);
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000396
Daniel Veillardbe586972003-11-18 20:56:51 +0000397 def debug(self):
398 print "Last token: ", self.last
399 print "Token queue: ", self.tokens
400 print "Line %d end: " % (self.lineno), self.line
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000401
Daniel Veillardbe586972003-11-18 20:56:51 +0000402 def token(self):
403 while self.tokens == []:
404 if self.line == "":
405 line = self.getline()
406 else:
407 line = self.line
408 self.line = ""
409 if line == None:
410 return None
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000411
Daniel Veillardbe586972003-11-18 20:56:51 +0000412 if line[0] == '#':
413 self.tokens = map((lambda x: ('preproc', x)),
414 string.split(line))
415 break;
416 l = len(line)
417 if line[0] == '"' or line[0] == "'":
418 end = line[0]
419 line = line[1:]
420 found = 0
421 tok = ""
422 while found == 0:
423 i = 0
424 l = len(line)
425 while i < l:
426 if line[i] == end:
427 self.line = line[i+1:]
428 line = line[:i]
429 l = i
430 found = 1
431 break
432 if line[i] == '\\':
433 i = i + 1
434 i = i + 1
435 tok = tok + line
436 if found == 0:
437 line = self.getline()
438 if line == None:
439 return None
440 self.last = ('string', tok)
441 return self.last
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000442
Daniel Veillardbe586972003-11-18 20:56:51 +0000443 if l >= 2 and line[0] == '/' and line[1] == '*':
444 line = line[2:]
445 found = 0
446 tok = ""
447 while found == 0:
448 i = 0
449 l = len(line)
450 while i < l:
451 if line[i] == '*' and i+1 < l and line[i+1] == '/':
452 self.line = line[i+2:]
453 line = line[:i-1]
454 l = i
455 found = 1
456 break
457 i = i + 1
458 if tok != "":
459 tok = tok + "\n"
460 tok = tok + line
461 if found == 0:
462 line = self.getline()
463 if line == None:
464 return None
465 self.last = ('comment', tok)
466 return self.last
467 if l >= 2 and line[0] == '/' and line[1] == '/':
468 line = line[2:]
469 self.last = ('comment', line)
470 return self.last
471 i = 0
472 while i < l:
473 if line[i] == '/' and i+1 < l and line[i+1] == '/':
474 self.line = line[i:]
475 line = line[:i]
476 break
477 if line[i] == '/' and i+1 < l and line[i+1] == '*':
478 self.line = line[i:]
479 line = line[:i]
480 break
481 if line[i] == '"' or line[i] == "'":
482 self.line = line[i:]
483 line = line[:i]
484 break
485 i = i + 1
486 l = len(line)
487 i = 0
488 while i < l:
489 if line[i] == ' ' or line[i] == '\t':
490 i = i + 1
491 continue
492 o = ord(line[i])
493 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
494 (o >= 48 and o <= 57):
495 s = i
496 while i < l:
497 o = ord(line[i])
498 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
499 (o >= 48 and o <= 57) or string.find(
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000500 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
Daniel Veillardbe586972003-11-18 20:56:51 +0000501 i = i + 1
502 else:
503 break
504 self.tokens.append(('name', line[s:i]))
505 continue
506 if string.find("(){}:;,[]", line[i]) != -1:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000507# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
508# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
509# line[i] == ',' or line[i] == '[' or line[i] == ']':
Daniel Veillardbe586972003-11-18 20:56:51 +0000510 self.tokens.append(('sep', line[i]))
511 i = i + 1
512 continue
513 if string.find("+-*><=/%&!|.", line[i]) != -1:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000514# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
515# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
516# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
517# line[i] == '!' or line[i] == '|' or line[i] == '.':
Daniel Veillardbe586972003-11-18 20:56:51 +0000518 if line[i] == '.' and i + 2 < l and \
519 line[i+1] == '.' and line[i+2] == '.':
520 self.tokens.append(('name', '...'))
521 i = i + 3
522 continue
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000523
Daniel Veillardbe586972003-11-18 20:56:51 +0000524 j = i + 1
525 if j < l and (
526 string.find("+-*><=/%&!|", line[j]) != -1):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000527# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
528# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
529# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
530# line[j] == '!' or line[j] == '|'):
Daniel Veillardbe586972003-11-18 20:56:51 +0000531 self.tokens.append(('op', line[i:j+1]))
532 i = j + 1
533 else:
534 self.tokens.append(('op', line[i]))
535 i = i + 1
536 continue
537 s = i
538 while i < l:
539 o = ord(line[i])
540 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
541 (o >= 48 and o <= 57) or (
542 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000543# line[i] != ' ' and line[i] != '\t' and
544# line[i] != '(' and line[i] != ')' and
545# line[i] != '{' and line[i] != '}' and
546# line[i] != ':' and line[i] != ';' and
547# line[i] != ',' and line[i] != '+' and
548# line[i] != '-' and line[i] != '*' and
549# line[i] != '/' and line[i] != '%' and
550# line[i] != '&' and line[i] != '!' and
551# line[i] != '|' and line[i] != '[' and
552# line[i] != ']' and line[i] != '=' and
553# line[i] != '*' and line[i] != '>' and
554# line[i] != '<'):
Daniel Veillardbe586972003-11-18 20:56:51 +0000555 i = i + 1
556 else:
557 break
558 self.tokens.append(('name', line[s:i]))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000559
Daniel Veillardbe586972003-11-18 20:56:51 +0000560 tok = self.tokens[0]
561 self.tokens = self.tokens[1:]
562 self.last = tok
563 return tok
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000564
565class CParser:
Daniel Veillardbe586972003-11-18 20:56:51 +0000566 """The C module parser"""
567 def __init__(self, filename, idx = None):
568 self.filename = filename
569 if len(filename) > 2 and filename[-2:] == '.h':
570 self.is_header = 1
571 else:
572 self.is_header = 0
573 self.input = open(filename)
574 self.lexer = CLexer(self.input)
575 if idx == None:
576 self.index = index()
577 else:
578 self.index = idx
579 self.top_comment = ""
580 self.last_comment = ""
581 self.comment = None
582 self.collect_ref = 0
Daniel Veillard1e906612003-12-05 14:57:46 +0000583 self.no_error = 0
Daniel Veillarda2351322004-06-27 12:08:10 +0000584 self.conditionals = []
585 self.defines = []
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000586
Daniel Veillardbe586972003-11-18 20:56:51 +0000587 def collect_references(self):
588 self.collect_ref = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000589
Daniel Veillard1e906612003-12-05 14:57:46 +0000590 def stop_error(self):
591 self.no_error = 1
592
593 def start_error(self):
594 self.no_error = 0
595
Daniel Veillardbe586972003-11-18 20:56:51 +0000596 def lineno(self):
597 return self.lexer.getlineno()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000598
Daniel Veillardbe586972003-11-18 20:56:51 +0000599 def index_add(self, name, module, static, type, info=None, extra = None):
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000600 if self.is_header == 1:
601 self.index.add(name, module, module, static, type, self.lineno(),
602 info, extra, self.conditionals)
603 else:
604 self.index.add(name, None, module, static, type, self.lineno(),
605 info, extra, self.conditionals)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000606
Daniel Veillardbe586972003-11-18 20:56:51 +0000607 def index_add_ref(self, name, module, static, type, info=None,
608 extra = None):
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000609 if self.is_header == 1:
610 self.index.add_ref(name, module, module, static, type,
611 self.lineno(), info, extra, self.conditionals)
612 else:
613 self.index.add_ref(name, None, module, static, type, self.lineno(),
614 info, extra, self.conditionals)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000615
Daniel Veillard1e906612003-12-05 14:57:46 +0000616 def warning(self, msg):
617 if self.no_error:
618 return
619 print msg
620
Daniel Veillardbe586972003-11-18 20:56:51 +0000621 def error(self, msg, token=-1):
Daniel Veillard1e906612003-12-05 14:57:46 +0000622 if self.no_error:
623 return
624
Daniel Veillardbe586972003-11-18 20:56:51 +0000625 print "Parse Error: " + msg
626 if token != -1:
627 print "Got token ", token
628 self.lexer.debug()
629 sys.exit(1)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000630
Daniel Veillardbe586972003-11-18 20:56:51 +0000631 def debug(self, msg, token=-1):
632 print "Debug: " + msg
633 if token != -1:
634 print "Got token ", token
635 self.lexer.debug()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000636
Daniel Veillardbe586972003-11-18 20:56:51 +0000637 def parseTopComment(self, comment):
638 res = {}
639 lines = string.split(comment, "\n")
640 item = None
641 for line in lines:
642 while line != "" and (line[0] == ' ' or line[0] == '\t'):
643 line = line[1:]
644 while line != "" and line[0] == '*':
645 line = line[1:]
646 while line != "" and (line[0] == ' ' or line[0] == '\t'):
647 line = line[1:]
648 try:
649 (it, line) = string.split(line, ":", 1)
650 item = it
651 while line != "" and (line[0] == ' ' or line[0] == '\t'):
652 line = line[1:]
653 if res.has_key(item):
654 res[item] = res[item] + " " + line
655 else:
656 res[item] = line
657 except:
658 if item != None:
659 if res.has_key(item):
660 res[item] = res[item] + " " + line
661 else:
662 res[item] = line
663 self.index.info = res
664
665 def parseComment(self, token):
666 if self.top_comment == "":
667 self.top_comment = token[1]
668 if self.comment == None or token[1][0] == '*':
669 self.comment = token[1];
670 else:
671 self.comment = self.comment + token[1]
672 token = self.lexer.token()
Daniel Veillard1e906612003-12-05 14:57:46 +0000673
674 if string.find(self.comment, "DOC_DISABLE") != -1:
675 self.stop_error()
676
677 if string.find(self.comment, "DOC_ENABLE") != -1:
678 self.start_error()
679
Daniel Veillardbe586972003-11-18 20:56:51 +0000680 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000681
682 #
683 # Parse a comment block associate to a macro
684 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000685 def parseMacroComment(self, name, quiet = 0):
686 if name[0:2] == '__':
687 quiet = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000688
Daniel Veillardbe586972003-11-18 20:56:51 +0000689 args = []
690 desc = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000691
Daniel Veillardbe586972003-11-18 20:56:51 +0000692 if self.comment == None:
693 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000694 self.warning("Missing comment for macro %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000695 return((args, desc))
696 if self.comment[0] != '*':
697 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000698 self.warning("Missing * in macro comment for %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000699 return((args, desc))
700 lines = string.split(self.comment, '\n')
701 if lines[0] == '*':
702 del lines[0]
703 if lines[0] != "* %s:" % (name):
704 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000705 self.warning("Misformatted macro comment for %s" % (name))
706 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000707 return((args, desc))
708 del lines[0]
709 while lines[0] == '*':
710 del lines[0]
711 while len(lines) > 0 and lines[0][0:3] == '* @':
712 l = lines[0][3:]
713 try:
714 (arg, desc) = string.split(l, ':', 1)
715 desc=string.strip(desc)
716 arg=string.strip(arg)
717 except:
718 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000719 self.warning("Misformatted macro comment for %s" % (name))
720 self.warning(" problem with '%s'" % (lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000721 del lines[0]
722 continue
723 del lines[0]
724 l = string.strip(lines[0])
725 while len(l) > 2 and l[0:3] != '* @':
726 while l[0] == '*':
727 l = l[1:]
728 desc = desc + ' ' + string.strip(l)
729 del lines[0]
730 if len(lines) == 0:
731 break
732 l = lines[0]
733 args.append((arg, desc))
734 while len(lines) > 0 and lines[0] == '*':
735 del lines[0]
736 desc = ""
737 while len(lines) > 0:
738 l = lines[0]
739 while len(l) > 0 and l[0] == '*':
740 l = l[1:]
741 l = string.strip(l)
742 desc = desc + " " + l
743 del lines[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000744
Daniel Veillardbe586972003-11-18 20:56:51 +0000745 desc = string.strip(desc)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000746
Daniel Veillardbe586972003-11-18 20:56:51 +0000747 if quiet == 0:
748 if desc == "":
Daniel Veillard1e906612003-12-05 14:57:46 +0000749 self.warning("Macro comment for %s lack description of the macro" % (name))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000750
Daniel Veillardbe586972003-11-18 20:56:51 +0000751 return((args, desc))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000752
753 #
754 # Parse a comment block and merge the informations found in the
755 # parameters descriptions, finally returns a block as complete
756 # as possible
757 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000758 def mergeFunctionComment(self, name, description, quiet = 0):
759 if name == 'main':
760 quiet = 1
761 if name[0:2] == '__':
762 quiet = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000763
Daniel Veillardbe586972003-11-18 20:56:51 +0000764 (ret, args) = description
765 desc = ""
766 retdesc = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000767
Daniel Veillardbe586972003-11-18 20:56:51 +0000768 if self.comment == None:
769 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000770 self.warning("Missing comment for function %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000771 return(((ret[0], retdesc), args, desc))
772 if self.comment[0] != '*':
773 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000774 self.warning("Missing * in function comment for %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000775 return(((ret[0], retdesc), args, desc))
776 lines = string.split(self.comment, '\n')
777 if lines[0] == '*':
778 del lines[0]
779 if lines[0] != "* %s:" % (name):
780 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000781 self.warning("Misformatted function comment for %s" % (name))
782 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000783 return(((ret[0], retdesc), args, desc))
784 del lines[0]
785 while lines[0] == '*':
786 del lines[0]
787 nbargs = len(args)
788 while len(lines) > 0 and lines[0][0:3] == '* @':
789 l = lines[0][3:]
790 try:
791 (arg, desc) = string.split(l, ':', 1)
792 desc=string.strip(desc)
793 arg=string.strip(arg)
794 except:
795 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000796 self.warning("Misformatted function comment for %s" % (name))
797 self.warning(" problem with '%s'" % (lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000798 del lines[0]
799 continue
800 del lines[0]
801 l = string.strip(lines[0])
802 while len(l) > 2 and l[0:3] != '* @':
803 while l[0] == '*':
804 l = l[1:]
805 desc = desc + ' ' + string.strip(l)
806 del lines[0]
807 if len(lines) == 0:
808 break
809 l = lines[0]
810 i = 0
811 while i < nbargs:
812 if args[i][1] == arg:
813 args[i] = (args[i][0], arg, desc)
814 break;
815 i = i + 1
816 if i >= nbargs:
817 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000818 self.warning("Unable to find arg %s from function comment for %s" % (
819 arg, name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000820 while len(lines) > 0 and lines[0] == '*':
821 del lines[0]
822 desc = ""
823 while len(lines) > 0:
824 l = lines[0]
825 while len(l) > 0 and l[0] == '*':
826 l = l[1:]
827 l = string.strip(l)
828 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
829 try:
830 l = string.split(l, ' ', 1)[1]
831 except:
832 l = ""
833 retdesc = string.strip(l)
834 del lines[0]
835 while len(lines) > 0:
836 l = lines[0]
837 while len(l) > 0 and l[0] == '*':
838 l = l[1:]
839 l = string.strip(l)
840 retdesc = retdesc + " " + l
841 del lines[0]
842 else:
843 desc = desc + " " + l
844 del lines[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000845
Daniel Veillardbe586972003-11-18 20:56:51 +0000846 retdesc = string.strip(retdesc)
847 desc = string.strip(desc)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000848
Daniel Veillardbe586972003-11-18 20:56:51 +0000849 if quiet == 0:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000850 #
851 # report missing comments
852 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000853 i = 0
854 while i < nbargs:
855 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
William M. Brack21e4ef22005-01-02 09:53:13 +0000856 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000857 i = i + 1
858 if retdesc == "" and ret[0] != "void":
William M. Brack21e4ef22005-01-02 09:53:13 +0000859 self.warning("Function comment for %s lacks description of return value" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000860 if desc == "":
William M. Brack21e4ef22005-01-02 09:53:13 +0000861 self.warning("Function comment for %s lacks description of the function" % (name))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000862
863
Daniel Veillardbe586972003-11-18 20:56:51 +0000864 return(((ret[0], retdesc), args, desc))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000865
Daniel Veillardbe586972003-11-18 20:56:51 +0000866 def parsePreproc(self, token):
Daniel Veillarda2351322004-06-27 12:08:10 +0000867 if debug:
868 print "=> preproc ", token, self.lexer.tokens
Daniel Veillardbe586972003-11-18 20:56:51 +0000869 name = token[1]
870 if name == "#include":
871 token = self.lexer.token()
872 if token == None:
873 return None
874 if token[0] == 'preproc':
875 self.index_add(token[1], self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000876 "include")
Daniel Veillardbe586972003-11-18 20:56:51 +0000877 return self.lexer.token()
878 return token
879 if name == "#define":
880 token = self.lexer.token()
881 if token == None:
882 return None
883 if token[0] == 'preproc':
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000884 # TODO macros with arguments
Daniel Veillardbe586972003-11-18 20:56:51 +0000885 name = token[1]
886 lst = []
887 token = self.lexer.token()
888 while token != None and token[0] == 'preproc' and \
889 token[1][0] != '#':
890 lst.append(token[1])
891 token = self.lexer.token()
892 try:
893 name = string.split(name, '(') [0]
894 except:
895 pass
896 info = self.parseMacroComment(name, not self.is_header)
897 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000898 "macro", info)
Daniel Veillardbe586972003-11-18 20:56:51 +0000899 return token
William M. Brack21e4ef22005-01-02 09:53:13 +0000900
901 #
902 # Processing of conditionals modified by Bill 1/1/05
903 #
904 # We process conditionals (i.e. tokens from #ifdef, #ifndef,
905 # #if, #else and #endif) for headers and mainline code,
906 # store the ones from the header in libxml2-api.xml, and later
907 # (in the routine merge_public) verify that the two (header and
908 # mainline code) agree.
909 #
910 # There is a small problem with processing the headers. Some of
911 # the variables are not concerned with enabling / disabling of
912 # library functions (e.g. '__XML_PARSER_H__'), and we don't want
913 # them to be included in libxml2-api.xml, or involved in
914 # the check between the header and the mainline code. To
915 # accomplish this, we ignore any conditional which doesn't include
916 # the string 'ENABLED'
917 #
918 if name == "#ifdef":
919 apstr = self.lexer.tokens[0][1]
Daniel Veillarda2351322004-06-27 12:08:10 +0000920 try:
William M. Brack21e4ef22005-01-02 09:53:13 +0000921 self.defines.append(apstr)
922 if string.find(apstr, 'ENABLED') != -1:
923 self.conditionals.append("defined(%s)" % apstr)
Daniel Veillarda2351322004-06-27 12:08:10 +0000924 except:
925 pass
William M. Brack21e4ef22005-01-02 09:53:13 +0000926 elif name == "#ifndef":
927 apstr = self.lexer.tokens[0][1]
928 try:
929 self.defines.append(apstr)
930 if string.find(apstr, 'ENABLED') != -1:
931 self.conditionals.append("!defined(%s)" % apstr)
932 except:
933 pass
934 elif name == "#if":
935 apstr = ""
936 for tok in self.lexer.tokens:
937 if apstr != "":
938 apstr = apstr + " "
939 apstr = apstr + tok[1]
940 try:
941 self.defines.append(apstr)
942 if string.find(apstr, 'ENABLED') != -1:
943 self.conditionals.append(apstr)
944 except:
945 pass
946 elif name == "#else":
947 if self.conditionals != [] and \
948 string.find(self.defines[-1], 'ENABLED') != -1:
949 self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
950 elif name == "#endif":
951 if self.conditionals != [] and \
952 string.find(self.defines[-1], 'ENABLED') != -1:
Daniel Veillarda2351322004-06-27 12:08:10 +0000953 self.conditionals = self.conditionals[:-1]
954 self.defines = self.defines[:-1]
Daniel Veillardbe586972003-11-18 20:56:51 +0000955 token = self.lexer.token()
956 while token != None and token[0] == 'preproc' and \
957 token[1][0] != '#':
958 token = self.lexer.token()
959 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000960
961 #
962 # token acquisition on top of the lexer, it handle internally
963 # preprocessor and comments since they are logically not part of
964 # the program structure.
965 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000966 def token(self):
967 global ignored_words
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000968
Daniel Veillardbe586972003-11-18 20:56:51 +0000969 token = self.lexer.token()
970 while token != None:
971 if token[0] == 'comment':
972 token = self.parseComment(token)
973 continue
974 elif token[0] == 'preproc':
975 token = self.parsePreproc(token)
976 continue
Daniel Veillard99dd7632005-05-06 11:40:56 +0000977 elif token[0] == "name" and token[1] == "__const":
978 token = ("name", "const")
979 return token
980 elif token[0] == "name" and token[1] == "__attribute":
981 token = self.lexer.token()
982 while token != None and token[1] != ";":
983 token = self.lexer.token()
984 return token
Daniel Veillardbe586972003-11-18 20:56:51 +0000985 elif token[0] == "name" and ignored_words.has_key(token[1]):
986 (n, info) = ignored_words[token[1]]
987 i = 0
988 while i < n:
989 token = self.lexer.token()
990 i = i + 1
991 token = self.lexer.token()
992 continue
993 else:
994 if debug:
995 print "=> ", token
996 return token
997 return None
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000998
999 #
1000 # Parse a typedef, it records the type and its name.
1001 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001002 def parseTypedef(self, token):
1003 if token == None:
1004 return None
1005 token = self.parseType(token)
1006 if token == None:
1007 self.error("parsing typedef")
1008 return None
1009 base_type = self.type
1010 type = base_type
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001011 #self.debug("end typedef type", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001012 while token != None:
1013 if token[0] == "name":
1014 name = token[1]
1015 signature = self.signature
1016 if signature != None:
1017 type = string.split(type, '(')[0]
1018 d = self.mergeFunctionComment(name,
1019 ((type, None), signature), 1)
1020 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001021 "functype", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001022 else:
1023 if base_type == "struct":
1024 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001025 "struct", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001026 base_type = "struct " + name
1027 else:
1028 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001029 "typedef", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001030 token = self.token()
1031 else:
1032 self.error("parsing typedef: expecting a name")
1033 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001034 #self.debug("end typedef", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001035 if token != None and token[0] == 'sep' and token[1] == ',':
1036 type = base_type
1037 token = self.token()
1038 while token != None and token[0] == "op":
1039 type = type + token[1]
1040 token = self.token()
1041 elif token != None and token[0] == 'sep' and token[1] == ';':
1042 break;
1043 elif token != None and token[0] == 'name':
1044 type = base_type
1045 continue;
1046 else:
1047 self.error("parsing typedef: expecting ';'", token)
1048 return token
1049 token = self.token()
1050 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001051
1052 #
1053 # Parse a C code block, used for functions it parse till
1054 # the balancing } included
1055 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001056 def parseBlock(self, token):
1057 while token != None:
1058 if token[0] == "sep" and token[1] == "{":
1059 token = self.token()
1060 token = self.parseBlock(token)
1061 elif token[0] == "sep" and token[1] == "}":
1062 self.comment = None
1063 token = self.token()
1064 return token
1065 else:
1066 if self.collect_ref == 1:
1067 oldtok = token
1068 token = self.token()
1069 if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1070 if token[0] == "sep" and token[1] == "(":
1071 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001072 0, "function")
Daniel Veillardbe586972003-11-18 20:56:51 +00001073 token = self.token()
1074 elif token[0] == "name":
1075 token = self.token()
1076 if token[0] == "sep" and (token[1] == ";" or
1077 token[1] == "," or token[1] == "="):
1078 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001079 0, "type")
Daniel Veillardbe586972003-11-18 20:56:51 +00001080 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1081 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001082 0, "typedef")
Daniel Veillardbe586972003-11-18 20:56:51 +00001083 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1084 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001085 0, "typedef")
1086
Daniel Veillardbe586972003-11-18 20:56:51 +00001087 else:
1088 token = self.token()
1089 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001090
1091 #
1092 # Parse a C struct definition till the balancing }
1093 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001094 def parseStruct(self, token):
1095 fields = []
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001096 #self.debug("start parseStruct", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001097 while token != None:
1098 if token[0] == "sep" and token[1] == "{":
1099 token = self.token()
1100 token = self.parseTypeBlock(token)
1101 elif token[0] == "sep" and token[1] == "}":
1102 self.struct_fields = fields
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001103 #self.debug("end parseStruct", token)
1104 #print fields
Daniel Veillardbe586972003-11-18 20:56:51 +00001105 token = self.token()
1106 return token
1107 else:
1108 base_type = self.type
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001109 #self.debug("before parseType", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001110 token = self.parseType(token)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001111 #self.debug("after parseType", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001112 if token != None and token[0] == "name":
1113 fname = token[1]
1114 token = self.token()
1115 if token[0] == "sep" and token[1] == ";":
1116 self.comment = None
1117 token = self.token()
1118 fields.append((self.type, fname, self.comment))
1119 self.comment = None
1120 else:
1121 self.error("parseStruct: expecting ;", token)
1122 elif token != None and token[0] == "sep" and token[1] == "{":
1123 token = self.token()
1124 token = self.parseTypeBlock(token)
1125 if token != None and token[0] == "name":
1126 token = self.token()
1127 if token != None and token[0] == "sep" and token[1] == ";":
1128 token = self.token()
1129 else:
1130 self.error("parseStruct: expecting ;", token)
1131 else:
1132 self.error("parseStruct: name", token)
1133 token = self.token()
1134 self.type = base_type;
1135 self.struct_fields = fields
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001136 #self.debug("end parseStruct", token)
1137 #print fields
Daniel Veillardbe586972003-11-18 20:56:51 +00001138 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001139
1140 #
1141 # Parse a C enum block, parse till the balancing }
1142 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001143 def parseEnumBlock(self, token):
1144 self.enums = []
1145 name = None
1146 self.comment = None
1147 comment = ""
1148 value = "0"
1149 while token != None:
1150 if token[0] == "sep" and token[1] == "{":
1151 token = self.token()
1152 token = self.parseTypeBlock(token)
1153 elif token[0] == "sep" and token[1] == "}":
1154 if name != None:
1155 if self.comment != None:
1156 comment = self.comment
1157 self.comment = None
1158 self.enums.append((name, value, comment))
1159 token = self.token()
1160 return token
1161 elif token[0] == "name":
1162 if name != None:
1163 if self.comment != None:
1164 comment = string.strip(self.comment)
1165 self.comment = None
1166 self.enums.append((name, value, comment))
1167 name = token[1]
1168 comment = ""
1169 token = self.token()
1170 if token[0] == "op" and token[1][0] == "=":
1171 value = ""
1172 if len(token[1]) > 1:
1173 value = token[1][1:]
1174 token = self.token()
1175 while token[0] != "sep" or (token[1] != ',' and
1176 token[1] != '}'):
1177 value = value + token[1]
1178 token = self.token()
1179 else:
1180 try:
1181 value = "%d" % (int(value) + 1)
1182 except:
Daniel Veillard1e906612003-12-05 14:57:46 +00001183 self.warning("Failed to compute value of enum %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +00001184 value=""
1185 if token[0] == "sep" and token[1] == ",":
1186 token = self.token()
1187 else:
1188 token = self.token()
1189 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001190
1191 #
1192 # Parse a C definition block, used for structs it parse till
1193 # the balancing }
1194 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001195 def parseTypeBlock(self, token):
1196 while token != None:
1197 if token[0] == "sep" and token[1] == "{":
1198 token = self.token()
1199 token = self.parseTypeBlock(token)
1200 elif token[0] == "sep" and token[1] == "}":
1201 token = self.token()
1202 return token
1203 else:
1204 token = self.token()
1205 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001206
1207 #
1208 # Parse a type: the fact that the type name can either occur after
1209 # the definition or within the definition makes it a little harder
1210 # if inside, the name token is pushed back before returning
1211 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001212 def parseType(self, token):
1213 self.type = ""
1214 self.struct_fields = []
1215 self.signature = None
1216 if token == None:
1217 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001218
Daniel Veillardbe586972003-11-18 20:56:51 +00001219 while token[0] == "name" and (
William M. Brackfbb619f2005-06-06 13:49:18 +00001220 token[1] == "const" or \
1221 token[1] == "unsigned" or \
1222 token[1] == "signed"):
Daniel Veillardbe586972003-11-18 20:56:51 +00001223 if self.type == "":
1224 self.type = token[1]
1225 else:
1226 self.type = self.type + " " + token[1]
1227 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001228
Daniel Veillardbe586972003-11-18 20:56:51 +00001229 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1230 if self.type == "":
1231 self.type = token[1]
1232 else:
1233 self.type = self.type + " " + token[1]
1234 if token[0] == "name" and token[1] == "int":
1235 if self.type == "":
1236 self.type = tmp[1]
1237 else:
1238 self.type = self.type + " " + tmp[1]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001239
Daniel Veillardbe586972003-11-18 20:56:51 +00001240 elif token[0] == "name" and token[1] == "struct":
1241 if self.type == "":
1242 self.type = token[1]
1243 else:
1244 self.type = self.type + " " + token[1]
1245 token = self.token()
1246 nametok = None
1247 if token[0] == "name":
1248 nametok = token
1249 token = self.token()
1250 if token != None and token[0] == "sep" and token[1] == "{":
1251 token = self.token()
1252 token = self.parseStruct(token)
1253 elif token != None and token[0] == "op" and token[1] == "*":
1254 self.type = self.type + " " + nametok[1] + " *"
1255 token = self.token()
1256 while token != None and token[0] == "op" and token[1] == "*":
1257 self.type = self.type + " *"
1258 token = self.token()
1259 if token[0] == "name":
1260 nametok = token
1261 token = self.token()
1262 else:
1263 self.error("struct : expecting name", token)
1264 return token
1265 elif token != None and token[0] == "name" and nametok != None:
1266 self.type = self.type + " " + nametok[1]
1267 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001268
Daniel Veillardbe586972003-11-18 20:56:51 +00001269 if nametok != None:
1270 self.lexer.push(token)
1271 token = nametok
1272 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001273
Daniel Veillardbe586972003-11-18 20:56:51 +00001274 elif token[0] == "name" and token[1] == "enum":
1275 if self.type == "":
1276 self.type = token[1]
1277 else:
1278 self.type = self.type + " " + token[1]
1279 self.enums = []
1280 token = self.token()
1281 if token != None and token[0] == "sep" and token[1] == "{":
1282 token = self.token()
1283 token = self.parseEnumBlock(token)
1284 else:
1285 self.error("parsing enum: expecting '{'", token)
1286 enum_type = None
1287 if token != None and token[0] != "name":
1288 self.lexer.push(token)
1289 token = ("name", "enum")
1290 else:
1291 enum_type = token[1]
1292 for enum in self.enums:
1293 self.index_add(enum[0], self.filename,
1294 not self.is_header, "enum",
1295 (enum[1], enum[2], enum_type))
1296 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001297
Daniel Veillardbe586972003-11-18 20:56:51 +00001298 elif token[0] == "name":
1299 if self.type == "":
1300 self.type = token[1]
1301 else:
1302 self.type = self.type + " " + token[1]
1303 else:
1304 self.error("parsing type %s: expecting a name" % (self.type),
1305 token)
1306 return token
1307 token = self.token()
1308 while token != None and (token[0] == "op" or
1309 token[0] == "name" and token[1] == "const"):
1310 self.type = self.type + " " + token[1]
1311 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001312
1313 #
1314 # if there is a parenthesis here, this means a function type
1315 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001316 if token != None and token[0] == "sep" and token[1] == '(':
1317 self.type = self.type + token[1]
1318 token = self.token()
1319 while token != None and token[0] == "op" and token[1] == '*':
1320 self.type = self.type + token[1]
1321 token = self.token()
1322 if token == None or token[0] != "name" :
1323 self.error("parsing function type, name expected", token);
1324 return token
1325 self.type = self.type + token[1]
1326 nametok = token
1327 token = self.token()
1328 if token != None and token[0] == "sep" and token[1] == ')':
1329 self.type = self.type + token[1]
1330 token = self.token()
1331 if token != None and token[0] == "sep" and token[1] == '(':
1332 token = self.token()
1333 type = self.type;
1334 token = self.parseSignature(token);
1335 self.type = type;
1336 else:
1337 self.error("parsing function type, '(' expected", token);
1338 return token
1339 else:
1340 self.error("parsing function type, ')' expected", token);
1341 return token
1342 self.lexer.push(token)
1343 token = nametok
1344 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001345
1346 #
1347 # do some lookahead for arrays
1348 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001349 if token != None and token[0] == "name":
1350 nametok = token
1351 token = self.token()
1352 if token != None and token[0] == "sep" and token[1] == '[':
1353 self.type = self.type + nametok[1]
1354 while token != None and token[0] == "sep" and token[1] == '[':
1355 self.type = self.type + token[1]
1356 token = self.token()
1357 while token != None and token[0] != 'sep' and \
1358 token[1] != ']' and token[1] != ';':
1359 self.type = self.type + token[1]
1360 token = self.token()
1361 if token != None and token[0] == 'sep' and token[1] == ']':
1362 self.type = self.type + token[1]
1363 token = self.token()
1364 else:
1365 self.error("parsing array type, ']' expected", token);
1366 return token
1367 elif token != None and token[0] == "sep" and token[1] == ':':
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001368 # remove :12 in case it's a limited int size
Daniel Veillardbe586972003-11-18 20:56:51 +00001369 token = self.token()
1370 token = self.token()
1371 self.lexer.push(token)
1372 token = nametok
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001373
Daniel Veillardbe586972003-11-18 20:56:51 +00001374 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001375
1376 #
1377 # Parse a signature: '(' has been parsed and we scan the type definition
1378 # up to the ')' included
Daniel Veillardbe586972003-11-18 20:56:51 +00001379 def parseSignature(self, token):
1380 signature = []
1381 if token != None and token[0] == "sep" and token[1] == ')':
1382 self.signature = []
1383 token = self.token()
1384 return token
1385 while token != None:
1386 token = self.parseType(token)
1387 if token != None and token[0] == "name":
1388 signature.append((self.type, token[1], None))
1389 token = self.token()
1390 elif token != None and token[0] == "sep" and token[1] == ',':
1391 token = self.token()
1392 continue
1393 elif token != None and token[0] == "sep" and token[1] == ')':
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001394 # only the type was provided
Daniel Veillardbe586972003-11-18 20:56:51 +00001395 if self.type == "...":
1396 signature.append((self.type, "...", None))
1397 else:
1398 signature.append((self.type, None, None))
1399 if token != None and token[0] == "sep":
1400 if token[1] == ',':
1401 token = self.token()
1402 continue
1403 elif token[1] == ')':
1404 token = self.token()
1405 break
1406 self.signature = signature
1407 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001408
1409 #
1410 # Parse a global definition, be it a type, variable or function
1411 # the extern "C" blocks are a bit nasty and require it to recurse.
1412 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001413 def parseGlobal(self, token):
1414 static = 0
1415 if token[1] == 'extern':
1416 token = self.token()
1417 if token == None:
1418 return token
1419 if token[0] == 'string':
1420 if token[1] == 'C':
1421 token = self.token()
1422 if token == None:
1423 return token
1424 if token[0] == 'sep' and token[1] == "{":
1425 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001426# print 'Entering extern "C line ', self.lineno()
Daniel Veillardbe586972003-11-18 20:56:51 +00001427 while token != None and (token[0] != 'sep' or
1428 token[1] != "}"):
1429 if token[0] == 'name':
1430 token = self.parseGlobal(token)
1431 else:
1432 self.error(
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001433 "token %s %s unexpected at the top level" % (
1434 token[0], token[1]))
Daniel Veillardbe586972003-11-18 20:56:51 +00001435 token = self.parseGlobal(token)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001436# print 'Exiting extern "C" line', self.lineno()
Daniel Veillardbe586972003-11-18 20:56:51 +00001437 token = self.token()
1438 return token
1439 else:
1440 return token
1441 elif token[1] == 'static':
1442 static = 1
1443 token = self.token()
1444 if token == None or token[0] != 'name':
1445 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001446
Daniel Veillardbe586972003-11-18 20:56:51 +00001447 if token[1] == 'typedef':
1448 token = self.token()
1449 return self.parseTypedef(token)
1450 else:
1451 token = self.parseType(token)
1452 type_orig = self.type
1453 if token == None or token[0] != "name":
1454 return token
1455 type = type_orig
1456 self.name = token[1]
1457 token = self.token()
1458 while token != None and (token[0] == "sep" or token[0] == "op"):
1459 if token[0] == "sep":
1460 if token[1] == "[":
1461 type = type + token[1]
1462 token = self.token()
1463 while token != None and (token[0] != "sep" or \
1464 token[1] != ";"):
1465 type = type + token[1]
1466 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001467
Daniel Veillardbe586972003-11-18 20:56:51 +00001468 if token != None and token[0] == "op" and token[1] == "=":
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001469 #
1470 # Skip the initialization of the variable
1471 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001472 token = self.token()
1473 if token[0] == 'sep' and token[1] == '{':
1474 token = self.token()
1475 token = self.parseBlock(token)
1476 else:
1477 self.comment = None
1478 while token != None and (token[0] != "sep" or \
1479 (token[1] != ';' and token[1] != ',')):
1480 token = self.token()
1481 self.comment = None
1482 if token == None or token[0] != "sep" or (token[1] != ';' and
1483 token[1] != ','):
1484 self.error("missing ';' or ',' after value")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001485
Daniel Veillardbe586972003-11-18 20:56:51 +00001486 if token != None and token[0] == "sep":
1487 if token[1] == ";":
1488 self.comment = None
1489 token = self.token()
1490 if type == "struct":
1491 self.index_add(self.name, self.filename,
1492 not self.is_header, "struct", self.struct_fields)
1493 else:
1494 self.index_add(self.name, self.filename,
1495 not self.is_header, "variable", type)
1496 break
1497 elif token[1] == "(":
1498 token = self.token()
1499 token = self.parseSignature(token)
1500 if token == None:
1501 return None
1502 if token[0] == "sep" and token[1] == ";":
1503 d = self.mergeFunctionComment(self.name,
1504 ((type, None), self.signature), 1)
1505 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001506 "function", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001507 token = self.token()
1508 elif token[0] == "sep" and token[1] == "{":
1509 d = self.mergeFunctionComment(self.name,
1510 ((type, None), self.signature), static)
1511 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001512 "function", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001513 token = self.token()
1514 token = self.parseBlock(token);
1515 elif token[1] == ',':
1516 self.comment = None
1517 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001518 "variable", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001519 type = type_orig
1520 token = self.token()
1521 while token != None and token[0] == "sep":
1522 type = type + token[1]
1523 token = self.token()
1524 if token != None and token[0] == "name":
1525 self.name = token[1]
1526 token = self.token()
1527 else:
1528 break
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001529
Daniel Veillardbe586972003-11-18 20:56:51 +00001530 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001531
Daniel Veillardbe586972003-11-18 20:56:51 +00001532 def parse(self):
Daniel Veillard1e906612003-12-05 14:57:46 +00001533 self.warning("Parsing %s" % (self.filename))
Daniel Veillardbe586972003-11-18 20:56:51 +00001534 token = self.token()
1535 while token != None:
1536 if token[0] == 'name':
1537 token = self.parseGlobal(token)
1538 else:
1539 self.error("token %s %s unexpected at the top level" % (
1540 token[0], token[1]))
1541 token = self.parseGlobal(token)
1542 return
1543 self.parseTopComment(self.top_comment)
1544 return self.index
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001545
1546
1547class docBuilder:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001548 """A documentation builder"""
1549 def __init__(self, name, directories=['.'], excludes=[]):
1550 self.name = name
1551 self.directories = directories
1552 self.excludes = excludes + ignored_files.keys()
1553 self.modules = {}
1554 self.headers = {}
1555 self.idx = index()
1556 self.xref = {}
1557 self.index = {}
1558 if name == 'libxml2':
1559 self.basename = 'libxml'
1560 else:
1561 self.basename = name
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001562
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001563 def indexString(self, id, str):
1564 if str == None:
1565 return
1566 str = string.replace(str, "'", ' ')
1567 str = string.replace(str, '"', ' ')
1568 str = string.replace(str, "/", ' ')
1569 str = string.replace(str, '*', ' ')
1570 str = string.replace(str, "[", ' ')
1571 str = string.replace(str, "]", ' ')
1572 str = string.replace(str, "(", ' ')
1573 str = string.replace(str, ")", ' ')
1574 str = string.replace(str, "<", ' ')
1575 str = string.replace(str, '>', ' ')
1576 str = string.replace(str, "&", ' ')
1577 str = string.replace(str, '#', ' ')
1578 str = string.replace(str, ",", ' ')
1579 str = string.replace(str, '.', ' ')
1580 str = string.replace(str, ';', ' ')
1581 tokens = string.split(str)
1582 for token in tokens:
1583 try:
1584 c = token[0]
1585 if string.find(string.letters, c) < 0:
1586 pass
1587 elif len(token) < 3:
1588 pass
1589 else:
1590 lower = string.lower(token)
1591 # TODO: generalize this a bit
1592 if lower == 'and' or lower == 'the':
1593 pass
1594 elif self.xref.has_key(token):
1595 self.xref[token].append(id)
1596 else:
1597 self.xref[token] = [id]
1598 except:
1599 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001600
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001601 def analyze(self):
1602 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1603 self.idx.analyze()
1604
1605 def scanHeaders(self):
1606 for header in self.headers.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001607 parser = CParser(header)
1608 idx = parser.parse()
1609 self.headers[header] = idx;
1610 self.idx.merge(idx)
1611
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001612 def scanModules(self):
1613 for module in self.modules.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001614 parser = CParser(module)
1615 idx = parser.parse()
1616 # idx.analyze()
1617 self.modules[module] = idx
1618 self.idx.merge_public(idx)
1619
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001620 def scan(self):
1621 for directory in self.directories:
1622 files = glob.glob(directory + "/*.c")
1623 for file in files:
1624 skip = 0
1625 for excl in self.excludes:
1626 if string.find(file, excl) != -1:
1627 skip = 1;
1628 break
1629 if skip == 0:
1630 self.modules[file] = None;
1631 files = glob.glob(directory + "/*.h")
1632 for file in files:
1633 skip = 0
1634 for excl in self.excludes:
1635 if string.find(file, excl) != -1:
1636 skip = 1;
1637 break
1638 if skip == 0:
1639 self.headers[file] = None;
1640 self.scanHeaders()
1641 self.scanModules()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001642
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001643 def modulename_file(self, file):
1644 module = os.path.basename(file)
1645 if module[-2:] == '.h':
1646 module = module[:-2]
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001647 elif module[-2:] == '.c':
1648 module = module[:-2]
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001649 return module
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001650
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001651 def serialize_enum(self, output, name):
1652 id = self.idx.enums[name]
1653 output.write(" <enum name='%s' file='%s'" % (name,
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001654 self.modulename_file(id.header)))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001655 if id.info != None:
1656 info = id.info
1657 if info[0] != None and info[0] != '':
1658 try:
1659 val = eval(info[0])
1660 except:
1661 val = info[0]
1662 output.write(" value='%s'" % (val));
1663 if info[2] != None and info[2] != '':
1664 output.write(" type='%s'" % info[2]);
1665 if info[1] != None and info[1] != '':
1666 output.write(" info='%s'" % escape(info[1]));
1667 output.write("/>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001668
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001669 def serialize_macro(self, output, name):
1670 id = self.idx.macros[name]
1671 output.write(" <macro name='%s' file='%s'>\n" % (name,
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001672 self.modulename_file(id.header)))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001673 if id.info != None:
1674 try:
1675 (args, desc) = id.info
1676 if desc != None and desc != "":
1677 output.write(" <info>%s</info>\n" % (escape(desc)))
1678 self.indexString(name, desc)
1679 for arg in args:
1680 (name, desc) = arg
1681 if desc != None and desc != "":
1682 output.write(" <arg name='%s' info='%s'/>\n" % (
1683 name, escape(desc)))
1684 self.indexString(name, desc)
1685 else:
1686 output.write(" <arg name='%s'/>\n" % (name))
1687 except:
1688 pass
1689 output.write(" </macro>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001690
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001691 def serialize_typedef(self, output, name):
1692 id = self.idx.typedefs[name]
1693 if id.info[0:7] == 'struct ':
1694 output.write(" <struct name='%s' file='%s' type='%s'" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001695 name, self.modulename_file(id.header), id.info))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001696 name = id.info[7:]
1697 if self.idx.structs.has_key(name) and ( \
1698 type(self.idx.structs[name].info) == type(()) or
Daniel Veillardc1eed322002-12-12 11:01:32 +00001699 type(self.idx.structs[name].info) == type([])):
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001700 output.write(">\n");
1701 try:
1702 for field in self.idx.structs[name].info:
1703 desc = field[2]
1704 self.indexString(name, desc)
1705 if desc == None:
1706 desc = ''
1707 else:
1708 desc = escape(desc)
1709 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1710 except:
1711 print "Failed to serialize struct %s" % (name)
1712 output.write(" </struct>\n")
1713 else:
1714 output.write("/>\n");
1715 else :
1716 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001717 name, self.modulename_file(id.header), id.info))
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001718
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001719 def serialize_variable(self, output, name):
1720 id = self.idx.variables[name]
1721 if id.info != None:
1722 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001723 name, self.modulename_file(id.header), id.info))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001724 else:
1725 output.write(" <variable name='%s' file='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001726 name, self.modulename_file(id.header)))
Daniel Veillardc1eed322002-12-12 11:01:32 +00001727
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001728 def serialize_function(self, output, name):
1729 id = self.idx.functions[name]
Daniel Veillarda2351322004-06-27 12:08:10 +00001730 if name == debugsym:
1731 print "=>", id
1732
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001733 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1734 name, self.modulename_file(id.header),
1735 self.modulename_file(id.module)))
William M. Brack21e4ef22005-01-02 09:53:13 +00001736 #
1737 # Processing of conditionals modified by Bill 1/1/05
1738 #
Daniel Veillarda2351322004-06-27 12:08:10 +00001739 if id.conditionals != None:
William M. Brack21e4ef22005-01-02 09:53:13 +00001740 apstr = ""
Daniel Veillarda2351322004-06-27 12:08:10 +00001741 for cond in id.conditionals:
William M. Brack21e4ef22005-01-02 09:53:13 +00001742 if apstr != "":
1743 apstr = apstr + " &amp;&amp; "
1744 apstr = apstr + cond
1745 output.write(" <cond>%s</cond>\n"% (apstr));
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001746 try:
1747 (ret, params, desc) = id.info
1748 output.write(" <info>%s</info>\n" % (escape(desc)))
1749 self.indexString(name, desc)
1750 if ret[0] != None:
1751 if ret[0] == "void":
1752 output.write(" <return type='void'/>\n")
1753 else:
1754 output.write(" <return type='%s' info='%s'/>\n" % (
1755 ret[0], escape(ret[1])))
1756 self.indexString(name, ret[1])
1757 for param in params:
1758 if param[0] == 'void':
1759 continue
1760 if param[2] == None:
1761 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1762 else:
1763 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1764 self.indexString(name, param[2])
1765 except:
1766 print "Failed to save function %s info: " % name, `id.info`
1767 output.write(" </%s>\n" % (id.type))
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001768
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001769 def serialize_exports(self, output, file):
1770 module = self.modulename_file(file)
1771 output.write(" <file name='%s'>\n" % (module))
1772 dict = self.headers[file]
Daniel Veillardbe586972003-11-18 20:56:51 +00001773 if dict.info != None:
1774 for data in ('Summary', 'Description', 'Author'):
1775 try:
1776 output.write(" <%s>%s</%s>\n" % (
1777 string.lower(data),
1778 escape(dict.info[data]),
1779 string.lower(data)))
1780 except:
1781 print "Header %s lacks a %s description" % (module, data)
1782 if dict.info.has_key('Description'):
1783 desc = dict.info['Description']
1784 if string.find(desc, "DEPRECATED") != -1:
1785 output.write(" <deprecated/>\n")
1786
Daniel Veillard1a792412003-11-18 23:52:38 +00001787 ids = dict.macros.keys()
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001788 ids.sort()
1789 for id in uniq(ids):
Daniel Veillard1a792412003-11-18 23:52:38 +00001790 # Macros are sometime used to masquerade other types.
1791 if dict.functions.has_key(id):
1792 continue
1793 if dict.variables.has_key(id):
1794 continue
1795 if dict.typedefs.has_key(id):
1796 continue
1797 if dict.structs.has_key(id):
1798 continue
1799 if dict.enums.has_key(id):
1800 continue
1801 output.write(" <exports symbol='%s' type='macro'/>\n" % (id))
1802 ids = dict.enums.keys()
1803 ids.sort()
1804 for id in uniq(ids):
1805 output.write(" <exports symbol='%s' type='enum'/>\n" % (id))
1806 ids = dict.typedefs.keys()
1807 ids.sort()
1808 for id in uniq(ids):
1809 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id))
1810 ids = dict.structs.keys()
1811 ids.sort()
1812 for id in uniq(ids):
1813 output.write(" <exports symbol='%s' type='struct'/>\n" % (id))
1814 ids = dict.variables.keys()
1815 ids.sort()
1816 for id in uniq(ids):
1817 output.write(" <exports symbol='%s' type='variable'/>\n" % (id))
1818 ids = dict.functions.keys()
1819 ids.sort()
1820 for id in uniq(ids):
1821 output.write(" <exports symbol='%s' type='function'/>\n" % (id))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001822 output.write(" </file>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001823
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001824 def serialize_xrefs_files(self, output):
1825 headers = self.headers.keys()
1826 headers.sort()
1827 for file in headers:
1828 module = self.modulename_file(file)
1829 output.write(" <file name='%s'>\n" % (module))
1830 dict = self.headers[file]
William M. Bracka2e844a2004-01-06 11:52:13 +00001831 ids = uniq(dict.functions.keys() + dict.variables.keys() + \
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001832 dict.macros.keys() + dict.typedefs.keys() + \
William M. Bracka2e844a2004-01-06 11:52:13 +00001833 dict.structs.keys() + dict.enums.keys())
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001834 ids.sort()
William M. Bracka2e844a2004-01-06 11:52:13 +00001835 for id in ids:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001836 output.write(" <ref name='%s'/>\n" % (id))
1837 output.write(" </file>\n")
1838 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001839
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001840 def serialize_xrefs_functions(self, output):
1841 funcs = {}
1842 for name in self.idx.functions.keys():
1843 id = self.idx.functions[name]
1844 try:
1845 (ret, params, desc) = id.info
1846 for param in params:
1847 if param[0] == 'void':
1848 continue
1849 if funcs.has_key(param[0]):
1850 funcs[param[0]].append(name)
1851 else:
1852 funcs[param[0]] = [name]
1853 except:
1854 pass
1855 typ = funcs.keys()
1856 typ.sort()
1857 for type in typ:
1858 if type == '' or type == 'void' or type == "int" or \
1859 type == "char *" or type == "const char *" :
1860 continue
1861 output.write(" <type name='%s'>\n" % (type))
1862 ids = funcs[type]
1863 ids.sort()
William M. Brackcf9eadf2003-12-25 13:24:05 +00001864 pid = '' # not sure why we have dups, but get rid of them!
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001865 for id in ids:
William M. Brackcf9eadf2003-12-25 13:24:05 +00001866 if id != pid:
1867 output.write(" <ref name='%s'/>\n" % (id))
1868 pid = id
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001869 output.write(" </type>\n")
1870
1871 def serialize_xrefs_constructors(self, output):
1872 funcs = {}
1873 for name in self.idx.functions.keys():
1874 id = self.idx.functions[name]
1875 try:
1876 (ret, params, desc) = id.info
1877 if ret[0] == "void":
1878 continue
1879 if funcs.has_key(ret[0]):
1880 funcs[ret[0]].append(name)
1881 else:
1882 funcs[ret[0]] = [name]
1883 except:
1884 pass
1885 typ = funcs.keys()
1886 typ.sort()
1887 for type in typ:
1888 if type == '' or type == 'void' or type == "int" or \
1889 type == "char *" or type == "const char *" :
1890 continue
1891 output.write(" <type name='%s'>\n" % (type))
1892 ids = funcs[type]
William M. Brack09a4d0a2004-01-05 14:28:43 +00001893 ids.sort()
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001894 for id in ids:
1895 output.write(" <ref name='%s'/>\n" % (id))
1896 output.write(" </type>\n")
1897
1898 def serialize_xrefs_alpha(self, output):
1899 letter = None
1900 ids = self.idx.identifiers.keys()
1901 ids.sort()
1902 for id in ids:
1903 if id[0] != letter:
1904 if letter != None:
1905 output.write(" </letter>\n")
1906 letter = id[0]
1907 output.write(" <letter name='%s'>\n" % (letter))
1908 output.write(" <ref name='%s'/>\n" % (id))
1909 if letter != None:
1910 output.write(" </letter>\n")
1911
1912 def serialize_xrefs_references(self, output):
1913 typ = self.idx.identifiers.keys()
1914 typ.sort()
1915 for id in typ:
1916 idf = self.idx.identifiers[id]
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001917 module = idf.header
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001918 output.write(" <reference name='%s' href='%s'/>\n" % (id,
1919 'html/' + self.basename + '-' +
1920 self.modulename_file(module) + '.html#' +
1921 id))
1922
1923 def serialize_xrefs_index(self, output):
1924 index = self.xref
1925 typ = index.keys()
1926 typ.sort()
1927 letter = None
1928 count = 0
1929 chunk = 0
1930 chunks = []
1931 for id in typ:
1932 if len(index[id]) > 30:
1933 continue
1934 if id[0] != letter:
1935 if letter == None or count > 200:
1936 if letter != None:
1937 output.write(" </letter>\n")
1938 output.write(" </chunk>\n")
1939 count = 0
1940 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1941 output.write(" <chunk name='chunk%s'>\n" % (chunk))
1942 first_letter = id[0]
1943 chunk = chunk + 1
1944 elif letter != None:
1945 output.write(" </letter>\n")
1946 letter = id[0]
1947 output.write(" <letter name='%s'>\n" % (letter))
1948 output.write(" <word name='%s'>\n" % (id))
1949 tokens = index[id];
1950 tokens.sort()
1951 tok = None
William M. Bracka2e844a2004-01-06 11:52:13 +00001952 for token in tokens:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001953 if tok == token:
1954 continue
1955 tok = token
1956 output.write(" <ref name='%s'/>\n" % (token))
1957 count = count + 1
1958 output.write(" </word>\n")
1959 if letter != None:
1960 output.write(" </letter>\n")
1961 output.write(" </chunk>\n")
William M. Brack966668a2003-12-20 02:10:28 +00001962 if count != 0:
1963 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001964 output.write(" <chunks>\n")
1965 for ch in chunks:
1966 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
1967 ch[0], ch[1], ch[2]))
1968 output.write(" </chunks>\n")
1969
1970 def serialize_xrefs(self, output):
1971 output.write(" <references>\n")
1972 self.serialize_xrefs_references(output)
1973 output.write(" </references>\n")
1974 output.write(" <alpha>\n")
1975 self.serialize_xrefs_alpha(output)
1976 output.write(" </alpha>\n")
1977 output.write(" <constructors>\n")
1978 self.serialize_xrefs_constructors(output)
1979 output.write(" </constructors>\n")
1980 output.write(" <functions>\n")
1981 self.serialize_xrefs_functions(output)
1982 output.write(" </functions>\n")
1983 output.write(" <files>\n")
1984 self.serialize_xrefs_files(output)
1985 output.write(" </files>\n")
1986 output.write(" <index>\n")
1987 self.serialize_xrefs_index(output)
1988 output.write(" </index>\n")
1989
1990 def serialize(self):
1991 filename = "%s-api.xml" % self.name
1992 print "Saving XML description %s" % (filename)
1993 output = open(filename, "w")
1994 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1995 output.write("<api name='%s'>\n" % self.name)
1996 output.write(" <files>\n")
1997 headers = self.headers.keys()
1998 headers.sort()
1999 for file in headers:
2000 self.serialize_exports(output, file)
2001 output.write(" </files>\n")
2002 output.write(" <symbols>\n")
2003 macros = self.idx.macros.keys()
2004 macros.sort()
2005 for macro in macros:
2006 self.serialize_macro(output, macro)
2007 enums = self.idx.enums.keys()
2008 enums.sort()
2009 for enum in enums:
2010 self.serialize_enum(output, enum)
2011 typedefs = self.idx.typedefs.keys()
2012 typedefs.sort()
2013 for typedef in typedefs:
2014 self.serialize_typedef(output, typedef)
2015 variables = self.idx.variables.keys()
2016 variables.sort()
2017 for variable in variables:
2018 self.serialize_variable(output, variable)
2019 functions = self.idx.functions.keys()
2020 functions.sort()
2021 for function in functions:
2022 self.serialize_function(output, function)
2023 output.write(" </symbols>\n")
2024 output.write("</api>\n")
2025 output.close()
2026
2027 filename = "%s-refs.xml" % self.name
2028 print "Saving XML Cross References %s" % (filename)
2029 output = open(filename, "w")
2030 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2031 output.write("<apirefs name='%s'>\n" % self.name)
2032 self.serialize_xrefs(output)
2033 output.write("</apirefs>\n")
2034 output.close()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002035
2036
2037def rebuild():
2038 builder = None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00002039 if glob.glob("parser.c") != [] :
2040 print "Rebuilding API description for libxml2"
2041 builder = docBuilder("libxml2", [".", "."],
2042 ["xmlwin32version.h", "tst.c"])
2043 elif glob.glob("../parser.c") != [] :
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002044 print "Rebuilding API description for libxml2"
2045 builder = docBuilder("libxml2", ["..", "../include/libxml"],
Daniel Veillardd4330462003-04-29 12:40:16 +00002046 ["xmlwin32version.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002047 elif glob.glob("../libxslt/transform.c") != [] :
2048 print "Rebuilding API description for libxslt"
2049 builder = docBuilder("libxslt", ["../libxslt"],
Daniel Veillard024b5702002-12-12 00:15:55 +00002050 ["win32config.h", "libxslt.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002051 else:
2052 print "rebuild() failed, unable to guess the module"
2053 return None
2054 builder.scan()
2055 builder.analyze()
2056 builder.serialize()
Daniel Veillard024b5702002-12-12 00:15:55 +00002057 if glob.glob("../libexslt/exslt.c") != [] :
2058 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2059 extra.scan()
2060 extra.analyze()
2061 extra.serialize()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002062 return builder
2063
2064#
2065# for debugging the parser
2066#
2067def parse(filename):
2068 parser = CParser(filename)
2069 idx = parser.parse()
2070 return idx
2071
2072if __name__ == "__main__":
Daniel Veillarda2351322004-06-27 12:08:10 +00002073 if len(sys.argv) > 1:
2074 debug = 1
2075 parse(sys.argv[1])
2076 else:
2077 rebuild()