blob: f1a795fd103b8fb5547d76fde86ad427c16b4838 [file] [log] [blame]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
Daniel Veillard540a31a2003-01-21 11:21:07 +000010import os, sys
Daniel Veillarda9b66d02002-12-11 14:23:49 +000011import string
12import glob
13
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000014debug=0
Daniel Veillarda2351322004-06-27 12:08:10 +000015#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000017
Daniel Veillardde0a0a52003-04-24 17:12:57 +000018#
19# C parser analysis code
20#
21ignored_files = {
22 "trio": "too many non standard macros",
23 "trio.c": "too many non standard macros",
24 "trionan.c": "too many non standard macros",
25 "triostr.c": "too many non standard macros",
26 "acconfig.h": "generated portability layer",
27 "config.h": "generated portability layer",
28 "libxml.h": "internal only",
Daniel Veillard92fc02c2003-04-24 23:12:35 +000029 "testOOM.c": "out of memory tester",
30 "testOOMlib.h": "out of memory tester",
31 "testOOMlib.c": "out of memory tester",
Daniel Veillard189f46b2004-01-25 21:03:04 +000032 "rngparser.c": "not yet integrated",
33 "rngparser.h": "not yet integrated",
Daniel Veillarda2351322004-06-27 12:08:10 +000034 "elfgcchack.h": "not a normal header",
Daniel Veillard95175012005-07-03 16:09:51 +000035 "testHTML.c": "test tool",
36 "testReader.c": "test tool",
37 "testSchemas.c": "test tool",
38 "testXPath.c": "test tool",
39 "testAutomata.c": "test tool",
40 "testModule.c": "test tool",
41 "testRegexp.c": "test tool",
42 "testThreads.c": "test tool",
43 "testC14N.c": "test tool",
44 "testRelax.c": "test tool",
45 "testThreadsWin32.c": "test tool",
46 "testSAX.c": "test tool",
47 "testURI.c": "test tool",
Daniel Veillarde43cc572004-11-03 11:50:29 +000048 "testapi.c": "generated regression tests",
Daniel Veillard95175012005-07-03 16:09:51 +000049 "runtest.c": "regression tests program",
50 "runsuite.c": "regression tests program",
Daniel Veillard5d4644e2005-04-01 13:11:58 +000051 "tst.c": "not part of the library",
Daniel Veillardf6b71bd2005-01-04 17:50:14 +000052 "testdso.c": "test for dynamid shared libraries",
Daniel Veillardde0a0a52003-04-24 17:12:57 +000053}
54
55ignored_words = {
56 "WINAPI": (0, "Windows keyword"),
57 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
William M. Brackcdfa2862003-08-29 06:03:38 +000058 "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000059 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
60 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000061 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000062 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
63 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000064 "XMLCALL": (0, "Special macro for win32 calls"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000065 "XSLTCALL": (0, "Special macro for win32 calls"),
Daniel Veillard5eee7672005-08-22 21:22:27 +000066 "XMLCDECL": (0, "Special macro for win32 calls"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000067 "EXSLTCALL": (0, "Special macro for win32 calls"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000068 "__declspec": (3, "Windows keyword"),
Daniel Veillarda2351322004-06-27 12:08:10 +000069 "__stdcall": (0, "Windows keyword"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000070 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
71 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
72 "X_IN_Y": (5, "macro function builder"),
73}
74
Daniel Veillarda9b66d02002-12-11 14:23:49 +000075def escape(raw):
76 raw = string.replace(raw, '&', '&')
77 raw = string.replace(raw, '<', '&lt;')
78 raw = string.replace(raw, '>', '&gt;')
79 raw = string.replace(raw, "'", '&apos;')
80 raw = string.replace(raw, '"', '&quot;')
81 return raw
82
Daniel Veillard2925c0a2003-11-17 13:58:17 +000083def uniq(items):
84 d = {}
85 for item in items:
86 d[item]=1
87 return d.keys()
88
Daniel Veillarda9b66d02002-12-11 14:23:49 +000089class identifier:
Daniel Veillard5d4644e2005-04-01 13:11:58 +000090 def __init__(self, name, header=None, module=None, type=None, lineno = 0,
Daniel Veillarda2351322004-06-27 12:08:10 +000091 info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +000092 self.name = name
Daniel Veillard5d4644e2005-04-01 13:11:58 +000093 self.header = header
Daniel Veillardbe586972003-11-18 20:56:51 +000094 self.module = module
95 self.type = type
96 self.info = info
97 self.extra = extra
98 self.lineno = lineno
99 self.static = 0
Daniel Veillarda2351322004-06-27 12:08:10 +0000100 if conditionals == None or len(conditionals) == 0:
101 self.conditionals = None
102 else:
103 self.conditionals = conditionals[:]
104 if self.name == debugsym:
105 print "=> define %s : %s" % (debugsym, (module, type, info,
106 extra, conditionals))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000107
Daniel Veillardbe586972003-11-18 20:56:51 +0000108 def __repr__(self):
109 r = "%s %s:" % (self.type, self.name)
110 if self.static:
111 r = r + " static"
112 if self.module != None:
113 r = r + " from %s" % (self.module)
114 if self.info != None:
115 r = r + " " + `self.info`
116 if self.extra != None:
117 r = r + " " + `self.extra`
Daniel Veillarda2351322004-06-27 12:08:10 +0000118 if self.conditionals != None:
119 r = r + " " + `self.conditionals`
Daniel Veillardbe586972003-11-18 20:56:51 +0000120 return r
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000121
122
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000123 def set_header(self, header):
124 self.header = header
Daniel Veillardbe586972003-11-18 20:56:51 +0000125 def set_module(self, module):
126 self.module = module
127 def set_type(self, type):
128 self.type = type
129 def set_info(self, info):
130 self.info = info
131 def set_extra(self, extra):
132 self.extra = extra
133 def set_lineno(self, lineno):
134 self.lineno = lineno
135 def set_static(self, static):
136 self.static = static
Daniel Veillarda2351322004-06-27 12:08:10 +0000137 def set_conditionals(self, conditionals):
138 if conditionals == None or len(conditionals) == 0:
139 self.conditionals = None
140 else:
141 self.conditionals = conditionals[:]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000142
Daniel Veillardbe586972003-11-18 20:56:51 +0000143 def get_name(self):
144 return self.name
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000145 def get_header(self):
146 return self.module
Daniel Veillardbe586972003-11-18 20:56:51 +0000147 def get_module(self):
148 return self.module
149 def get_type(self):
150 return self.type
151 def get_info(self):
152 return self.info
153 def get_lineno(self):
154 return self.lineno
155 def get_extra(self):
156 return self.extra
157 def get_static(self):
158 return self.static
Daniel Veillarda2351322004-06-27 12:08:10 +0000159 def get_conditionals(self):
160 return self.conditionals
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000161
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000162 def update(self, header, module, type = None, info = None, extra=None,
Daniel Veillarda2351322004-06-27 12:08:10 +0000163 conditionals=None):
164 if self.name == debugsym:
165 print "=> update %s : %s" % (debugsym, (module, type, info,
166 extra, conditionals))
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000167 if header != None and self.header == None:
168 self.set_header(module)
169 if module != None and (self.module == None or self.header == self.module):
Daniel Veillardbe586972003-11-18 20:56:51 +0000170 self.set_module(module)
171 if type != None and self.type == None:
172 self.set_type(type)
173 if info != None:
174 self.set_info(info)
175 if extra != None:
176 self.set_extra(extra)
Daniel Veillarda2351322004-06-27 12:08:10 +0000177 if conditionals != None:
178 self.set_conditionals(conditionals)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000179
180class index:
Daniel Veillardbe586972003-11-18 20:56:51 +0000181 def __init__(self, name = "noname"):
182 self.name = name
183 self.identifiers = {}
184 self.functions = {}
185 self.variables = {}
186 self.includes = {}
187 self.structs = {}
188 self.enums = {}
189 self.typedefs = {}
190 self.macros = {}
191 self.references = {}
192 self.info = {}
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000193
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000194 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +0000195 if name[0:2] == '__':
196 return None
197 d = None
198 try:
199 d = self.identifiers[name]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000200 d.update(header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000201 except:
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000202 d = identifier(name, header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000203 self.identifiers[name] = d
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000204
Daniel Veillardbe586972003-11-18 20:56:51 +0000205 if d != None and static == 1:
206 d.set_static(1)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000207
Daniel Veillardbe586972003-11-18 20:56:51 +0000208 if d != None and name != None and type != None:
209 self.references[name] = d
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000210
Daniel Veillarda2351322004-06-27 12:08:10 +0000211 if name == debugsym:
212 print "New ref: %s" % (d)
213
214 return d
215
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000216 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +0000217 if name[0:2] == '__':
218 return None
219 d = None
220 try:
221 d = self.identifiers[name]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000222 d.update(header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000223 except:
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000224 d = identifier(name, header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000225 self.identifiers[name] = d
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000226
Daniel Veillardbe586972003-11-18 20:56:51 +0000227 if d != None and static == 1:
228 d.set_static(1)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000229
Daniel Veillardbe586972003-11-18 20:56:51 +0000230 if d != None and name != None and type != None:
231 if type == "function":
232 self.functions[name] = d
233 elif type == "functype":
234 self.functions[name] = d
235 elif type == "variable":
236 self.variables[name] = d
237 elif type == "include":
238 self.includes[name] = d
239 elif type == "struct":
240 self.structs[name] = d
241 elif type == "enum":
242 self.enums[name] = d
243 elif type == "typedef":
244 self.typedefs[name] = d
245 elif type == "macro":
246 self.macros[name] = d
247 else:
248 print "Unable to register type ", type
Daniel Veillarda2351322004-06-27 12:08:10 +0000249
250 if name == debugsym:
251 print "New symbol: %s" % (d)
252
Daniel Veillardbe586972003-11-18 20:56:51 +0000253 return d
254
255 def merge(self, idx):
256 for id in idx.functions.keys():
257 #
258 # macro might be used to override functions or variables
259 # definitions
260 #
261 if self.macros.has_key(id):
262 del self.macros[id]
263 if self.functions.has_key(id):
264 print "function %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000265 id, self.functions[id].header, idx.functions[id].header)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000266 else:
Daniel Veillardbe586972003-11-18 20:56:51 +0000267 self.functions[id] = idx.functions[id]
268 self.identifiers[id] = idx.functions[id]
269 for id in idx.variables.keys():
270 #
271 # macro might be used to override functions or variables
272 # definitions
273 #
274 if self.macros.has_key(id):
275 del self.macros[id]
276 if self.variables.has_key(id):
277 print "variable %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000278 id, self.variables[id].header, idx.variables[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000279 else:
280 self.variables[id] = idx.variables[id]
281 self.identifiers[id] = idx.variables[id]
282 for id in idx.structs.keys():
283 if self.structs.has_key(id):
284 print "struct %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000285 id, self.structs[id].header, idx.structs[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000286 else:
287 self.structs[id] = idx.structs[id]
288 self.identifiers[id] = idx.structs[id]
289 for id in idx.typedefs.keys():
290 if self.typedefs.has_key(id):
291 print "typedef %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000292 id, self.typedefs[id].header, idx.typedefs[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000293 else:
294 self.typedefs[id] = idx.typedefs[id]
295 self.identifiers[id] = idx.typedefs[id]
296 for id in idx.macros.keys():
297 #
298 # macro might be used to override functions or variables
299 # definitions
300 #
301 if self.variables.has_key(id):
302 continue
303 if self.functions.has_key(id):
304 continue
305 if self.enums.has_key(id):
306 continue
307 if self.macros.has_key(id):
308 print "macro %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000309 id, self.macros[id].header, idx.macros[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000310 else:
311 self.macros[id] = idx.macros[id]
312 self.identifiers[id] = idx.macros[id]
313 for id in idx.enums.keys():
314 if self.enums.has_key(id):
315 print "enum %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000316 id, self.enums[id].header, idx.enums[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000317 else:
318 self.enums[id] = idx.enums[id]
319 self.identifiers[id] = idx.enums[id]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000320
Daniel Veillardbe586972003-11-18 20:56:51 +0000321 def merge_public(self, idx):
322 for id in idx.functions.keys():
323 if self.functions.has_key(id):
William M. Brack21e4ef22005-01-02 09:53:13 +0000324 # check that function condition agrees with header
325 if idx.functions[id].conditionals != \
326 self.functions[id].conditionals:
327 print "Header condition differs from Function for %s:" \
328 % id
329 print " H: %s" % self.functions[id].conditionals
330 print " C: %s" % idx.functions[id].conditionals
Daniel Veillardbe586972003-11-18 20:56:51 +0000331 up = idx.functions[id]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000332 self.functions[id].update(None, up.module, up.type, up.info, up.extra)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000333 # else:
334 # print "Function %s from %s is not declared in headers" % (
William M. Brack21e4ef22005-01-02 09:53:13 +0000335 # id, idx.functions[id].module)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000336 # TODO: do the same for variables.
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000337
Daniel Veillardbe586972003-11-18 20:56:51 +0000338 def analyze_dict(self, type, dict):
339 count = 0
340 public = 0
341 for name in dict.keys():
342 id = dict[name]
343 count = count + 1
344 if id.static == 0:
345 public = public + 1
346 if count != public:
347 print " %d %s , %d public" % (count, type, public)
348 elif count != 0:
349 print " %d public %s" % (count, type)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000350
351
Daniel Veillardbe586972003-11-18 20:56:51 +0000352 def analyze(self):
353 self.analyze_dict("functions", self.functions)
354 self.analyze_dict("variables", self.variables)
355 self.analyze_dict("structs", self.structs)
356 self.analyze_dict("typedefs", self.typedefs)
357 self.analyze_dict("macros", self.macros)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000358
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000359class CLexer:
Daniel Veillardbe586972003-11-18 20:56:51 +0000360 """A lexer for the C language, tokenize the input by reading and
361 analyzing it line by line"""
362 def __init__(self, input):
363 self.input = input
364 self.tokens = []
365 self.line = ""
366 self.lineno = 0
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000367
Daniel Veillardbe586972003-11-18 20:56:51 +0000368 def getline(self):
369 line = ''
370 while line == '':
371 line = self.input.readline()
372 if not line:
373 return None
374 self.lineno = self.lineno + 1
375 line = string.lstrip(line)
376 line = string.rstrip(line)
377 if line == '':
378 continue
379 while line[-1] == '\\':
380 line = line[:-1]
381 n = self.input.readline()
382 self.lineno = self.lineno + 1
383 n = string.lstrip(n)
384 n = string.rstrip(n)
385 if not n:
386 break
387 else:
388 line = line + n
389 return line
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000390
Daniel Veillardbe586972003-11-18 20:56:51 +0000391 def getlineno(self):
392 return self.lineno
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000393
Daniel Veillardbe586972003-11-18 20:56:51 +0000394 def push(self, token):
395 self.tokens.insert(0, token);
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000396
Daniel Veillardbe586972003-11-18 20:56:51 +0000397 def debug(self):
398 print "Last token: ", self.last
399 print "Token queue: ", self.tokens
400 print "Line %d end: " % (self.lineno), self.line
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000401
Daniel Veillardbe586972003-11-18 20:56:51 +0000402 def token(self):
403 while self.tokens == []:
404 if self.line == "":
405 line = self.getline()
406 else:
407 line = self.line
408 self.line = ""
409 if line == None:
410 return None
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000411
Daniel Veillardbe586972003-11-18 20:56:51 +0000412 if line[0] == '#':
413 self.tokens = map((lambda x: ('preproc', x)),
414 string.split(line))
415 break;
416 l = len(line)
417 if line[0] == '"' or line[0] == "'":
418 end = line[0]
419 line = line[1:]
420 found = 0
421 tok = ""
422 while found == 0:
423 i = 0
424 l = len(line)
425 while i < l:
426 if line[i] == end:
427 self.line = line[i+1:]
428 line = line[:i]
429 l = i
430 found = 1
431 break
432 if line[i] == '\\':
433 i = i + 1
434 i = i + 1
435 tok = tok + line
436 if found == 0:
437 line = self.getline()
438 if line == None:
439 return None
440 self.last = ('string', tok)
441 return self.last
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000442
Daniel Veillardbe586972003-11-18 20:56:51 +0000443 if l >= 2 and line[0] == '/' and line[1] == '*':
444 line = line[2:]
445 found = 0
446 tok = ""
447 while found == 0:
448 i = 0
449 l = len(line)
450 while i < l:
451 if line[i] == '*' and i+1 < l and line[i+1] == '/':
452 self.line = line[i+2:]
453 line = line[:i-1]
454 l = i
455 found = 1
456 break
457 i = i + 1
458 if tok != "":
459 tok = tok + "\n"
460 tok = tok + line
461 if found == 0:
462 line = self.getline()
463 if line == None:
464 return None
465 self.last = ('comment', tok)
466 return self.last
467 if l >= 2 and line[0] == '/' and line[1] == '/':
468 line = line[2:]
469 self.last = ('comment', line)
470 return self.last
471 i = 0
472 while i < l:
473 if line[i] == '/' and i+1 < l and line[i+1] == '/':
474 self.line = line[i:]
475 line = line[:i]
476 break
477 if line[i] == '/' and i+1 < l and line[i+1] == '*':
478 self.line = line[i:]
479 line = line[:i]
480 break
481 if line[i] == '"' or line[i] == "'":
482 self.line = line[i:]
483 line = line[:i]
484 break
485 i = i + 1
486 l = len(line)
487 i = 0
488 while i < l:
489 if line[i] == ' ' or line[i] == '\t':
490 i = i + 1
491 continue
492 o = ord(line[i])
493 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
494 (o >= 48 and o <= 57):
495 s = i
496 while i < l:
497 o = ord(line[i])
498 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
499 (o >= 48 and o <= 57) or string.find(
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000500 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
Daniel Veillardbe586972003-11-18 20:56:51 +0000501 i = i + 1
502 else:
503 break
504 self.tokens.append(('name', line[s:i]))
505 continue
506 if string.find("(){}:;,[]", line[i]) != -1:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000507# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
508# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
509# line[i] == ',' or line[i] == '[' or line[i] == ']':
Daniel Veillardbe586972003-11-18 20:56:51 +0000510 self.tokens.append(('sep', line[i]))
511 i = i + 1
512 continue
513 if string.find("+-*><=/%&!|.", line[i]) != -1:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000514# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
515# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
516# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
517# line[i] == '!' or line[i] == '|' or line[i] == '.':
Daniel Veillardbe586972003-11-18 20:56:51 +0000518 if line[i] == '.' and i + 2 < l and \
519 line[i+1] == '.' and line[i+2] == '.':
520 self.tokens.append(('name', '...'))
521 i = i + 3
522 continue
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000523
Daniel Veillardbe586972003-11-18 20:56:51 +0000524 j = i + 1
525 if j < l and (
526 string.find("+-*><=/%&!|", line[j]) != -1):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000527# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
528# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
529# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
530# line[j] == '!' or line[j] == '|'):
Daniel Veillardbe586972003-11-18 20:56:51 +0000531 self.tokens.append(('op', line[i:j+1]))
532 i = j + 1
533 else:
534 self.tokens.append(('op', line[i]))
535 i = i + 1
536 continue
537 s = i
538 while i < l:
539 o = ord(line[i])
540 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
541 (o >= 48 and o <= 57) or (
542 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000543# line[i] != ' ' and line[i] != '\t' and
544# line[i] != '(' and line[i] != ')' and
545# line[i] != '{' and line[i] != '}' and
546# line[i] != ':' and line[i] != ';' and
547# line[i] != ',' and line[i] != '+' and
548# line[i] != '-' and line[i] != '*' and
549# line[i] != '/' and line[i] != '%' and
550# line[i] != '&' and line[i] != '!' and
551# line[i] != '|' and line[i] != '[' and
552# line[i] != ']' and line[i] != '=' and
553# line[i] != '*' and line[i] != '>' and
554# line[i] != '<'):
Daniel Veillardbe586972003-11-18 20:56:51 +0000555 i = i + 1
556 else:
557 break
558 self.tokens.append(('name', line[s:i]))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000559
Daniel Veillardbe586972003-11-18 20:56:51 +0000560 tok = self.tokens[0]
561 self.tokens = self.tokens[1:]
562 self.last = tok
563 return tok
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000564
565class CParser:
Daniel Veillardbe586972003-11-18 20:56:51 +0000566 """The C module parser"""
567 def __init__(self, filename, idx = None):
568 self.filename = filename
569 if len(filename) > 2 and filename[-2:] == '.h':
570 self.is_header = 1
571 else:
572 self.is_header = 0
573 self.input = open(filename)
574 self.lexer = CLexer(self.input)
575 if idx == None:
576 self.index = index()
577 else:
578 self.index = idx
579 self.top_comment = ""
580 self.last_comment = ""
581 self.comment = None
582 self.collect_ref = 0
Daniel Veillard1e906612003-12-05 14:57:46 +0000583 self.no_error = 0
Daniel Veillarda2351322004-06-27 12:08:10 +0000584 self.conditionals = []
585 self.defines = []
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000586
Daniel Veillardbe586972003-11-18 20:56:51 +0000587 def collect_references(self):
588 self.collect_ref = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000589
Daniel Veillard1e906612003-12-05 14:57:46 +0000590 def stop_error(self):
591 self.no_error = 1
592
593 def start_error(self):
594 self.no_error = 0
595
Daniel Veillardbe586972003-11-18 20:56:51 +0000596 def lineno(self):
597 return self.lexer.getlineno()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000598
Daniel Veillardbe586972003-11-18 20:56:51 +0000599 def index_add(self, name, module, static, type, info=None, extra = None):
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000600 if self.is_header == 1:
601 self.index.add(name, module, module, static, type, self.lineno(),
602 info, extra, self.conditionals)
603 else:
604 self.index.add(name, None, module, static, type, self.lineno(),
605 info, extra, self.conditionals)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000606
Daniel Veillardbe586972003-11-18 20:56:51 +0000607 def index_add_ref(self, name, module, static, type, info=None,
608 extra = None):
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000609 if self.is_header == 1:
610 self.index.add_ref(name, module, module, static, type,
611 self.lineno(), info, extra, self.conditionals)
612 else:
613 self.index.add_ref(name, None, module, static, type, self.lineno(),
614 info, extra, self.conditionals)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000615
Daniel Veillard1e906612003-12-05 14:57:46 +0000616 def warning(self, msg):
617 if self.no_error:
618 return
619 print msg
620
Daniel Veillardbe586972003-11-18 20:56:51 +0000621 def error(self, msg, token=-1):
Daniel Veillard1e906612003-12-05 14:57:46 +0000622 if self.no_error:
623 return
624
Daniel Veillardbe586972003-11-18 20:56:51 +0000625 print "Parse Error: " + msg
626 if token != -1:
627 print "Got token ", token
628 self.lexer.debug()
629 sys.exit(1)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000630
Daniel Veillardbe586972003-11-18 20:56:51 +0000631 def debug(self, msg, token=-1):
632 print "Debug: " + msg
633 if token != -1:
634 print "Got token ", token
635 self.lexer.debug()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000636
Daniel Veillardbe586972003-11-18 20:56:51 +0000637 def parseTopComment(self, comment):
638 res = {}
639 lines = string.split(comment, "\n")
640 item = None
641 for line in lines:
642 while line != "" and (line[0] == ' ' or line[0] == '\t'):
643 line = line[1:]
644 while line != "" and line[0] == '*':
645 line = line[1:]
646 while line != "" and (line[0] == ' ' or line[0] == '\t'):
647 line = line[1:]
648 try:
649 (it, line) = string.split(line, ":", 1)
650 item = it
651 while line != "" and (line[0] == ' ' or line[0] == '\t'):
652 line = line[1:]
653 if res.has_key(item):
654 res[item] = res[item] + " " + line
655 else:
656 res[item] = line
657 except:
658 if item != None:
659 if res.has_key(item):
660 res[item] = res[item] + " " + line
661 else:
662 res[item] = line
663 self.index.info = res
664
665 def parseComment(self, token):
666 if self.top_comment == "":
667 self.top_comment = token[1]
668 if self.comment == None or token[1][0] == '*':
669 self.comment = token[1];
670 else:
671 self.comment = self.comment + token[1]
672 token = self.lexer.token()
Daniel Veillard1e906612003-12-05 14:57:46 +0000673
674 if string.find(self.comment, "DOC_DISABLE") != -1:
675 self.stop_error()
676
677 if string.find(self.comment, "DOC_ENABLE") != -1:
678 self.start_error()
679
Daniel Veillardbe586972003-11-18 20:56:51 +0000680 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000681
Daniel Veillard99b78502005-09-11 22:04:08 +0000682 #
683 # Parse a comment block associate to a typedef
684 #
685 def parseTypeComment(self, name, quiet = 0):
686 if name[0:2] == '__':
687 quiet = 1
688
689 args = []
690 desc = ""
691
692 if self.comment == None:
693 if not quiet:
694 self.warning("Missing comment for type %s" % (name))
695 return((args, desc))
696 if self.comment[0] != '*':
697 if not quiet:
698 self.warning("Missing * in type comment for %s" % (name))
699 return((args, desc))
700 lines = string.split(self.comment, '\n')
701 if lines[0] == '*':
702 del lines[0]
703 if lines[0] != "* %s:" % (name):
704 if not quiet:
705 self.warning("Misformatted type comment for %s" % (name))
706 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
707 return((args, desc))
708 del lines[0]
709 while len(lines) > 0 and lines[0] == '*':
710 del lines[0]
711 desc = ""
712 while len(lines) > 0:
713 l = lines[0]
714 while len(l) > 0 and l[0] == '*':
715 l = l[1:]
716 l = string.strip(l)
717 desc = desc + " " + l
718 del lines[0]
719
720 desc = string.strip(desc)
721
722 if quiet == 0:
723 if desc == "":
724 self.warning("Type comment for %s lack description of the macro" % (name))
725
726 return(desc)
727 #
728 # Parse a comment block associate to a macro
729 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000730 def parseMacroComment(self, name, quiet = 0):
731 if name[0:2] == '__':
732 quiet = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000733
Daniel Veillardbe586972003-11-18 20:56:51 +0000734 args = []
735 desc = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000736
Daniel Veillardbe586972003-11-18 20:56:51 +0000737 if self.comment == None:
738 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000739 self.warning("Missing comment for macro %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000740 return((args, desc))
741 if self.comment[0] != '*':
742 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000743 self.warning("Missing * in macro comment for %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000744 return((args, desc))
745 lines = string.split(self.comment, '\n')
746 if lines[0] == '*':
747 del lines[0]
748 if lines[0] != "* %s:" % (name):
749 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000750 self.warning("Misformatted macro comment for %s" % (name))
751 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000752 return((args, desc))
753 del lines[0]
754 while lines[0] == '*':
755 del lines[0]
756 while len(lines) > 0 and lines[0][0:3] == '* @':
757 l = lines[0][3:]
758 try:
759 (arg, desc) = string.split(l, ':', 1)
760 desc=string.strip(desc)
761 arg=string.strip(arg)
762 except:
763 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000764 self.warning("Misformatted macro comment for %s" % (name))
765 self.warning(" problem with '%s'" % (lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000766 del lines[0]
767 continue
768 del lines[0]
769 l = string.strip(lines[0])
770 while len(l) > 2 and l[0:3] != '* @':
771 while l[0] == '*':
772 l = l[1:]
773 desc = desc + ' ' + string.strip(l)
774 del lines[0]
775 if len(lines) == 0:
776 break
777 l = lines[0]
778 args.append((arg, desc))
779 while len(lines) > 0 and lines[0] == '*':
780 del lines[0]
781 desc = ""
782 while len(lines) > 0:
783 l = lines[0]
784 while len(l) > 0 and l[0] == '*':
785 l = l[1:]
786 l = string.strip(l)
787 desc = desc + " " + l
788 del lines[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000789
Daniel Veillardbe586972003-11-18 20:56:51 +0000790 desc = string.strip(desc)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000791
Daniel Veillardbe586972003-11-18 20:56:51 +0000792 if quiet == 0:
793 if desc == "":
Daniel Veillard1e906612003-12-05 14:57:46 +0000794 self.warning("Macro comment for %s lack description of the macro" % (name))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000795
Daniel Veillardbe586972003-11-18 20:56:51 +0000796 return((args, desc))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000797
798 #
799 # Parse a comment block and merge the informations found in the
800 # parameters descriptions, finally returns a block as complete
801 # as possible
802 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000803 def mergeFunctionComment(self, name, description, quiet = 0):
804 if name == 'main':
805 quiet = 1
806 if name[0:2] == '__':
807 quiet = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000808
Daniel Veillardbe586972003-11-18 20:56:51 +0000809 (ret, args) = description
810 desc = ""
811 retdesc = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000812
Daniel Veillardbe586972003-11-18 20:56:51 +0000813 if self.comment == None:
814 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000815 self.warning("Missing comment for function %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000816 return(((ret[0], retdesc), args, desc))
817 if self.comment[0] != '*':
818 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000819 self.warning("Missing * in function comment for %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000820 return(((ret[0], retdesc), args, desc))
821 lines = string.split(self.comment, '\n')
822 if lines[0] == '*':
823 del lines[0]
824 if lines[0] != "* %s:" % (name):
825 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000826 self.warning("Misformatted function comment for %s" % (name))
827 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000828 return(((ret[0], retdesc), args, desc))
829 del lines[0]
830 while lines[0] == '*':
831 del lines[0]
832 nbargs = len(args)
833 while len(lines) > 0 and lines[0][0:3] == '* @':
834 l = lines[0][3:]
835 try:
836 (arg, desc) = string.split(l, ':', 1)
837 desc=string.strip(desc)
838 arg=string.strip(arg)
839 except:
840 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000841 self.warning("Misformatted function comment for %s" % (name))
842 self.warning(" problem with '%s'" % (lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000843 del lines[0]
844 continue
845 del lines[0]
846 l = string.strip(lines[0])
847 while len(l) > 2 and l[0:3] != '* @':
848 while l[0] == '*':
849 l = l[1:]
850 desc = desc + ' ' + string.strip(l)
851 del lines[0]
852 if len(lines) == 0:
853 break
854 l = lines[0]
855 i = 0
856 while i < nbargs:
857 if args[i][1] == arg:
858 args[i] = (args[i][0], arg, desc)
859 break;
860 i = i + 1
861 if i >= nbargs:
862 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000863 self.warning("Unable to find arg %s from function comment for %s" % (
864 arg, name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000865 while len(lines) > 0 and lines[0] == '*':
866 del lines[0]
867 desc = ""
868 while len(lines) > 0:
869 l = lines[0]
870 while len(l) > 0 and l[0] == '*':
871 l = l[1:]
872 l = string.strip(l)
873 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
874 try:
875 l = string.split(l, ' ', 1)[1]
876 except:
877 l = ""
878 retdesc = string.strip(l)
879 del lines[0]
880 while len(lines) > 0:
881 l = lines[0]
882 while len(l) > 0 and l[0] == '*':
883 l = l[1:]
884 l = string.strip(l)
885 retdesc = retdesc + " " + l
886 del lines[0]
887 else:
888 desc = desc + " " + l
889 del lines[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000890
Daniel Veillardbe586972003-11-18 20:56:51 +0000891 retdesc = string.strip(retdesc)
892 desc = string.strip(desc)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000893
Daniel Veillardbe586972003-11-18 20:56:51 +0000894 if quiet == 0:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000895 #
896 # report missing comments
897 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000898 i = 0
899 while i < nbargs:
900 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
William M. Brack21e4ef22005-01-02 09:53:13 +0000901 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000902 i = i + 1
903 if retdesc == "" and ret[0] != "void":
William M. Brack21e4ef22005-01-02 09:53:13 +0000904 self.warning("Function comment for %s lacks description of return value" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000905 if desc == "":
William M. Brack21e4ef22005-01-02 09:53:13 +0000906 self.warning("Function comment for %s lacks description of the function" % (name))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000907
908
Daniel Veillardbe586972003-11-18 20:56:51 +0000909 return(((ret[0], retdesc), args, desc))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000910
Daniel Veillardbe586972003-11-18 20:56:51 +0000911 def parsePreproc(self, token):
Daniel Veillarda2351322004-06-27 12:08:10 +0000912 if debug:
913 print "=> preproc ", token, self.lexer.tokens
Daniel Veillardbe586972003-11-18 20:56:51 +0000914 name = token[1]
915 if name == "#include":
916 token = self.lexer.token()
917 if token == None:
918 return None
919 if token[0] == 'preproc':
920 self.index_add(token[1], self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000921 "include")
Daniel Veillardbe586972003-11-18 20:56:51 +0000922 return self.lexer.token()
923 return token
924 if name == "#define":
925 token = self.lexer.token()
926 if token == None:
927 return None
928 if token[0] == 'preproc':
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000929 # TODO macros with arguments
Daniel Veillardbe586972003-11-18 20:56:51 +0000930 name = token[1]
931 lst = []
932 token = self.lexer.token()
933 while token != None and token[0] == 'preproc' and \
934 token[1][0] != '#':
935 lst.append(token[1])
936 token = self.lexer.token()
937 try:
938 name = string.split(name, '(') [0]
939 except:
940 pass
941 info = self.parseMacroComment(name, not self.is_header)
942 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000943 "macro", info)
Daniel Veillardbe586972003-11-18 20:56:51 +0000944 return token
William M. Brack21e4ef22005-01-02 09:53:13 +0000945
946 #
947 # Processing of conditionals modified by Bill 1/1/05
948 #
949 # We process conditionals (i.e. tokens from #ifdef, #ifndef,
950 # #if, #else and #endif) for headers and mainline code,
951 # store the ones from the header in libxml2-api.xml, and later
952 # (in the routine merge_public) verify that the two (header and
953 # mainline code) agree.
954 #
955 # There is a small problem with processing the headers. Some of
956 # the variables are not concerned with enabling / disabling of
957 # library functions (e.g. '__XML_PARSER_H__'), and we don't want
958 # them to be included in libxml2-api.xml, or involved in
959 # the check between the header and the mainline code. To
960 # accomplish this, we ignore any conditional which doesn't include
961 # the string 'ENABLED'
962 #
963 if name == "#ifdef":
964 apstr = self.lexer.tokens[0][1]
Daniel Veillarda2351322004-06-27 12:08:10 +0000965 try:
William M. Brack21e4ef22005-01-02 09:53:13 +0000966 self.defines.append(apstr)
967 if string.find(apstr, 'ENABLED') != -1:
968 self.conditionals.append("defined(%s)" % apstr)
Daniel Veillarda2351322004-06-27 12:08:10 +0000969 except:
970 pass
William M. Brack21e4ef22005-01-02 09:53:13 +0000971 elif name == "#ifndef":
972 apstr = self.lexer.tokens[0][1]
973 try:
974 self.defines.append(apstr)
975 if string.find(apstr, 'ENABLED') != -1:
976 self.conditionals.append("!defined(%s)" % apstr)
977 except:
978 pass
979 elif name == "#if":
980 apstr = ""
981 for tok in self.lexer.tokens:
982 if apstr != "":
983 apstr = apstr + " "
984 apstr = apstr + tok[1]
985 try:
986 self.defines.append(apstr)
987 if string.find(apstr, 'ENABLED') != -1:
988 self.conditionals.append(apstr)
989 except:
990 pass
991 elif name == "#else":
992 if self.conditionals != [] and \
993 string.find(self.defines[-1], 'ENABLED') != -1:
994 self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
995 elif name == "#endif":
996 if self.conditionals != [] and \
997 string.find(self.defines[-1], 'ENABLED') != -1:
Daniel Veillarda2351322004-06-27 12:08:10 +0000998 self.conditionals = self.conditionals[:-1]
999 self.defines = self.defines[:-1]
Daniel Veillardbe586972003-11-18 20:56:51 +00001000 token = self.lexer.token()
1001 while token != None and token[0] == 'preproc' and \
1002 token[1][0] != '#':
1003 token = self.lexer.token()
1004 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001005
1006 #
1007 # token acquisition on top of the lexer, it handle internally
1008 # preprocessor and comments since they are logically not part of
1009 # the program structure.
1010 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001011 def token(self):
1012 global ignored_words
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001013
Daniel Veillardbe586972003-11-18 20:56:51 +00001014 token = self.lexer.token()
1015 while token != None:
1016 if token[0] == 'comment':
1017 token = self.parseComment(token)
1018 continue
1019 elif token[0] == 'preproc':
1020 token = self.parsePreproc(token)
1021 continue
Daniel Veillard99dd7632005-05-06 11:40:56 +00001022 elif token[0] == "name" and token[1] == "__const":
1023 token = ("name", "const")
1024 return token
1025 elif token[0] == "name" and token[1] == "__attribute":
1026 token = self.lexer.token()
1027 while token != None and token[1] != ";":
1028 token = self.lexer.token()
1029 return token
Daniel Veillardbe586972003-11-18 20:56:51 +00001030 elif token[0] == "name" and ignored_words.has_key(token[1]):
1031 (n, info) = ignored_words[token[1]]
1032 i = 0
1033 while i < n:
1034 token = self.lexer.token()
1035 i = i + 1
1036 token = self.lexer.token()
1037 continue
1038 else:
1039 if debug:
1040 print "=> ", token
1041 return token
1042 return None
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001043
1044 #
1045 # Parse a typedef, it records the type and its name.
1046 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001047 def parseTypedef(self, token):
1048 if token == None:
1049 return None
1050 token = self.parseType(token)
1051 if token == None:
1052 self.error("parsing typedef")
1053 return None
1054 base_type = self.type
1055 type = base_type
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001056 #self.debug("end typedef type", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001057 while token != None:
1058 if token[0] == "name":
1059 name = token[1]
1060 signature = self.signature
1061 if signature != None:
1062 type = string.split(type, '(')[0]
1063 d = self.mergeFunctionComment(name,
1064 ((type, None), signature), 1)
1065 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001066 "functype", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001067 else:
1068 if base_type == "struct":
1069 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001070 "struct", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001071 base_type = "struct " + name
1072 else:
Daniel Veillard99b78502005-09-11 22:04:08 +00001073 # TODO report missing or misformatted comments
1074 info = self.parseTypeComment(name, 1)
Daniel Veillardbe586972003-11-18 20:56:51 +00001075 self.index_add(name, self.filename, not self.is_header,
Daniel Veillard99b78502005-09-11 22:04:08 +00001076 "typedef", type, info)
Daniel Veillardbe586972003-11-18 20:56:51 +00001077 token = self.token()
1078 else:
1079 self.error("parsing typedef: expecting a name")
1080 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001081 #self.debug("end typedef", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001082 if token != None and token[0] == 'sep' and token[1] == ',':
1083 type = base_type
1084 token = self.token()
1085 while token != None and token[0] == "op":
1086 type = type + token[1]
1087 token = self.token()
1088 elif token != None and token[0] == 'sep' and token[1] == ';':
1089 break;
1090 elif token != None and token[0] == 'name':
1091 type = base_type
1092 continue;
1093 else:
1094 self.error("parsing typedef: expecting ';'", token)
1095 return token
1096 token = self.token()
1097 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001098
1099 #
1100 # Parse a C code block, used for functions it parse till
1101 # the balancing } included
1102 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001103 def parseBlock(self, token):
1104 while token != None:
1105 if token[0] == "sep" and token[1] == "{":
1106 token = self.token()
1107 token = self.parseBlock(token)
1108 elif token[0] == "sep" and token[1] == "}":
1109 self.comment = None
1110 token = self.token()
1111 return token
1112 else:
1113 if self.collect_ref == 1:
1114 oldtok = token
1115 token = self.token()
1116 if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1117 if token[0] == "sep" and token[1] == "(":
1118 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001119 0, "function")
Daniel Veillardbe586972003-11-18 20:56:51 +00001120 token = self.token()
1121 elif token[0] == "name":
1122 token = self.token()
1123 if token[0] == "sep" and (token[1] == ";" or
1124 token[1] == "," or token[1] == "="):
1125 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001126 0, "type")
Daniel Veillardbe586972003-11-18 20:56:51 +00001127 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1128 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001129 0, "typedef")
Daniel Veillardbe586972003-11-18 20:56:51 +00001130 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1131 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001132 0, "typedef")
1133
Daniel Veillardbe586972003-11-18 20:56:51 +00001134 else:
1135 token = self.token()
1136 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001137
1138 #
1139 # Parse a C struct definition till the balancing }
1140 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001141 def parseStruct(self, token):
1142 fields = []
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001143 #self.debug("start parseStruct", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001144 while token != None:
1145 if token[0] == "sep" and token[1] == "{":
1146 token = self.token()
1147 token = self.parseTypeBlock(token)
1148 elif token[0] == "sep" and token[1] == "}":
1149 self.struct_fields = fields
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001150 #self.debug("end parseStruct", token)
1151 #print fields
Daniel Veillardbe586972003-11-18 20:56:51 +00001152 token = self.token()
1153 return token
1154 else:
1155 base_type = self.type
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001156 #self.debug("before parseType", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001157 token = self.parseType(token)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001158 #self.debug("after parseType", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001159 if token != None and token[0] == "name":
1160 fname = token[1]
1161 token = self.token()
1162 if token[0] == "sep" and token[1] == ";":
1163 self.comment = None
1164 token = self.token()
1165 fields.append((self.type, fname, self.comment))
1166 self.comment = None
1167 else:
1168 self.error("parseStruct: expecting ;", token)
1169 elif token != None and token[0] == "sep" and token[1] == "{":
1170 token = self.token()
1171 token = self.parseTypeBlock(token)
1172 if token != None and token[0] == "name":
1173 token = self.token()
1174 if token != None and token[0] == "sep" and token[1] == ";":
1175 token = self.token()
1176 else:
1177 self.error("parseStruct: expecting ;", token)
1178 else:
1179 self.error("parseStruct: name", token)
1180 token = self.token()
1181 self.type = base_type;
1182 self.struct_fields = fields
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001183 #self.debug("end parseStruct", token)
1184 #print fields
Daniel Veillardbe586972003-11-18 20:56:51 +00001185 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001186
1187 #
1188 # Parse a C enum block, parse till the balancing }
1189 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001190 def parseEnumBlock(self, token):
1191 self.enums = []
1192 name = None
1193 self.comment = None
1194 comment = ""
1195 value = "0"
1196 while token != None:
1197 if token[0] == "sep" and token[1] == "{":
1198 token = self.token()
1199 token = self.parseTypeBlock(token)
1200 elif token[0] == "sep" and token[1] == "}":
1201 if name != None:
1202 if self.comment != None:
1203 comment = self.comment
1204 self.comment = None
1205 self.enums.append((name, value, comment))
1206 token = self.token()
1207 return token
1208 elif token[0] == "name":
1209 if name != None:
1210 if self.comment != None:
1211 comment = string.strip(self.comment)
1212 self.comment = None
1213 self.enums.append((name, value, comment))
1214 name = token[1]
1215 comment = ""
1216 token = self.token()
1217 if token[0] == "op" and token[1][0] == "=":
1218 value = ""
1219 if len(token[1]) > 1:
1220 value = token[1][1:]
1221 token = self.token()
1222 while token[0] != "sep" or (token[1] != ',' and
1223 token[1] != '}'):
1224 value = value + token[1]
1225 token = self.token()
1226 else:
1227 try:
1228 value = "%d" % (int(value) + 1)
1229 except:
Daniel Veillard1e906612003-12-05 14:57:46 +00001230 self.warning("Failed to compute value of enum %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +00001231 value=""
1232 if token[0] == "sep" and token[1] == ",":
1233 token = self.token()
1234 else:
1235 token = self.token()
1236 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001237
1238 #
1239 # Parse a C definition block, used for structs it parse till
1240 # the balancing }
1241 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001242 def parseTypeBlock(self, token):
1243 while token != None:
1244 if token[0] == "sep" and token[1] == "{":
1245 token = self.token()
1246 token = self.parseTypeBlock(token)
1247 elif token[0] == "sep" and token[1] == "}":
1248 token = self.token()
1249 return token
1250 else:
1251 token = self.token()
1252 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001253
1254 #
1255 # Parse a type: the fact that the type name can either occur after
1256 # the definition or within the definition makes it a little harder
1257 # if inside, the name token is pushed back before returning
1258 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001259 def parseType(self, token):
1260 self.type = ""
1261 self.struct_fields = []
1262 self.signature = None
1263 if token == None:
1264 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001265
Daniel Veillardbe586972003-11-18 20:56:51 +00001266 while token[0] == "name" and (
William M. Brackfbb619f2005-06-06 13:49:18 +00001267 token[1] == "const" or \
1268 token[1] == "unsigned" or \
1269 token[1] == "signed"):
Daniel Veillardbe586972003-11-18 20:56:51 +00001270 if self.type == "":
1271 self.type = token[1]
1272 else:
1273 self.type = self.type + " " + token[1]
1274 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001275
Daniel Veillardbe586972003-11-18 20:56:51 +00001276 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1277 if self.type == "":
1278 self.type = token[1]
1279 else:
1280 self.type = self.type + " " + token[1]
1281 if token[0] == "name" and token[1] == "int":
1282 if self.type == "":
1283 self.type = tmp[1]
1284 else:
1285 self.type = self.type + " " + tmp[1]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001286
Daniel Veillardbe586972003-11-18 20:56:51 +00001287 elif token[0] == "name" and token[1] == "struct":
1288 if self.type == "":
1289 self.type = token[1]
1290 else:
1291 self.type = self.type + " " + token[1]
1292 token = self.token()
1293 nametok = None
1294 if token[0] == "name":
1295 nametok = token
1296 token = self.token()
1297 if token != None and token[0] == "sep" and token[1] == "{":
1298 token = self.token()
1299 token = self.parseStruct(token)
1300 elif token != None and token[0] == "op" and token[1] == "*":
1301 self.type = self.type + " " + nametok[1] + " *"
1302 token = self.token()
1303 while token != None and token[0] == "op" and token[1] == "*":
1304 self.type = self.type + " *"
1305 token = self.token()
1306 if token[0] == "name":
1307 nametok = token
1308 token = self.token()
1309 else:
1310 self.error("struct : expecting name", token)
1311 return token
1312 elif token != None and token[0] == "name" and nametok != None:
1313 self.type = self.type + " " + nametok[1]
1314 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001315
Daniel Veillardbe586972003-11-18 20:56:51 +00001316 if nametok != None:
1317 self.lexer.push(token)
1318 token = nametok
1319 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001320
Daniel Veillardbe586972003-11-18 20:56:51 +00001321 elif token[0] == "name" and token[1] == "enum":
1322 if self.type == "":
1323 self.type = token[1]
1324 else:
1325 self.type = self.type + " " + token[1]
1326 self.enums = []
1327 token = self.token()
1328 if token != None and token[0] == "sep" and token[1] == "{":
1329 token = self.token()
1330 token = self.parseEnumBlock(token)
1331 else:
1332 self.error("parsing enum: expecting '{'", token)
1333 enum_type = None
1334 if token != None and token[0] != "name":
1335 self.lexer.push(token)
1336 token = ("name", "enum")
1337 else:
1338 enum_type = token[1]
1339 for enum in self.enums:
1340 self.index_add(enum[0], self.filename,
1341 not self.is_header, "enum",
1342 (enum[1], enum[2], enum_type))
1343 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001344
Daniel Veillardbe586972003-11-18 20:56:51 +00001345 elif token[0] == "name":
1346 if self.type == "":
1347 self.type = token[1]
1348 else:
1349 self.type = self.type + " " + token[1]
1350 else:
1351 self.error("parsing type %s: expecting a name" % (self.type),
1352 token)
1353 return token
1354 token = self.token()
1355 while token != None and (token[0] == "op" or
1356 token[0] == "name" and token[1] == "const"):
1357 self.type = self.type + " " + token[1]
1358 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001359
1360 #
1361 # if there is a parenthesis here, this means a function type
1362 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001363 if token != None and token[0] == "sep" and token[1] == '(':
1364 self.type = self.type + token[1]
1365 token = self.token()
1366 while token != None and token[0] == "op" and token[1] == '*':
1367 self.type = self.type + token[1]
1368 token = self.token()
1369 if token == None or token[0] != "name" :
1370 self.error("parsing function type, name expected", token);
1371 return token
1372 self.type = self.type + token[1]
1373 nametok = token
1374 token = self.token()
1375 if token != None and token[0] == "sep" and token[1] == ')':
1376 self.type = self.type + token[1]
1377 token = self.token()
1378 if token != None and token[0] == "sep" and token[1] == '(':
1379 token = self.token()
1380 type = self.type;
1381 token = self.parseSignature(token);
1382 self.type = type;
1383 else:
1384 self.error("parsing function type, '(' expected", token);
1385 return token
1386 else:
1387 self.error("parsing function type, ')' expected", token);
1388 return token
1389 self.lexer.push(token)
1390 token = nametok
1391 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001392
1393 #
1394 # do some lookahead for arrays
1395 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001396 if token != None and token[0] == "name":
1397 nametok = token
1398 token = self.token()
1399 if token != None and token[0] == "sep" and token[1] == '[':
1400 self.type = self.type + nametok[1]
1401 while token != None and token[0] == "sep" and token[1] == '[':
1402 self.type = self.type + token[1]
1403 token = self.token()
1404 while token != None and token[0] != 'sep' and \
1405 token[1] != ']' and token[1] != ';':
1406 self.type = self.type + token[1]
1407 token = self.token()
1408 if token != None and token[0] == 'sep' and token[1] == ']':
1409 self.type = self.type + token[1]
1410 token = self.token()
1411 else:
1412 self.error("parsing array type, ']' expected", token);
1413 return token
1414 elif token != None and token[0] == "sep" and token[1] == ':':
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001415 # remove :12 in case it's a limited int size
Daniel Veillardbe586972003-11-18 20:56:51 +00001416 token = self.token()
1417 token = self.token()
1418 self.lexer.push(token)
1419 token = nametok
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001420
Daniel Veillardbe586972003-11-18 20:56:51 +00001421 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001422
1423 #
1424 # Parse a signature: '(' has been parsed and we scan the type definition
1425 # up to the ')' included
Daniel Veillardbe586972003-11-18 20:56:51 +00001426 def parseSignature(self, token):
1427 signature = []
1428 if token != None and token[0] == "sep" and token[1] == ')':
1429 self.signature = []
1430 token = self.token()
1431 return token
1432 while token != None:
1433 token = self.parseType(token)
1434 if token != None and token[0] == "name":
1435 signature.append((self.type, token[1], None))
1436 token = self.token()
1437 elif token != None and token[0] == "sep" and token[1] == ',':
1438 token = self.token()
1439 continue
1440 elif token != None and token[0] == "sep" and token[1] == ')':
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001441 # only the type was provided
Daniel Veillardbe586972003-11-18 20:56:51 +00001442 if self.type == "...":
1443 signature.append((self.type, "...", None))
1444 else:
1445 signature.append((self.type, None, None))
1446 if token != None and token[0] == "sep":
1447 if token[1] == ',':
1448 token = self.token()
1449 continue
1450 elif token[1] == ')':
1451 token = self.token()
1452 break
1453 self.signature = signature
1454 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001455
1456 #
1457 # Parse a global definition, be it a type, variable or function
1458 # the extern "C" blocks are a bit nasty and require it to recurse.
1459 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001460 def parseGlobal(self, token):
1461 static = 0
1462 if token[1] == 'extern':
1463 token = self.token()
1464 if token == None:
1465 return token
1466 if token[0] == 'string':
1467 if token[1] == 'C':
1468 token = self.token()
1469 if token == None:
1470 return token
1471 if token[0] == 'sep' and token[1] == "{":
1472 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001473# print 'Entering extern "C line ', self.lineno()
Daniel Veillardbe586972003-11-18 20:56:51 +00001474 while token != None and (token[0] != 'sep' or
1475 token[1] != "}"):
1476 if token[0] == 'name':
1477 token = self.parseGlobal(token)
1478 else:
1479 self.error(
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001480 "token %s %s unexpected at the top level" % (
1481 token[0], token[1]))
Daniel Veillardbe586972003-11-18 20:56:51 +00001482 token = self.parseGlobal(token)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001483# print 'Exiting extern "C" line', self.lineno()
Daniel Veillardbe586972003-11-18 20:56:51 +00001484 token = self.token()
1485 return token
1486 else:
1487 return token
1488 elif token[1] == 'static':
1489 static = 1
1490 token = self.token()
1491 if token == None or token[0] != 'name':
1492 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001493
Daniel Veillardbe586972003-11-18 20:56:51 +00001494 if token[1] == 'typedef':
1495 token = self.token()
1496 return self.parseTypedef(token)
1497 else:
1498 token = self.parseType(token)
1499 type_orig = self.type
1500 if token == None or token[0] != "name":
1501 return token
1502 type = type_orig
1503 self.name = token[1]
1504 token = self.token()
1505 while token != None and (token[0] == "sep" or token[0] == "op"):
1506 if token[0] == "sep":
1507 if token[1] == "[":
1508 type = type + token[1]
1509 token = self.token()
1510 while token != None and (token[0] != "sep" or \
1511 token[1] != ";"):
1512 type = type + token[1]
1513 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001514
Daniel Veillardbe586972003-11-18 20:56:51 +00001515 if token != None and token[0] == "op" and token[1] == "=":
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001516 #
1517 # Skip the initialization of the variable
1518 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001519 token = self.token()
1520 if token[0] == 'sep' and token[1] == '{':
1521 token = self.token()
1522 token = self.parseBlock(token)
1523 else:
1524 self.comment = None
1525 while token != None and (token[0] != "sep" or \
1526 (token[1] != ';' and token[1] != ',')):
1527 token = self.token()
1528 self.comment = None
1529 if token == None or token[0] != "sep" or (token[1] != ';' and
1530 token[1] != ','):
1531 self.error("missing ';' or ',' after value")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001532
Daniel Veillardbe586972003-11-18 20:56:51 +00001533 if token != None and token[0] == "sep":
1534 if token[1] == ";":
1535 self.comment = None
1536 token = self.token()
1537 if type == "struct":
1538 self.index_add(self.name, self.filename,
1539 not self.is_header, "struct", self.struct_fields)
1540 else:
1541 self.index_add(self.name, self.filename,
1542 not self.is_header, "variable", type)
1543 break
1544 elif token[1] == "(":
1545 token = self.token()
1546 token = self.parseSignature(token)
1547 if token == None:
1548 return None
1549 if token[0] == "sep" and token[1] == ";":
1550 d = self.mergeFunctionComment(self.name,
1551 ((type, None), self.signature), 1)
1552 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001553 "function", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001554 token = self.token()
1555 elif token[0] == "sep" and token[1] == "{":
1556 d = self.mergeFunctionComment(self.name,
1557 ((type, None), self.signature), static)
1558 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001559 "function", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001560 token = self.token()
1561 token = self.parseBlock(token);
1562 elif token[1] == ',':
1563 self.comment = None
1564 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001565 "variable", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001566 type = type_orig
1567 token = self.token()
1568 while token != None and token[0] == "sep":
1569 type = type + token[1]
1570 token = self.token()
1571 if token != None and token[0] == "name":
1572 self.name = token[1]
1573 token = self.token()
1574 else:
1575 break
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001576
Daniel Veillardbe586972003-11-18 20:56:51 +00001577 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001578
Daniel Veillardbe586972003-11-18 20:56:51 +00001579 def parse(self):
Daniel Veillard1e906612003-12-05 14:57:46 +00001580 self.warning("Parsing %s" % (self.filename))
Daniel Veillardbe586972003-11-18 20:56:51 +00001581 token = self.token()
1582 while token != None:
1583 if token[0] == 'name':
1584 token = self.parseGlobal(token)
1585 else:
1586 self.error("token %s %s unexpected at the top level" % (
1587 token[0], token[1]))
1588 token = self.parseGlobal(token)
1589 return
1590 self.parseTopComment(self.top_comment)
1591 return self.index
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001592
1593
1594class docBuilder:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001595 """A documentation builder"""
1596 def __init__(self, name, directories=['.'], excludes=[]):
1597 self.name = name
1598 self.directories = directories
1599 self.excludes = excludes + ignored_files.keys()
1600 self.modules = {}
1601 self.headers = {}
1602 self.idx = index()
1603 self.xref = {}
1604 self.index = {}
1605 if name == 'libxml2':
1606 self.basename = 'libxml'
1607 else:
1608 self.basename = name
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001609
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001610 def indexString(self, id, str):
1611 if str == None:
1612 return
1613 str = string.replace(str, "'", ' ')
1614 str = string.replace(str, '"', ' ')
1615 str = string.replace(str, "/", ' ')
1616 str = string.replace(str, '*', ' ')
1617 str = string.replace(str, "[", ' ')
1618 str = string.replace(str, "]", ' ')
1619 str = string.replace(str, "(", ' ')
1620 str = string.replace(str, ")", ' ')
1621 str = string.replace(str, "<", ' ')
1622 str = string.replace(str, '>', ' ')
1623 str = string.replace(str, "&", ' ')
1624 str = string.replace(str, '#', ' ')
1625 str = string.replace(str, ",", ' ')
1626 str = string.replace(str, '.', ' ')
1627 str = string.replace(str, ';', ' ')
1628 tokens = string.split(str)
1629 for token in tokens:
1630 try:
1631 c = token[0]
1632 if string.find(string.letters, c) < 0:
1633 pass
1634 elif len(token) < 3:
1635 pass
1636 else:
1637 lower = string.lower(token)
1638 # TODO: generalize this a bit
1639 if lower == 'and' or lower == 'the':
1640 pass
1641 elif self.xref.has_key(token):
1642 self.xref[token].append(id)
1643 else:
1644 self.xref[token] = [id]
1645 except:
1646 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001647
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001648 def analyze(self):
1649 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1650 self.idx.analyze()
1651
1652 def scanHeaders(self):
1653 for header in self.headers.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001654 parser = CParser(header)
1655 idx = parser.parse()
1656 self.headers[header] = idx;
1657 self.idx.merge(idx)
1658
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001659 def scanModules(self):
1660 for module in self.modules.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001661 parser = CParser(module)
1662 idx = parser.parse()
1663 # idx.analyze()
1664 self.modules[module] = idx
1665 self.idx.merge_public(idx)
1666
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001667 def scan(self):
1668 for directory in self.directories:
1669 files = glob.glob(directory + "/*.c")
1670 for file in files:
1671 skip = 0
1672 for excl in self.excludes:
1673 if string.find(file, excl) != -1:
1674 skip = 1;
1675 break
1676 if skip == 0:
1677 self.modules[file] = None;
1678 files = glob.glob(directory + "/*.h")
1679 for file in files:
1680 skip = 0
1681 for excl in self.excludes:
1682 if string.find(file, excl) != -1:
1683 skip = 1;
1684 break
1685 if skip == 0:
1686 self.headers[file] = None;
1687 self.scanHeaders()
1688 self.scanModules()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001689
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001690 def modulename_file(self, file):
1691 module = os.path.basename(file)
1692 if module[-2:] == '.h':
1693 module = module[:-2]
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001694 elif module[-2:] == '.c':
1695 module = module[:-2]
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001696 return module
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001697
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001698 def serialize_enum(self, output, name):
1699 id = self.idx.enums[name]
1700 output.write(" <enum name='%s' file='%s'" % (name,
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001701 self.modulename_file(id.header)))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001702 if id.info != None:
1703 info = id.info
1704 if info[0] != None and info[0] != '':
1705 try:
1706 val = eval(info[0])
1707 except:
1708 val = info[0]
1709 output.write(" value='%s'" % (val));
1710 if info[2] != None and info[2] != '':
1711 output.write(" type='%s'" % info[2]);
1712 if info[1] != None and info[1] != '':
1713 output.write(" info='%s'" % escape(info[1]));
1714 output.write("/>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001715
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001716 def serialize_macro(self, output, name):
1717 id = self.idx.macros[name]
1718 output.write(" <macro name='%s' file='%s'>\n" % (name,
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001719 self.modulename_file(id.header)))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001720 if id.info != None:
1721 try:
1722 (args, desc) = id.info
1723 if desc != None and desc != "":
1724 output.write(" <info>%s</info>\n" % (escape(desc)))
1725 self.indexString(name, desc)
1726 for arg in args:
1727 (name, desc) = arg
1728 if desc != None and desc != "":
1729 output.write(" <arg name='%s' info='%s'/>\n" % (
1730 name, escape(desc)))
1731 self.indexString(name, desc)
1732 else:
1733 output.write(" <arg name='%s'/>\n" % (name))
1734 except:
1735 pass
1736 output.write(" </macro>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001737
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001738 def serialize_typedef(self, output, name):
1739 id = self.idx.typedefs[name]
1740 if id.info[0:7] == 'struct ':
1741 output.write(" <struct name='%s' file='%s' type='%s'" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001742 name, self.modulename_file(id.header), id.info))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001743 name = id.info[7:]
1744 if self.idx.structs.has_key(name) and ( \
1745 type(self.idx.structs[name].info) == type(()) or
Daniel Veillardc1eed322002-12-12 11:01:32 +00001746 type(self.idx.structs[name].info) == type([])):
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001747 output.write(">\n");
1748 try:
1749 for field in self.idx.structs[name].info:
1750 desc = field[2]
1751 self.indexString(name, desc)
1752 if desc == None:
1753 desc = ''
1754 else:
1755 desc = escape(desc)
1756 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1757 except:
1758 print "Failed to serialize struct %s" % (name)
1759 output.write(" </struct>\n")
1760 else:
1761 output.write("/>\n");
1762 else :
Daniel Veillard99b78502005-09-11 22:04:08 +00001763 output.write(" <typedef name='%s' file='%s' type='%s'" % (
1764 name, self.modulename_file(id.header), id.info))
1765 try:
1766 desc = id.extra
1767 if desc != None and desc != "":
1768 output.write(">\n <info>%s</info>\n" % (escape(desc)))
1769 output.write(" </typedef>\n")
1770 else:
1771 output.write("/>\n")
1772 except:
1773 output.write("/>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001774
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001775 def serialize_variable(self, output, name):
1776 id = self.idx.variables[name]
1777 if id.info != None:
1778 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001779 name, self.modulename_file(id.header), id.info))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001780 else:
1781 output.write(" <variable name='%s' file='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001782 name, self.modulename_file(id.header)))
Daniel Veillardc1eed322002-12-12 11:01:32 +00001783
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001784 def serialize_function(self, output, name):
1785 id = self.idx.functions[name]
Daniel Veillarda2351322004-06-27 12:08:10 +00001786 if name == debugsym:
1787 print "=>", id
1788
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001789 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1790 name, self.modulename_file(id.header),
1791 self.modulename_file(id.module)))
William M. Brack21e4ef22005-01-02 09:53:13 +00001792 #
1793 # Processing of conditionals modified by Bill 1/1/05
1794 #
Daniel Veillarda2351322004-06-27 12:08:10 +00001795 if id.conditionals != None:
William M. Brack21e4ef22005-01-02 09:53:13 +00001796 apstr = ""
Daniel Veillarda2351322004-06-27 12:08:10 +00001797 for cond in id.conditionals:
William M. Brack21e4ef22005-01-02 09:53:13 +00001798 if apstr != "":
1799 apstr = apstr + " &amp;&amp; "
1800 apstr = apstr + cond
1801 output.write(" <cond>%s</cond>\n"% (apstr));
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001802 try:
1803 (ret, params, desc) = id.info
1804 output.write(" <info>%s</info>\n" % (escape(desc)))
1805 self.indexString(name, desc)
1806 if ret[0] != None:
1807 if ret[0] == "void":
1808 output.write(" <return type='void'/>\n")
1809 else:
1810 output.write(" <return type='%s' info='%s'/>\n" % (
1811 ret[0], escape(ret[1])))
1812 self.indexString(name, ret[1])
1813 for param in params:
1814 if param[0] == 'void':
1815 continue
1816 if param[2] == None:
1817 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1818 else:
1819 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1820 self.indexString(name, param[2])
1821 except:
1822 print "Failed to save function %s info: " % name, `id.info`
1823 output.write(" </%s>\n" % (id.type))
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001824
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001825 def serialize_exports(self, output, file):
1826 module = self.modulename_file(file)
1827 output.write(" <file name='%s'>\n" % (module))
1828 dict = self.headers[file]
Daniel Veillardbe586972003-11-18 20:56:51 +00001829 if dict.info != None:
1830 for data in ('Summary', 'Description', 'Author'):
1831 try:
1832 output.write(" <%s>%s</%s>\n" % (
1833 string.lower(data),
1834 escape(dict.info[data]),
1835 string.lower(data)))
1836 except:
1837 print "Header %s lacks a %s description" % (module, data)
1838 if dict.info.has_key('Description'):
1839 desc = dict.info['Description']
1840 if string.find(desc, "DEPRECATED") != -1:
1841 output.write(" <deprecated/>\n")
1842
Daniel Veillard1a792412003-11-18 23:52:38 +00001843 ids = dict.macros.keys()
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001844 ids.sort()
1845 for id in uniq(ids):
Daniel Veillard1a792412003-11-18 23:52:38 +00001846 # Macros are sometime used to masquerade other types.
1847 if dict.functions.has_key(id):
1848 continue
1849 if dict.variables.has_key(id):
1850 continue
1851 if dict.typedefs.has_key(id):
1852 continue
1853 if dict.structs.has_key(id):
1854 continue
1855 if dict.enums.has_key(id):
1856 continue
1857 output.write(" <exports symbol='%s' type='macro'/>\n" % (id))
1858 ids = dict.enums.keys()
1859 ids.sort()
1860 for id in uniq(ids):
1861 output.write(" <exports symbol='%s' type='enum'/>\n" % (id))
1862 ids = dict.typedefs.keys()
1863 ids.sort()
1864 for id in uniq(ids):
1865 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id))
1866 ids = dict.structs.keys()
1867 ids.sort()
1868 for id in uniq(ids):
1869 output.write(" <exports symbol='%s' type='struct'/>\n" % (id))
1870 ids = dict.variables.keys()
1871 ids.sort()
1872 for id in uniq(ids):
1873 output.write(" <exports symbol='%s' type='variable'/>\n" % (id))
1874 ids = dict.functions.keys()
1875 ids.sort()
1876 for id in uniq(ids):
1877 output.write(" <exports symbol='%s' type='function'/>\n" % (id))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001878 output.write(" </file>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001879
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001880 def serialize_xrefs_files(self, output):
1881 headers = self.headers.keys()
1882 headers.sort()
1883 for file in headers:
1884 module = self.modulename_file(file)
1885 output.write(" <file name='%s'>\n" % (module))
1886 dict = self.headers[file]
William M. Bracka2e844a2004-01-06 11:52:13 +00001887 ids = uniq(dict.functions.keys() + dict.variables.keys() + \
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001888 dict.macros.keys() + dict.typedefs.keys() + \
William M. Bracka2e844a2004-01-06 11:52:13 +00001889 dict.structs.keys() + dict.enums.keys())
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001890 ids.sort()
William M. Bracka2e844a2004-01-06 11:52:13 +00001891 for id in ids:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001892 output.write(" <ref name='%s'/>\n" % (id))
1893 output.write(" </file>\n")
1894 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001895
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001896 def serialize_xrefs_functions(self, output):
1897 funcs = {}
1898 for name in self.idx.functions.keys():
1899 id = self.idx.functions[name]
1900 try:
1901 (ret, params, desc) = id.info
1902 for param in params:
1903 if param[0] == 'void':
1904 continue
1905 if funcs.has_key(param[0]):
1906 funcs[param[0]].append(name)
1907 else:
1908 funcs[param[0]] = [name]
1909 except:
1910 pass
1911 typ = funcs.keys()
1912 typ.sort()
1913 for type in typ:
1914 if type == '' or type == 'void' or type == "int" or \
1915 type == "char *" or type == "const char *" :
1916 continue
1917 output.write(" <type name='%s'>\n" % (type))
1918 ids = funcs[type]
1919 ids.sort()
William M. Brackcf9eadf2003-12-25 13:24:05 +00001920 pid = '' # not sure why we have dups, but get rid of them!
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001921 for id in ids:
William M. Brackcf9eadf2003-12-25 13:24:05 +00001922 if id != pid:
1923 output.write(" <ref name='%s'/>\n" % (id))
1924 pid = id
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001925 output.write(" </type>\n")
1926
1927 def serialize_xrefs_constructors(self, output):
1928 funcs = {}
1929 for name in self.idx.functions.keys():
1930 id = self.idx.functions[name]
1931 try:
1932 (ret, params, desc) = id.info
1933 if ret[0] == "void":
1934 continue
1935 if funcs.has_key(ret[0]):
1936 funcs[ret[0]].append(name)
1937 else:
1938 funcs[ret[0]] = [name]
1939 except:
1940 pass
1941 typ = funcs.keys()
1942 typ.sort()
1943 for type in typ:
1944 if type == '' or type == 'void' or type == "int" or \
1945 type == "char *" or type == "const char *" :
1946 continue
1947 output.write(" <type name='%s'>\n" % (type))
1948 ids = funcs[type]
William M. Brack09a4d0a2004-01-05 14:28:43 +00001949 ids.sort()
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001950 for id in ids:
1951 output.write(" <ref name='%s'/>\n" % (id))
1952 output.write(" </type>\n")
1953
1954 def serialize_xrefs_alpha(self, output):
1955 letter = None
1956 ids = self.idx.identifiers.keys()
1957 ids.sort()
1958 for id in ids:
1959 if id[0] != letter:
1960 if letter != None:
1961 output.write(" </letter>\n")
1962 letter = id[0]
1963 output.write(" <letter name='%s'>\n" % (letter))
1964 output.write(" <ref name='%s'/>\n" % (id))
1965 if letter != None:
1966 output.write(" </letter>\n")
1967
1968 def serialize_xrefs_references(self, output):
1969 typ = self.idx.identifiers.keys()
1970 typ.sort()
1971 for id in typ:
1972 idf = self.idx.identifiers[id]
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001973 module = idf.header
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001974 output.write(" <reference name='%s' href='%s'/>\n" % (id,
1975 'html/' + self.basename + '-' +
1976 self.modulename_file(module) + '.html#' +
1977 id))
1978
1979 def serialize_xrefs_index(self, output):
1980 index = self.xref
1981 typ = index.keys()
1982 typ.sort()
1983 letter = None
1984 count = 0
1985 chunk = 0
1986 chunks = []
1987 for id in typ:
1988 if len(index[id]) > 30:
1989 continue
1990 if id[0] != letter:
1991 if letter == None or count > 200:
1992 if letter != None:
1993 output.write(" </letter>\n")
1994 output.write(" </chunk>\n")
1995 count = 0
1996 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1997 output.write(" <chunk name='chunk%s'>\n" % (chunk))
1998 first_letter = id[0]
1999 chunk = chunk + 1
2000 elif letter != None:
2001 output.write(" </letter>\n")
2002 letter = id[0]
2003 output.write(" <letter name='%s'>\n" % (letter))
2004 output.write(" <word name='%s'>\n" % (id))
2005 tokens = index[id];
2006 tokens.sort()
2007 tok = None
William M. Bracka2e844a2004-01-06 11:52:13 +00002008 for token in tokens:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00002009 if tok == token:
2010 continue
2011 tok = token
2012 output.write(" <ref name='%s'/>\n" % (token))
2013 count = count + 1
2014 output.write(" </word>\n")
2015 if letter != None:
2016 output.write(" </letter>\n")
2017 output.write(" </chunk>\n")
William M. Brack966668a2003-12-20 02:10:28 +00002018 if count != 0:
2019 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00002020 output.write(" <chunks>\n")
2021 for ch in chunks:
2022 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
2023 ch[0], ch[1], ch[2]))
2024 output.write(" </chunks>\n")
2025
2026 def serialize_xrefs(self, output):
2027 output.write(" <references>\n")
2028 self.serialize_xrefs_references(output)
2029 output.write(" </references>\n")
2030 output.write(" <alpha>\n")
2031 self.serialize_xrefs_alpha(output)
2032 output.write(" </alpha>\n")
2033 output.write(" <constructors>\n")
2034 self.serialize_xrefs_constructors(output)
2035 output.write(" </constructors>\n")
2036 output.write(" <functions>\n")
2037 self.serialize_xrefs_functions(output)
2038 output.write(" </functions>\n")
2039 output.write(" <files>\n")
2040 self.serialize_xrefs_files(output)
2041 output.write(" </files>\n")
2042 output.write(" <index>\n")
2043 self.serialize_xrefs_index(output)
2044 output.write(" </index>\n")
2045
2046 def serialize(self):
2047 filename = "%s-api.xml" % self.name
2048 print "Saving XML description %s" % (filename)
2049 output = open(filename, "w")
2050 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2051 output.write("<api name='%s'>\n" % self.name)
2052 output.write(" <files>\n")
2053 headers = self.headers.keys()
2054 headers.sort()
2055 for file in headers:
2056 self.serialize_exports(output, file)
2057 output.write(" </files>\n")
2058 output.write(" <symbols>\n")
2059 macros = self.idx.macros.keys()
2060 macros.sort()
2061 for macro in macros:
2062 self.serialize_macro(output, macro)
2063 enums = self.idx.enums.keys()
2064 enums.sort()
2065 for enum in enums:
2066 self.serialize_enum(output, enum)
2067 typedefs = self.idx.typedefs.keys()
2068 typedefs.sort()
2069 for typedef in typedefs:
2070 self.serialize_typedef(output, typedef)
2071 variables = self.idx.variables.keys()
2072 variables.sort()
2073 for variable in variables:
2074 self.serialize_variable(output, variable)
2075 functions = self.idx.functions.keys()
2076 functions.sort()
2077 for function in functions:
2078 self.serialize_function(output, function)
2079 output.write(" </symbols>\n")
2080 output.write("</api>\n")
2081 output.close()
2082
2083 filename = "%s-refs.xml" % self.name
2084 print "Saving XML Cross References %s" % (filename)
2085 output = open(filename, "w")
2086 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2087 output.write("<apirefs name='%s'>\n" % self.name)
2088 self.serialize_xrefs(output)
2089 output.write("</apirefs>\n")
2090 output.close()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002091
2092
2093def rebuild():
2094 builder = None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00002095 if glob.glob("parser.c") != [] :
2096 print "Rebuilding API description for libxml2"
2097 builder = docBuilder("libxml2", [".", "."],
2098 ["xmlwin32version.h", "tst.c"])
2099 elif glob.glob("../parser.c") != [] :
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002100 print "Rebuilding API description for libxml2"
2101 builder = docBuilder("libxml2", ["..", "../include/libxml"],
Daniel Veillardd4330462003-04-29 12:40:16 +00002102 ["xmlwin32version.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002103 elif glob.glob("../libxslt/transform.c") != [] :
2104 print "Rebuilding API description for libxslt"
2105 builder = docBuilder("libxslt", ["../libxslt"],
Daniel Veillard024b5702002-12-12 00:15:55 +00002106 ["win32config.h", "libxslt.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002107 else:
2108 print "rebuild() failed, unable to guess the module"
2109 return None
2110 builder.scan()
2111 builder.analyze()
2112 builder.serialize()
Daniel Veillard024b5702002-12-12 00:15:55 +00002113 if glob.glob("../libexslt/exslt.c") != [] :
2114 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2115 extra.scan()
2116 extra.analyze()
2117 extra.serialize()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002118 return builder
2119
2120#
2121# for debugging the parser
2122#
2123def parse(filename):
2124 parser = CParser(filename)
2125 idx = parser.parse()
2126 return idx
2127
2128if __name__ == "__main__":
Daniel Veillarda2351322004-06-27 12:08:10 +00002129 if len(sys.argv) > 1:
2130 debug = 1
2131 parse(sys.argv[1])
2132 else:
2133 rebuild()