blob: 5f41d91c0b0b9b3cea53777b0dac8ce273fb5cbf [file] [log] [blame]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
Daniel Veillard540a31a2003-01-21 11:21:07 +000010import os, sys
Daniel Veillarda9b66d02002-12-11 14:23:49 +000011import string
12import glob
13
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000014debug=0
Daniel Veillarda2351322004-06-27 12:08:10 +000015#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000017
Daniel Veillardde0a0a52003-04-24 17:12:57 +000018#
19# C parser analysis code
20#
21ignored_files = {
22 "trio": "too many non standard macros",
23 "trio.c": "too many non standard macros",
24 "trionan.c": "too many non standard macros",
25 "triostr.c": "too many non standard macros",
26 "acconfig.h": "generated portability layer",
27 "config.h": "generated portability layer",
28 "libxml.h": "internal only",
Daniel Veillard92fc02c2003-04-24 23:12:35 +000029 "testOOM.c": "out of memory tester",
30 "testOOMlib.h": "out of memory tester",
31 "testOOMlib.c": "out of memory tester",
Daniel Veillard189f46b2004-01-25 21:03:04 +000032 "rngparser.c": "not yet integrated",
33 "rngparser.h": "not yet integrated",
Daniel Veillarda2351322004-06-27 12:08:10 +000034 "elfgcchack.h": "not a normal header",
Daniel Veillarde43cc572004-11-03 11:50:29 +000035 "testapi.c": "generated regression tests",
Daniel Veillard5d4644e2005-04-01 13:11:58 +000036 "tst.c": "not part of the library",
Daniel Veillardf6b71bd2005-01-04 17:50:14 +000037 "testdso.c": "test for dynamid shared libraries",
Daniel Veillardde0a0a52003-04-24 17:12:57 +000038}
39
40ignored_words = {
41 "WINAPI": (0, "Windows keyword"),
42 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
William M. Brackcdfa2862003-08-29 06:03:38 +000043 "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000044 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
45 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000046 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000047 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
48 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000049 "XMLCALL": (0, "Special macro for win32 calls"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000050 "XSLTCALL": (0, "Special macro for win32 calls"),
51 "EXSLTCALL": (0, "Special macro for win32 calls"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000052 "__declspec": (3, "Windows keyword"),
Daniel Veillarda2351322004-06-27 12:08:10 +000053 "__stdcall": (0, "Windows keyword"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000054 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
55 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
56 "X_IN_Y": (5, "macro function builder"),
57}
58
Daniel Veillarda9b66d02002-12-11 14:23:49 +000059def escape(raw):
60 raw = string.replace(raw, '&', '&')
61 raw = string.replace(raw, '<', '&lt;')
62 raw = string.replace(raw, '>', '&gt;')
63 raw = string.replace(raw, "'", '&apos;')
64 raw = string.replace(raw, '"', '&quot;')
65 return raw
66
Daniel Veillard2925c0a2003-11-17 13:58:17 +000067def uniq(items):
68 d = {}
69 for item in items:
70 d[item]=1
71 return d.keys()
72
Daniel Veillarda9b66d02002-12-11 14:23:49 +000073class identifier:
Daniel Veillard5d4644e2005-04-01 13:11:58 +000074 def __init__(self, name, header=None, module=None, type=None, lineno = 0,
Daniel Veillarda2351322004-06-27 12:08:10 +000075 info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +000076 self.name = name
Daniel Veillard5d4644e2005-04-01 13:11:58 +000077 self.header = header
Daniel Veillardbe586972003-11-18 20:56:51 +000078 self.module = module
79 self.type = type
80 self.info = info
81 self.extra = extra
82 self.lineno = lineno
83 self.static = 0
Daniel Veillarda2351322004-06-27 12:08:10 +000084 if conditionals == None or len(conditionals) == 0:
85 self.conditionals = None
86 else:
87 self.conditionals = conditionals[:]
88 if self.name == debugsym:
89 print "=> define %s : %s" % (debugsym, (module, type, info,
90 extra, conditionals))
Daniel Veillarda9b66d02002-12-11 14:23:49 +000091
Daniel Veillardbe586972003-11-18 20:56:51 +000092 def __repr__(self):
93 r = "%s %s:" % (self.type, self.name)
94 if self.static:
95 r = r + " static"
96 if self.module != None:
97 r = r + " from %s" % (self.module)
98 if self.info != None:
99 r = r + " " + `self.info`
100 if self.extra != None:
101 r = r + " " + `self.extra`
Daniel Veillarda2351322004-06-27 12:08:10 +0000102 if self.conditionals != None:
103 r = r + " " + `self.conditionals`
Daniel Veillardbe586972003-11-18 20:56:51 +0000104 return r
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000105
106
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000107 def set_header(self, header):
108 self.header = header
Daniel Veillardbe586972003-11-18 20:56:51 +0000109 def set_module(self, module):
110 self.module = module
111 def set_type(self, type):
112 self.type = type
113 def set_info(self, info):
114 self.info = info
115 def set_extra(self, extra):
116 self.extra = extra
117 def set_lineno(self, lineno):
118 self.lineno = lineno
119 def set_static(self, static):
120 self.static = static
Daniel Veillarda2351322004-06-27 12:08:10 +0000121 def set_conditionals(self, conditionals):
122 if conditionals == None or len(conditionals) == 0:
123 self.conditionals = None
124 else:
125 self.conditionals = conditionals[:]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000126
Daniel Veillardbe586972003-11-18 20:56:51 +0000127 def get_name(self):
128 return self.name
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000129 def get_header(self):
130 return self.module
Daniel Veillardbe586972003-11-18 20:56:51 +0000131 def get_module(self):
132 return self.module
133 def get_type(self):
134 return self.type
135 def get_info(self):
136 return self.info
137 def get_lineno(self):
138 return self.lineno
139 def get_extra(self):
140 return self.extra
141 def get_static(self):
142 return self.static
Daniel Veillarda2351322004-06-27 12:08:10 +0000143 def get_conditionals(self):
144 return self.conditionals
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000145
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000146 def update(self, header, module, type = None, info = None, extra=None,
Daniel Veillarda2351322004-06-27 12:08:10 +0000147 conditionals=None):
148 if self.name == debugsym:
149 print "=> update %s : %s" % (debugsym, (module, type, info,
150 extra, conditionals))
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000151 if header != None and self.header == None:
152 self.set_header(module)
153 if module != None and (self.module == None or self.header == self.module):
Daniel Veillardbe586972003-11-18 20:56:51 +0000154 self.set_module(module)
155 if type != None and self.type == None:
156 self.set_type(type)
157 if info != None:
158 self.set_info(info)
159 if extra != None:
160 self.set_extra(extra)
Daniel Veillarda2351322004-06-27 12:08:10 +0000161 if conditionals != None:
162 self.set_conditionals(conditionals)
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000163 if self.name == 'xmlCleanupPredefinedEntities':
164 print "updating xmlCleanupPredefinedEntities : header %s module %s info %s" % (self.header, self.module, self.info)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000165
166
167class index:
Daniel Veillardbe586972003-11-18 20:56:51 +0000168 def __init__(self, name = "noname"):
169 self.name = name
170 self.identifiers = {}
171 self.functions = {}
172 self.variables = {}
173 self.includes = {}
174 self.structs = {}
175 self.enums = {}
176 self.typedefs = {}
177 self.macros = {}
178 self.references = {}
179 self.info = {}
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000180
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000181 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +0000182 if name[0:2] == '__':
183 return None
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000184 if name == 'xmlCleanupPredefinedEntities':
185 print "adding ref xmlCleanupPredefinedEntities : header %s module %s info %s" % (header, module, info)
Daniel Veillardbe586972003-11-18 20:56:51 +0000186 d = None
187 try:
188 d = self.identifiers[name]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000189 d.update(header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000190 except:
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000191 d = identifier(name, header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000192 self.identifiers[name] = d
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000193
Daniel Veillardbe586972003-11-18 20:56:51 +0000194 if d != None and static == 1:
195 d.set_static(1)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000196
Daniel Veillardbe586972003-11-18 20:56:51 +0000197 if d != None and name != None and type != None:
198 self.references[name] = d
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000199
Daniel Veillarda2351322004-06-27 12:08:10 +0000200 if name == debugsym:
201 print "New ref: %s" % (d)
202
203 return d
204
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000205 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +0000206 if name[0:2] == '__':
207 return None
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000208 if name == 'xmlCleanupPredefinedEntities':
209 print "adding xmlCleanupPredefinedEntities : header %s module %s info %s" % (header, module, info)
Daniel Veillardbe586972003-11-18 20:56:51 +0000210 d = None
211 try:
212 d = self.identifiers[name]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000213 d.update(header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000214 except:
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000215 d = identifier(name, header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000216 self.identifiers[name] = d
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000217
Daniel Veillardbe586972003-11-18 20:56:51 +0000218 if d != None and static == 1:
219 d.set_static(1)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000220
Daniel Veillardbe586972003-11-18 20:56:51 +0000221 if d != None and name != None and type != None:
222 if type == "function":
223 self.functions[name] = d
224 elif type == "functype":
225 self.functions[name] = d
226 elif type == "variable":
227 self.variables[name] = d
228 elif type == "include":
229 self.includes[name] = d
230 elif type == "struct":
231 self.structs[name] = d
232 elif type == "enum":
233 self.enums[name] = d
234 elif type == "typedef":
235 self.typedefs[name] = d
236 elif type == "macro":
237 self.macros[name] = d
238 else:
239 print "Unable to register type ", type
Daniel Veillarda2351322004-06-27 12:08:10 +0000240
241 if name == debugsym:
242 print "New symbol: %s" % (d)
243
Daniel Veillardbe586972003-11-18 20:56:51 +0000244 return d
245
246 def merge(self, idx):
247 for id in idx.functions.keys():
248 #
249 # macro might be used to override functions or variables
250 # definitions
251 #
252 if self.macros.has_key(id):
253 del self.macros[id]
254 if self.functions.has_key(id):
255 print "function %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000256 id, self.functions[id].header, idx.functions[id].header)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000257 else:
Daniel Veillardbe586972003-11-18 20:56:51 +0000258 self.functions[id] = idx.functions[id]
259 self.identifiers[id] = idx.functions[id]
260 for id in idx.variables.keys():
261 #
262 # macro might be used to override functions or variables
263 # definitions
264 #
265 if self.macros.has_key(id):
266 del self.macros[id]
267 if self.variables.has_key(id):
268 print "variable %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000269 id, self.variables[id].header, idx.variables[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000270 else:
271 self.variables[id] = idx.variables[id]
272 self.identifiers[id] = idx.variables[id]
273 for id in idx.structs.keys():
274 if self.structs.has_key(id):
275 print "struct %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000276 id, self.structs[id].header, idx.structs[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000277 else:
278 self.structs[id] = idx.structs[id]
279 self.identifiers[id] = idx.structs[id]
280 for id in idx.typedefs.keys():
281 if self.typedefs.has_key(id):
282 print "typedef %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000283 id, self.typedefs[id].header, idx.typedefs[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000284 else:
285 self.typedefs[id] = idx.typedefs[id]
286 self.identifiers[id] = idx.typedefs[id]
287 for id in idx.macros.keys():
288 #
289 # macro might be used to override functions or variables
290 # definitions
291 #
292 if self.variables.has_key(id):
293 continue
294 if self.functions.has_key(id):
295 continue
296 if self.enums.has_key(id):
297 continue
298 if self.macros.has_key(id):
299 print "macro %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000300 id, self.macros[id].header, idx.macros[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000301 else:
302 self.macros[id] = idx.macros[id]
303 self.identifiers[id] = idx.macros[id]
304 for id in idx.enums.keys():
305 if self.enums.has_key(id):
306 print "enum %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000307 id, self.enums[id].header, idx.enums[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000308 else:
309 self.enums[id] = idx.enums[id]
310 self.identifiers[id] = idx.enums[id]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000311
Daniel Veillardbe586972003-11-18 20:56:51 +0000312 def merge_public(self, idx):
313 for id in idx.functions.keys():
314 if self.functions.has_key(id):
William M. Brack21e4ef22005-01-02 09:53:13 +0000315 # check that function condition agrees with header
316 if idx.functions[id].conditionals != \
317 self.functions[id].conditionals:
318 print "Header condition differs from Function for %s:" \
319 % id
320 print " H: %s" % self.functions[id].conditionals
321 print " C: %s" % idx.functions[id].conditionals
Daniel Veillardbe586972003-11-18 20:56:51 +0000322 up = idx.functions[id]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000323 self.functions[id].update(None, up.module, up.type, up.info, up.extra)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000324 # else:
325 # print "Function %s from %s is not declared in headers" % (
William M. Brack21e4ef22005-01-02 09:53:13 +0000326 # id, idx.functions[id].module)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000327 # TODO: do the same for variables.
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000328
Daniel Veillardbe586972003-11-18 20:56:51 +0000329 def analyze_dict(self, type, dict):
330 count = 0
331 public = 0
332 for name in dict.keys():
333 id = dict[name]
334 count = count + 1
335 if id.static == 0:
336 public = public + 1
337 if count != public:
338 print " %d %s , %d public" % (count, type, public)
339 elif count != 0:
340 print " %d public %s" % (count, type)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000341
342
Daniel Veillardbe586972003-11-18 20:56:51 +0000343 def analyze(self):
344 self.analyze_dict("functions", self.functions)
345 self.analyze_dict("variables", self.variables)
346 self.analyze_dict("structs", self.structs)
347 self.analyze_dict("typedefs", self.typedefs)
348 self.analyze_dict("macros", self.macros)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000349
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000350class CLexer:
Daniel Veillardbe586972003-11-18 20:56:51 +0000351 """A lexer for the C language, tokenize the input by reading and
352 analyzing it line by line"""
353 def __init__(self, input):
354 self.input = input
355 self.tokens = []
356 self.line = ""
357 self.lineno = 0
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000358
Daniel Veillardbe586972003-11-18 20:56:51 +0000359 def getline(self):
360 line = ''
361 while line == '':
362 line = self.input.readline()
363 if not line:
364 return None
365 self.lineno = self.lineno + 1
366 line = string.lstrip(line)
367 line = string.rstrip(line)
368 if line == '':
369 continue
370 while line[-1] == '\\':
371 line = line[:-1]
372 n = self.input.readline()
373 self.lineno = self.lineno + 1
374 n = string.lstrip(n)
375 n = string.rstrip(n)
376 if not n:
377 break
378 else:
379 line = line + n
380 return line
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000381
Daniel Veillardbe586972003-11-18 20:56:51 +0000382 def getlineno(self):
383 return self.lineno
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000384
Daniel Veillardbe586972003-11-18 20:56:51 +0000385 def push(self, token):
386 self.tokens.insert(0, token);
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000387
Daniel Veillardbe586972003-11-18 20:56:51 +0000388 def debug(self):
389 print "Last token: ", self.last
390 print "Token queue: ", self.tokens
391 print "Line %d end: " % (self.lineno), self.line
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000392
Daniel Veillardbe586972003-11-18 20:56:51 +0000393 def token(self):
394 while self.tokens == []:
395 if self.line == "":
396 line = self.getline()
397 else:
398 line = self.line
399 self.line = ""
400 if line == None:
401 return None
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000402
Daniel Veillardbe586972003-11-18 20:56:51 +0000403 if line[0] == '#':
404 self.tokens = map((lambda x: ('preproc', x)),
405 string.split(line))
406 break;
407 l = len(line)
408 if line[0] == '"' or line[0] == "'":
409 end = line[0]
410 line = line[1:]
411 found = 0
412 tok = ""
413 while found == 0:
414 i = 0
415 l = len(line)
416 while i < l:
417 if line[i] == end:
418 self.line = line[i+1:]
419 line = line[:i]
420 l = i
421 found = 1
422 break
423 if line[i] == '\\':
424 i = i + 1
425 i = i + 1
426 tok = tok + line
427 if found == 0:
428 line = self.getline()
429 if line == None:
430 return None
431 self.last = ('string', tok)
432 return self.last
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000433
Daniel Veillardbe586972003-11-18 20:56:51 +0000434 if l >= 2 and line[0] == '/' and line[1] == '*':
435 line = line[2:]
436 found = 0
437 tok = ""
438 while found == 0:
439 i = 0
440 l = len(line)
441 while i < l:
442 if line[i] == '*' and i+1 < l and line[i+1] == '/':
443 self.line = line[i+2:]
444 line = line[:i-1]
445 l = i
446 found = 1
447 break
448 i = i + 1
449 if tok != "":
450 tok = tok + "\n"
451 tok = tok + line
452 if found == 0:
453 line = self.getline()
454 if line == None:
455 return None
456 self.last = ('comment', tok)
457 return self.last
458 if l >= 2 and line[0] == '/' and line[1] == '/':
459 line = line[2:]
460 self.last = ('comment', line)
461 return self.last
462 i = 0
463 while i < l:
464 if line[i] == '/' and i+1 < l and line[i+1] == '/':
465 self.line = line[i:]
466 line = line[:i]
467 break
468 if line[i] == '/' and i+1 < l and line[i+1] == '*':
469 self.line = line[i:]
470 line = line[:i]
471 break
472 if line[i] == '"' or line[i] == "'":
473 self.line = line[i:]
474 line = line[:i]
475 break
476 i = i + 1
477 l = len(line)
478 i = 0
479 while i < l:
480 if line[i] == ' ' or line[i] == '\t':
481 i = i + 1
482 continue
483 o = ord(line[i])
484 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
485 (o >= 48 and o <= 57):
486 s = i
487 while i < l:
488 o = ord(line[i])
489 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
490 (o >= 48 and o <= 57) or string.find(
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000491 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
Daniel Veillardbe586972003-11-18 20:56:51 +0000492 i = i + 1
493 else:
494 break
495 self.tokens.append(('name', line[s:i]))
496 continue
497 if string.find("(){}:;,[]", line[i]) != -1:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000498# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
499# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
500# line[i] == ',' or line[i] == '[' or line[i] == ']':
Daniel Veillardbe586972003-11-18 20:56:51 +0000501 self.tokens.append(('sep', line[i]))
502 i = i + 1
503 continue
504 if string.find("+-*><=/%&!|.", line[i]) != -1:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000505# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
506# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
507# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
508# line[i] == '!' or line[i] == '|' or line[i] == '.':
Daniel Veillardbe586972003-11-18 20:56:51 +0000509 if line[i] == '.' and i + 2 < l and \
510 line[i+1] == '.' and line[i+2] == '.':
511 self.tokens.append(('name', '...'))
512 i = i + 3
513 continue
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000514
Daniel Veillardbe586972003-11-18 20:56:51 +0000515 j = i + 1
516 if j < l and (
517 string.find("+-*><=/%&!|", line[j]) != -1):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000518# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
519# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
520# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
521# line[j] == '!' or line[j] == '|'):
Daniel Veillardbe586972003-11-18 20:56:51 +0000522 self.tokens.append(('op', line[i:j+1]))
523 i = j + 1
524 else:
525 self.tokens.append(('op', line[i]))
526 i = i + 1
527 continue
528 s = i
529 while i < l:
530 o = ord(line[i])
531 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
532 (o >= 48 and o <= 57) or (
533 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000534# line[i] != ' ' and line[i] != '\t' and
535# line[i] != '(' and line[i] != ')' and
536# line[i] != '{' and line[i] != '}' and
537# line[i] != ':' and line[i] != ';' and
538# line[i] != ',' and line[i] != '+' and
539# line[i] != '-' and line[i] != '*' and
540# line[i] != '/' and line[i] != '%' and
541# line[i] != '&' and line[i] != '!' and
542# line[i] != '|' and line[i] != '[' and
543# line[i] != ']' and line[i] != '=' and
544# line[i] != '*' and line[i] != '>' and
545# line[i] != '<'):
Daniel Veillardbe586972003-11-18 20:56:51 +0000546 i = i + 1
547 else:
548 break
549 self.tokens.append(('name', line[s:i]))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000550
Daniel Veillardbe586972003-11-18 20:56:51 +0000551 tok = self.tokens[0]
552 self.tokens = self.tokens[1:]
553 self.last = tok
554 return tok
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000555
556class CParser:
Daniel Veillardbe586972003-11-18 20:56:51 +0000557 """The C module parser"""
558 def __init__(self, filename, idx = None):
559 self.filename = filename
560 if len(filename) > 2 and filename[-2:] == '.h':
561 self.is_header = 1
562 else:
563 self.is_header = 0
564 self.input = open(filename)
565 self.lexer = CLexer(self.input)
566 if idx == None:
567 self.index = index()
568 else:
569 self.index = idx
570 self.top_comment = ""
571 self.last_comment = ""
572 self.comment = None
573 self.collect_ref = 0
Daniel Veillard1e906612003-12-05 14:57:46 +0000574 self.no_error = 0
Daniel Veillarda2351322004-06-27 12:08:10 +0000575 self.conditionals = []
576 self.defines = []
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000577
Daniel Veillardbe586972003-11-18 20:56:51 +0000578 def collect_references(self):
579 self.collect_ref = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000580
Daniel Veillard1e906612003-12-05 14:57:46 +0000581 def stop_error(self):
582 self.no_error = 1
583
584 def start_error(self):
585 self.no_error = 0
586
Daniel Veillardbe586972003-11-18 20:56:51 +0000587 def lineno(self):
588 return self.lexer.getlineno()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000589
Daniel Veillardbe586972003-11-18 20:56:51 +0000590 def index_add(self, name, module, static, type, info=None, extra = None):
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000591 if self.is_header == 1:
592 self.index.add(name, module, module, static, type, self.lineno(),
593 info, extra, self.conditionals)
594 else:
595 self.index.add(name, None, module, static, type, self.lineno(),
596 info, extra, self.conditionals)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000597
Daniel Veillardbe586972003-11-18 20:56:51 +0000598 def index_add_ref(self, name, module, static, type, info=None,
599 extra = None):
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000600 if self.is_header == 1:
601 self.index.add_ref(name, module, module, static, type,
602 self.lineno(), info, extra, self.conditionals)
603 else:
604 self.index.add_ref(name, None, module, static, type, self.lineno(),
605 info, extra, self.conditionals)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000606
Daniel Veillard1e906612003-12-05 14:57:46 +0000607 def warning(self, msg):
608 if self.no_error:
609 return
610 print msg
611
Daniel Veillardbe586972003-11-18 20:56:51 +0000612 def error(self, msg, token=-1):
Daniel Veillard1e906612003-12-05 14:57:46 +0000613 if self.no_error:
614 return
615
Daniel Veillardbe586972003-11-18 20:56:51 +0000616 print "Parse Error: " + msg
617 if token != -1:
618 print "Got token ", token
619 self.lexer.debug()
620 sys.exit(1)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000621
Daniel Veillardbe586972003-11-18 20:56:51 +0000622 def debug(self, msg, token=-1):
623 print "Debug: " + msg
624 if token != -1:
625 print "Got token ", token
626 self.lexer.debug()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000627
Daniel Veillardbe586972003-11-18 20:56:51 +0000628 def parseTopComment(self, comment):
629 res = {}
630 lines = string.split(comment, "\n")
631 item = None
632 for line in lines:
633 while line != "" and (line[0] == ' ' or line[0] == '\t'):
634 line = line[1:]
635 while line != "" and line[0] == '*':
636 line = line[1:]
637 while line != "" and (line[0] == ' ' or line[0] == '\t'):
638 line = line[1:]
639 try:
640 (it, line) = string.split(line, ":", 1)
641 item = it
642 while line != "" and (line[0] == ' ' or line[0] == '\t'):
643 line = line[1:]
644 if res.has_key(item):
645 res[item] = res[item] + " " + line
646 else:
647 res[item] = line
648 except:
649 if item != None:
650 if res.has_key(item):
651 res[item] = res[item] + " " + line
652 else:
653 res[item] = line
654 self.index.info = res
655
656 def parseComment(self, token):
657 if self.top_comment == "":
658 self.top_comment = token[1]
659 if self.comment == None or token[1][0] == '*':
660 self.comment = token[1];
661 else:
662 self.comment = self.comment + token[1]
663 token = self.lexer.token()
Daniel Veillard1e906612003-12-05 14:57:46 +0000664
665 if string.find(self.comment, "DOC_DISABLE") != -1:
666 self.stop_error()
667
668 if string.find(self.comment, "DOC_ENABLE") != -1:
669 self.start_error()
670
Daniel Veillardbe586972003-11-18 20:56:51 +0000671 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000672
673 #
674 # Parse a comment block associate to a macro
675 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000676 def parseMacroComment(self, name, quiet = 0):
677 if name[0:2] == '__':
678 quiet = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000679
Daniel Veillardbe586972003-11-18 20:56:51 +0000680 args = []
681 desc = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000682
Daniel Veillardbe586972003-11-18 20:56:51 +0000683 if self.comment == None:
684 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000685 self.warning("Missing comment for macro %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000686 return((args, desc))
687 if self.comment[0] != '*':
688 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000689 self.warning("Missing * in macro comment for %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000690 return((args, desc))
691 lines = string.split(self.comment, '\n')
692 if lines[0] == '*':
693 del lines[0]
694 if lines[0] != "* %s:" % (name):
695 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000696 self.warning("Misformatted macro comment for %s" % (name))
697 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000698 return((args, desc))
699 del lines[0]
700 while lines[0] == '*':
701 del lines[0]
702 while len(lines) > 0 and lines[0][0:3] == '* @':
703 l = lines[0][3:]
704 try:
705 (arg, desc) = string.split(l, ':', 1)
706 desc=string.strip(desc)
707 arg=string.strip(arg)
708 except:
709 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000710 self.warning("Misformatted macro comment for %s" % (name))
711 self.warning(" problem with '%s'" % (lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000712 del lines[0]
713 continue
714 del lines[0]
715 l = string.strip(lines[0])
716 while len(l) > 2 and l[0:3] != '* @':
717 while l[0] == '*':
718 l = l[1:]
719 desc = desc + ' ' + string.strip(l)
720 del lines[0]
721 if len(lines) == 0:
722 break
723 l = lines[0]
724 args.append((arg, desc))
725 while len(lines) > 0 and lines[0] == '*':
726 del lines[0]
727 desc = ""
728 while len(lines) > 0:
729 l = lines[0]
730 while len(l) > 0 and l[0] == '*':
731 l = l[1:]
732 l = string.strip(l)
733 desc = desc + " " + l
734 del lines[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000735
Daniel Veillardbe586972003-11-18 20:56:51 +0000736 desc = string.strip(desc)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000737
Daniel Veillardbe586972003-11-18 20:56:51 +0000738 if quiet == 0:
739 if desc == "":
Daniel Veillard1e906612003-12-05 14:57:46 +0000740 self.warning("Macro comment for %s lack description of the macro" % (name))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000741
Daniel Veillardbe586972003-11-18 20:56:51 +0000742 return((args, desc))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000743
744 #
745 # Parse a comment block and merge the informations found in the
746 # parameters descriptions, finally returns a block as complete
747 # as possible
748 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000749 def mergeFunctionComment(self, name, description, quiet = 0):
750 if name == 'main':
751 quiet = 1
752 if name[0:2] == '__':
753 quiet = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000754
Daniel Veillardbe586972003-11-18 20:56:51 +0000755 (ret, args) = description
756 desc = ""
757 retdesc = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000758
Daniel Veillardbe586972003-11-18 20:56:51 +0000759 if self.comment == None:
760 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000761 self.warning("Missing comment for function %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000762 return(((ret[0], retdesc), args, desc))
763 if self.comment[0] != '*':
764 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000765 self.warning("Missing * in function comment for %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000766 return(((ret[0], retdesc), args, desc))
767 lines = string.split(self.comment, '\n')
768 if lines[0] == '*':
769 del lines[0]
770 if lines[0] != "* %s:" % (name):
771 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000772 self.warning("Misformatted function comment for %s" % (name))
773 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000774 return(((ret[0], retdesc), args, desc))
775 del lines[0]
776 while lines[0] == '*':
777 del lines[0]
778 nbargs = len(args)
779 while len(lines) > 0 and lines[0][0:3] == '* @':
780 l = lines[0][3:]
781 try:
782 (arg, desc) = string.split(l, ':', 1)
783 desc=string.strip(desc)
784 arg=string.strip(arg)
785 except:
786 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000787 self.warning("Misformatted function comment for %s" % (name))
788 self.warning(" problem with '%s'" % (lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000789 del lines[0]
790 continue
791 del lines[0]
792 l = string.strip(lines[0])
793 while len(l) > 2 and l[0:3] != '* @':
794 while l[0] == '*':
795 l = l[1:]
796 desc = desc + ' ' + string.strip(l)
797 del lines[0]
798 if len(lines) == 0:
799 break
800 l = lines[0]
801 i = 0
802 while i < nbargs:
803 if args[i][1] == arg:
804 args[i] = (args[i][0], arg, desc)
805 break;
806 i = i + 1
807 if i >= nbargs:
808 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000809 self.warning("Unable to find arg %s from function comment for %s" % (
810 arg, name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000811 while len(lines) > 0 and lines[0] == '*':
812 del lines[0]
813 desc = ""
814 while len(lines) > 0:
815 l = lines[0]
816 while len(l) > 0 and l[0] == '*':
817 l = l[1:]
818 l = string.strip(l)
819 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
820 try:
821 l = string.split(l, ' ', 1)[1]
822 except:
823 l = ""
824 retdesc = string.strip(l)
825 del lines[0]
826 while len(lines) > 0:
827 l = lines[0]
828 while len(l) > 0 and l[0] == '*':
829 l = l[1:]
830 l = string.strip(l)
831 retdesc = retdesc + " " + l
832 del lines[0]
833 else:
834 desc = desc + " " + l
835 del lines[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000836
Daniel Veillardbe586972003-11-18 20:56:51 +0000837 retdesc = string.strip(retdesc)
838 desc = string.strip(desc)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000839
Daniel Veillardbe586972003-11-18 20:56:51 +0000840 if quiet == 0:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000841 #
842 # report missing comments
843 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000844 i = 0
845 while i < nbargs:
846 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
William M. Brack21e4ef22005-01-02 09:53:13 +0000847 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000848 i = i + 1
849 if retdesc == "" and ret[0] != "void":
William M. Brack21e4ef22005-01-02 09:53:13 +0000850 self.warning("Function comment for %s lacks description of return value" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000851 if desc == "":
William M. Brack21e4ef22005-01-02 09:53:13 +0000852 self.warning("Function comment for %s lacks description of the function" % (name))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000853
854
Daniel Veillardbe586972003-11-18 20:56:51 +0000855 return(((ret[0], retdesc), args, desc))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000856
Daniel Veillardbe586972003-11-18 20:56:51 +0000857 def parsePreproc(self, token):
Daniel Veillarda2351322004-06-27 12:08:10 +0000858 if debug:
859 print "=> preproc ", token, self.lexer.tokens
Daniel Veillardbe586972003-11-18 20:56:51 +0000860 name = token[1]
861 if name == "#include":
862 token = self.lexer.token()
863 if token == None:
864 return None
865 if token[0] == 'preproc':
866 self.index_add(token[1], self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000867 "include")
Daniel Veillardbe586972003-11-18 20:56:51 +0000868 return self.lexer.token()
869 return token
870 if name == "#define":
871 token = self.lexer.token()
872 if token == None:
873 return None
874 if token[0] == 'preproc':
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000875 # TODO macros with arguments
Daniel Veillardbe586972003-11-18 20:56:51 +0000876 name = token[1]
877 lst = []
878 token = self.lexer.token()
879 while token != None and token[0] == 'preproc' and \
880 token[1][0] != '#':
881 lst.append(token[1])
882 token = self.lexer.token()
883 try:
884 name = string.split(name, '(') [0]
885 except:
886 pass
887 info = self.parseMacroComment(name, not self.is_header)
888 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000889 "macro", info)
Daniel Veillardbe586972003-11-18 20:56:51 +0000890 return token
William M. Brack21e4ef22005-01-02 09:53:13 +0000891
892 #
893 # Processing of conditionals modified by Bill 1/1/05
894 #
895 # We process conditionals (i.e. tokens from #ifdef, #ifndef,
896 # #if, #else and #endif) for headers and mainline code,
897 # store the ones from the header in libxml2-api.xml, and later
898 # (in the routine merge_public) verify that the two (header and
899 # mainline code) agree.
900 #
901 # There is a small problem with processing the headers. Some of
902 # the variables are not concerned with enabling / disabling of
903 # library functions (e.g. '__XML_PARSER_H__'), and we don't want
904 # them to be included in libxml2-api.xml, or involved in
905 # the check between the header and the mainline code. To
906 # accomplish this, we ignore any conditional which doesn't include
907 # the string 'ENABLED'
908 #
909 if name == "#ifdef":
910 apstr = self.lexer.tokens[0][1]
Daniel Veillarda2351322004-06-27 12:08:10 +0000911 try:
William M. Brack21e4ef22005-01-02 09:53:13 +0000912 self.defines.append(apstr)
913 if string.find(apstr, 'ENABLED') != -1:
914 self.conditionals.append("defined(%s)" % apstr)
Daniel Veillarda2351322004-06-27 12:08:10 +0000915 except:
916 pass
William M. Brack21e4ef22005-01-02 09:53:13 +0000917 elif name == "#ifndef":
918 apstr = self.lexer.tokens[0][1]
919 try:
920 self.defines.append(apstr)
921 if string.find(apstr, 'ENABLED') != -1:
922 self.conditionals.append("!defined(%s)" % apstr)
923 except:
924 pass
925 elif name == "#if":
926 apstr = ""
927 for tok in self.lexer.tokens:
928 if apstr != "":
929 apstr = apstr + " "
930 apstr = apstr + tok[1]
931 try:
932 self.defines.append(apstr)
933 if string.find(apstr, 'ENABLED') != -1:
934 self.conditionals.append(apstr)
935 except:
936 pass
937 elif name == "#else":
938 if self.conditionals != [] and \
939 string.find(self.defines[-1], 'ENABLED') != -1:
940 self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
941 elif name == "#endif":
942 if self.conditionals != [] and \
943 string.find(self.defines[-1], 'ENABLED') != -1:
Daniel Veillarda2351322004-06-27 12:08:10 +0000944 self.conditionals = self.conditionals[:-1]
945 self.defines = self.defines[:-1]
Daniel Veillardbe586972003-11-18 20:56:51 +0000946 token = self.lexer.token()
947 while token != None and token[0] == 'preproc' and \
948 token[1][0] != '#':
949 token = self.lexer.token()
950 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000951
952 #
953 # token acquisition on top of the lexer, it handle internally
954 # preprocessor and comments since they are logically not part of
955 # the program structure.
956 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000957 def token(self):
958 global ignored_words
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000959
Daniel Veillardbe586972003-11-18 20:56:51 +0000960 token = self.lexer.token()
961 while token != None:
962 if token[0] == 'comment':
963 token = self.parseComment(token)
964 continue
965 elif token[0] == 'preproc':
966 token = self.parsePreproc(token)
967 continue
968 elif token[0] == "name" and ignored_words.has_key(token[1]):
969 (n, info) = ignored_words[token[1]]
970 i = 0
971 while i < n:
972 token = self.lexer.token()
973 i = i + 1
974 token = self.lexer.token()
975 continue
976 else:
977 if debug:
978 print "=> ", token
979 return token
980 return None
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000981
982 #
983 # Parse a typedef, it records the type and its name.
984 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000985 def parseTypedef(self, token):
986 if token == None:
987 return None
988 token = self.parseType(token)
989 if token == None:
990 self.error("parsing typedef")
991 return None
992 base_type = self.type
993 type = base_type
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000994 #self.debug("end typedef type", token)
Daniel Veillardbe586972003-11-18 20:56:51 +0000995 while token != None:
996 if token[0] == "name":
997 name = token[1]
998 signature = self.signature
999 if signature != None:
1000 type = string.split(type, '(')[0]
1001 d = self.mergeFunctionComment(name,
1002 ((type, None), signature), 1)
1003 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001004 "functype", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001005 else:
1006 if base_type == "struct":
1007 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001008 "struct", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001009 base_type = "struct " + name
1010 else:
1011 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001012 "typedef", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001013 token = self.token()
1014 else:
1015 self.error("parsing typedef: expecting a name")
1016 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001017 #self.debug("end typedef", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001018 if token != None and token[0] == 'sep' and token[1] == ',':
1019 type = base_type
1020 token = self.token()
1021 while token != None and token[0] == "op":
1022 type = type + token[1]
1023 token = self.token()
1024 elif token != None and token[0] == 'sep' and token[1] == ';':
1025 break;
1026 elif token != None and token[0] == 'name':
1027 type = base_type
1028 continue;
1029 else:
1030 self.error("parsing typedef: expecting ';'", token)
1031 return token
1032 token = self.token()
1033 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001034
1035 #
1036 # Parse a C code block, used for functions it parse till
1037 # the balancing } included
1038 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001039 def parseBlock(self, token):
1040 while token != None:
1041 if token[0] == "sep" and token[1] == "{":
1042 token = self.token()
1043 token = self.parseBlock(token)
1044 elif token[0] == "sep" and token[1] == "}":
1045 self.comment = None
1046 token = self.token()
1047 return token
1048 else:
1049 if self.collect_ref == 1:
1050 oldtok = token
1051 token = self.token()
1052 if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1053 if token[0] == "sep" and token[1] == "(":
1054 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001055 0, "function")
Daniel Veillardbe586972003-11-18 20:56:51 +00001056 token = self.token()
1057 elif token[0] == "name":
1058 token = self.token()
1059 if token[0] == "sep" and (token[1] == ";" or
1060 token[1] == "," or token[1] == "="):
1061 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001062 0, "type")
Daniel Veillardbe586972003-11-18 20:56:51 +00001063 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1064 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001065 0, "typedef")
Daniel Veillardbe586972003-11-18 20:56:51 +00001066 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1067 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001068 0, "typedef")
1069
Daniel Veillardbe586972003-11-18 20:56:51 +00001070 else:
1071 token = self.token()
1072 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001073
1074 #
1075 # Parse a C struct definition till the balancing }
1076 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001077 def parseStruct(self, token):
1078 fields = []
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001079 #self.debug("start parseStruct", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001080 while token != None:
1081 if token[0] == "sep" and token[1] == "{":
1082 token = self.token()
1083 token = self.parseTypeBlock(token)
1084 elif token[0] == "sep" and token[1] == "}":
1085 self.struct_fields = fields
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001086 #self.debug("end parseStruct", token)
1087 #print fields
Daniel Veillardbe586972003-11-18 20:56:51 +00001088 token = self.token()
1089 return token
1090 else:
1091 base_type = self.type
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001092 #self.debug("before parseType", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001093 token = self.parseType(token)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001094 #self.debug("after parseType", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001095 if token != None and token[0] == "name":
1096 fname = token[1]
1097 token = self.token()
1098 if token[0] == "sep" and token[1] == ";":
1099 self.comment = None
1100 token = self.token()
1101 fields.append((self.type, fname, self.comment))
1102 self.comment = None
1103 else:
1104 self.error("parseStruct: expecting ;", token)
1105 elif token != None and token[0] == "sep" and token[1] == "{":
1106 token = self.token()
1107 token = self.parseTypeBlock(token)
1108 if token != None and token[0] == "name":
1109 token = self.token()
1110 if token != None and token[0] == "sep" and token[1] == ";":
1111 token = self.token()
1112 else:
1113 self.error("parseStruct: expecting ;", token)
1114 else:
1115 self.error("parseStruct: name", token)
1116 token = self.token()
1117 self.type = base_type;
1118 self.struct_fields = fields
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001119 #self.debug("end parseStruct", token)
1120 #print fields
Daniel Veillardbe586972003-11-18 20:56:51 +00001121 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001122
1123 #
1124 # Parse a C enum block, parse till the balancing }
1125 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001126 def parseEnumBlock(self, token):
1127 self.enums = []
1128 name = None
1129 self.comment = None
1130 comment = ""
1131 value = "0"
1132 while token != None:
1133 if token[0] == "sep" and token[1] == "{":
1134 token = self.token()
1135 token = self.parseTypeBlock(token)
1136 elif token[0] == "sep" and token[1] == "}":
1137 if name != None:
1138 if self.comment != None:
1139 comment = self.comment
1140 self.comment = None
1141 self.enums.append((name, value, comment))
1142 token = self.token()
1143 return token
1144 elif token[0] == "name":
1145 if name != None:
1146 if self.comment != None:
1147 comment = string.strip(self.comment)
1148 self.comment = None
1149 self.enums.append((name, value, comment))
1150 name = token[1]
1151 comment = ""
1152 token = self.token()
1153 if token[0] == "op" and token[1][0] == "=":
1154 value = ""
1155 if len(token[1]) > 1:
1156 value = token[1][1:]
1157 token = self.token()
1158 while token[0] != "sep" or (token[1] != ',' and
1159 token[1] != '}'):
1160 value = value + token[1]
1161 token = self.token()
1162 else:
1163 try:
1164 value = "%d" % (int(value) + 1)
1165 except:
Daniel Veillard1e906612003-12-05 14:57:46 +00001166 self.warning("Failed to compute value of enum %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +00001167 value=""
1168 if token[0] == "sep" and token[1] == ",":
1169 token = self.token()
1170 else:
1171 token = self.token()
1172 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001173
1174 #
1175 # Parse a C definition block, used for structs it parse till
1176 # the balancing }
1177 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001178 def parseTypeBlock(self, token):
1179 while token != None:
1180 if token[0] == "sep" and token[1] == "{":
1181 token = self.token()
1182 token = self.parseTypeBlock(token)
1183 elif token[0] == "sep" and token[1] == "}":
1184 token = self.token()
1185 return token
1186 else:
1187 token = self.token()
1188 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001189
1190 #
1191 # Parse a type: the fact that the type name can either occur after
1192 # the definition or within the definition makes it a little harder
1193 # if inside, the name token is pushed back before returning
1194 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001195 def parseType(self, token):
1196 self.type = ""
1197 self.struct_fields = []
1198 self.signature = None
1199 if token == None:
1200 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001201
Daniel Veillardbe586972003-11-18 20:56:51 +00001202 while token[0] == "name" and (
1203 token[1] == "const" or token[1] == "unsigned"):
1204 if self.type == "":
1205 self.type = token[1]
1206 else:
1207 self.type = self.type + " " + token[1]
1208 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001209
Daniel Veillardbe586972003-11-18 20:56:51 +00001210 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1211 if self.type == "":
1212 self.type = token[1]
1213 else:
1214 self.type = self.type + " " + token[1]
1215 if token[0] == "name" and token[1] == "int":
1216 if self.type == "":
1217 self.type = tmp[1]
1218 else:
1219 self.type = self.type + " " + tmp[1]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001220
Daniel Veillardbe586972003-11-18 20:56:51 +00001221 elif token[0] == "name" and token[1] == "struct":
1222 if self.type == "":
1223 self.type = token[1]
1224 else:
1225 self.type = self.type + " " + token[1]
1226 token = self.token()
1227 nametok = None
1228 if token[0] == "name":
1229 nametok = token
1230 token = self.token()
1231 if token != None and token[0] == "sep" and token[1] == "{":
1232 token = self.token()
1233 token = self.parseStruct(token)
1234 elif token != None and token[0] == "op" and token[1] == "*":
1235 self.type = self.type + " " + nametok[1] + " *"
1236 token = self.token()
1237 while token != None and token[0] == "op" and token[1] == "*":
1238 self.type = self.type + " *"
1239 token = self.token()
1240 if token[0] == "name":
1241 nametok = token
1242 token = self.token()
1243 else:
1244 self.error("struct : expecting name", token)
1245 return token
1246 elif token != None and token[0] == "name" and nametok != None:
1247 self.type = self.type + " " + nametok[1]
1248 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001249
Daniel Veillardbe586972003-11-18 20:56:51 +00001250 if nametok != None:
1251 self.lexer.push(token)
1252 token = nametok
1253 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001254
Daniel Veillardbe586972003-11-18 20:56:51 +00001255 elif token[0] == "name" and token[1] == "enum":
1256 if self.type == "":
1257 self.type = token[1]
1258 else:
1259 self.type = self.type + " " + token[1]
1260 self.enums = []
1261 token = self.token()
1262 if token != None and token[0] == "sep" and token[1] == "{":
1263 token = self.token()
1264 token = self.parseEnumBlock(token)
1265 else:
1266 self.error("parsing enum: expecting '{'", token)
1267 enum_type = None
1268 if token != None and token[0] != "name":
1269 self.lexer.push(token)
1270 token = ("name", "enum")
1271 else:
1272 enum_type = token[1]
1273 for enum in self.enums:
1274 self.index_add(enum[0], self.filename,
1275 not self.is_header, "enum",
1276 (enum[1], enum[2], enum_type))
1277 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001278
Daniel Veillardbe586972003-11-18 20:56:51 +00001279 elif token[0] == "name":
1280 if self.type == "":
1281 self.type = token[1]
1282 else:
1283 self.type = self.type + " " + token[1]
1284 else:
1285 self.error("parsing type %s: expecting a name" % (self.type),
1286 token)
1287 return token
1288 token = self.token()
1289 while token != None and (token[0] == "op" or
1290 token[0] == "name" and token[1] == "const"):
1291 self.type = self.type + " " + token[1]
1292 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001293
1294 #
1295 # if there is a parenthesis here, this means a function type
1296 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001297 if token != None and token[0] == "sep" and token[1] == '(':
1298 self.type = self.type + token[1]
1299 token = self.token()
1300 while token != None and token[0] == "op" and token[1] == '*':
1301 self.type = self.type + token[1]
1302 token = self.token()
1303 if token == None or token[0] != "name" :
1304 self.error("parsing function type, name expected", token);
1305 return token
1306 self.type = self.type + token[1]
1307 nametok = token
1308 token = self.token()
1309 if token != None and token[0] == "sep" and token[1] == ')':
1310 self.type = self.type + token[1]
1311 token = self.token()
1312 if token != None and token[0] == "sep" and token[1] == '(':
1313 token = self.token()
1314 type = self.type;
1315 token = self.parseSignature(token);
1316 self.type = type;
1317 else:
1318 self.error("parsing function type, '(' expected", token);
1319 return token
1320 else:
1321 self.error("parsing function type, ')' expected", token);
1322 return token
1323 self.lexer.push(token)
1324 token = nametok
1325 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001326
1327 #
1328 # do some lookahead for arrays
1329 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001330 if token != None and token[0] == "name":
1331 nametok = token
1332 token = self.token()
1333 if token != None and token[0] == "sep" and token[1] == '[':
1334 self.type = self.type + nametok[1]
1335 while token != None and token[0] == "sep" and token[1] == '[':
1336 self.type = self.type + token[1]
1337 token = self.token()
1338 while token != None and token[0] != 'sep' and \
1339 token[1] != ']' and token[1] != ';':
1340 self.type = self.type + token[1]
1341 token = self.token()
1342 if token != None and token[0] == 'sep' and token[1] == ']':
1343 self.type = self.type + token[1]
1344 token = self.token()
1345 else:
1346 self.error("parsing array type, ']' expected", token);
1347 return token
1348 elif token != None and token[0] == "sep" and token[1] == ':':
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001349 # remove :12 in case it's a limited int size
Daniel Veillardbe586972003-11-18 20:56:51 +00001350 token = self.token()
1351 token = self.token()
1352 self.lexer.push(token)
1353 token = nametok
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001354
Daniel Veillardbe586972003-11-18 20:56:51 +00001355 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001356
1357 #
1358 # Parse a signature: '(' has been parsed and we scan the type definition
1359 # up to the ')' included
Daniel Veillardbe586972003-11-18 20:56:51 +00001360 def parseSignature(self, token):
1361 signature = []
1362 if token != None and token[0] == "sep" and token[1] == ')':
1363 self.signature = []
1364 token = self.token()
1365 return token
1366 while token != None:
1367 token = self.parseType(token)
1368 if token != None and token[0] == "name":
1369 signature.append((self.type, token[1], None))
1370 token = self.token()
1371 elif token != None and token[0] == "sep" and token[1] == ',':
1372 token = self.token()
1373 continue
1374 elif token != None and token[0] == "sep" and token[1] == ')':
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001375 # only the type was provided
Daniel Veillardbe586972003-11-18 20:56:51 +00001376 if self.type == "...":
1377 signature.append((self.type, "...", None))
1378 else:
1379 signature.append((self.type, None, None))
1380 if token != None and token[0] == "sep":
1381 if token[1] == ',':
1382 token = self.token()
1383 continue
1384 elif token[1] == ')':
1385 token = self.token()
1386 break
1387 self.signature = signature
1388 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001389
1390 #
1391 # Parse a global definition, be it a type, variable or function
1392 # the extern "C" blocks are a bit nasty and require it to recurse.
1393 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001394 def parseGlobal(self, token):
1395 static = 0
1396 if token[1] == 'extern':
1397 token = self.token()
1398 if token == None:
1399 return token
1400 if token[0] == 'string':
1401 if token[1] == 'C':
1402 token = self.token()
1403 if token == None:
1404 return token
1405 if token[0] == 'sep' and token[1] == "{":
1406 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001407# print 'Entering extern "C line ', self.lineno()
Daniel Veillardbe586972003-11-18 20:56:51 +00001408 while token != None and (token[0] != 'sep' or
1409 token[1] != "}"):
1410 if token[0] == 'name':
1411 token = self.parseGlobal(token)
1412 else:
1413 self.error(
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001414 "token %s %s unexpected at the top level" % (
1415 token[0], token[1]))
Daniel Veillardbe586972003-11-18 20:56:51 +00001416 token = self.parseGlobal(token)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001417# print 'Exiting extern "C" line', self.lineno()
Daniel Veillardbe586972003-11-18 20:56:51 +00001418 token = self.token()
1419 return token
1420 else:
1421 return token
1422 elif token[1] == 'static':
1423 static = 1
1424 token = self.token()
1425 if token == None or token[0] != 'name':
1426 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001427
Daniel Veillardbe586972003-11-18 20:56:51 +00001428 if token[1] == 'typedef':
1429 token = self.token()
1430 return self.parseTypedef(token)
1431 else:
1432 token = self.parseType(token)
1433 type_orig = self.type
1434 if token == None or token[0] != "name":
1435 return token
1436 type = type_orig
1437 self.name = token[1]
1438 token = self.token()
1439 while token != None and (token[0] == "sep" or token[0] == "op"):
1440 if token[0] == "sep":
1441 if token[1] == "[":
1442 type = type + token[1]
1443 token = self.token()
1444 while token != None and (token[0] != "sep" or \
1445 token[1] != ";"):
1446 type = type + token[1]
1447 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001448
Daniel Veillardbe586972003-11-18 20:56:51 +00001449 if token != None and token[0] == "op" and token[1] == "=":
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001450 #
1451 # Skip the initialization of the variable
1452 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001453 token = self.token()
1454 if token[0] == 'sep' and token[1] == '{':
1455 token = self.token()
1456 token = self.parseBlock(token)
1457 else:
1458 self.comment = None
1459 while token != None and (token[0] != "sep" or \
1460 (token[1] != ';' and token[1] != ',')):
1461 token = self.token()
1462 self.comment = None
1463 if token == None or token[0] != "sep" or (token[1] != ';' and
1464 token[1] != ','):
1465 self.error("missing ';' or ',' after value")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001466
Daniel Veillardbe586972003-11-18 20:56:51 +00001467 if token != None and token[0] == "sep":
1468 if token[1] == ";":
1469 self.comment = None
1470 token = self.token()
1471 if type == "struct":
1472 self.index_add(self.name, self.filename,
1473 not self.is_header, "struct", self.struct_fields)
1474 else:
1475 self.index_add(self.name, self.filename,
1476 not self.is_header, "variable", type)
1477 break
1478 elif token[1] == "(":
1479 token = self.token()
1480 token = self.parseSignature(token)
1481 if token == None:
1482 return None
1483 if token[0] == "sep" and token[1] == ";":
1484 d = self.mergeFunctionComment(self.name,
1485 ((type, None), self.signature), 1)
1486 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001487 "function", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001488 token = self.token()
1489 elif token[0] == "sep" and token[1] == "{":
1490 d = self.mergeFunctionComment(self.name,
1491 ((type, None), self.signature), static)
1492 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001493 "function", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001494 token = self.token()
1495 token = self.parseBlock(token);
1496 elif token[1] == ',':
1497 self.comment = None
1498 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001499 "variable", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001500 type = type_orig
1501 token = self.token()
1502 while token != None and token[0] == "sep":
1503 type = type + token[1]
1504 token = self.token()
1505 if token != None and token[0] == "name":
1506 self.name = token[1]
1507 token = self.token()
1508 else:
1509 break
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001510
Daniel Veillardbe586972003-11-18 20:56:51 +00001511 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001512
Daniel Veillardbe586972003-11-18 20:56:51 +00001513 def parse(self):
Daniel Veillard1e906612003-12-05 14:57:46 +00001514 self.warning("Parsing %s" % (self.filename))
Daniel Veillardbe586972003-11-18 20:56:51 +00001515 token = self.token()
1516 while token != None:
1517 if token[0] == 'name':
1518 token = self.parseGlobal(token)
1519 else:
1520 self.error("token %s %s unexpected at the top level" % (
1521 token[0], token[1]))
1522 token = self.parseGlobal(token)
1523 return
1524 self.parseTopComment(self.top_comment)
1525 return self.index
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001526
1527
1528class docBuilder:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001529 """A documentation builder"""
1530 def __init__(self, name, directories=['.'], excludes=[]):
1531 self.name = name
1532 self.directories = directories
1533 self.excludes = excludes + ignored_files.keys()
1534 self.modules = {}
1535 self.headers = {}
1536 self.idx = index()
1537 self.xref = {}
1538 self.index = {}
1539 if name == 'libxml2':
1540 self.basename = 'libxml'
1541 else:
1542 self.basename = name
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001543
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001544 def indexString(self, id, str):
1545 if str == None:
1546 return
1547 str = string.replace(str, "'", ' ')
1548 str = string.replace(str, '"', ' ')
1549 str = string.replace(str, "/", ' ')
1550 str = string.replace(str, '*', ' ')
1551 str = string.replace(str, "[", ' ')
1552 str = string.replace(str, "]", ' ')
1553 str = string.replace(str, "(", ' ')
1554 str = string.replace(str, ")", ' ')
1555 str = string.replace(str, "<", ' ')
1556 str = string.replace(str, '>', ' ')
1557 str = string.replace(str, "&", ' ')
1558 str = string.replace(str, '#', ' ')
1559 str = string.replace(str, ",", ' ')
1560 str = string.replace(str, '.', ' ')
1561 str = string.replace(str, ';', ' ')
1562 tokens = string.split(str)
1563 for token in tokens:
1564 try:
1565 c = token[0]
1566 if string.find(string.letters, c) < 0:
1567 pass
1568 elif len(token) < 3:
1569 pass
1570 else:
1571 lower = string.lower(token)
1572 # TODO: generalize this a bit
1573 if lower == 'and' or lower == 'the':
1574 pass
1575 elif self.xref.has_key(token):
1576 self.xref[token].append(id)
1577 else:
1578 self.xref[token] = [id]
1579 except:
1580 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001581
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001582 def analyze(self):
1583 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1584 self.idx.analyze()
1585
1586 def scanHeaders(self):
1587 for header in self.headers.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001588 parser = CParser(header)
1589 idx = parser.parse()
1590 self.headers[header] = idx;
1591 self.idx.merge(idx)
1592
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001593 def scanModules(self):
1594 for module in self.modules.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001595 parser = CParser(module)
1596 idx = parser.parse()
1597 # idx.analyze()
1598 self.modules[module] = idx
1599 self.idx.merge_public(idx)
1600
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001601 def scan(self):
1602 for directory in self.directories:
1603 files = glob.glob(directory + "/*.c")
1604 for file in files:
1605 skip = 0
1606 for excl in self.excludes:
1607 if string.find(file, excl) != -1:
1608 skip = 1;
1609 break
1610 if skip == 0:
1611 self.modules[file] = None;
1612 files = glob.glob(directory + "/*.h")
1613 for file in files:
1614 skip = 0
1615 for excl in self.excludes:
1616 if string.find(file, excl) != -1:
1617 skip = 1;
1618 break
1619 if skip == 0:
1620 self.headers[file] = None;
1621 self.scanHeaders()
1622 self.scanModules()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001623
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001624 def modulename_file(self, file):
1625 module = os.path.basename(file)
1626 if module[-2:] == '.h':
1627 module = module[:-2]
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001628 elif module[-2:] == '.c':
1629 module = module[:-2]
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001630 return module
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001631
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001632 def serialize_enum(self, output, name):
1633 id = self.idx.enums[name]
1634 output.write(" <enum name='%s' file='%s'" % (name,
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001635 self.modulename_file(id.header)))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001636 if id.info != None:
1637 info = id.info
1638 if info[0] != None and info[0] != '':
1639 try:
1640 val = eval(info[0])
1641 except:
1642 val = info[0]
1643 output.write(" value='%s'" % (val));
1644 if info[2] != None and info[2] != '':
1645 output.write(" type='%s'" % info[2]);
1646 if info[1] != None and info[1] != '':
1647 output.write(" info='%s'" % escape(info[1]));
1648 output.write("/>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001649
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001650 def serialize_macro(self, output, name):
1651 id = self.idx.macros[name]
1652 output.write(" <macro name='%s' file='%s'>\n" % (name,
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001653 self.modulename_file(id.header)))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001654 if id.info != None:
1655 try:
1656 (args, desc) = id.info
1657 if desc != None and desc != "":
1658 output.write(" <info>%s</info>\n" % (escape(desc)))
1659 self.indexString(name, desc)
1660 for arg in args:
1661 (name, desc) = arg
1662 if desc != None and desc != "":
1663 output.write(" <arg name='%s' info='%s'/>\n" % (
1664 name, escape(desc)))
1665 self.indexString(name, desc)
1666 else:
1667 output.write(" <arg name='%s'/>\n" % (name))
1668 except:
1669 pass
1670 output.write(" </macro>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001671
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001672 def serialize_typedef(self, output, name):
1673 id = self.idx.typedefs[name]
1674 if id.info[0:7] == 'struct ':
1675 output.write(" <struct name='%s' file='%s' type='%s'" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001676 name, self.modulename_file(id.header), id.info))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001677 name = id.info[7:]
1678 if self.idx.structs.has_key(name) and ( \
1679 type(self.idx.structs[name].info) == type(()) or
Daniel Veillardc1eed322002-12-12 11:01:32 +00001680 type(self.idx.structs[name].info) == type([])):
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001681 output.write(">\n");
1682 try:
1683 for field in self.idx.structs[name].info:
1684 desc = field[2]
1685 self.indexString(name, desc)
1686 if desc == None:
1687 desc = ''
1688 else:
1689 desc = escape(desc)
1690 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1691 except:
1692 print "Failed to serialize struct %s" % (name)
1693 output.write(" </struct>\n")
1694 else:
1695 output.write("/>\n");
1696 else :
1697 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001698 name, self.modulename_file(id.header), id.info))
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001699
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001700 def serialize_variable(self, output, name):
1701 id = self.idx.variables[name]
1702 if id.info != None:
1703 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001704 name, self.modulename_file(id.header), id.info))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001705 else:
1706 output.write(" <variable name='%s' file='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001707 name, self.modulename_file(id.header)))
Daniel Veillardc1eed322002-12-12 11:01:32 +00001708
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001709 def serialize_function(self, output, name):
1710 id = self.idx.functions[name]
Daniel Veillarda2351322004-06-27 12:08:10 +00001711 if name == debugsym:
1712 print "=>", id
1713
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001714 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1715 name, self.modulename_file(id.header),
1716 self.modulename_file(id.module)))
William M. Brack21e4ef22005-01-02 09:53:13 +00001717 #
1718 # Processing of conditionals modified by Bill 1/1/05
1719 #
Daniel Veillarda2351322004-06-27 12:08:10 +00001720 if id.conditionals != None:
William M. Brack21e4ef22005-01-02 09:53:13 +00001721 apstr = ""
Daniel Veillarda2351322004-06-27 12:08:10 +00001722 for cond in id.conditionals:
William M. Brack21e4ef22005-01-02 09:53:13 +00001723 if apstr != "":
1724 apstr = apstr + " &amp;&amp; "
1725 apstr = apstr + cond
1726 output.write(" <cond>%s</cond>\n"% (apstr));
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001727 try:
1728 (ret, params, desc) = id.info
1729 output.write(" <info>%s</info>\n" % (escape(desc)))
1730 self.indexString(name, desc)
1731 if ret[0] != None:
1732 if ret[0] == "void":
1733 output.write(" <return type='void'/>\n")
1734 else:
1735 output.write(" <return type='%s' info='%s'/>\n" % (
1736 ret[0], escape(ret[1])))
1737 self.indexString(name, ret[1])
1738 for param in params:
1739 if param[0] == 'void':
1740 continue
1741 if param[2] == None:
1742 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1743 else:
1744 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1745 self.indexString(name, param[2])
1746 except:
1747 print "Failed to save function %s info: " % name, `id.info`
1748 output.write(" </%s>\n" % (id.type))
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001749
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001750 def serialize_exports(self, output, file):
1751 module = self.modulename_file(file)
1752 output.write(" <file name='%s'>\n" % (module))
1753 dict = self.headers[file]
Daniel Veillardbe586972003-11-18 20:56:51 +00001754 if dict.info != None:
1755 for data in ('Summary', 'Description', 'Author'):
1756 try:
1757 output.write(" <%s>%s</%s>\n" % (
1758 string.lower(data),
1759 escape(dict.info[data]),
1760 string.lower(data)))
1761 except:
1762 print "Header %s lacks a %s description" % (module, data)
1763 if dict.info.has_key('Description'):
1764 desc = dict.info['Description']
1765 if string.find(desc, "DEPRECATED") != -1:
1766 output.write(" <deprecated/>\n")
1767
Daniel Veillard1a792412003-11-18 23:52:38 +00001768 ids = dict.macros.keys()
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001769 ids.sort()
1770 for id in uniq(ids):
Daniel Veillard1a792412003-11-18 23:52:38 +00001771 # Macros are sometime used to masquerade other types.
1772 if dict.functions.has_key(id):
1773 continue
1774 if dict.variables.has_key(id):
1775 continue
1776 if dict.typedefs.has_key(id):
1777 continue
1778 if dict.structs.has_key(id):
1779 continue
1780 if dict.enums.has_key(id):
1781 continue
1782 output.write(" <exports symbol='%s' type='macro'/>\n" % (id))
1783 ids = dict.enums.keys()
1784 ids.sort()
1785 for id in uniq(ids):
1786 output.write(" <exports symbol='%s' type='enum'/>\n" % (id))
1787 ids = dict.typedefs.keys()
1788 ids.sort()
1789 for id in uniq(ids):
1790 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id))
1791 ids = dict.structs.keys()
1792 ids.sort()
1793 for id in uniq(ids):
1794 output.write(" <exports symbol='%s' type='struct'/>\n" % (id))
1795 ids = dict.variables.keys()
1796 ids.sort()
1797 for id in uniq(ids):
1798 output.write(" <exports symbol='%s' type='variable'/>\n" % (id))
1799 ids = dict.functions.keys()
1800 ids.sort()
1801 for id in uniq(ids):
1802 output.write(" <exports symbol='%s' type='function'/>\n" % (id))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001803 output.write(" </file>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001804
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001805 def serialize_xrefs_files(self, output):
1806 headers = self.headers.keys()
1807 headers.sort()
1808 for file in headers:
1809 module = self.modulename_file(file)
1810 output.write(" <file name='%s'>\n" % (module))
1811 dict = self.headers[file]
William M. Bracka2e844a2004-01-06 11:52:13 +00001812 ids = uniq(dict.functions.keys() + dict.variables.keys() + \
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001813 dict.macros.keys() + dict.typedefs.keys() + \
William M. Bracka2e844a2004-01-06 11:52:13 +00001814 dict.structs.keys() + dict.enums.keys())
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001815 ids.sort()
William M. Bracka2e844a2004-01-06 11:52:13 +00001816 for id in ids:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001817 output.write(" <ref name='%s'/>\n" % (id))
1818 output.write(" </file>\n")
1819 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001820
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001821 def serialize_xrefs_functions(self, output):
1822 funcs = {}
1823 for name in self.idx.functions.keys():
1824 id = self.idx.functions[name]
1825 try:
1826 (ret, params, desc) = id.info
1827 for param in params:
1828 if param[0] == 'void':
1829 continue
1830 if funcs.has_key(param[0]):
1831 funcs[param[0]].append(name)
1832 else:
1833 funcs[param[0]] = [name]
1834 except:
1835 pass
1836 typ = funcs.keys()
1837 typ.sort()
1838 for type in typ:
1839 if type == '' or type == 'void' or type == "int" or \
1840 type == "char *" or type == "const char *" :
1841 continue
1842 output.write(" <type name='%s'>\n" % (type))
1843 ids = funcs[type]
1844 ids.sort()
William M. Brackcf9eadf2003-12-25 13:24:05 +00001845 pid = '' # not sure why we have dups, but get rid of them!
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001846 for id in ids:
William M. Brackcf9eadf2003-12-25 13:24:05 +00001847 if id != pid:
1848 output.write(" <ref name='%s'/>\n" % (id))
1849 pid = id
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001850 output.write(" </type>\n")
1851
1852 def serialize_xrefs_constructors(self, output):
1853 funcs = {}
1854 for name in self.idx.functions.keys():
1855 id = self.idx.functions[name]
1856 try:
1857 (ret, params, desc) = id.info
1858 if ret[0] == "void":
1859 continue
1860 if funcs.has_key(ret[0]):
1861 funcs[ret[0]].append(name)
1862 else:
1863 funcs[ret[0]] = [name]
1864 except:
1865 pass
1866 typ = funcs.keys()
1867 typ.sort()
1868 for type in typ:
1869 if type == '' or type == 'void' or type == "int" or \
1870 type == "char *" or type == "const char *" :
1871 continue
1872 output.write(" <type name='%s'>\n" % (type))
1873 ids = funcs[type]
William M. Brack09a4d0a2004-01-05 14:28:43 +00001874 ids.sort()
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001875 for id in ids:
1876 output.write(" <ref name='%s'/>\n" % (id))
1877 output.write(" </type>\n")
1878
1879 def serialize_xrefs_alpha(self, output):
1880 letter = None
1881 ids = self.idx.identifiers.keys()
1882 ids.sort()
1883 for id in ids:
1884 if id[0] != letter:
1885 if letter != None:
1886 output.write(" </letter>\n")
1887 letter = id[0]
1888 output.write(" <letter name='%s'>\n" % (letter))
1889 output.write(" <ref name='%s'/>\n" % (id))
1890 if letter != None:
1891 output.write(" </letter>\n")
1892
1893 def serialize_xrefs_references(self, output):
1894 typ = self.idx.identifiers.keys()
1895 typ.sort()
1896 for id in typ:
1897 idf = self.idx.identifiers[id]
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001898 module = idf.header
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001899 output.write(" <reference name='%s' href='%s'/>\n" % (id,
1900 'html/' + self.basename + '-' +
1901 self.modulename_file(module) + '.html#' +
1902 id))
1903
1904 def serialize_xrefs_index(self, output):
1905 index = self.xref
1906 typ = index.keys()
1907 typ.sort()
1908 letter = None
1909 count = 0
1910 chunk = 0
1911 chunks = []
1912 for id in typ:
1913 if len(index[id]) > 30:
1914 continue
1915 if id[0] != letter:
1916 if letter == None or count > 200:
1917 if letter != None:
1918 output.write(" </letter>\n")
1919 output.write(" </chunk>\n")
1920 count = 0
1921 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1922 output.write(" <chunk name='chunk%s'>\n" % (chunk))
1923 first_letter = id[0]
1924 chunk = chunk + 1
1925 elif letter != None:
1926 output.write(" </letter>\n")
1927 letter = id[0]
1928 output.write(" <letter name='%s'>\n" % (letter))
1929 output.write(" <word name='%s'>\n" % (id))
1930 tokens = index[id];
1931 tokens.sort()
1932 tok = None
William M. Bracka2e844a2004-01-06 11:52:13 +00001933 for token in tokens:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001934 if tok == token:
1935 continue
1936 tok = token
1937 output.write(" <ref name='%s'/>\n" % (token))
1938 count = count + 1
1939 output.write(" </word>\n")
1940 if letter != None:
1941 output.write(" </letter>\n")
1942 output.write(" </chunk>\n")
William M. Brack966668a2003-12-20 02:10:28 +00001943 if count != 0:
1944 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001945 output.write(" <chunks>\n")
1946 for ch in chunks:
1947 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
1948 ch[0], ch[1], ch[2]))
1949 output.write(" </chunks>\n")
1950
1951 def serialize_xrefs(self, output):
1952 output.write(" <references>\n")
1953 self.serialize_xrefs_references(output)
1954 output.write(" </references>\n")
1955 output.write(" <alpha>\n")
1956 self.serialize_xrefs_alpha(output)
1957 output.write(" </alpha>\n")
1958 output.write(" <constructors>\n")
1959 self.serialize_xrefs_constructors(output)
1960 output.write(" </constructors>\n")
1961 output.write(" <functions>\n")
1962 self.serialize_xrefs_functions(output)
1963 output.write(" </functions>\n")
1964 output.write(" <files>\n")
1965 self.serialize_xrefs_files(output)
1966 output.write(" </files>\n")
1967 output.write(" <index>\n")
1968 self.serialize_xrefs_index(output)
1969 output.write(" </index>\n")
1970
1971 def serialize(self):
1972 filename = "%s-api.xml" % self.name
1973 print "Saving XML description %s" % (filename)
1974 output = open(filename, "w")
1975 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1976 output.write("<api name='%s'>\n" % self.name)
1977 output.write(" <files>\n")
1978 headers = self.headers.keys()
1979 headers.sort()
1980 for file in headers:
1981 self.serialize_exports(output, file)
1982 output.write(" </files>\n")
1983 output.write(" <symbols>\n")
1984 macros = self.idx.macros.keys()
1985 macros.sort()
1986 for macro in macros:
1987 self.serialize_macro(output, macro)
1988 enums = self.idx.enums.keys()
1989 enums.sort()
1990 for enum in enums:
1991 self.serialize_enum(output, enum)
1992 typedefs = self.idx.typedefs.keys()
1993 typedefs.sort()
1994 for typedef in typedefs:
1995 self.serialize_typedef(output, typedef)
1996 variables = self.idx.variables.keys()
1997 variables.sort()
1998 for variable in variables:
1999 self.serialize_variable(output, variable)
2000 functions = self.idx.functions.keys()
2001 functions.sort()
2002 for function in functions:
2003 self.serialize_function(output, function)
2004 output.write(" </symbols>\n")
2005 output.write("</api>\n")
2006 output.close()
2007
2008 filename = "%s-refs.xml" % self.name
2009 print "Saving XML Cross References %s" % (filename)
2010 output = open(filename, "w")
2011 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2012 output.write("<apirefs name='%s'>\n" % self.name)
2013 self.serialize_xrefs(output)
2014 output.write("</apirefs>\n")
2015 output.close()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002016
2017
2018def rebuild():
2019 builder = None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00002020 if glob.glob("parser.c") != [] :
2021 print "Rebuilding API description for libxml2"
2022 builder = docBuilder("libxml2", [".", "."],
2023 ["xmlwin32version.h", "tst.c"])
2024 elif glob.glob("../parser.c") != [] :
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002025 print "Rebuilding API description for libxml2"
2026 builder = docBuilder("libxml2", ["..", "../include/libxml"],
Daniel Veillardd4330462003-04-29 12:40:16 +00002027 ["xmlwin32version.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002028 elif glob.glob("../libxslt/transform.c") != [] :
2029 print "Rebuilding API description for libxslt"
2030 builder = docBuilder("libxslt", ["../libxslt"],
Daniel Veillard024b5702002-12-12 00:15:55 +00002031 ["win32config.h", "libxslt.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002032 else:
2033 print "rebuild() failed, unable to guess the module"
2034 return None
2035 builder.scan()
2036 builder.analyze()
2037 builder.serialize()
Daniel Veillard024b5702002-12-12 00:15:55 +00002038 if glob.glob("../libexslt/exslt.c") != [] :
2039 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2040 extra.scan()
2041 extra.analyze()
2042 extra.serialize()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002043 return builder
2044
2045#
2046# for debugging the parser
2047#
2048def parse(filename):
2049 parser = CParser(filename)
2050 idx = parser.parse()
2051 return idx
2052
2053if __name__ == "__main__":
Daniel Veillarda2351322004-06-27 12:08:10 +00002054 if len(sys.argv) > 1:
2055 debug = 1
2056 parse(sys.argv[1])
2057 else:
2058 rebuild()