blob: 56862389fc1780d04b9b28608c722c2231b2afdc [file] [log] [blame]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
Daniel Veillard540a31a2003-01-21 11:21:07 +000010import os, sys
Daniel Veillarda9b66d02002-12-11 14:23:49 +000011import string
12import glob
13
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000014debug=0
Daniel Veillarda2351322004-06-27 12:08:10 +000015#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000017
Daniel Veillardde0a0a52003-04-24 17:12:57 +000018#
19# C parser analysis code
20#
21ignored_files = {
22 "trio": "too many non standard macros",
23 "trio.c": "too many non standard macros",
24 "trionan.c": "too many non standard macros",
25 "triostr.c": "too many non standard macros",
26 "acconfig.h": "generated portability layer",
27 "config.h": "generated portability layer",
28 "libxml.h": "internal only",
Daniel Veillard92fc02c2003-04-24 23:12:35 +000029 "testOOM.c": "out of memory tester",
30 "testOOMlib.h": "out of memory tester",
31 "testOOMlib.c": "out of memory tester",
Daniel Veillard189f46b2004-01-25 21:03:04 +000032 "rngparser.c": "not yet integrated",
33 "rngparser.h": "not yet integrated",
Daniel Veillarda2351322004-06-27 12:08:10 +000034 "elfgcchack.h": "not a normal header",
Daniel Veillarde43cc572004-11-03 11:50:29 +000035 "testapi.c": "generated regression tests",
Daniel Veillard5d4644e2005-04-01 13:11:58 +000036 "tst.c": "not part of the library",
Daniel Veillardf6b71bd2005-01-04 17:50:14 +000037 "testdso.c": "test for dynamid shared libraries",
Daniel Veillardde0a0a52003-04-24 17:12:57 +000038}
39
40ignored_words = {
41 "WINAPI": (0, "Windows keyword"),
42 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
William M. Brackcdfa2862003-08-29 06:03:38 +000043 "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000044 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
45 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000046 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000047 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
48 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000049 "XMLCALL": (0, "Special macro for win32 calls"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000050 "XSLTCALL": (0, "Special macro for win32 calls"),
51 "EXSLTCALL": (0, "Special macro for win32 calls"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000052 "__declspec": (3, "Windows keyword"),
Daniel Veillarda2351322004-06-27 12:08:10 +000053 "__stdcall": (0, "Windows keyword"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000054 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
55 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
56 "X_IN_Y": (5, "macro function builder"),
57}
58
Daniel Veillarda9b66d02002-12-11 14:23:49 +000059def escape(raw):
60 raw = string.replace(raw, '&', '&')
61 raw = string.replace(raw, '<', '&lt;')
62 raw = string.replace(raw, '>', '&gt;')
63 raw = string.replace(raw, "'", '&apos;')
64 raw = string.replace(raw, '"', '&quot;')
65 return raw
66
Daniel Veillard2925c0a2003-11-17 13:58:17 +000067def uniq(items):
68 d = {}
69 for item in items:
70 d[item]=1
71 return d.keys()
72
Daniel Veillarda9b66d02002-12-11 14:23:49 +000073class identifier:
Daniel Veillard5d4644e2005-04-01 13:11:58 +000074 def __init__(self, name, header=None, module=None, type=None, lineno = 0,
Daniel Veillarda2351322004-06-27 12:08:10 +000075 info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +000076 self.name = name
Daniel Veillard5d4644e2005-04-01 13:11:58 +000077 self.header = header
Daniel Veillardbe586972003-11-18 20:56:51 +000078 self.module = module
79 self.type = type
80 self.info = info
81 self.extra = extra
82 self.lineno = lineno
83 self.static = 0
Daniel Veillarda2351322004-06-27 12:08:10 +000084 if conditionals == None or len(conditionals) == 0:
85 self.conditionals = None
86 else:
87 self.conditionals = conditionals[:]
88 if self.name == debugsym:
89 print "=> define %s : %s" % (debugsym, (module, type, info,
90 extra, conditionals))
Daniel Veillarda9b66d02002-12-11 14:23:49 +000091
Daniel Veillardbe586972003-11-18 20:56:51 +000092 def __repr__(self):
93 r = "%s %s:" % (self.type, self.name)
94 if self.static:
95 r = r + " static"
96 if self.module != None:
97 r = r + " from %s" % (self.module)
98 if self.info != None:
99 r = r + " " + `self.info`
100 if self.extra != None:
101 r = r + " " + `self.extra`
Daniel Veillarda2351322004-06-27 12:08:10 +0000102 if self.conditionals != None:
103 r = r + " " + `self.conditionals`
Daniel Veillardbe586972003-11-18 20:56:51 +0000104 return r
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000105
106
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000107 def set_header(self, header):
108 self.header = header
Daniel Veillardbe586972003-11-18 20:56:51 +0000109 def set_module(self, module):
110 self.module = module
111 def set_type(self, type):
112 self.type = type
113 def set_info(self, info):
114 self.info = info
115 def set_extra(self, extra):
116 self.extra = extra
117 def set_lineno(self, lineno):
118 self.lineno = lineno
119 def set_static(self, static):
120 self.static = static
Daniel Veillarda2351322004-06-27 12:08:10 +0000121 def set_conditionals(self, conditionals):
122 if conditionals == None or len(conditionals) == 0:
123 self.conditionals = None
124 else:
125 self.conditionals = conditionals[:]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000126
Daniel Veillardbe586972003-11-18 20:56:51 +0000127 def get_name(self):
128 return self.name
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000129 def get_header(self):
130 return self.module
Daniel Veillardbe586972003-11-18 20:56:51 +0000131 def get_module(self):
132 return self.module
133 def get_type(self):
134 return self.type
135 def get_info(self):
136 return self.info
137 def get_lineno(self):
138 return self.lineno
139 def get_extra(self):
140 return self.extra
141 def get_static(self):
142 return self.static
Daniel Veillarda2351322004-06-27 12:08:10 +0000143 def get_conditionals(self):
144 return self.conditionals
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000145
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000146 def update(self, header, module, type = None, info = None, extra=None,
Daniel Veillarda2351322004-06-27 12:08:10 +0000147 conditionals=None):
148 if self.name == debugsym:
149 print "=> update %s : %s" % (debugsym, (module, type, info,
150 extra, conditionals))
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000151 if header != None and self.header == None:
152 self.set_header(module)
153 if module != None and (self.module == None or self.header == self.module):
Daniel Veillardbe586972003-11-18 20:56:51 +0000154 self.set_module(module)
155 if type != None and self.type == None:
156 self.set_type(type)
157 if info != None:
158 self.set_info(info)
159 if extra != None:
160 self.set_extra(extra)
Daniel Veillarda2351322004-06-27 12:08:10 +0000161 if conditionals != None:
162 self.set_conditionals(conditionals)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000163
164class index:
Daniel Veillardbe586972003-11-18 20:56:51 +0000165 def __init__(self, name = "noname"):
166 self.name = name
167 self.identifiers = {}
168 self.functions = {}
169 self.variables = {}
170 self.includes = {}
171 self.structs = {}
172 self.enums = {}
173 self.typedefs = {}
174 self.macros = {}
175 self.references = {}
176 self.info = {}
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000177
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000178 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +0000179 if name[0:2] == '__':
180 return None
181 d = None
182 try:
183 d = self.identifiers[name]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000184 d.update(header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000185 except:
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000186 d = identifier(name, header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000187 self.identifiers[name] = d
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000188
Daniel Veillardbe586972003-11-18 20:56:51 +0000189 if d != None and static == 1:
190 d.set_static(1)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000191
Daniel Veillardbe586972003-11-18 20:56:51 +0000192 if d != None and name != None and type != None:
193 self.references[name] = d
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000194
Daniel Veillarda2351322004-06-27 12:08:10 +0000195 if name == debugsym:
196 print "New ref: %s" % (d)
197
198 return d
199
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000200 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +0000201 if name[0:2] == '__':
202 return None
203 d = None
204 try:
205 d = self.identifiers[name]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000206 d.update(header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000207 except:
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000208 d = identifier(name, header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000209 self.identifiers[name] = d
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000210
Daniel Veillardbe586972003-11-18 20:56:51 +0000211 if d != None and static == 1:
212 d.set_static(1)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000213
Daniel Veillardbe586972003-11-18 20:56:51 +0000214 if d != None and name != None and type != None:
215 if type == "function":
216 self.functions[name] = d
217 elif type == "functype":
218 self.functions[name] = d
219 elif type == "variable":
220 self.variables[name] = d
221 elif type == "include":
222 self.includes[name] = d
223 elif type == "struct":
224 self.structs[name] = d
225 elif type == "enum":
226 self.enums[name] = d
227 elif type == "typedef":
228 self.typedefs[name] = d
229 elif type == "macro":
230 self.macros[name] = d
231 else:
232 print "Unable to register type ", type
Daniel Veillarda2351322004-06-27 12:08:10 +0000233
234 if name == debugsym:
235 print "New symbol: %s" % (d)
236
Daniel Veillardbe586972003-11-18 20:56:51 +0000237 return d
238
239 def merge(self, idx):
240 for id in idx.functions.keys():
241 #
242 # macro might be used to override functions or variables
243 # definitions
244 #
245 if self.macros.has_key(id):
246 del self.macros[id]
247 if self.functions.has_key(id):
248 print "function %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000249 id, self.functions[id].header, idx.functions[id].header)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000250 else:
Daniel Veillardbe586972003-11-18 20:56:51 +0000251 self.functions[id] = idx.functions[id]
252 self.identifiers[id] = idx.functions[id]
253 for id in idx.variables.keys():
254 #
255 # macro might be used to override functions or variables
256 # definitions
257 #
258 if self.macros.has_key(id):
259 del self.macros[id]
260 if self.variables.has_key(id):
261 print "variable %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000262 id, self.variables[id].header, idx.variables[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000263 else:
264 self.variables[id] = idx.variables[id]
265 self.identifiers[id] = idx.variables[id]
266 for id in idx.structs.keys():
267 if self.structs.has_key(id):
268 print "struct %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000269 id, self.structs[id].header, idx.structs[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000270 else:
271 self.structs[id] = idx.structs[id]
272 self.identifiers[id] = idx.structs[id]
273 for id in idx.typedefs.keys():
274 if self.typedefs.has_key(id):
275 print "typedef %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000276 id, self.typedefs[id].header, idx.typedefs[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000277 else:
278 self.typedefs[id] = idx.typedefs[id]
279 self.identifiers[id] = idx.typedefs[id]
280 for id in idx.macros.keys():
281 #
282 # macro might be used to override functions or variables
283 # definitions
284 #
285 if self.variables.has_key(id):
286 continue
287 if self.functions.has_key(id):
288 continue
289 if self.enums.has_key(id):
290 continue
291 if self.macros.has_key(id):
292 print "macro %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000293 id, self.macros[id].header, idx.macros[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000294 else:
295 self.macros[id] = idx.macros[id]
296 self.identifiers[id] = idx.macros[id]
297 for id in idx.enums.keys():
298 if self.enums.has_key(id):
299 print "enum %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000300 id, self.enums[id].header, idx.enums[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000301 else:
302 self.enums[id] = idx.enums[id]
303 self.identifiers[id] = idx.enums[id]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000304
Daniel Veillardbe586972003-11-18 20:56:51 +0000305 def merge_public(self, idx):
306 for id in idx.functions.keys():
307 if self.functions.has_key(id):
William M. Brack21e4ef22005-01-02 09:53:13 +0000308 # check that function condition agrees with header
309 if idx.functions[id].conditionals != \
310 self.functions[id].conditionals:
311 print "Header condition differs from Function for %s:" \
312 % id
313 print " H: %s" % self.functions[id].conditionals
314 print " C: %s" % idx.functions[id].conditionals
Daniel Veillardbe586972003-11-18 20:56:51 +0000315 up = idx.functions[id]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000316 self.functions[id].update(None, up.module, up.type, up.info, up.extra)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000317 # else:
318 # print "Function %s from %s is not declared in headers" % (
William M. Brack21e4ef22005-01-02 09:53:13 +0000319 # id, idx.functions[id].module)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000320 # TODO: do the same for variables.
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000321
Daniel Veillardbe586972003-11-18 20:56:51 +0000322 def analyze_dict(self, type, dict):
323 count = 0
324 public = 0
325 for name in dict.keys():
326 id = dict[name]
327 count = count + 1
328 if id.static == 0:
329 public = public + 1
330 if count != public:
331 print " %d %s , %d public" % (count, type, public)
332 elif count != 0:
333 print " %d public %s" % (count, type)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000334
335
Daniel Veillardbe586972003-11-18 20:56:51 +0000336 def analyze(self):
337 self.analyze_dict("functions", self.functions)
338 self.analyze_dict("variables", self.variables)
339 self.analyze_dict("structs", self.structs)
340 self.analyze_dict("typedefs", self.typedefs)
341 self.analyze_dict("macros", self.macros)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000342
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000343class CLexer:
Daniel Veillardbe586972003-11-18 20:56:51 +0000344 """A lexer for the C language, tokenize the input by reading and
345 analyzing it line by line"""
346 def __init__(self, input):
347 self.input = input
348 self.tokens = []
349 self.line = ""
350 self.lineno = 0
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000351
Daniel Veillardbe586972003-11-18 20:56:51 +0000352 def getline(self):
353 line = ''
354 while line == '':
355 line = self.input.readline()
356 if not line:
357 return None
358 self.lineno = self.lineno + 1
359 line = string.lstrip(line)
360 line = string.rstrip(line)
361 if line == '':
362 continue
363 while line[-1] == '\\':
364 line = line[:-1]
365 n = self.input.readline()
366 self.lineno = self.lineno + 1
367 n = string.lstrip(n)
368 n = string.rstrip(n)
369 if not n:
370 break
371 else:
372 line = line + n
373 return line
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000374
Daniel Veillardbe586972003-11-18 20:56:51 +0000375 def getlineno(self):
376 return self.lineno
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000377
Daniel Veillardbe586972003-11-18 20:56:51 +0000378 def push(self, token):
379 self.tokens.insert(0, token);
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000380
Daniel Veillardbe586972003-11-18 20:56:51 +0000381 def debug(self):
382 print "Last token: ", self.last
383 print "Token queue: ", self.tokens
384 print "Line %d end: " % (self.lineno), self.line
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000385
Daniel Veillardbe586972003-11-18 20:56:51 +0000386 def token(self):
387 while self.tokens == []:
388 if self.line == "":
389 line = self.getline()
390 else:
391 line = self.line
392 self.line = ""
393 if line == None:
394 return None
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000395
Daniel Veillardbe586972003-11-18 20:56:51 +0000396 if line[0] == '#':
397 self.tokens = map((lambda x: ('preproc', x)),
398 string.split(line))
399 break;
400 l = len(line)
401 if line[0] == '"' or line[0] == "'":
402 end = line[0]
403 line = line[1:]
404 found = 0
405 tok = ""
406 while found == 0:
407 i = 0
408 l = len(line)
409 while i < l:
410 if line[i] == end:
411 self.line = line[i+1:]
412 line = line[:i]
413 l = i
414 found = 1
415 break
416 if line[i] == '\\':
417 i = i + 1
418 i = i + 1
419 tok = tok + line
420 if found == 0:
421 line = self.getline()
422 if line == None:
423 return None
424 self.last = ('string', tok)
425 return self.last
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000426
Daniel Veillardbe586972003-11-18 20:56:51 +0000427 if l >= 2 and line[0] == '/' and line[1] == '*':
428 line = line[2:]
429 found = 0
430 tok = ""
431 while found == 0:
432 i = 0
433 l = len(line)
434 while i < l:
435 if line[i] == '*' and i+1 < l and line[i+1] == '/':
436 self.line = line[i+2:]
437 line = line[:i-1]
438 l = i
439 found = 1
440 break
441 i = i + 1
442 if tok != "":
443 tok = tok + "\n"
444 tok = tok + line
445 if found == 0:
446 line = self.getline()
447 if line == None:
448 return None
449 self.last = ('comment', tok)
450 return self.last
451 if l >= 2 and line[0] == '/' and line[1] == '/':
452 line = line[2:]
453 self.last = ('comment', line)
454 return self.last
455 i = 0
456 while i < l:
457 if line[i] == '/' and i+1 < l and line[i+1] == '/':
458 self.line = line[i:]
459 line = line[:i]
460 break
461 if line[i] == '/' and i+1 < l and line[i+1] == '*':
462 self.line = line[i:]
463 line = line[:i]
464 break
465 if line[i] == '"' or line[i] == "'":
466 self.line = line[i:]
467 line = line[:i]
468 break
469 i = i + 1
470 l = len(line)
471 i = 0
472 while i < l:
473 if line[i] == ' ' or line[i] == '\t':
474 i = i + 1
475 continue
476 o = ord(line[i])
477 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
478 (o >= 48 and o <= 57):
479 s = i
480 while i < l:
481 o = ord(line[i])
482 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
483 (o >= 48 and o <= 57) or string.find(
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000484 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
Daniel Veillardbe586972003-11-18 20:56:51 +0000485 i = i + 1
486 else:
487 break
488 self.tokens.append(('name', line[s:i]))
489 continue
490 if string.find("(){}:;,[]", line[i]) != -1:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000491# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
492# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
493# line[i] == ',' or line[i] == '[' or line[i] == ']':
Daniel Veillardbe586972003-11-18 20:56:51 +0000494 self.tokens.append(('sep', line[i]))
495 i = i + 1
496 continue
497 if string.find("+-*><=/%&!|.", line[i]) != -1:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000498# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
499# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
500# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
501# line[i] == '!' or line[i] == '|' or line[i] == '.':
Daniel Veillardbe586972003-11-18 20:56:51 +0000502 if line[i] == '.' and i + 2 < l and \
503 line[i+1] == '.' and line[i+2] == '.':
504 self.tokens.append(('name', '...'))
505 i = i + 3
506 continue
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000507
Daniel Veillardbe586972003-11-18 20:56:51 +0000508 j = i + 1
509 if j < l and (
510 string.find("+-*><=/%&!|", line[j]) != -1):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000511# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
512# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
513# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
514# line[j] == '!' or line[j] == '|'):
Daniel Veillardbe586972003-11-18 20:56:51 +0000515 self.tokens.append(('op', line[i:j+1]))
516 i = j + 1
517 else:
518 self.tokens.append(('op', line[i]))
519 i = i + 1
520 continue
521 s = i
522 while i < l:
523 o = ord(line[i])
524 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
525 (o >= 48 and o <= 57) or (
526 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000527# line[i] != ' ' and line[i] != '\t' and
528# line[i] != '(' and line[i] != ')' and
529# line[i] != '{' and line[i] != '}' and
530# line[i] != ':' and line[i] != ';' and
531# line[i] != ',' and line[i] != '+' and
532# line[i] != '-' and line[i] != '*' and
533# line[i] != '/' and line[i] != '%' and
534# line[i] != '&' and line[i] != '!' and
535# line[i] != '|' and line[i] != '[' and
536# line[i] != ']' and line[i] != '=' and
537# line[i] != '*' and line[i] != '>' and
538# line[i] != '<'):
Daniel Veillardbe586972003-11-18 20:56:51 +0000539 i = i + 1
540 else:
541 break
542 self.tokens.append(('name', line[s:i]))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000543
Daniel Veillardbe586972003-11-18 20:56:51 +0000544 tok = self.tokens[0]
545 self.tokens = self.tokens[1:]
546 self.last = tok
547 return tok
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000548
549class CParser:
Daniel Veillardbe586972003-11-18 20:56:51 +0000550 """The C module parser"""
551 def __init__(self, filename, idx = None):
552 self.filename = filename
553 if len(filename) > 2 and filename[-2:] == '.h':
554 self.is_header = 1
555 else:
556 self.is_header = 0
557 self.input = open(filename)
558 self.lexer = CLexer(self.input)
559 if idx == None:
560 self.index = index()
561 else:
562 self.index = idx
563 self.top_comment = ""
564 self.last_comment = ""
565 self.comment = None
566 self.collect_ref = 0
Daniel Veillard1e906612003-12-05 14:57:46 +0000567 self.no_error = 0
Daniel Veillarda2351322004-06-27 12:08:10 +0000568 self.conditionals = []
569 self.defines = []
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000570
Daniel Veillardbe586972003-11-18 20:56:51 +0000571 def collect_references(self):
572 self.collect_ref = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000573
Daniel Veillard1e906612003-12-05 14:57:46 +0000574 def stop_error(self):
575 self.no_error = 1
576
577 def start_error(self):
578 self.no_error = 0
579
Daniel Veillardbe586972003-11-18 20:56:51 +0000580 def lineno(self):
581 return self.lexer.getlineno()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000582
Daniel Veillardbe586972003-11-18 20:56:51 +0000583 def index_add(self, name, module, static, type, info=None, extra = None):
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000584 if self.is_header == 1:
585 self.index.add(name, module, module, static, type, self.lineno(),
586 info, extra, self.conditionals)
587 else:
588 self.index.add(name, None, module, static, type, self.lineno(),
589 info, extra, self.conditionals)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000590
Daniel Veillardbe586972003-11-18 20:56:51 +0000591 def index_add_ref(self, name, module, static, type, info=None,
592 extra = None):
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000593 if self.is_header == 1:
594 self.index.add_ref(name, module, module, static, type,
595 self.lineno(), info, extra, self.conditionals)
596 else:
597 self.index.add_ref(name, None, module, static, type, self.lineno(),
598 info, extra, self.conditionals)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000599
Daniel Veillard1e906612003-12-05 14:57:46 +0000600 def warning(self, msg):
601 if self.no_error:
602 return
603 print msg
604
Daniel Veillardbe586972003-11-18 20:56:51 +0000605 def error(self, msg, token=-1):
Daniel Veillard1e906612003-12-05 14:57:46 +0000606 if self.no_error:
607 return
608
Daniel Veillardbe586972003-11-18 20:56:51 +0000609 print "Parse Error: " + msg
610 if token != -1:
611 print "Got token ", token
612 self.lexer.debug()
613 sys.exit(1)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000614
Daniel Veillardbe586972003-11-18 20:56:51 +0000615 def debug(self, msg, token=-1):
616 print "Debug: " + msg
617 if token != -1:
618 print "Got token ", token
619 self.lexer.debug()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000620
Daniel Veillardbe586972003-11-18 20:56:51 +0000621 def parseTopComment(self, comment):
622 res = {}
623 lines = string.split(comment, "\n")
624 item = None
625 for line in lines:
626 while line != "" and (line[0] == ' ' or line[0] == '\t'):
627 line = line[1:]
628 while line != "" and line[0] == '*':
629 line = line[1:]
630 while line != "" and (line[0] == ' ' or line[0] == '\t'):
631 line = line[1:]
632 try:
633 (it, line) = string.split(line, ":", 1)
634 item = it
635 while line != "" and (line[0] == ' ' or line[0] == '\t'):
636 line = line[1:]
637 if res.has_key(item):
638 res[item] = res[item] + " " + line
639 else:
640 res[item] = line
641 except:
642 if item != None:
643 if res.has_key(item):
644 res[item] = res[item] + " " + line
645 else:
646 res[item] = line
647 self.index.info = res
648
649 def parseComment(self, token):
650 if self.top_comment == "":
651 self.top_comment = token[1]
652 if self.comment == None or token[1][0] == '*':
653 self.comment = token[1];
654 else:
655 self.comment = self.comment + token[1]
656 token = self.lexer.token()
Daniel Veillard1e906612003-12-05 14:57:46 +0000657
658 if string.find(self.comment, "DOC_DISABLE") != -1:
659 self.stop_error()
660
661 if string.find(self.comment, "DOC_ENABLE") != -1:
662 self.start_error()
663
Daniel Veillardbe586972003-11-18 20:56:51 +0000664 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000665
666 #
667 # Parse a comment block associate to a macro
668 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000669 def parseMacroComment(self, name, quiet = 0):
670 if name[0:2] == '__':
671 quiet = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000672
Daniel Veillardbe586972003-11-18 20:56:51 +0000673 args = []
674 desc = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000675
Daniel Veillardbe586972003-11-18 20:56:51 +0000676 if self.comment == None:
677 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000678 self.warning("Missing comment for macro %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000679 return((args, desc))
680 if self.comment[0] != '*':
681 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000682 self.warning("Missing * in macro comment for %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000683 return((args, desc))
684 lines = string.split(self.comment, '\n')
685 if lines[0] == '*':
686 del lines[0]
687 if lines[0] != "* %s:" % (name):
688 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000689 self.warning("Misformatted macro comment for %s" % (name))
690 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000691 return((args, desc))
692 del lines[0]
693 while lines[0] == '*':
694 del lines[0]
695 while len(lines) > 0 and lines[0][0:3] == '* @':
696 l = lines[0][3:]
697 try:
698 (arg, desc) = string.split(l, ':', 1)
699 desc=string.strip(desc)
700 arg=string.strip(arg)
701 except:
702 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000703 self.warning("Misformatted macro comment for %s" % (name))
704 self.warning(" problem with '%s'" % (lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000705 del lines[0]
706 continue
707 del lines[0]
708 l = string.strip(lines[0])
709 while len(l) > 2 and l[0:3] != '* @':
710 while l[0] == '*':
711 l = l[1:]
712 desc = desc + ' ' + string.strip(l)
713 del lines[0]
714 if len(lines) == 0:
715 break
716 l = lines[0]
717 args.append((arg, desc))
718 while len(lines) > 0 and lines[0] == '*':
719 del lines[0]
720 desc = ""
721 while len(lines) > 0:
722 l = lines[0]
723 while len(l) > 0 and l[0] == '*':
724 l = l[1:]
725 l = string.strip(l)
726 desc = desc + " " + l
727 del lines[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000728
Daniel Veillardbe586972003-11-18 20:56:51 +0000729 desc = string.strip(desc)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000730
Daniel Veillardbe586972003-11-18 20:56:51 +0000731 if quiet == 0:
732 if desc == "":
Daniel Veillard1e906612003-12-05 14:57:46 +0000733 self.warning("Macro comment for %s lack description of the macro" % (name))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000734
Daniel Veillardbe586972003-11-18 20:56:51 +0000735 return((args, desc))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000736
737 #
738 # Parse a comment block and merge the informations found in the
739 # parameters descriptions, finally returns a block as complete
740 # as possible
741 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000742 def mergeFunctionComment(self, name, description, quiet = 0):
743 if name == 'main':
744 quiet = 1
745 if name[0:2] == '__':
746 quiet = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000747
Daniel Veillardbe586972003-11-18 20:56:51 +0000748 (ret, args) = description
749 desc = ""
750 retdesc = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000751
Daniel Veillardbe586972003-11-18 20:56:51 +0000752 if self.comment == None:
753 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000754 self.warning("Missing comment for function %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000755 return(((ret[0], retdesc), args, desc))
756 if self.comment[0] != '*':
757 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000758 self.warning("Missing * in function comment for %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000759 return(((ret[0], retdesc), args, desc))
760 lines = string.split(self.comment, '\n')
761 if lines[0] == '*':
762 del lines[0]
763 if lines[0] != "* %s:" % (name):
764 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000765 self.warning("Misformatted function comment for %s" % (name))
766 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000767 return(((ret[0], retdesc), args, desc))
768 del lines[0]
769 while lines[0] == '*':
770 del lines[0]
771 nbargs = len(args)
772 while len(lines) > 0 and lines[0][0:3] == '* @':
773 l = lines[0][3:]
774 try:
775 (arg, desc) = string.split(l, ':', 1)
776 desc=string.strip(desc)
777 arg=string.strip(arg)
778 except:
779 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000780 self.warning("Misformatted function comment for %s" % (name))
781 self.warning(" problem with '%s'" % (lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000782 del lines[0]
783 continue
784 del lines[0]
785 l = string.strip(lines[0])
786 while len(l) > 2 and l[0:3] != '* @':
787 while l[0] == '*':
788 l = l[1:]
789 desc = desc + ' ' + string.strip(l)
790 del lines[0]
791 if len(lines) == 0:
792 break
793 l = lines[0]
794 i = 0
795 while i < nbargs:
796 if args[i][1] == arg:
797 args[i] = (args[i][0], arg, desc)
798 break;
799 i = i + 1
800 if i >= nbargs:
801 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000802 self.warning("Unable to find arg %s from function comment for %s" % (
803 arg, name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000804 while len(lines) > 0 and lines[0] == '*':
805 del lines[0]
806 desc = ""
807 while len(lines) > 0:
808 l = lines[0]
809 while len(l) > 0 and l[0] == '*':
810 l = l[1:]
811 l = string.strip(l)
812 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
813 try:
814 l = string.split(l, ' ', 1)[1]
815 except:
816 l = ""
817 retdesc = string.strip(l)
818 del lines[0]
819 while len(lines) > 0:
820 l = lines[0]
821 while len(l) > 0 and l[0] == '*':
822 l = l[1:]
823 l = string.strip(l)
824 retdesc = retdesc + " " + l
825 del lines[0]
826 else:
827 desc = desc + " " + l
828 del lines[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000829
Daniel Veillardbe586972003-11-18 20:56:51 +0000830 retdesc = string.strip(retdesc)
831 desc = string.strip(desc)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000832
Daniel Veillardbe586972003-11-18 20:56:51 +0000833 if quiet == 0:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000834 #
835 # report missing comments
836 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000837 i = 0
838 while i < nbargs:
839 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
William M. Brack21e4ef22005-01-02 09:53:13 +0000840 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000841 i = i + 1
842 if retdesc == "" and ret[0] != "void":
William M. Brack21e4ef22005-01-02 09:53:13 +0000843 self.warning("Function comment for %s lacks description of return value" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000844 if desc == "":
William M. Brack21e4ef22005-01-02 09:53:13 +0000845 self.warning("Function comment for %s lacks description of the function" % (name))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000846
847
Daniel Veillardbe586972003-11-18 20:56:51 +0000848 return(((ret[0], retdesc), args, desc))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000849
Daniel Veillardbe586972003-11-18 20:56:51 +0000850 def parsePreproc(self, token):
Daniel Veillarda2351322004-06-27 12:08:10 +0000851 if debug:
852 print "=> preproc ", token, self.lexer.tokens
Daniel Veillardbe586972003-11-18 20:56:51 +0000853 name = token[1]
854 if name == "#include":
855 token = self.lexer.token()
856 if token == None:
857 return None
858 if token[0] == 'preproc':
859 self.index_add(token[1], self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000860 "include")
Daniel Veillardbe586972003-11-18 20:56:51 +0000861 return self.lexer.token()
862 return token
863 if name == "#define":
864 token = self.lexer.token()
865 if token == None:
866 return None
867 if token[0] == 'preproc':
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000868 # TODO macros with arguments
Daniel Veillardbe586972003-11-18 20:56:51 +0000869 name = token[1]
870 lst = []
871 token = self.lexer.token()
872 while token != None and token[0] == 'preproc' and \
873 token[1][0] != '#':
874 lst.append(token[1])
875 token = self.lexer.token()
876 try:
877 name = string.split(name, '(') [0]
878 except:
879 pass
880 info = self.parseMacroComment(name, not self.is_header)
881 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000882 "macro", info)
Daniel Veillardbe586972003-11-18 20:56:51 +0000883 return token
William M. Brack21e4ef22005-01-02 09:53:13 +0000884
885 #
886 # Processing of conditionals modified by Bill 1/1/05
887 #
888 # We process conditionals (i.e. tokens from #ifdef, #ifndef,
889 # #if, #else and #endif) for headers and mainline code,
890 # store the ones from the header in libxml2-api.xml, and later
891 # (in the routine merge_public) verify that the two (header and
892 # mainline code) agree.
893 #
894 # There is a small problem with processing the headers. Some of
895 # the variables are not concerned with enabling / disabling of
896 # library functions (e.g. '__XML_PARSER_H__'), and we don't want
897 # them to be included in libxml2-api.xml, or involved in
898 # the check between the header and the mainline code. To
899 # accomplish this, we ignore any conditional which doesn't include
900 # the string 'ENABLED'
901 #
902 if name == "#ifdef":
903 apstr = self.lexer.tokens[0][1]
Daniel Veillarda2351322004-06-27 12:08:10 +0000904 try:
William M. Brack21e4ef22005-01-02 09:53:13 +0000905 self.defines.append(apstr)
906 if string.find(apstr, 'ENABLED') != -1:
907 self.conditionals.append("defined(%s)" % apstr)
Daniel Veillarda2351322004-06-27 12:08:10 +0000908 except:
909 pass
William M. Brack21e4ef22005-01-02 09:53:13 +0000910 elif name == "#ifndef":
911 apstr = self.lexer.tokens[0][1]
912 try:
913 self.defines.append(apstr)
914 if string.find(apstr, 'ENABLED') != -1:
915 self.conditionals.append("!defined(%s)" % apstr)
916 except:
917 pass
918 elif name == "#if":
919 apstr = ""
920 for tok in self.lexer.tokens:
921 if apstr != "":
922 apstr = apstr + " "
923 apstr = apstr + tok[1]
924 try:
925 self.defines.append(apstr)
926 if string.find(apstr, 'ENABLED') != -1:
927 self.conditionals.append(apstr)
928 except:
929 pass
930 elif name == "#else":
931 if self.conditionals != [] and \
932 string.find(self.defines[-1], 'ENABLED') != -1:
933 self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
934 elif name == "#endif":
935 if self.conditionals != [] and \
936 string.find(self.defines[-1], 'ENABLED') != -1:
Daniel Veillarda2351322004-06-27 12:08:10 +0000937 self.conditionals = self.conditionals[:-1]
938 self.defines = self.defines[:-1]
Daniel Veillardbe586972003-11-18 20:56:51 +0000939 token = self.lexer.token()
940 while token != None and token[0] == 'preproc' and \
941 token[1][0] != '#':
942 token = self.lexer.token()
943 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000944
945 #
946 # token acquisition on top of the lexer, it handle internally
947 # preprocessor and comments since they are logically not part of
948 # the program structure.
949 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000950 def token(self):
951 global ignored_words
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000952
Daniel Veillardbe586972003-11-18 20:56:51 +0000953 token = self.lexer.token()
954 while token != None:
955 if token[0] == 'comment':
956 token = self.parseComment(token)
957 continue
958 elif token[0] == 'preproc':
959 token = self.parsePreproc(token)
960 continue
Daniel Veillard99dd7632005-05-06 11:40:56 +0000961 elif token[0] == "name" and token[1] == "__const":
962 token = ("name", "const")
963 return token
964 elif token[0] == "name" and token[1] == "__attribute":
965 token = self.lexer.token()
966 while token != None and token[1] != ";":
967 token = self.lexer.token()
968 return token
Daniel Veillardbe586972003-11-18 20:56:51 +0000969 elif token[0] == "name" and ignored_words.has_key(token[1]):
970 (n, info) = ignored_words[token[1]]
971 i = 0
972 while i < n:
973 token = self.lexer.token()
974 i = i + 1
975 token = self.lexer.token()
976 continue
977 else:
978 if debug:
979 print "=> ", token
980 return token
981 return None
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000982
983 #
984 # Parse a typedef, it records the type and its name.
985 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000986 def parseTypedef(self, token):
987 if token == None:
988 return None
989 token = self.parseType(token)
990 if token == None:
991 self.error("parsing typedef")
992 return None
993 base_type = self.type
994 type = base_type
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000995 #self.debug("end typedef type", token)
Daniel Veillardbe586972003-11-18 20:56:51 +0000996 while token != None:
997 if token[0] == "name":
998 name = token[1]
999 signature = self.signature
1000 if signature != None:
1001 type = string.split(type, '(')[0]
1002 d = self.mergeFunctionComment(name,
1003 ((type, None), signature), 1)
1004 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001005 "functype", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001006 else:
1007 if base_type == "struct":
1008 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001009 "struct", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001010 base_type = "struct " + name
1011 else:
1012 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001013 "typedef", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001014 token = self.token()
1015 else:
1016 self.error("parsing typedef: expecting a name")
1017 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001018 #self.debug("end typedef", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001019 if token != None and token[0] == 'sep' and token[1] == ',':
1020 type = base_type
1021 token = self.token()
1022 while token != None and token[0] == "op":
1023 type = type + token[1]
1024 token = self.token()
1025 elif token != None and token[0] == 'sep' and token[1] == ';':
1026 break;
1027 elif token != None and token[0] == 'name':
1028 type = base_type
1029 continue;
1030 else:
1031 self.error("parsing typedef: expecting ';'", token)
1032 return token
1033 token = self.token()
1034 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001035
1036 #
1037 # Parse a C code block, used for functions it parse till
1038 # the balancing } included
1039 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001040 def parseBlock(self, token):
1041 while token != None:
1042 if token[0] == "sep" and token[1] == "{":
1043 token = self.token()
1044 token = self.parseBlock(token)
1045 elif token[0] == "sep" and token[1] == "}":
1046 self.comment = None
1047 token = self.token()
1048 return token
1049 else:
1050 if self.collect_ref == 1:
1051 oldtok = token
1052 token = self.token()
1053 if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1054 if token[0] == "sep" and token[1] == "(":
1055 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001056 0, "function")
Daniel Veillardbe586972003-11-18 20:56:51 +00001057 token = self.token()
1058 elif token[0] == "name":
1059 token = self.token()
1060 if token[0] == "sep" and (token[1] == ";" or
1061 token[1] == "," or token[1] == "="):
1062 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001063 0, "type")
Daniel Veillardbe586972003-11-18 20:56:51 +00001064 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1065 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001066 0, "typedef")
Daniel Veillardbe586972003-11-18 20:56:51 +00001067 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1068 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001069 0, "typedef")
1070
Daniel Veillardbe586972003-11-18 20:56:51 +00001071 else:
1072 token = self.token()
1073 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001074
1075 #
1076 # Parse a C struct definition till the balancing }
1077 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001078 def parseStruct(self, token):
1079 fields = []
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001080 #self.debug("start parseStruct", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001081 while token != None:
1082 if token[0] == "sep" and token[1] == "{":
1083 token = self.token()
1084 token = self.parseTypeBlock(token)
1085 elif token[0] == "sep" and token[1] == "}":
1086 self.struct_fields = fields
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001087 #self.debug("end parseStruct", token)
1088 #print fields
Daniel Veillardbe586972003-11-18 20:56:51 +00001089 token = self.token()
1090 return token
1091 else:
1092 base_type = self.type
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001093 #self.debug("before parseType", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001094 token = self.parseType(token)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001095 #self.debug("after parseType", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001096 if token != None and token[0] == "name":
1097 fname = token[1]
1098 token = self.token()
1099 if token[0] == "sep" and token[1] == ";":
1100 self.comment = None
1101 token = self.token()
1102 fields.append((self.type, fname, self.comment))
1103 self.comment = None
1104 else:
1105 self.error("parseStruct: expecting ;", token)
1106 elif token != None and token[0] == "sep" and token[1] == "{":
1107 token = self.token()
1108 token = self.parseTypeBlock(token)
1109 if token != None and token[0] == "name":
1110 token = self.token()
1111 if token != None and token[0] == "sep" and token[1] == ";":
1112 token = self.token()
1113 else:
1114 self.error("parseStruct: expecting ;", token)
1115 else:
1116 self.error("parseStruct: name", token)
1117 token = self.token()
1118 self.type = base_type;
1119 self.struct_fields = fields
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001120 #self.debug("end parseStruct", token)
1121 #print fields
Daniel Veillardbe586972003-11-18 20:56:51 +00001122 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001123
1124 #
1125 # Parse a C enum block, parse till the balancing }
1126 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001127 def parseEnumBlock(self, token):
1128 self.enums = []
1129 name = None
1130 self.comment = None
1131 comment = ""
1132 value = "0"
1133 while token != None:
1134 if token[0] == "sep" and token[1] == "{":
1135 token = self.token()
1136 token = self.parseTypeBlock(token)
1137 elif token[0] == "sep" and token[1] == "}":
1138 if name != None:
1139 if self.comment != None:
1140 comment = self.comment
1141 self.comment = None
1142 self.enums.append((name, value, comment))
1143 token = self.token()
1144 return token
1145 elif token[0] == "name":
1146 if name != None:
1147 if self.comment != None:
1148 comment = string.strip(self.comment)
1149 self.comment = None
1150 self.enums.append((name, value, comment))
1151 name = token[1]
1152 comment = ""
1153 token = self.token()
1154 if token[0] == "op" and token[1][0] == "=":
1155 value = ""
1156 if len(token[1]) > 1:
1157 value = token[1][1:]
1158 token = self.token()
1159 while token[0] != "sep" or (token[1] != ',' and
1160 token[1] != '}'):
1161 value = value + token[1]
1162 token = self.token()
1163 else:
1164 try:
1165 value = "%d" % (int(value) + 1)
1166 except:
Daniel Veillard1e906612003-12-05 14:57:46 +00001167 self.warning("Failed to compute value of enum %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +00001168 value=""
1169 if token[0] == "sep" and token[1] == ",":
1170 token = self.token()
1171 else:
1172 token = self.token()
1173 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001174
1175 #
1176 # Parse a C definition block, used for structs it parse till
1177 # the balancing }
1178 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001179 def parseTypeBlock(self, token):
1180 while token != None:
1181 if token[0] == "sep" and token[1] == "{":
1182 token = self.token()
1183 token = self.parseTypeBlock(token)
1184 elif token[0] == "sep" and token[1] == "}":
1185 token = self.token()
1186 return token
1187 else:
1188 token = self.token()
1189 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001190
1191 #
1192 # Parse a type: the fact that the type name can either occur after
1193 # the definition or within the definition makes it a little harder
1194 # if inside, the name token is pushed back before returning
1195 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001196 def parseType(self, token):
1197 self.type = ""
1198 self.struct_fields = []
1199 self.signature = None
1200 if token == None:
1201 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001202
Daniel Veillardbe586972003-11-18 20:56:51 +00001203 while token[0] == "name" and (
William M. Brackfbb619f2005-06-06 13:49:18 +00001204 token[1] == "const" or \
1205 token[1] == "unsigned" or \
1206 token[1] == "signed"):
Daniel Veillardbe586972003-11-18 20:56:51 +00001207 if self.type == "":
1208 self.type = token[1]
1209 else:
1210 self.type = self.type + " " + token[1]
1211 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001212
Daniel Veillardbe586972003-11-18 20:56:51 +00001213 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1214 if self.type == "":
1215 self.type = token[1]
1216 else:
1217 self.type = self.type + " " + token[1]
1218 if token[0] == "name" and token[1] == "int":
1219 if self.type == "":
1220 self.type = tmp[1]
1221 else:
1222 self.type = self.type + " " + tmp[1]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001223
Daniel Veillardbe586972003-11-18 20:56:51 +00001224 elif token[0] == "name" and token[1] == "struct":
1225 if self.type == "":
1226 self.type = token[1]
1227 else:
1228 self.type = self.type + " " + token[1]
1229 token = self.token()
1230 nametok = None
1231 if token[0] == "name":
1232 nametok = token
1233 token = self.token()
1234 if token != None and token[0] == "sep" and token[1] == "{":
1235 token = self.token()
1236 token = self.parseStruct(token)
1237 elif token != None and token[0] == "op" and token[1] == "*":
1238 self.type = self.type + " " + nametok[1] + " *"
1239 token = self.token()
1240 while token != None and token[0] == "op" and token[1] == "*":
1241 self.type = self.type + " *"
1242 token = self.token()
1243 if token[0] == "name":
1244 nametok = token
1245 token = self.token()
1246 else:
1247 self.error("struct : expecting name", token)
1248 return token
1249 elif token != None and token[0] == "name" and nametok != None:
1250 self.type = self.type + " " + nametok[1]
1251 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001252
Daniel Veillardbe586972003-11-18 20:56:51 +00001253 if nametok != None:
1254 self.lexer.push(token)
1255 token = nametok
1256 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001257
Daniel Veillardbe586972003-11-18 20:56:51 +00001258 elif token[0] == "name" and token[1] == "enum":
1259 if self.type == "":
1260 self.type = token[1]
1261 else:
1262 self.type = self.type + " " + token[1]
1263 self.enums = []
1264 token = self.token()
1265 if token != None and token[0] == "sep" and token[1] == "{":
1266 token = self.token()
1267 token = self.parseEnumBlock(token)
1268 else:
1269 self.error("parsing enum: expecting '{'", token)
1270 enum_type = None
1271 if token != None and token[0] != "name":
1272 self.lexer.push(token)
1273 token = ("name", "enum")
1274 else:
1275 enum_type = token[1]
1276 for enum in self.enums:
1277 self.index_add(enum[0], self.filename,
1278 not self.is_header, "enum",
1279 (enum[1], enum[2], enum_type))
1280 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001281
Daniel Veillardbe586972003-11-18 20:56:51 +00001282 elif token[0] == "name":
1283 if self.type == "":
1284 self.type = token[1]
1285 else:
1286 self.type = self.type + " " + token[1]
1287 else:
1288 self.error("parsing type %s: expecting a name" % (self.type),
1289 token)
1290 return token
1291 token = self.token()
1292 while token != None and (token[0] == "op" or
1293 token[0] == "name" and token[1] == "const"):
1294 self.type = self.type + " " + token[1]
1295 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001296
1297 #
1298 # if there is a parenthesis here, this means a function type
1299 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001300 if token != None and token[0] == "sep" and token[1] == '(':
1301 self.type = self.type + token[1]
1302 token = self.token()
1303 while token != None and token[0] == "op" and token[1] == '*':
1304 self.type = self.type + token[1]
1305 token = self.token()
1306 if token == None or token[0] != "name" :
1307 self.error("parsing function type, name expected", token);
1308 return token
1309 self.type = self.type + token[1]
1310 nametok = token
1311 token = self.token()
1312 if token != None and token[0] == "sep" and token[1] == ')':
1313 self.type = self.type + token[1]
1314 token = self.token()
1315 if token != None and token[0] == "sep" and token[1] == '(':
1316 token = self.token()
1317 type = self.type;
1318 token = self.parseSignature(token);
1319 self.type = type;
1320 else:
1321 self.error("parsing function type, '(' expected", token);
1322 return token
1323 else:
1324 self.error("parsing function type, ')' expected", token);
1325 return token
1326 self.lexer.push(token)
1327 token = nametok
1328 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001329
1330 #
1331 # do some lookahead for arrays
1332 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001333 if token != None and token[0] == "name":
1334 nametok = token
1335 token = self.token()
1336 if token != None and token[0] == "sep" and token[1] == '[':
1337 self.type = self.type + nametok[1]
1338 while token != None and token[0] == "sep" and token[1] == '[':
1339 self.type = self.type + token[1]
1340 token = self.token()
1341 while token != None and token[0] != 'sep' and \
1342 token[1] != ']' and token[1] != ';':
1343 self.type = self.type + token[1]
1344 token = self.token()
1345 if token != None and token[0] == 'sep' and token[1] == ']':
1346 self.type = self.type + token[1]
1347 token = self.token()
1348 else:
1349 self.error("parsing array type, ']' expected", token);
1350 return token
1351 elif token != None and token[0] == "sep" and token[1] == ':':
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001352 # remove :12 in case it's a limited int size
Daniel Veillardbe586972003-11-18 20:56:51 +00001353 token = self.token()
1354 token = self.token()
1355 self.lexer.push(token)
1356 token = nametok
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001357
Daniel Veillardbe586972003-11-18 20:56:51 +00001358 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001359
1360 #
1361 # Parse a signature: '(' has been parsed and we scan the type definition
1362 # up to the ')' included
Daniel Veillardbe586972003-11-18 20:56:51 +00001363 def parseSignature(self, token):
1364 signature = []
1365 if token != None and token[0] == "sep" and token[1] == ')':
1366 self.signature = []
1367 token = self.token()
1368 return token
1369 while token != None:
1370 token = self.parseType(token)
1371 if token != None and token[0] == "name":
1372 signature.append((self.type, token[1], None))
1373 token = self.token()
1374 elif token != None and token[0] == "sep" and token[1] == ',':
1375 token = self.token()
1376 continue
1377 elif token != None and token[0] == "sep" and token[1] == ')':
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001378 # only the type was provided
Daniel Veillardbe586972003-11-18 20:56:51 +00001379 if self.type == "...":
1380 signature.append((self.type, "...", None))
1381 else:
1382 signature.append((self.type, None, None))
1383 if token != None and token[0] == "sep":
1384 if token[1] == ',':
1385 token = self.token()
1386 continue
1387 elif token[1] == ')':
1388 token = self.token()
1389 break
1390 self.signature = signature
1391 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001392
1393 #
1394 # Parse a global definition, be it a type, variable or function
1395 # the extern "C" blocks are a bit nasty and require it to recurse.
1396 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001397 def parseGlobal(self, token):
1398 static = 0
1399 if token[1] == 'extern':
1400 token = self.token()
1401 if token == None:
1402 return token
1403 if token[0] == 'string':
1404 if token[1] == 'C':
1405 token = self.token()
1406 if token == None:
1407 return token
1408 if token[0] == 'sep' and token[1] == "{":
1409 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001410# print 'Entering extern "C line ', self.lineno()
Daniel Veillardbe586972003-11-18 20:56:51 +00001411 while token != None and (token[0] != 'sep' or
1412 token[1] != "}"):
1413 if token[0] == 'name':
1414 token = self.parseGlobal(token)
1415 else:
1416 self.error(
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001417 "token %s %s unexpected at the top level" % (
1418 token[0], token[1]))
Daniel Veillardbe586972003-11-18 20:56:51 +00001419 token = self.parseGlobal(token)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001420# print 'Exiting extern "C" line', self.lineno()
Daniel Veillardbe586972003-11-18 20:56:51 +00001421 token = self.token()
1422 return token
1423 else:
1424 return token
1425 elif token[1] == 'static':
1426 static = 1
1427 token = self.token()
1428 if token == None or token[0] != 'name':
1429 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001430
Daniel Veillardbe586972003-11-18 20:56:51 +00001431 if token[1] == 'typedef':
1432 token = self.token()
1433 return self.parseTypedef(token)
1434 else:
1435 token = self.parseType(token)
1436 type_orig = self.type
1437 if token == None or token[0] != "name":
1438 return token
1439 type = type_orig
1440 self.name = token[1]
1441 token = self.token()
1442 while token != None and (token[0] == "sep" or token[0] == "op"):
1443 if token[0] == "sep":
1444 if token[1] == "[":
1445 type = type + token[1]
1446 token = self.token()
1447 while token != None and (token[0] != "sep" or \
1448 token[1] != ";"):
1449 type = type + token[1]
1450 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001451
Daniel Veillardbe586972003-11-18 20:56:51 +00001452 if token != None and token[0] == "op" and token[1] == "=":
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001453 #
1454 # Skip the initialization of the variable
1455 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001456 token = self.token()
1457 if token[0] == 'sep' and token[1] == '{':
1458 token = self.token()
1459 token = self.parseBlock(token)
1460 else:
1461 self.comment = None
1462 while token != None and (token[0] != "sep" or \
1463 (token[1] != ';' and token[1] != ',')):
1464 token = self.token()
1465 self.comment = None
1466 if token == None or token[0] != "sep" or (token[1] != ';' and
1467 token[1] != ','):
1468 self.error("missing ';' or ',' after value")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001469
Daniel Veillardbe586972003-11-18 20:56:51 +00001470 if token != None and token[0] == "sep":
1471 if token[1] == ";":
1472 self.comment = None
1473 token = self.token()
1474 if type == "struct":
1475 self.index_add(self.name, self.filename,
1476 not self.is_header, "struct", self.struct_fields)
1477 else:
1478 self.index_add(self.name, self.filename,
1479 not self.is_header, "variable", type)
1480 break
1481 elif token[1] == "(":
1482 token = self.token()
1483 token = self.parseSignature(token)
1484 if token == None:
1485 return None
1486 if token[0] == "sep" and token[1] == ";":
1487 d = self.mergeFunctionComment(self.name,
1488 ((type, None), self.signature), 1)
1489 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001490 "function", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001491 token = self.token()
1492 elif token[0] == "sep" and token[1] == "{":
1493 d = self.mergeFunctionComment(self.name,
1494 ((type, None), self.signature), static)
1495 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001496 "function", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001497 token = self.token()
1498 token = self.parseBlock(token);
1499 elif token[1] == ',':
1500 self.comment = None
1501 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001502 "variable", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001503 type = type_orig
1504 token = self.token()
1505 while token != None and token[0] == "sep":
1506 type = type + token[1]
1507 token = self.token()
1508 if token != None and token[0] == "name":
1509 self.name = token[1]
1510 token = self.token()
1511 else:
1512 break
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001513
Daniel Veillardbe586972003-11-18 20:56:51 +00001514 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001515
Daniel Veillardbe586972003-11-18 20:56:51 +00001516 def parse(self):
Daniel Veillard1e906612003-12-05 14:57:46 +00001517 self.warning("Parsing %s" % (self.filename))
Daniel Veillardbe586972003-11-18 20:56:51 +00001518 token = self.token()
1519 while token != None:
1520 if token[0] == 'name':
1521 token = self.parseGlobal(token)
1522 else:
1523 self.error("token %s %s unexpected at the top level" % (
1524 token[0], token[1]))
1525 token = self.parseGlobal(token)
1526 return
1527 self.parseTopComment(self.top_comment)
1528 return self.index
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001529
1530
1531class docBuilder:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001532 """A documentation builder"""
1533 def __init__(self, name, directories=['.'], excludes=[]):
1534 self.name = name
1535 self.directories = directories
1536 self.excludes = excludes + ignored_files.keys()
1537 self.modules = {}
1538 self.headers = {}
1539 self.idx = index()
1540 self.xref = {}
1541 self.index = {}
1542 if name == 'libxml2':
1543 self.basename = 'libxml'
1544 else:
1545 self.basename = name
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001546
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001547 def indexString(self, id, str):
1548 if str == None:
1549 return
1550 str = string.replace(str, "'", ' ')
1551 str = string.replace(str, '"', ' ')
1552 str = string.replace(str, "/", ' ')
1553 str = string.replace(str, '*', ' ')
1554 str = string.replace(str, "[", ' ')
1555 str = string.replace(str, "]", ' ')
1556 str = string.replace(str, "(", ' ')
1557 str = string.replace(str, ")", ' ')
1558 str = string.replace(str, "<", ' ')
1559 str = string.replace(str, '>', ' ')
1560 str = string.replace(str, "&", ' ')
1561 str = string.replace(str, '#', ' ')
1562 str = string.replace(str, ",", ' ')
1563 str = string.replace(str, '.', ' ')
1564 str = string.replace(str, ';', ' ')
1565 tokens = string.split(str)
1566 for token in tokens:
1567 try:
1568 c = token[0]
1569 if string.find(string.letters, c) < 0:
1570 pass
1571 elif len(token) < 3:
1572 pass
1573 else:
1574 lower = string.lower(token)
1575 # TODO: generalize this a bit
1576 if lower == 'and' or lower == 'the':
1577 pass
1578 elif self.xref.has_key(token):
1579 self.xref[token].append(id)
1580 else:
1581 self.xref[token] = [id]
1582 except:
1583 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001584
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001585 def analyze(self):
1586 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1587 self.idx.analyze()
1588
1589 def scanHeaders(self):
1590 for header in self.headers.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001591 parser = CParser(header)
1592 idx = parser.parse()
1593 self.headers[header] = idx;
1594 self.idx.merge(idx)
1595
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001596 def scanModules(self):
1597 for module in self.modules.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001598 parser = CParser(module)
1599 idx = parser.parse()
1600 # idx.analyze()
1601 self.modules[module] = idx
1602 self.idx.merge_public(idx)
1603
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001604 def scan(self):
1605 for directory in self.directories:
1606 files = glob.glob(directory + "/*.c")
1607 for file in files:
1608 skip = 0
1609 for excl in self.excludes:
1610 if string.find(file, excl) != -1:
1611 skip = 1;
1612 break
1613 if skip == 0:
1614 self.modules[file] = None;
1615 files = glob.glob(directory + "/*.h")
1616 for file in files:
1617 skip = 0
1618 for excl in self.excludes:
1619 if string.find(file, excl) != -1:
1620 skip = 1;
1621 break
1622 if skip == 0:
1623 self.headers[file] = None;
1624 self.scanHeaders()
1625 self.scanModules()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001626
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001627 def modulename_file(self, file):
1628 module = os.path.basename(file)
1629 if module[-2:] == '.h':
1630 module = module[:-2]
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001631 elif module[-2:] == '.c':
1632 module = module[:-2]
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001633 return module
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001634
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001635 def serialize_enum(self, output, name):
1636 id = self.idx.enums[name]
1637 output.write(" <enum name='%s' file='%s'" % (name,
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001638 self.modulename_file(id.header)))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001639 if id.info != None:
1640 info = id.info
1641 if info[0] != None and info[0] != '':
1642 try:
1643 val = eval(info[0])
1644 except:
1645 val = info[0]
1646 output.write(" value='%s'" % (val));
1647 if info[2] != None and info[2] != '':
1648 output.write(" type='%s'" % info[2]);
1649 if info[1] != None and info[1] != '':
1650 output.write(" info='%s'" % escape(info[1]));
1651 output.write("/>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001652
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001653 def serialize_macro(self, output, name):
1654 id = self.idx.macros[name]
1655 output.write(" <macro name='%s' file='%s'>\n" % (name,
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001656 self.modulename_file(id.header)))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001657 if id.info != None:
1658 try:
1659 (args, desc) = id.info
1660 if desc != None and desc != "":
1661 output.write(" <info>%s</info>\n" % (escape(desc)))
1662 self.indexString(name, desc)
1663 for arg in args:
1664 (name, desc) = arg
1665 if desc != None and desc != "":
1666 output.write(" <arg name='%s' info='%s'/>\n" % (
1667 name, escape(desc)))
1668 self.indexString(name, desc)
1669 else:
1670 output.write(" <arg name='%s'/>\n" % (name))
1671 except:
1672 pass
1673 output.write(" </macro>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001674
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001675 def serialize_typedef(self, output, name):
1676 id = self.idx.typedefs[name]
1677 if id.info[0:7] == 'struct ':
1678 output.write(" <struct name='%s' file='%s' type='%s'" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001679 name, self.modulename_file(id.header), id.info))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001680 name = id.info[7:]
1681 if self.idx.structs.has_key(name) and ( \
1682 type(self.idx.structs[name].info) == type(()) or
Daniel Veillardc1eed322002-12-12 11:01:32 +00001683 type(self.idx.structs[name].info) == type([])):
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001684 output.write(">\n");
1685 try:
1686 for field in self.idx.structs[name].info:
1687 desc = field[2]
1688 self.indexString(name, desc)
1689 if desc == None:
1690 desc = ''
1691 else:
1692 desc = escape(desc)
1693 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1694 except:
1695 print "Failed to serialize struct %s" % (name)
1696 output.write(" </struct>\n")
1697 else:
1698 output.write("/>\n");
1699 else :
1700 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001701 name, self.modulename_file(id.header), id.info))
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001702
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001703 def serialize_variable(self, output, name):
1704 id = self.idx.variables[name]
1705 if id.info != None:
1706 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001707 name, self.modulename_file(id.header), id.info))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001708 else:
1709 output.write(" <variable name='%s' file='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001710 name, self.modulename_file(id.header)))
Daniel Veillardc1eed322002-12-12 11:01:32 +00001711
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001712 def serialize_function(self, output, name):
1713 id = self.idx.functions[name]
Daniel Veillarda2351322004-06-27 12:08:10 +00001714 if name == debugsym:
1715 print "=>", id
1716
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001717 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1718 name, self.modulename_file(id.header),
1719 self.modulename_file(id.module)))
William M. Brack21e4ef22005-01-02 09:53:13 +00001720 #
1721 # Processing of conditionals modified by Bill 1/1/05
1722 #
Daniel Veillarda2351322004-06-27 12:08:10 +00001723 if id.conditionals != None:
William M. Brack21e4ef22005-01-02 09:53:13 +00001724 apstr = ""
Daniel Veillarda2351322004-06-27 12:08:10 +00001725 for cond in id.conditionals:
William M. Brack21e4ef22005-01-02 09:53:13 +00001726 if apstr != "":
1727 apstr = apstr + " &amp;&amp; "
1728 apstr = apstr + cond
1729 output.write(" <cond>%s</cond>\n"% (apstr));
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001730 try:
1731 (ret, params, desc) = id.info
1732 output.write(" <info>%s</info>\n" % (escape(desc)))
1733 self.indexString(name, desc)
1734 if ret[0] != None:
1735 if ret[0] == "void":
1736 output.write(" <return type='void'/>\n")
1737 else:
1738 output.write(" <return type='%s' info='%s'/>\n" % (
1739 ret[0], escape(ret[1])))
1740 self.indexString(name, ret[1])
1741 for param in params:
1742 if param[0] == 'void':
1743 continue
1744 if param[2] == None:
1745 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1746 else:
1747 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1748 self.indexString(name, param[2])
1749 except:
1750 print "Failed to save function %s info: " % name, `id.info`
1751 output.write(" </%s>\n" % (id.type))
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001752
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001753 def serialize_exports(self, output, file):
1754 module = self.modulename_file(file)
1755 output.write(" <file name='%s'>\n" % (module))
1756 dict = self.headers[file]
Daniel Veillardbe586972003-11-18 20:56:51 +00001757 if dict.info != None:
1758 for data in ('Summary', 'Description', 'Author'):
1759 try:
1760 output.write(" <%s>%s</%s>\n" % (
1761 string.lower(data),
1762 escape(dict.info[data]),
1763 string.lower(data)))
1764 except:
1765 print "Header %s lacks a %s description" % (module, data)
1766 if dict.info.has_key('Description'):
1767 desc = dict.info['Description']
1768 if string.find(desc, "DEPRECATED") != -1:
1769 output.write(" <deprecated/>\n")
1770
Daniel Veillard1a792412003-11-18 23:52:38 +00001771 ids = dict.macros.keys()
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001772 ids.sort()
1773 for id in uniq(ids):
Daniel Veillard1a792412003-11-18 23:52:38 +00001774 # Macros are sometime used to masquerade other types.
1775 if dict.functions.has_key(id):
1776 continue
1777 if dict.variables.has_key(id):
1778 continue
1779 if dict.typedefs.has_key(id):
1780 continue
1781 if dict.structs.has_key(id):
1782 continue
1783 if dict.enums.has_key(id):
1784 continue
1785 output.write(" <exports symbol='%s' type='macro'/>\n" % (id))
1786 ids = dict.enums.keys()
1787 ids.sort()
1788 for id in uniq(ids):
1789 output.write(" <exports symbol='%s' type='enum'/>\n" % (id))
1790 ids = dict.typedefs.keys()
1791 ids.sort()
1792 for id in uniq(ids):
1793 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id))
1794 ids = dict.structs.keys()
1795 ids.sort()
1796 for id in uniq(ids):
1797 output.write(" <exports symbol='%s' type='struct'/>\n" % (id))
1798 ids = dict.variables.keys()
1799 ids.sort()
1800 for id in uniq(ids):
1801 output.write(" <exports symbol='%s' type='variable'/>\n" % (id))
1802 ids = dict.functions.keys()
1803 ids.sort()
1804 for id in uniq(ids):
1805 output.write(" <exports symbol='%s' type='function'/>\n" % (id))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001806 output.write(" </file>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001807
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001808 def serialize_xrefs_files(self, output):
1809 headers = self.headers.keys()
1810 headers.sort()
1811 for file in headers:
1812 module = self.modulename_file(file)
1813 output.write(" <file name='%s'>\n" % (module))
1814 dict = self.headers[file]
William M. Bracka2e844a2004-01-06 11:52:13 +00001815 ids = uniq(dict.functions.keys() + dict.variables.keys() + \
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001816 dict.macros.keys() + dict.typedefs.keys() + \
William M. Bracka2e844a2004-01-06 11:52:13 +00001817 dict.structs.keys() + dict.enums.keys())
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001818 ids.sort()
William M. Bracka2e844a2004-01-06 11:52:13 +00001819 for id in ids:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001820 output.write(" <ref name='%s'/>\n" % (id))
1821 output.write(" </file>\n")
1822 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001823
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001824 def serialize_xrefs_functions(self, output):
1825 funcs = {}
1826 for name in self.idx.functions.keys():
1827 id = self.idx.functions[name]
1828 try:
1829 (ret, params, desc) = id.info
1830 for param in params:
1831 if param[0] == 'void':
1832 continue
1833 if funcs.has_key(param[0]):
1834 funcs[param[0]].append(name)
1835 else:
1836 funcs[param[0]] = [name]
1837 except:
1838 pass
1839 typ = funcs.keys()
1840 typ.sort()
1841 for type in typ:
1842 if type == '' or type == 'void' or type == "int" or \
1843 type == "char *" or type == "const char *" :
1844 continue
1845 output.write(" <type name='%s'>\n" % (type))
1846 ids = funcs[type]
1847 ids.sort()
William M. Brackcf9eadf2003-12-25 13:24:05 +00001848 pid = '' # not sure why we have dups, but get rid of them!
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001849 for id in ids:
William M. Brackcf9eadf2003-12-25 13:24:05 +00001850 if id != pid:
1851 output.write(" <ref name='%s'/>\n" % (id))
1852 pid = id
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001853 output.write(" </type>\n")
1854
1855 def serialize_xrefs_constructors(self, output):
1856 funcs = {}
1857 for name in self.idx.functions.keys():
1858 id = self.idx.functions[name]
1859 try:
1860 (ret, params, desc) = id.info
1861 if ret[0] == "void":
1862 continue
1863 if funcs.has_key(ret[0]):
1864 funcs[ret[0]].append(name)
1865 else:
1866 funcs[ret[0]] = [name]
1867 except:
1868 pass
1869 typ = funcs.keys()
1870 typ.sort()
1871 for type in typ:
1872 if type == '' or type == 'void' or type == "int" or \
1873 type == "char *" or type == "const char *" :
1874 continue
1875 output.write(" <type name='%s'>\n" % (type))
1876 ids = funcs[type]
William M. Brack09a4d0a2004-01-05 14:28:43 +00001877 ids.sort()
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001878 for id in ids:
1879 output.write(" <ref name='%s'/>\n" % (id))
1880 output.write(" </type>\n")
1881
1882 def serialize_xrefs_alpha(self, output):
1883 letter = None
1884 ids = self.idx.identifiers.keys()
1885 ids.sort()
1886 for id in ids:
1887 if id[0] != letter:
1888 if letter != None:
1889 output.write(" </letter>\n")
1890 letter = id[0]
1891 output.write(" <letter name='%s'>\n" % (letter))
1892 output.write(" <ref name='%s'/>\n" % (id))
1893 if letter != None:
1894 output.write(" </letter>\n")
1895
1896 def serialize_xrefs_references(self, output):
1897 typ = self.idx.identifiers.keys()
1898 typ.sort()
1899 for id in typ:
1900 idf = self.idx.identifiers[id]
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001901 module = idf.header
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001902 output.write(" <reference name='%s' href='%s'/>\n" % (id,
1903 'html/' + self.basename + '-' +
1904 self.modulename_file(module) + '.html#' +
1905 id))
1906
1907 def serialize_xrefs_index(self, output):
1908 index = self.xref
1909 typ = index.keys()
1910 typ.sort()
1911 letter = None
1912 count = 0
1913 chunk = 0
1914 chunks = []
1915 for id in typ:
1916 if len(index[id]) > 30:
1917 continue
1918 if id[0] != letter:
1919 if letter == None or count > 200:
1920 if letter != None:
1921 output.write(" </letter>\n")
1922 output.write(" </chunk>\n")
1923 count = 0
1924 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1925 output.write(" <chunk name='chunk%s'>\n" % (chunk))
1926 first_letter = id[0]
1927 chunk = chunk + 1
1928 elif letter != None:
1929 output.write(" </letter>\n")
1930 letter = id[0]
1931 output.write(" <letter name='%s'>\n" % (letter))
1932 output.write(" <word name='%s'>\n" % (id))
1933 tokens = index[id];
1934 tokens.sort()
1935 tok = None
William M. Bracka2e844a2004-01-06 11:52:13 +00001936 for token in tokens:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001937 if tok == token:
1938 continue
1939 tok = token
1940 output.write(" <ref name='%s'/>\n" % (token))
1941 count = count + 1
1942 output.write(" </word>\n")
1943 if letter != None:
1944 output.write(" </letter>\n")
1945 output.write(" </chunk>\n")
William M. Brack966668a2003-12-20 02:10:28 +00001946 if count != 0:
1947 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001948 output.write(" <chunks>\n")
1949 for ch in chunks:
1950 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
1951 ch[0], ch[1], ch[2]))
1952 output.write(" </chunks>\n")
1953
1954 def serialize_xrefs(self, output):
1955 output.write(" <references>\n")
1956 self.serialize_xrefs_references(output)
1957 output.write(" </references>\n")
1958 output.write(" <alpha>\n")
1959 self.serialize_xrefs_alpha(output)
1960 output.write(" </alpha>\n")
1961 output.write(" <constructors>\n")
1962 self.serialize_xrefs_constructors(output)
1963 output.write(" </constructors>\n")
1964 output.write(" <functions>\n")
1965 self.serialize_xrefs_functions(output)
1966 output.write(" </functions>\n")
1967 output.write(" <files>\n")
1968 self.serialize_xrefs_files(output)
1969 output.write(" </files>\n")
1970 output.write(" <index>\n")
1971 self.serialize_xrefs_index(output)
1972 output.write(" </index>\n")
1973
1974 def serialize(self):
1975 filename = "%s-api.xml" % self.name
1976 print "Saving XML description %s" % (filename)
1977 output = open(filename, "w")
1978 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1979 output.write("<api name='%s'>\n" % self.name)
1980 output.write(" <files>\n")
1981 headers = self.headers.keys()
1982 headers.sort()
1983 for file in headers:
1984 self.serialize_exports(output, file)
1985 output.write(" </files>\n")
1986 output.write(" <symbols>\n")
1987 macros = self.idx.macros.keys()
1988 macros.sort()
1989 for macro in macros:
1990 self.serialize_macro(output, macro)
1991 enums = self.idx.enums.keys()
1992 enums.sort()
1993 for enum in enums:
1994 self.serialize_enum(output, enum)
1995 typedefs = self.idx.typedefs.keys()
1996 typedefs.sort()
1997 for typedef in typedefs:
1998 self.serialize_typedef(output, typedef)
1999 variables = self.idx.variables.keys()
2000 variables.sort()
2001 for variable in variables:
2002 self.serialize_variable(output, variable)
2003 functions = self.idx.functions.keys()
2004 functions.sort()
2005 for function in functions:
2006 self.serialize_function(output, function)
2007 output.write(" </symbols>\n")
2008 output.write("</api>\n")
2009 output.close()
2010
2011 filename = "%s-refs.xml" % self.name
2012 print "Saving XML Cross References %s" % (filename)
2013 output = open(filename, "w")
2014 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2015 output.write("<apirefs name='%s'>\n" % self.name)
2016 self.serialize_xrefs(output)
2017 output.write("</apirefs>\n")
2018 output.close()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002019
2020
2021def rebuild():
2022 builder = None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00002023 if glob.glob("parser.c") != [] :
2024 print "Rebuilding API description for libxml2"
2025 builder = docBuilder("libxml2", [".", "."],
2026 ["xmlwin32version.h", "tst.c"])
2027 elif glob.glob("../parser.c") != [] :
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002028 print "Rebuilding API description for libxml2"
2029 builder = docBuilder("libxml2", ["..", "../include/libxml"],
Daniel Veillardd4330462003-04-29 12:40:16 +00002030 ["xmlwin32version.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002031 elif glob.glob("../libxslt/transform.c") != [] :
2032 print "Rebuilding API description for libxslt"
2033 builder = docBuilder("libxslt", ["../libxslt"],
Daniel Veillard024b5702002-12-12 00:15:55 +00002034 ["win32config.h", "libxslt.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002035 else:
2036 print "rebuild() failed, unable to guess the module"
2037 return None
2038 builder.scan()
2039 builder.analyze()
2040 builder.serialize()
Daniel Veillard024b5702002-12-12 00:15:55 +00002041 if glob.glob("../libexslt/exslt.c") != [] :
2042 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2043 extra.scan()
2044 extra.analyze()
2045 extra.serialize()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002046 return builder
2047
2048#
2049# for debugging the parser
2050#
2051def parse(filename):
2052 parser = CParser(filename)
2053 idx = parser.parse()
2054 return idx
2055
2056if __name__ == "__main__":
Daniel Veillarda2351322004-06-27 12:08:10 +00002057 if len(sys.argv) > 1:
2058 debug = 1
2059 parse(sys.argv[1])
2060 else:
2061 rebuild()