blob: 505a0bc3423879890572ae600d5eabf6fc815b65 [file] [log] [blame]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
Daniel Veillard540a31a2003-01-21 11:21:07 +000010import os, sys
Daniel Veillarda9b66d02002-12-11 14:23:49 +000011import string
12import glob
13
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000014debug=0
Daniel Veillarda2351322004-06-27 12:08:10 +000015#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000017
Daniel Veillardde0a0a52003-04-24 17:12:57 +000018#
19# C parser analysis code
20#
21ignored_files = {
22 "trio": "too many non standard macros",
23 "trio.c": "too many non standard macros",
24 "trionan.c": "too many non standard macros",
25 "triostr.c": "too many non standard macros",
26 "acconfig.h": "generated portability layer",
27 "config.h": "generated portability layer",
28 "libxml.h": "internal only",
Daniel Veillard92fc02c2003-04-24 23:12:35 +000029 "testOOM.c": "out of memory tester",
30 "testOOMlib.h": "out of memory tester",
31 "testOOMlib.c": "out of memory tester",
Daniel Veillard189f46b2004-01-25 21:03:04 +000032 "rngparser.c": "not yet integrated",
33 "rngparser.h": "not yet integrated",
Daniel Veillarda2351322004-06-27 12:08:10 +000034 "elfgcchack.h": "not a normal header",
Daniel Veillard95175012005-07-03 16:09:51 +000035 "testHTML.c": "test tool",
36 "testReader.c": "test tool",
37 "testSchemas.c": "test tool",
38 "testXPath.c": "test tool",
39 "testAutomata.c": "test tool",
40 "testModule.c": "test tool",
41 "testRegexp.c": "test tool",
42 "testThreads.c": "test tool",
43 "testC14N.c": "test tool",
44 "testRelax.c": "test tool",
45 "testThreadsWin32.c": "test tool",
46 "testSAX.c": "test tool",
47 "testURI.c": "test tool",
Daniel Veillarde43cc572004-11-03 11:50:29 +000048 "testapi.c": "generated regression tests",
Daniel Veillard95175012005-07-03 16:09:51 +000049 "runtest.c": "regression tests program",
50 "runsuite.c": "regression tests program",
Daniel Veillard5d4644e2005-04-01 13:11:58 +000051 "tst.c": "not part of the library",
Daniel Veillard6ad5c4a2006-10-11 16:43:06 +000052 "test.c": "not part of the library",
Daniel Veillardf6b71bd2005-01-04 17:50:14 +000053 "testdso.c": "test for dynamid shared libraries",
Daniel Veillardde0a0a52003-04-24 17:12:57 +000054}
55
56ignored_words = {
57 "WINAPI": (0, "Windows keyword"),
58 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
William M. Brackcdfa2862003-08-29 06:03:38 +000059 "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000060 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
61 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000062 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000063 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
64 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000065 "XMLCALL": (0, "Special macro for win32 calls"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000066 "XSLTCALL": (0, "Special macro for win32 calls"),
Daniel Veillard5eee7672005-08-22 21:22:27 +000067 "XMLCDECL": (0, "Special macro for win32 calls"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000068 "EXSLTCALL": (0, "Special macro for win32 calls"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000069 "__declspec": (3, "Windows keyword"),
Daniel Veillarda2351322004-06-27 12:08:10 +000070 "__stdcall": (0, "Windows keyword"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000071 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
72 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
73 "X_IN_Y": (5, "macro function builder"),
74}
75
Daniel Veillarda9b66d02002-12-11 14:23:49 +000076def escape(raw):
77 raw = string.replace(raw, '&', '&')
78 raw = string.replace(raw, '<', '&lt;')
79 raw = string.replace(raw, '>', '&gt;')
80 raw = string.replace(raw, "'", '&apos;')
81 raw = string.replace(raw, '"', '&quot;')
82 return raw
83
Daniel Veillard2925c0a2003-11-17 13:58:17 +000084def uniq(items):
85 d = {}
86 for item in items:
87 d[item]=1
88 return d.keys()
89
Daniel Veillarda9b66d02002-12-11 14:23:49 +000090class identifier:
Daniel Veillard5d4644e2005-04-01 13:11:58 +000091 def __init__(self, name, header=None, module=None, type=None, lineno = 0,
Daniel Veillarda2351322004-06-27 12:08:10 +000092 info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +000093 self.name = name
Daniel Veillard5d4644e2005-04-01 13:11:58 +000094 self.header = header
Daniel Veillardbe586972003-11-18 20:56:51 +000095 self.module = module
96 self.type = type
97 self.info = info
98 self.extra = extra
99 self.lineno = lineno
100 self.static = 0
Daniel Veillarda2351322004-06-27 12:08:10 +0000101 if conditionals == None or len(conditionals) == 0:
102 self.conditionals = None
103 else:
104 self.conditionals = conditionals[:]
105 if self.name == debugsym:
106 print "=> define %s : %s" % (debugsym, (module, type, info,
107 extra, conditionals))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000108
Daniel Veillardbe586972003-11-18 20:56:51 +0000109 def __repr__(self):
110 r = "%s %s:" % (self.type, self.name)
111 if self.static:
112 r = r + " static"
113 if self.module != None:
114 r = r + " from %s" % (self.module)
115 if self.info != None:
116 r = r + " " + `self.info`
117 if self.extra != None:
118 r = r + " " + `self.extra`
Daniel Veillarda2351322004-06-27 12:08:10 +0000119 if self.conditionals != None:
120 r = r + " " + `self.conditionals`
Daniel Veillardbe586972003-11-18 20:56:51 +0000121 return r
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000122
123
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000124 def set_header(self, header):
125 self.header = header
Daniel Veillardbe586972003-11-18 20:56:51 +0000126 def set_module(self, module):
127 self.module = module
128 def set_type(self, type):
129 self.type = type
130 def set_info(self, info):
131 self.info = info
132 def set_extra(self, extra):
133 self.extra = extra
134 def set_lineno(self, lineno):
135 self.lineno = lineno
136 def set_static(self, static):
137 self.static = static
Daniel Veillarda2351322004-06-27 12:08:10 +0000138 def set_conditionals(self, conditionals):
139 if conditionals == None or len(conditionals) == 0:
140 self.conditionals = None
141 else:
142 self.conditionals = conditionals[:]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000143
Daniel Veillardbe586972003-11-18 20:56:51 +0000144 def get_name(self):
145 return self.name
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000146 def get_header(self):
147 return self.module
Daniel Veillardbe586972003-11-18 20:56:51 +0000148 def get_module(self):
149 return self.module
150 def get_type(self):
151 return self.type
152 def get_info(self):
153 return self.info
154 def get_lineno(self):
155 return self.lineno
156 def get_extra(self):
157 return self.extra
158 def get_static(self):
159 return self.static
Daniel Veillarda2351322004-06-27 12:08:10 +0000160 def get_conditionals(self):
161 return self.conditionals
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000162
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000163 def update(self, header, module, type = None, info = None, extra=None,
Daniel Veillarda2351322004-06-27 12:08:10 +0000164 conditionals=None):
165 if self.name == debugsym:
166 print "=> update %s : %s" % (debugsym, (module, type, info,
167 extra, conditionals))
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000168 if header != None and self.header == None:
169 self.set_header(module)
170 if module != None and (self.module == None or self.header == self.module):
Daniel Veillardbe586972003-11-18 20:56:51 +0000171 self.set_module(module)
172 if type != None and self.type == None:
173 self.set_type(type)
174 if info != None:
175 self.set_info(info)
176 if extra != None:
177 self.set_extra(extra)
Daniel Veillarda2351322004-06-27 12:08:10 +0000178 if conditionals != None:
179 self.set_conditionals(conditionals)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000180
181class index:
Daniel Veillardbe586972003-11-18 20:56:51 +0000182 def __init__(self, name = "noname"):
183 self.name = name
184 self.identifiers = {}
185 self.functions = {}
186 self.variables = {}
187 self.includes = {}
188 self.structs = {}
189 self.enums = {}
190 self.typedefs = {}
191 self.macros = {}
192 self.references = {}
193 self.info = {}
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000194
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000195 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +0000196 if name[0:2] == '__':
197 return None
198 d = None
199 try:
200 d = self.identifiers[name]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000201 d.update(header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000202 except:
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000203 d = identifier(name, header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000204 self.identifiers[name] = d
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000205
Daniel Veillardbe586972003-11-18 20:56:51 +0000206 if d != None and static == 1:
207 d.set_static(1)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000208
Daniel Veillardbe586972003-11-18 20:56:51 +0000209 if d != None and name != None and type != None:
210 self.references[name] = d
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000211
Daniel Veillarda2351322004-06-27 12:08:10 +0000212 if name == debugsym:
213 print "New ref: %s" % (d)
214
215 return d
216
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000217 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +0000218 if name[0:2] == '__':
219 return None
220 d = None
221 try:
222 d = self.identifiers[name]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000223 d.update(header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000224 except:
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000225 d = identifier(name, header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000226 self.identifiers[name] = d
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000227
Daniel Veillardbe586972003-11-18 20:56:51 +0000228 if d != None and static == 1:
229 d.set_static(1)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000230
Daniel Veillardbe586972003-11-18 20:56:51 +0000231 if d != None and name != None and type != None:
232 if type == "function":
233 self.functions[name] = d
234 elif type == "functype":
235 self.functions[name] = d
236 elif type == "variable":
237 self.variables[name] = d
238 elif type == "include":
239 self.includes[name] = d
240 elif type == "struct":
241 self.structs[name] = d
242 elif type == "enum":
243 self.enums[name] = d
244 elif type == "typedef":
245 self.typedefs[name] = d
246 elif type == "macro":
247 self.macros[name] = d
248 else:
249 print "Unable to register type ", type
Daniel Veillarda2351322004-06-27 12:08:10 +0000250
251 if name == debugsym:
252 print "New symbol: %s" % (d)
253
Daniel Veillardbe586972003-11-18 20:56:51 +0000254 return d
255
256 def merge(self, idx):
257 for id in idx.functions.keys():
258 #
259 # macro might be used to override functions or variables
260 # definitions
261 #
262 if self.macros.has_key(id):
263 del self.macros[id]
264 if self.functions.has_key(id):
265 print "function %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000266 id, self.functions[id].header, idx.functions[id].header)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000267 else:
Daniel Veillardbe586972003-11-18 20:56:51 +0000268 self.functions[id] = idx.functions[id]
269 self.identifiers[id] = idx.functions[id]
270 for id in idx.variables.keys():
271 #
272 # macro might be used to override functions or variables
273 # definitions
274 #
275 if self.macros.has_key(id):
276 del self.macros[id]
277 if self.variables.has_key(id):
278 print "variable %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000279 id, self.variables[id].header, idx.variables[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000280 else:
281 self.variables[id] = idx.variables[id]
282 self.identifiers[id] = idx.variables[id]
283 for id in idx.structs.keys():
284 if self.structs.has_key(id):
285 print "struct %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000286 id, self.structs[id].header, idx.structs[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000287 else:
288 self.structs[id] = idx.structs[id]
289 self.identifiers[id] = idx.structs[id]
290 for id in idx.typedefs.keys():
291 if self.typedefs.has_key(id):
292 print "typedef %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000293 id, self.typedefs[id].header, idx.typedefs[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000294 else:
295 self.typedefs[id] = idx.typedefs[id]
296 self.identifiers[id] = idx.typedefs[id]
297 for id in idx.macros.keys():
298 #
299 # macro might be used to override functions or variables
300 # definitions
301 #
302 if self.variables.has_key(id):
303 continue
304 if self.functions.has_key(id):
305 continue
306 if self.enums.has_key(id):
307 continue
308 if self.macros.has_key(id):
309 print "macro %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000310 id, self.macros[id].header, idx.macros[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000311 else:
312 self.macros[id] = idx.macros[id]
313 self.identifiers[id] = idx.macros[id]
314 for id in idx.enums.keys():
315 if self.enums.has_key(id):
316 print "enum %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000317 id, self.enums[id].header, idx.enums[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000318 else:
319 self.enums[id] = idx.enums[id]
320 self.identifiers[id] = idx.enums[id]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000321
Daniel Veillardbe586972003-11-18 20:56:51 +0000322 def merge_public(self, idx):
323 for id in idx.functions.keys():
324 if self.functions.has_key(id):
William M. Brack21e4ef22005-01-02 09:53:13 +0000325 # check that function condition agrees with header
326 if idx.functions[id].conditionals != \
327 self.functions[id].conditionals:
328 print "Header condition differs from Function for %s:" \
329 % id
330 print " H: %s" % self.functions[id].conditionals
331 print " C: %s" % idx.functions[id].conditionals
Daniel Veillardbe586972003-11-18 20:56:51 +0000332 up = idx.functions[id]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000333 self.functions[id].update(None, up.module, up.type, up.info, up.extra)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000334 # else:
335 # print "Function %s from %s is not declared in headers" % (
William M. Brack21e4ef22005-01-02 09:53:13 +0000336 # id, idx.functions[id].module)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000337 # TODO: do the same for variables.
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000338
Daniel Veillardbe586972003-11-18 20:56:51 +0000339 def analyze_dict(self, type, dict):
340 count = 0
341 public = 0
342 for name in dict.keys():
343 id = dict[name]
344 count = count + 1
345 if id.static == 0:
346 public = public + 1
347 if count != public:
348 print " %d %s , %d public" % (count, type, public)
349 elif count != 0:
350 print " %d public %s" % (count, type)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000351
352
Daniel Veillardbe586972003-11-18 20:56:51 +0000353 def analyze(self):
354 self.analyze_dict("functions", self.functions)
355 self.analyze_dict("variables", self.variables)
356 self.analyze_dict("structs", self.structs)
357 self.analyze_dict("typedefs", self.typedefs)
358 self.analyze_dict("macros", self.macros)
Daniel Veillarddee23482008-04-11 12:58:43 +0000359
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000360class CLexer:
Daniel Veillardbe586972003-11-18 20:56:51 +0000361 """A lexer for the C language, tokenize the input by reading and
362 analyzing it line by line"""
363 def __init__(self, input):
364 self.input = input
365 self.tokens = []
366 self.line = ""
367 self.lineno = 0
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000368
Daniel Veillardbe586972003-11-18 20:56:51 +0000369 def getline(self):
370 line = ''
371 while line == '':
372 line = self.input.readline()
373 if not line:
374 return None
375 self.lineno = self.lineno + 1
376 line = string.lstrip(line)
377 line = string.rstrip(line)
378 if line == '':
379 continue
380 while line[-1] == '\\':
381 line = line[:-1]
382 n = self.input.readline()
383 self.lineno = self.lineno + 1
384 n = string.lstrip(n)
385 n = string.rstrip(n)
386 if not n:
387 break
388 else:
389 line = line + n
390 return line
Daniel Veillarddee23482008-04-11 12:58:43 +0000391
Daniel Veillardbe586972003-11-18 20:56:51 +0000392 def getlineno(self):
393 return self.lineno
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000394
Daniel Veillardbe586972003-11-18 20:56:51 +0000395 def push(self, token):
396 self.tokens.insert(0, token);
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000397
Daniel Veillardbe586972003-11-18 20:56:51 +0000398 def debug(self):
399 print "Last token: ", self.last
400 print "Token queue: ", self.tokens
401 print "Line %d end: " % (self.lineno), self.line
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000402
Daniel Veillardbe586972003-11-18 20:56:51 +0000403 def token(self):
404 while self.tokens == []:
405 if self.line == "":
406 line = self.getline()
407 else:
408 line = self.line
409 self.line = ""
410 if line == None:
411 return None
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000412
Daniel Veillardbe586972003-11-18 20:56:51 +0000413 if line[0] == '#':
414 self.tokens = map((lambda x: ('preproc', x)),
415 string.split(line))
416 break;
417 l = len(line)
418 if line[0] == '"' or line[0] == "'":
419 end = line[0]
420 line = line[1:]
421 found = 0
422 tok = ""
423 while found == 0:
424 i = 0
425 l = len(line)
426 while i < l:
427 if line[i] == end:
428 self.line = line[i+1:]
429 line = line[:i]
430 l = i
431 found = 1
432 break
433 if line[i] == '\\':
434 i = i + 1
435 i = i + 1
436 tok = tok + line
437 if found == 0:
438 line = self.getline()
439 if line == None:
440 return None
441 self.last = ('string', tok)
442 return self.last
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000443
Daniel Veillardbe586972003-11-18 20:56:51 +0000444 if l >= 2 and line[0] == '/' and line[1] == '*':
445 line = line[2:]
446 found = 0
447 tok = ""
448 while found == 0:
449 i = 0
450 l = len(line)
451 while i < l:
452 if line[i] == '*' and i+1 < l and line[i+1] == '/':
453 self.line = line[i+2:]
454 line = line[:i-1]
455 l = i
456 found = 1
457 break
458 i = i + 1
459 if tok != "":
460 tok = tok + "\n"
461 tok = tok + line
462 if found == 0:
463 line = self.getline()
464 if line == None:
465 return None
466 self.last = ('comment', tok)
467 return self.last
468 if l >= 2 and line[0] == '/' and line[1] == '/':
469 line = line[2:]
470 self.last = ('comment', line)
471 return self.last
472 i = 0
473 while i < l:
474 if line[i] == '/' and i+1 < l and line[i+1] == '/':
475 self.line = line[i:]
476 line = line[:i]
477 break
478 if line[i] == '/' and i+1 < l and line[i+1] == '*':
479 self.line = line[i:]
480 line = line[:i]
481 break
482 if line[i] == '"' or line[i] == "'":
483 self.line = line[i:]
484 line = line[:i]
485 break
486 i = i + 1
487 l = len(line)
488 i = 0
489 while i < l:
490 if line[i] == ' ' or line[i] == '\t':
491 i = i + 1
492 continue
493 o = ord(line[i])
494 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
495 (o >= 48 and o <= 57):
496 s = i
497 while i < l:
498 o = ord(line[i])
499 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
500 (o >= 48 and o <= 57) or string.find(
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000501 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
Daniel Veillardbe586972003-11-18 20:56:51 +0000502 i = i + 1
503 else:
504 break
505 self.tokens.append(('name', line[s:i]))
506 continue
507 if string.find("(){}:;,[]", line[i]) != -1:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000508# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
509# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
510# line[i] == ',' or line[i] == '[' or line[i] == ']':
Daniel Veillardbe586972003-11-18 20:56:51 +0000511 self.tokens.append(('sep', line[i]))
512 i = i + 1
513 continue
514 if string.find("+-*><=/%&!|.", line[i]) != -1:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000515# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
516# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
517# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
518# line[i] == '!' or line[i] == '|' or line[i] == '.':
Daniel Veillardbe586972003-11-18 20:56:51 +0000519 if line[i] == '.' and i + 2 < l and \
520 line[i+1] == '.' and line[i+2] == '.':
521 self.tokens.append(('name', '...'))
522 i = i + 3
523 continue
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000524
Daniel Veillardbe586972003-11-18 20:56:51 +0000525 j = i + 1
526 if j < l and (
527 string.find("+-*><=/%&!|", line[j]) != -1):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000528# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
529# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
530# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
531# line[j] == '!' or line[j] == '|'):
Daniel Veillardbe586972003-11-18 20:56:51 +0000532 self.tokens.append(('op', line[i:j+1]))
533 i = j + 1
534 else:
535 self.tokens.append(('op', line[i]))
536 i = i + 1
537 continue
538 s = i
539 while i < l:
540 o = ord(line[i])
541 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
542 (o >= 48 and o <= 57) or (
543 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000544# line[i] != ' ' and line[i] != '\t' and
545# line[i] != '(' and line[i] != ')' and
546# line[i] != '{' and line[i] != '}' and
547# line[i] != ':' and line[i] != ';' and
548# line[i] != ',' and line[i] != '+' and
549# line[i] != '-' and line[i] != '*' and
550# line[i] != '/' and line[i] != '%' and
551# line[i] != '&' and line[i] != '!' and
552# line[i] != '|' and line[i] != '[' and
553# line[i] != ']' and line[i] != '=' and
554# line[i] != '*' and line[i] != '>' and
555# line[i] != '<'):
Daniel Veillardbe586972003-11-18 20:56:51 +0000556 i = i + 1
557 else:
558 break
559 self.tokens.append(('name', line[s:i]))
Daniel Veillarddee23482008-04-11 12:58:43 +0000560
Daniel Veillardbe586972003-11-18 20:56:51 +0000561 tok = self.tokens[0]
562 self.tokens = self.tokens[1:]
563 self.last = tok
564 return tok
Daniel Veillarddee23482008-04-11 12:58:43 +0000565
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000566class CParser:
Daniel Veillardbe586972003-11-18 20:56:51 +0000567 """The C module parser"""
568 def __init__(self, filename, idx = None):
569 self.filename = filename
570 if len(filename) > 2 and filename[-2:] == '.h':
571 self.is_header = 1
572 else:
573 self.is_header = 0
574 self.input = open(filename)
575 self.lexer = CLexer(self.input)
576 if idx == None:
577 self.index = index()
578 else:
579 self.index = idx
580 self.top_comment = ""
581 self.last_comment = ""
582 self.comment = None
583 self.collect_ref = 0
Daniel Veillard1e906612003-12-05 14:57:46 +0000584 self.no_error = 0
Daniel Veillarda2351322004-06-27 12:08:10 +0000585 self.conditionals = []
586 self.defines = []
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000587
Daniel Veillardbe586972003-11-18 20:56:51 +0000588 def collect_references(self):
589 self.collect_ref = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000590
Daniel Veillard1e906612003-12-05 14:57:46 +0000591 def stop_error(self):
592 self.no_error = 1
593
594 def start_error(self):
595 self.no_error = 0
596
Daniel Veillardbe586972003-11-18 20:56:51 +0000597 def lineno(self):
598 return self.lexer.getlineno()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000599
Daniel Veillardbe586972003-11-18 20:56:51 +0000600 def index_add(self, name, module, static, type, info=None, extra = None):
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000601 if self.is_header == 1:
602 self.index.add(name, module, module, static, type, self.lineno(),
603 info, extra, self.conditionals)
604 else:
605 self.index.add(name, None, module, static, type, self.lineno(),
606 info, extra, self.conditionals)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000607
Daniel Veillardbe586972003-11-18 20:56:51 +0000608 def index_add_ref(self, name, module, static, type, info=None,
609 extra = None):
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000610 if self.is_header == 1:
611 self.index.add_ref(name, module, module, static, type,
612 self.lineno(), info, extra, self.conditionals)
613 else:
614 self.index.add_ref(name, None, module, static, type, self.lineno(),
615 info, extra, self.conditionals)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000616
Daniel Veillard1e906612003-12-05 14:57:46 +0000617 def warning(self, msg):
618 if self.no_error:
619 return
620 print msg
621
Daniel Veillardbe586972003-11-18 20:56:51 +0000622 def error(self, msg, token=-1):
Daniel Veillard1e906612003-12-05 14:57:46 +0000623 if self.no_error:
624 return
625
Daniel Veillardbe586972003-11-18 20:56:51 +0000626 print "Parse Error: " + msg
627 if token != -1:
628 print "Got token ", token
629 self.lexer.debug()
630 sys.exit(1)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000631
Daniel Veillardbe586972003-11-18 20:56:51 +0000632 def debug(self, msg, token=-1):
633 print "Debug: " + msg
634 if token != -1:
635 print "Got token ", token
636 self.lexer.debug()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000637
Daniel Veillardbe586972003-11-18 20:56:51 +0000638 def parseTopComment(self, comment):
639 res = {}
640 lines = string.split(comment, "\n")
641 item = None
642 for line in lines:
643 while line != "" and (line[0] == ' ' or line[0] == '\t'):
644 line = line[1:]
645 while line != "" and line[0] == '*':
646 line = line[1:]
647 while line != "" and (line[0] == ' ' or line[0] == '\t'):
648 line = line[1:]
649 try:
650 (it, line) = string.split(line, ":", 1)
651 item = it
652 while line != "" and (line[0] == ' ' or line[0] == '\t'):
653 line = line[1:]
654 if res.has_key(item):
655 res[item] = res[item] + " " + line
656 else:
657 res[item] = line
658 except:
659 if item != None:
660 if res.has_key(item):
661 res[item] = res[item] + " " + line
662 else:
663 res[item] = line
664 self.index.info = res
665
666 def parseComment(self, token):
667 if self.top_comment == "":
668 self.top_comment = token[1]
669 if self.comment == None or token[1][0] == '*':
670 self.comment = token[1];
671 else:
672 self.comment = self.comment + token[1]
673 token = self.lexer.token()
Daniel Veillard1e906612003-12-05 14:57:46 +0000674
675 if string.find(self.comment, "DOC_DISABLE") != -1:
676 self.stop_error()
677
678 if string.find(self.comment, "DOC_ENABLE") != -1:
679 self.start_error()
680
Daniel Veillardbe586972003-11-18 20:56:51 +0000681 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000682
Daniel Veillard99b78502005-09-11 22:04:08 +0000683 #
684 # Parse a comment block associate to a typedef
685 #
686 def parseTypeComment(self, name, quiet = 0):
687 if name[0:2] == '__':
688 quiet = 1
689
690 args = []
691 desc = ""
692
693 if self.comment == None:
694 if not quiet:
695 self.warning("Missing comment for type %s" % (name))
696 return((args, desc))
697 if self.comment[0] != '*':
698 if not quiet:
699 self.warning("Missing * in type comment for %s" % (name))
700 return((args, desc))
701 lines = string.split(self.comment, '\n')
702 if lines[0] == '*':
703 del lines[0]
704 if lines[0] != "* %s:" % (name):
705 if not quiet:
706 self.warning("Misformatted type comment for %s" % (name))
707 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
708 return((args, desc))
709 del lines[0]
710 while len(lines) > 0 and lines[0] == '*':
711 del lines[0]
712 desc = ""
713 while len(lines) > 0:
714 l = lines[0]
715 while len(l) > 0 and l[0] == '*':
716 l = l[1:]
717 l = string.strip(l)
718 desc = desc + " " + l
719 del lines[0]
Daniel Veillarddee23482008-04-11 12:58:43 +0000720
Daniel Veillard99b78502005-09-11 22:04:08 +0000721 desc = string.strip(desc)
722
723 if quiet == 0:
724 if desc == "":
725 self.warning("Type comment for %s lack description of the macro" % (name))
726
727 return(desc)
728 #
729 # Parse a comment block associate to a macro
730 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000731 def parseMacroComment(self, name, quiet = 0):
732 if name[0:2] == '__':
733 quiet = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000734
Daniel Veillardbe586972003-11-18 20:56:51 +0000735 args = []
736 desc = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000737
Daniel Veillardbe586972003-11-18 20:56:51 +0000738 if self.comment == None:
739 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000740 self.warning("Missing comment for macro %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000741 return((args, desc))
742 if self.comment[0] != '*':
743 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000744 self.warning("Missing * in macro comment for %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000745 return((args, desc))
746 lines = string.split(self.comment, '\n')
747 if lines[0] == '*':
748 del lines[0]
749 if lines[0] != "* %s:" % (name):
750 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000751 self.warning("Misformatted macro comment for %s" % (name))
752 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000753 return((args, desc))
754 del lines[0]
755 while lines[0] == '*':
756 del lines[0]
757 while len(lines) > 0 and lines[0][0:3] == '* @':
758 l = lines[0][3:]
759 try:
760 (arg, desc) = string.split(l, ':', 1)
761 desc=string.strip(desc)
762 arg=string.strip(arg)
763 except:
764 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000765 self.warning("Misformatted macro comment for %s" % (name))
766 self.warning(" problem with '%s'" % (lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000767 del lines[0]
768 continue
769 del lines[0]
770 l = string.strip(lines[0])
771 while len(l) > 2 and l[0:3] != '* @':
772 while l[0] == '*':
773 l = l[1:]
774 desc = desc + ' ' + string.strip(l)
775 del lines[0]
776 if len(lines) == 0:
777 break
778 l = lines[0]
779 args.append((arg, desc))
780 while len(lines) > 0 and lines[0] == '*':
781 del lines[0]
782 desc = ""
783 while len(lines) > 0:
784 l = lines[0]
785 while len(l) > 0 and l[0] == '*':
786 l = l[1:]
787 l = string.strip(l)
788 desc = desc + " " + l
789 del lines[0]
Daniel Veillarddee23482008-04-11 12:58:43 +0000790
Daniel Veillardbe586972003-11-18 20:56:51 +0000791 desc = string.strip(desc)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000792
Daniel Veillardbe586972003-11-18 20:56:51 +0000793 if quiet == 0:
794 if desc == "":
Daniel Veillard1e906612003-12-05 14:57:46 +0000795 self.warning("Macro comment for %s lack description of the macro" % (name))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000796
Daniel Veillardbe586972003-11-18 20:56:51 +0000797 return((args, desc))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000798
799 #
800 # Parse a comment block and merge the informations found in the
801 # parameters descriptions, finally returns a block as complete
802 # as possible
803 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000804 def mergeFunctionComment(self, name, description, quiet = 0):
805 if name == 'main':
806 quiet = 1
807 if name[0:2] == '__':
808 quiet = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000809
Daniel Veillardbe586972003-11-18 20:56:51 +0000810 (ret, args) = description
811 desc = ""
812 retdesc = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000813
Daniel Veillardbe586972003-11-18 20:56:51 +0000814 if self.comment == None:
815 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000816 self.warning("Missing comment for function %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000817 return(((ret[0], retdesc), args, desc))
818 if self.comment[0] != '*':
819 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000820 self.warning("Missing * in function comment for %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000821 return(((ret[0], retdesc), args, desc))
822 lines = string.split(self.comment, '\n')
823 if lines[0] == '*':
824 del lines[0]
825 if lines[0] != "* %s:" % (name):
826 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000827 self.warning("Misformatted function comment for %s" % (name))
828 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000829 return(((ret[0], retdesc), args, desc))
830 del lines[0]
831 while lines[0] == '*':
832 del lines[0]
833 nbargs = len(args)
834 while len(lines) > 0 and lines[0][0:3] == '* @':
835 l = lines[0][3:]
836 try:
837 (arg, desc) = string.split(l, ':', 1)
838 desc=string.strip(desc)
839 arg=string.strip(arg)
840 except:
841 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000842 self.warning("Misformatted function comment for %s" % (name))
843 self.warning(" problem with '%s'" % (lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000844 del lines[0]
845 continue
846 del lines[0]
847 l = string.strip(lines[0])
848 while len(l) > 2 and l[0:3] != '* @':
849 while l[0] == '*':
850 l = l[1:]
851 desc = desc + ' ' + string.strip(l)
852 del lines[0]
853 if len(lines) == 0:
854 break
855 l = lines[0]
856 i = 0
857 while i < nbargs:
858 if args[i][1] == arg:
859 args[i] = (args[i][0], arg, desc)
860 break;
861 i = i + 1
862 if i >= nbargs:
863 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000864 self.warning("Unable to find arg %s from function comment for %s" % (
865 arg, name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000866 while len(lines) > 0 and lines[0] == '*':
867 del lines[0]
868 desc = ""
869 while len(lines) > 0:
870 l = lines[0]
871 while len(l) > 0 and l[0] == '*':
872 l = l[1:]
873 l = string.strip(l)
874 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
875 try:
876 l = string.split(l, ' ', 1)[1]
877 except:
878 l = ""
879 retdesc = string.strip(l)
880 del lines[0]
881 while len(lines) > 0:
882 l = lines[0]
883 while len(l) > 0 and l[0] == '*':
884 l = l[1:]
885 l = string.strip(l)
886 retdesc = retdesc + " " + l
887 del lines[0]
888 else:
889 desc = desc + " " + l
890 del lines[0]
Daniel Veillarddee23482008-04-11 12:58:43 +0000891
Daniel Veillardbe586972003-11-18 20:56:51 +0000892 retdesc = string.strip(retdesc)
893 desc = string.strip(desc)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000894
Daniel Veillardbe586972003-11-18 20:56:51 +0000895 if quiet == 0:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000896 #
897 # report missing comments
898 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000899 i = 0
900 while i < nbargs:
Daniel Veillarddee23482008-04-11 12:58:43 +0000901 if args[i][2] == None and args[i][0] != "void" and \
902 ((args[i][1] != None) or (args[i][1] == '')):
William M. Brack21e4ef22005-01-02 09:53:13 +0000903 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000904 i = i + 1
905 if retdesc == "" and ret[0] != "void":
William M. Brack21e4ef22005-01-02 09:53:13 +0000906 self.warning("Function comment for %s lacks description of return value" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000907 if desc == "":
William M. Brack21e4ef22005-01-02 09:53:13 +0000908 self.warning("Function comment for %s lacks description of the function" % (name))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000909
Daniel Veillardbe586972003-11-18 20:56:51 +0000910 return(((ret[0], retdesc), args, desc))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000911
Daniel Veillardbe586972003-11-18 20:56:51 +0000912 def parsePreproc(self, token):
Daniel Veillarda2351322004-06-27 12:08:10 +0000913 if debug:
914 print "=> preproc ", token, self.lexer.tokens
Daniel Veillardbe586972003-11-18 20:56:51 +0000915 name = token[1]
916 if name == "#include":
917 token = self.lexer.token()
918 if token == None:
919 return None
920 if token[0] == 'preproc':
921 self.index_add(token[1], self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000922 "include")
Daniel Veillardbe586972003-11-18 20:56:51 +0000923 return self.lexer.token()
924 return token
925 if name == "#define":
926 token = self.lexer.token()
927 if token == None:
928 return None
929 if token[0] == 'preproc':
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000930 # TODO macros with arguments
Daniel Veillardbe586972003-11-18 20:56:51 +0000931 name = token[1]
932 lst = []
933 token = self.lexer.token()
934 while token != None and token[0] == 'preproc' and \
935 token[1][0] != '#':
936 lst.append(token[1])
937 token = self.lexer.token()
938 try:
939 name = string.split(name, '(') [0]
940 except:
941 pass
942 info = self.parseMacroComment(name, not self.is_header)
943 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000944 "macro", info)
Daniel Veillardbe586972003-11-18 20:56:51 +0000945 return token
William M. Brack21e4ef22005-01-02 09:53:13 +0000946
947 #
948 # Processing of conditionals modified by Bill 1/1/05
949 #
950 # We process conditionals (i.e. tokens from #ifdef, #ifndef,
951 # #if, #else and #endif) for headers and mainline code,
952 # store the ones from the header in libxml2-api.xml, and later
953 # (in the routine merge_public) verify that the two (header and
954 # mainline code) agree.
955 #
956 # There is a small problem with processing the headers. Some of
957 # the variables are not concerned with enabling / disabling of
958 # library functions (e.g. '__XML_PARSER_H__'), and we don't want
959 # them to be included in libxml2-api.xml, or involved in
960 # the check between the header and the mainline code. To
961 # accomplish this, we ignore any conditional which doesn't include
962 # the string 'ENABLED'
963 #
964 if name == "#ifdef":
965 apstr = self.lexer.tokens[0][1]
Daniel Veillarda2351322004-06-27 12:08:10 +0000966 try:
William M. Brack21e4ef22005-01-02 09:53:13 +0000967 self.defines.append(apstr)
968 if string.find(apstr, 'ENABLED') != -1:
Daniel Veillarddee23482008-04-11 12:58:43 +0000969 self.conditionals.append("defined(%s)" % apstr)
Daniel Veillarda2351322004-06-27 12:08:10 +0000970 except:
971 pass
William M. Brack21e4ef22005-01-02 09:53:13 +0000972 elif name == "#ifndef":
973 apstr = self.lexer.tokens[0][1]
974 try:
975 self.defines.append(apstr)
976 if string.find(apstr, 'ENABLED') != -1:
Daniel Veillarddee23482008-04-11 12:58:43 +0000977 self.conditionals.append("!defined(%s)" % apstr)
William M. Brack21e4ef22005-01-02 09:53:13 +0000978 except:
979 pass
980 elif name == "#if":
981 apstr = ""
982 for tok in self.lexer.tokens:
983 if apstr != "":
984 apstr = apstr + " "
985 apstr = apstr + tok[1]
986 try:
987 self.defines.append(apstr)
988 if string.find(apstr, 'ENABLED') != -1:
989 self.conditionals.append(apstr)
990 except:
991 pass
992 elif name == "#else":
993 if self.conditionals != [] and \
994 string.find(self.defines[-1], 'ENABLED') != -1:
995 self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
996 elif name == "#endif":
997 if self.conditionals != [] and \
998 string.find(self.defines[-1], 'ENABLED') != -1:
Daniel Veillarda2351322004-06-27 12:08:10 +0000999 self.conditionals = self.conditionals[:-1]
1000 self.defines = self.defines[:-1]
Daniel Veillardbe586972003-11-18 20:56:51 +00001001 token = self.lexer.token()
1002 while token != None and token[0] == 'preproc' and \
1003 token[1][0] != '#':
1004 token = self.lexer.token()
1005 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001006
1007 #
1008 # token acquisition on top of the lexer, it handle internally
1009 # preprocessor and comments since they are logically not part of
1010 # the program structure.
1011 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001012 def token(self):
1013 global ignored_words
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001014
Daniel Veillardbe586972003-11-18 20:56:51 +00001015 token = self.lexer.token()
1016 while token != None:
1017 if token[0] == 'comment':
1018 token = self.parseComment(token)
1019 continue
1020 elif token[0] == 'preproc':
1021 token = self.parsePreproc(token)
1022 continue
Daniel Veillard99dd7632005-05-06 11:40:56 +00001023 elif token[0] == "name" and token[1] == "__const":
1024 token = ("name", "const")
1025 return token
1026 elif token[0] == "name" and token[1] == "__attribute":
1027 token = self.lexer.token()
1028 while token != None and token[1] != ";":
1029 token = self.lexer.token()
1030 return token
Daniel Veillardbe586972003-11-18 20:56:51 +00001031 elif token[0] == "name" and ignored_words.has_key(token[1]):
1032 (n, info) = ignored_words[token[1]]
1033 i = 0
1034 while i < n:
1035 token = self.lexer.token()
1036 i = i + 1
1037 token = self.lexer.token()
1038 continue
1039 else:
1040 if debug:
1041 print "=> ", token
1042 return token
1043 return None
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001044
1045 #
1046 # Parse a typedef, it records the type and its name.
1047 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001048 def parseTypedef(self, token):
1049 if token == None:
1050 return None
1051 token = self.parseType(token)
1052 if token == None:
1053 self.error("parsing typedef")
1054 return None
1055 base_type = self.type
1056 type = base_type
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001057 #self.debug("end typedef type", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001058 while token != None:
1059 if token[0] == "name":
1060 name = token[1]
1061 signature = self.signature
1062 if signature != None:
1063 type = string.split(type, '(')[0]
1064 d = self.mergeFunctionComment(name,
1065 ((type, None), signature), 1)
1066 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001067 "functype", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001068 else:
1069 if base_type == "struct":
1070 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001071 "struct", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001072 base_type = "struct " + name
1073 else:
Daniel Veillard99b78502005-09-11 22:04:08 +00001074 # TODO report missing or misformatted comments
1075 info = self.parseTypeComment(name, 1)
Daniel Veillardbe586972003-11-18 20:56:51 +00001076 self.index_add(name, self.filename, not self.is_header,
Daniel Veillard99b78502005-09-11 22:04:08 +00001077 "typedef", type, info)
Daniel Veillardbe586972003-11-18 20:56:51 +00001078 token = self.token()
1079 else:
1080 self.error("parsing typedef: expecting a name")
1081 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001082 #self.debug("end typedef", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001083 if token != None and token[0] == 'sep' and token[1] == ',':
1084 type = base_type
1085 token = self.token()
1086 while token != None and token[0] == "op":
1087 type = type + token[1]
1088 token = self.token()
1089 elif token != None and token[0] == 'sep' and token[1] == ';':
1090 break;
1091 elif token != None and token[0] == 'name':
1092 type = base_type
1093 continue;
1094 else:
1095 self.error("parsing typedef: expecting ';'", token)
1096 return token
1097 token = self.token()
1098 return token
Daniel Veillarddee23482008-04-11 12:58:43 +00001099
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001100 #
1101 # Parse a C code block, used for functions it parse till
1102 # the balancing } included
1103 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001104 def parseBlock(self, token):
1105 while token != None:
1106 if token[0] == "sep" and token[1] == "{":
1107 token = self.token()
1108 token = self.parseBlock(token)
1109 elif token[0] == "sep" and token[1] == "}":
1110 self.comment = None
1111 token = self.token()
1112 return token
1113 else:
1114 if self.collect_ref == 1:
1115 oldtok = token
1116 token = self.token()
1117 if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1118 if token[0] == "sep" and token[1] == "(":
1119 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001120 0, "function")
Daniel Veillardbe586972003-11-18 20:56:51 +00001121 token = self.token()
1122 elif token[0] == "name":
1123 token = self.token()
1124 if token[0] == "sep" and (token[1] == ";" or
1125 token[1] == "," or token[1] == "="):
1126 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001127 0, "type")
Daniel Veillardbe586972003-11-18 20:56:51 +00001128 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1129 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001130 0, "typedef")
Daniel Veillardbe586972003-11-18 20:56:51 +00001131 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1132 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001133 0, "typedef")
Daniel Veillarddee23482008-04-11 12:58:43 +00001134
Daniel Veillardbe586972003-11-18 20:56:51 +00001135 else:
1136 token = self.token()
1137 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001138
1139 #
1140 # Parse a C struct definition till the balancing }
1141 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001142 def parseStruct(self, token):
1143 fields = []
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001144 #self.debug("start parseStruct", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001145 while token != None:
1146 if token[0] == "sep" and token[1] == "{":
1147 token = self.token()
1148 token = self.parseTypeBlock(token)
1149 elif token[0] == "sep" and token[1] == "}":
1150 self.struct_fields = fields
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001151 #self.debug("end parseStruct", token)
1152 #print fields
Daniel Veillardbe586972003-11-18 20:56:51 +00001153 token = self.token()
1154 return token
1155 else:
1156 base_type = self.type
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001157 #self.debug("before parseType", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001158 token = self.parseType(token)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001159 #self.debug("after parseType", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001160 if token != None and token[0] == "name":
1161 fname = token[1]
1162 token = self.token()
1163 if token[0] == "sep" and token[1] == ";":
1164 self.comment = None
1165 token = self.token()
1166 fields.append((self.type, fname, self.comment))
1167 self.comment = None
1168 else:
1169 self.error("parseStruct: expecting ;", token)
1170 elif token != None and token[0] == "sep" and token[1] == "{":
1171 token = self.token()
1172 token = self.parseTypeBlock(token)
1173 if token != None and token[0] == "name":
1174 token = self.token()
1175 if token != None and token[0] == "sep" and token[1] == ";":
1176 token = self.token()
1177 else:
1178 self.error("parseStruct: expecting ;", token)
1179 else:
1180 self.error("parseStruct: name", token)
1181 token = self.token()
1182 self.type = base_type;
1183 self.struct_fields = fields
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001184 #self.debug("end parseStruct", token)
1185 #print fields
Daniel Veillardbe586972003-11-18 20:56:51 +00001186 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001187
1188 #
1189 # Parse a C enum block, parse till the balancing }
1190 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001191 def parseEnumBlock(self, token):
1192 self.enums = []
1193 name = None
1194 self.comment = None
1195 comment = ""
1196 value = "0"
1197 while token != None:
1198 if token[0] == "sep" and token[1] == "{":
1199 token = self.token()
1200 token = self.parseTypeBlock(token)
1201 elif token[0] == "sep" and token[1] == "}":
1202 if name != None:
1203 if self.comment != None:
1204 comment = self.comment
1205 self.comment = None
1206 self.enums.append((name, value, comment))
1207 token = self.token()
1208 return token
1209 elif token[0] == "name":
1210 if name != None:
1211 if self.comment != None:
1212 comment = string.strip(self.comment)
1213 self.comment = None
1214 self.enums.append((name, value, comment))
1215 name = token[1]
1216 comment = ""
1217 token = self.token()
1218 if token[0] == "op" and token[1][0] == "=":
1219 value = ""
1220 if len(token[1]) > 1:
1221 value = token[1][1:]
1222 token = self.token()
1223 while token[0] != "sep" or (token[1] != ',' and
1224 token[1] != '}'):
1225 value = value + token[1]
1226 token = self.token()
1227 else:
1228 try:
1229 value = "%d" % (int(value) + 1)
1230 except:
Daniel Veillard1e906612003-12-05 14:57:46 +00001231 self.warning("Failed to compute value of enum %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +00001232 value=""
1233 if token[0] == "sep" and token[1] == ",":
1234 token = self.token()
1235 else:
1236 token = self.token()
1237 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001238
1239 #
1240 # Parse a C definition block, used for structs it parse till
1241 # the balancing }
1242 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001243 def parseTypeBlock(self, token):
1244 while token != None:
1245 if token[0] == "sep" and token[1] == "{":
1246 token = self.token()
1247 token = self.parseTypeBlock(token)
1248 elif token[0] == "sep" and token[1] == "}":
1249 token = self.token()
1250 return token
1251 else:
1252 token = self.token()
1253 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001254
1255 #
1256 # Parse a type: the fact that the type name can either occur after
1257 # the definition or within the definition makes it a little harder
1258 # if inside, the name token is pushed back before returning
1259 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001260 def parseType(self, token):
1261 self.type = ""
1262 self.struct_fields = []
1263 self.signature = None
1264 if token == None:
1265 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001266
Daniel Veillarddee23482008-04-11 12:58:43 +00001267 while token[0] == "name" and (
William M. Brackfbb619f2005-06-06 13:49:18 +00001268 token[1] == "const" or \
1269 token[1] == "unsigned" or \
1270 token[1] == "signed"):
Daniel Veillardbe586972003-11-18 20:56:51 +00001271 if self.type == "":
1272 self.type = token[1]
1273 else:
1274 self.type = self.type + " " + token[1]
1275 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001276
Daniel Veillardbe586972003-11-18 20:56:51 +00001277 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1278 if self.type == "":
1279 self.type = token[1]
1280 else:
1281 self.type = self.type + " " + token[1]
1282 if token[0] == "name" and token[1] == "int":
1283 if self.type == "":
1284 self.type = tmp[1]
1285 else:
1286 self.type = self.type + " " + tmp[1]
Daniel Veillarddee23482008-04-11 12:58:43 +00001287
Daniel Veillardbe586972003-11-18 20:56:51 +00001288 elif token[0] == "name" and token[1] == "struct":
1289 if self.type == "":
1290 self.type = token[1]
1291 else:
1292 self.type = self.type + " " + token[1]
1293 token = self.token()
1294 nametok = None
1295 if token[0] == "name":
1296 nametok = token
1297 token = self.token()
1298 if token != None and token[0] == "sep" and token[1] == "{":
1299 token = self.token()
1300 token = self.parseStruct(token)
1301 elif token != None and token[0] == "op" and token[1] == "*":
1302 self.type = self.type + " " + nametok[1] + " *"
1303 token = self.token()
1304 while token != None and token[0] == "op" and token[1] == "*":
1305 self.type = self.type + " *"
1306 token = self.token()
1307 if token[0] == "name":
1308 nametok = token
1309 token = self.token()
1310 else:
1311 self.error("struct : expecting name", token)
1312 return token
1313 elif token != None and token[0] == "name" and nametok != None:
1314 self.type = self.type + " " + nametok[1]
1315 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001316
Daniel Veillardbe586972003-11-18 20:56:51 +00001317 if nametok != None:
1318 self.lexer.push(token)
1319 token = nametok
1320 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001321
Daniel Veillardbe586972003-11-18 20:56:51 +00001322 elif token[0] == "name" and token[1] == "enum":
1323 if self.type == "":
1324 self.type = token[1]
1325 else:
1326 self.type = self.type + " " + token[1]
1327 self.enums = []
1328 token = self.token()
1329 if token != None and token[0] == "sep" and token[1] == "{":
1330 token = self.token()
1331 token = self.parseEnumBlock(token)
1332 else:
1333 self.error("parsing enum: expecting '{'", token)
1334 enum_type = None
1335 if token != None and token[0] != "name":
1336 self.lexer.push(token)
1337 token = ("name", "enum")
1338 else:
1339 enum_type = token[1]
1340 for enum in self.enums:
1341 self.index_add(enum[0], self.filename,
1342 not self.is_header, "enum",
1343 (enum[1], enum[2], enum_type))
1344 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001345
Daniel Veillardbe586972003-11-18 20:56:51 +00001346 elif token[0] == "name":
1347 if self.type == "":
1348 self.type = token[1]
1349 else:
1350 self.type = self.type + " " + token[1]
1351 else:
1352 self.error("parsing type %s: expecting a name" % (self.type),
1353 token)
1354 return token
1355 token = self.token()
1356 while token != None and (token[0] == "op" or
1357 token[0] == "name" and token[1] == "const"):
1358 self.type = self.type + " " + token[1]
1359 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001360
1361 #
1362 # if there is a parenthesis here, this means a function type
1363 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001364 if token != None and token[0] == "sep" and token[1] == '(':
1365 self.type = self.type + token[1]
1366 token = self.token()
1367 while token != None and token[0] == "op" and token[1] == '*':
1368 self.type = self.type + token[1]
1369 token = self.token()
1370 if token == None or token[0] != "name" :
1371 self.error("parsing function type, name expected", token);
1372 return token
1373 self.type = self.type + token[1]
1374 nametok = token
1375 token = self.token()
1376 if token != None and token[0] == "sep" and token[1] == ')':
1377 self.type = self.type + token[1]
1378 token = self.token()
1379 if token != None and token[0] == "sep" and token[1] == '(':
1380 token = self.token()
1381 type = self.type;
1382 token = self.parseSignature(token);
1383 self.type = type;
1384 else:
1385 self.error("parsing function type, '(' expected", token);
1386 return token
1387 else:
1388 self.error("parsing function type, ')' expected", token);
1389 return token
1390 self.lexer.push(token)
1391 token = nametok
1392 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001393
1394 #
1395 # do some lookahead for arrays
1396 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001397 if token != None and token[0] == "name":
1398 nametok = token
1399 token = self.token()
1400 if token != None and token[0] == "sep" and token[1] == '[':
1401 self.type = self.type + nametok[1]
1402 while token != None and token[0] == "sep" and token[1] == '[':
1403 self.type = self.type + token[1]
1404 token = self.token()
1405 while token != None and token[0] != 'sep' and \
1406 token[1] != ']' and token[1] != ';':
1407 self.type = self.type + token[1]
1408 token = self.token()
1409 if token != None and token[0] == 'sep' and token[1] == ']':
1410 self.type = self.type + token[1]
1411 token = self.token()
1412 else:
1413 self.error("parsing array type, ']' expected", token);
1414 return token
1415 elif token != None and token[0] == "sep" and token[1] == ':':
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001416 # remove :12 in case it's a limited int size
Daniel Veillardbe586972003-11-18 20:56:51 +00001417 token = self.token()
1418 token = self.token()
1419 self.lexer.push(token)
1420 token = nametok
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001421
Daniel Veillardbe586972003-11-18 20:56:51 +00001422 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001423
1424 #
1425 # Parse a signature: '(' has been parsed and we scan the type definition
1426 # up to the ')' included
Daniel Veillardbe586972003-11-18 20:56:51 +00001427 def parseSignature(self, token):
1428 signature = []
1429 if token != None and token[0] == "sep" and token[1] == ')':
1430 self.signature = []
1431 token = self.token()
1432 return token
1433 while token != None:
1434 token = self.parseType(token)
1435 if token != None and token[0] == "name":
1436 signature.append((self.type, token[1], None))
1437 token = self.token()
1438 elif token != None and token[0] == "sep" and token[1] == ',':
1439 token = self.token()
1440 continue
1441 elif token != None and token[0] == "sep" and token[1] == ')':
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001442 # only the type was provided
Daniel Veillardbe586972003-11-18 20:56:51 +00001443 if self.type == "...":
1444 signature.append((self.type, "...", None))
1445 else:
1446 signature.append((self.type, None, None))
1447 if token != None and token[0] == "sep":
1448 if token[1] == ',':
1449 token = self.token()
1450 continue
1451 elif token[1] == ')':
1452 token = self.token()
1453 break
1454 self.signature = signature
1455 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001456
1457 #
1458 # Parse a global definition, be it a type, variable or function
1459 # the extern "C" blocks are a bit nasty and require it to recurse.
1460 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001461 def parseGlobal(self, token):
1462 static = 0
1463 if token[1] == 'extern':
1464 token = self.token()
1465 if token == None:
1466 return token
1467 if token[0] == 'string':
1468 if token[1] == 'C':
1469 token = self.token()
1470 if token == None:
1471 return token
1472 if token[0] == 'sep' and token[1] == "{":
1473 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001474# print 'Entering extern "C line ', self.lineno()
Daniel Veillardbe586972003-11-18 20:56:51 +00001475 while token != None and (token[0] != 'sep' or
1476 token[1] != "}"):
1477 if token[0] == 'name':
1478 token = self.parseGlobal(token)
1479 else:
1480 self.error(
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001481 "token %s %s unexpected at the top level" % (
1482 token[0], token[1]))
Daniel Veillardbe586972003-11-18 20:56:51 +00001483 token = self.parseGlobal(token)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001484# print 'Exiting extern "C" line', self.lineno()
Daniel Veillardbe586972003-11-18 20:56:51 +00001485 token = self.token()
1486 return token
1487 else:
1488 return token
1489 elif token[1] == 'static':
1490 static = 1
1491 token = self.token()
1492 if token == None or token[0] != 'name':
1493 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001494
Daniel Veillardbe586972003-11-18 20:56:51 +00001495 if token[1] == 'typedef':
1496 token = self.token()
1497 return self.parseTypedef(token)
1498 else:
1499 token = self.parseType(token)
1500 type_orig = self.type
1501 if token == None or token[0] != "name":
1502 return token
1503 type = type_orig
1504 self.name = token[1]
1505 token = self.token()
1506 while token != None and (token[0] == "sep" or token[0] == "op"):
1507 if token[0] == "sep":
1508 if token[1] == "[":
1509 type = type + token[1]
1510 token = self.token()
1511 while token != None and (token[0] != "sep" or \
1512 token[1] != ";"):
1513 type = type + token[1]
1514 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001515
Daniel Veillardbe586972003-11-18 20:56:51 +00001516 if token != None and token[0] == "op" and token[1] == "=":
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001517 #
1518 # Skip the initialization of the variable
1519 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001520 token = self.token()
1521 if token[0] == 'sep' and token[1] == '{':
1522 token = self.token()
1523 token = self.parseBlock(token)
1524 else:
1525 self.comment = None
1526 while token != None and (token[0] != "sep" or \
1527 (token[1] != ';' and token[1] != ',')):
1528 token = self.token()
1529 self.comment = None
1530 if token == None or token[0] != "sep" or (token[1] != ';' and
1531 token[1] != ','):
1532 self.error("missing ';' or ',' after value")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001533
Daniel Veillardbe586972003-11-18 20:56:51 +00001534 if token != None and token[0] == "sep":
1535 if token[1] == ";":
1536 self.comment = None
1537 token = self.token()
1538 if type == "struct":
1539 self.index_add(self.name, self.filename,
1540 not self.is_header, "struct", self.struct_fields)
1541 else:
1542 self.index_add(self.name, self.filename,
1543 not self.is_header, "variable", type)
1544 break
1545 elif token[1] == "(":
1546 token = self.token()
1547 token = self.parseSignature(token)
1548 if token == None:
1549 return None
1550 if token[0] == "sep" and token[1] == ";":
1551 d = self.mergeFunctionComment(self.name,
1552 ((type, None), self.signature), 1)
1553 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001554 "function", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001555 token = self.token()
1556 elif token[0] == "sep" and token[1] == "{":
1557 d = self.mergeFunctionComment(self.name,
1558 ((type, None), self.signature), static)
1559 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001560 "function", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001561 token = self.token()
1562 token = self.parseBlock(token);
1563 elif token[1] == ',':
1564 self.comment = None
1565 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001566 "variable", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001567 type = type_orig
1568 token = self.token()
1569 while token != None and token[0] == "sep":
1570 type = type + token[1]
1571 token = self.token()
1572 if token != None and token[0] == "name":
1573 self.name = token[1]
1574 token = self.token()
1575 else:
1576 break
Daniel Veillarddee23482008-04-11 12:58:43 +00001577
Daniel Veillardbe586972003-11-18 20:56:51 +00001578 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001579
Daniel Veillardbe586972003-11-18 20:56:51 +00001580 def parse(self):
Daniel Veillard1e906612003-12-05 14:57:46 +00001581 self.warning("Parsing %s" % (self.filename))
Daniel Veillardbe586972003-11-18 20:56:51 +00001582 token = self.token()
1583 while token != None:
1584 if token[0] == 'name':
1585 token = self.parseGlobal(token)
1586 else:
1587 self.error("token %s %s unexpected at the top level" % (
1588 token[0], token[1]))
1589 token = self.parseGlobal(token)
1590 return
1591 self.parseTopComment(self.top_comment)
1592 return self.index
Daniel Veillarddee23482008-04-11 12:58:43 +00001593
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001594
1595class docBuilder:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001596 """A documentation builder"""
1597 def __init__(self, name, directories=['.'], excludes=[]):
1598 self.name = name
1599 self.directories = directories
1600 self.excludes = excludes + ignored_files.keys()
1601 self.modules = {}
1602 self.headers = {}
1603 self.idx = index()
1604 self.xref = {}
1605 self.index = {}
1606 if name == 'libxml2':
1607 self.basename = 'libxml'
1608 else:
1609 self.basename = name
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001610
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001611 def indexString(self, id, str):
1612 if str == None:
1613 return
1614 str = string.replace(str, "'", ' ')
1615 str = string.replace(str, '"', ' ')
1616 str = string.replace(str, "/", ' ')
1617 str = string.replace(str, '*', ' ')
1618 str = string.replace(str, "[", ' ')
1619 str = string.replace(str, "]", ' ')
1620 str = string.replace(str, "(", ' ')
1621 str = string.replace(str, ")", ' ')
1622 str = string.replace(str, "<", ' ')
1623 str = string.replace(str, '>', ' ')
1624 str = string.replace(str, "&", ' ')
1625 str = string.replace(str, '#', ' ')
1626 str = string.replace(str, ",", ' ')
1627 str = string.replace(str, '.', ' ')
1628 str = string.replace(str, ';', ' ')
1629 tokens = string.split(str)
1630 for token in tokens:
1631 try:
1632 c = token[0]
1633 if string.find(string.letters, c) < 0:
1634 pass
1635 elif len(token) < 3:
1636 pass
1637 else:
1638 lower = string.lower(token)
1639 # TODO: generalize this a bit
1640 if lower == 'and' or lower == 'the':
1641 pass
1642 elif self.xref.has_key(token):
1643 self.xref[token].append(id)
1644 else:
1645 self.xref[token] = [id]
1646 except:
1647 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001648
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001649 def analyze(self):
1650 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1651 self.idx.analyze()
1652
1653 def scanHeaders(self):
1654 for header in self.headers.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001655 parser = CParser(header)
1656 idx = parser.parse()
1657 self.headers[header] = idx;
1658 self.idx.merge(idx)
1659
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001660 def scanModules(self):
1661 for module in self.modules.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001662 parser = CParser(module)
1663 idx = parser.parse()
1664 # idx.analyze()
1665 self.modules[module] = idx
1666 self.idx.merge_public(idx)
1667
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001668 def scan(self):
1669 for directory in self.directories:
1670 files = glob.glob(directory + "/*.c")
1671 for file in files:
1672 skip = 0
1673 for excl in self.excludes:
1674 if string.find(file, excl) != -1:
1675 skip = 1;
1676 break
1677 if skip == 0:
1678 self.modules[file] = None;
1679 files = glob.glob(directory + "/*.h")
1680 for file in files:
1681 skip = 0
1682 for excl in self.excludes:
1683 if string.find(file, excl) != -1:
1684 skip = 1;
1685 break
1686 if skip == 0:
1687 self.headers[file] = None;
1688 self.scanHeaders()
1689 self.scanModules()
Daniel Veillarddee23482008-04-11 12:58:43 +00001690
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001691 def modulename_file(self, file):
1692 module = os.path.basename(file)
1693 if module[-2:] == '.h':
1694 module = module[:-2]
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001695 elif module[-2:] == '.c':
1696 module = module[:-2]
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001697 return module
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001698
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001699 def serialize_enum(self, output, name):
1700 id = self.idx.enums[name]
1701 output.write(" <enum name='%s' file='%s'" % (name,
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001702 self.modulename_file(id.header)))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001703 if id.info != None:
1704 info = id.info
1705 if info[0] != None and info[0] != '':
1706 try:
1707 val = eval(info[0])
1708 except:
1709 val = info[0]
1710 output.write(" value='%s'" % (val));
1711 if info[2] != None and info[2] != '':
1712 output.write(" type='%s'" % info[2]);
1713 if info[1] != None and info[1] != '':
1714 output.write(" info='%s'" % escape(info[1]));
1715 output.write("/>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001716
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001717 def serialize_macro(self, output, name):
1718 id = self.idx.macros[name]
1719 output.write(" <macro name='%s' file='%s'>\n" % (name,
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001720 self.modulename_file(id.header)))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001721 if id.info != None:
1722 try:
1723 (args, desc) = id.info
1724 if desc != None and desc != "":
1725 output.write(" <info>%s</info>\n" % (escape(desc)))
1726 self.indexString(name, desc)
1727 for arg in args:
1728 (name, desc) = arg
1729 if desc != None and desc != "":
1730 output.write(" <arg name='%s' info='%s'/>\n" % (
1731 name, escape(desc)))
1732 self.indexString(name, desc)
1733 else:
1734 output.write(" <arg name='%s'/>\n" % (name))
1735 except:
1736 pass
1737 output.write(" </macro>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001738
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001739 def serialize_typedef(self, output, name):
1740 id = self.idx.typedefs[name]
1741 if id.info[0:7] == 'struct ':
1742 output.write(" <struct name='%s' file='%s' type='%s'" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001743 name, self.modulename_file(id.header), id.info))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001744 name = id.info[7:]
1745 if self.idx.structs.has_key(name) and ( \
1746 type(self.idx.structs[name].info) == type(()) or
Daniel Veillardc1eed322002-12-12 11:01:32 +00001747 type(self.idx.structs[name].info) == type([])):
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001748 output.write(">\n");
1749 try:
1750 for field in self.idx.structs[name].info:
1751 desc = field[2]
1752 self.indexString(name, desc)
1753 if desc == None:
1754 desc = ''
1755 else:
1756 desc = escape(desc)
1757 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1758 except:
1759 print "Failed to serialize struct %s" % (name)
1760 output.write(" </struct>\n")
1761 else:
1762 output.write("/>\n");
1763 else :
Daniel Veillard99b78502005-09-11 22:04:08 +00001764 output.write(" <typedef name='%s' file='%s' type='%s'" % (
1765 name, self.modulename_file(id.header), id.info))
1766 try:
1767 desc = id.extra
1768 if desc != None and desc != "":
1769 output.write(">\n <info>%s</info>\n" % (escape(desc)))
1770 output.write(" </typedef>\n")
1771 else:
1772 output.write("/>\n")
1773 except:
1774 output.write("/>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001775
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001776 def serialize_variable(self, output, name):
1777 id = self.idx.variables[name]
1778 if id.info != None:
1779 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001780 name, self.modulename_file(id.header), id.info))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001781 else:
1782 output.write(" <variable name='%s' file='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001783 name, self.modulename_file(id.header)))
Daniel Veillarddee23482008-04-11 12:58:43 +00001784
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001785 def serialize_function(self, output, name):
1786 id = self.idx.functions[name]
Daniel Veillarda2351322004-06-27 12:08:10 +00001787 if name == debugsym:
1788 print "=>", id
1789
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001790 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1791 name, self.modulename_file(id.header),
1792 self.modulename_file(id.module)))
William M. Brack21e4ef22005-01-02 09:53:13 +00001793 #
1794 # Processing of conditionals modified by Bill 1/1/05
1795 #
Daniel Veillarda2351322004-06-27 12:08:10 +00001796 if id.conditionals != None:
William M. Brack21e4ef22005-01-02 09:53:13 +00001797 apstr = ""
Daniel Veillarda2351322004-06-27 12:08:10 +00001798 for cond in id.conditionals:
William M. Brack21e4ef22005-01-02 09:53:13 +00001799 if apstr != "":
1800 apstr = apstr + " &amp;&amp; "
1801 apstr = apstr + cond
1802 output.write(" <cond>%s</cond>\n"% (apstr));
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001803 try:
1804 (ret, params, desc) = id.info
Daniel Veillarddee23482008-04-11 12:58:43 +00001805 if (desc == None or desc == '') and \
1806 name[0:9] != "xmlThrDef" and name != "xmlDllMain":
1807 print "%s %s from %s has no description" % (id.type, name,
1808 self.modulename_file(id.module))
1809
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001810 output.write(" <info>%s</info>\n" % (escape(desc)))
1811 self.indexString(name, desc)
1812 if ret[0] != None:
1813 if ret[0] == "void":
1814 output.write(" <return type='void'/>\n")
1815 else:
1816 output.write(" <return type='%s' info='%s'/>\n" % (
1817 ret[0], escape(ret[1])))
1818 self.indexString(name, ret[1])
1819 for param in params:
1820 if param[0] == 'void':
1821 continue
1822 if param[2] == None:
1823 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1824 else:
1825 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1826 self.indexString(name, param[2])
1827 except:
1828 print "Failed to save function %s info: " % name, `id.info`
1829 output.write(" </%s>\n" % (id.type))
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001830
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001831 def serialize_exports(self, output, file):
1832 module = self.modulename_file(file)
1833 output.write(" <file name='%s'>\n" % (module))
1834 dict = self.headers[file]
Daniel Veillardbe586972003-11-18 20:56:51 +00001835 if dict.info != None:
1836 for data in ('Summary', 'Description', 'Author'):
1837 try:
1838 output.write(" <%s>%s</%s>\n" % (
1839 string.lower(data),
1840 escape(dict.info[data]),
1841 string.lower(data)))
1842 except:
1843 print "Header %s lacks a %s description" % (module, data)
1844 if dict.info.has_key('Description'):
1845 desc = dict.info['Description']
1846 if string.find(desc, "DEPRECATED") != -1:
1847 output.write(" <deprecated/>\n")
1848
Daniel Veillard1a792412003-11-18 23:52:38 +00001849 ids = dict.macros.keys()
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001850 ids.sort()
1851 for id in uniq(ids):
Daniel Veillard1a792412003-11-18 23:52:38 +00001852 # Macros are sometime used to masquerade other types.
1853 if dict.functions.has_key(id):
1854 continue
1855 if dict.variables.has_key(id):
1856 continue
1857 if dict.typedefs.has_key(id):
1858 continue
1859 if dict.structs.has_key(id):
1860 continue
1861 if dict.enums.has_key(id):
1862 continue
1863 output.write(" <exports symbol='%s' type='macro'/>\n" % (id))
1864 ids = dict.enums.keys()
1865 ids.sort()
1866 for id in uniq(ids):
1867 output.write(" <exports symbol='%s' type='enum'/>\n" % (id))
1868 ids = dict.typedefs.keys()
1869 ids.sort()
1870 for id in uniq(ids):
1871 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id))
1872 ids = dict.structs.keys()
1873 ids.sort()
1874 for id in uniq(ids):
1875 output.write(" <exports symbol='%s' type='struct'/>\n" % (id))
1876 ids = dict.variables.keys()
1877 ids.sort()
1878 for id in uniq(ids):
1879 output.write(" <exports symbol='%s' type='variable'/>\n" % (id))
1880 ids = dict.functions.keys()
1881 ids.sort()
1882 for id in uniq(ids):
1883 output.write(" <exports symbol='%s' type='function'/>\n" % (id))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001884 output.write(" </file>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001885
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001886 def serialize_xrefs_files(self, output):
1887 headers = self.headers.keys()
1888 headers.sort()
1889 for file in headers:
1890 module = self.modulename_file(file)
1891 output.write(" <file name='%s'>\n" % (module))
1892 dict = self.headers[file]
William M. Bracka2e844a2004-01-06 11:52:13 +00001893 ids = uniq(dict.functions.keys() + dict.variables.keys() + \
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001894 dict.macros.keys() + dict.typedefs.keys() + \
William M. Bracka2e844a2004-01-06 11:52:13 +00001895 dict.structs.keys() + dict.enums.keys())
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001896 ids.sort()
William M. Bracka2e844a2004-01-06 11:52:13 +00001897 for id in ids:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001898 output.write(" <ref name='%s'/>\n" % (id))
1899 output.write(" </file>\n")
1900 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001901
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001902 def serialize_xrefs_functions(self, output):
1903 funcs = {}
1904 for name in self.idx.functions.keys():
1905 id = self.idx.functions[name]
1906 try:
1907 (ret, params, desc) = id.info
1908 for param in params:
1909 if param[0] == 'void':
1910 continue
1911 if funcs.has_key(param[0]):
1912 funcs[param[0]].append(name)
1913 else:
1914 funcs[param[0]] = [name]
1915 except:
1916 pass
1917 typ = funcs.keys()
1918 typ.sort()
1919 for type in typ:
1920 if type == '' or type == 'void' or type == "int" or \
1921 type == "char *" or type == "const char *" :
1922 continue
1923 output.write(" <type name='%s'>\n" % (type))
1924 ids = funcs[type]
1925 ids.sort()
William M. Brackcf9eadf2003-12-25 13:24:05 +00001926 pid = '' # not sure why we have dups, but get rid of them!
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001927 for id in ids:
William M. Brackcf9eadf2003-12-25 13:24:05 +00001928 if id != pid:
1929 output.write(" <ref name='%s'/>\n" % (id))
1930 pid = id
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001931 output.write(" </type>\n")
1932
1933 def serialize_xrefs_constructors(self, output):
1934 funcs = {}
1935 for name in self.idx.functions.keys():
1936 id = self.idx.functions[name]
1937 try:
1938 (ret, params, desc) = id.info
1939 if ret[0] == "void":
1940 continue
1941 if funcs.has_key(ret[0]):
1942 funcs[ret[0]].append(name)
1943 else:
1944 funcs[ret[0]] = [name]
1945 except:
1946 pass
1947 typ = funcs.keys()
1948 typ.sort()
1949 for type in typ:
1950 if type == '' or type == 'void' or type == "int" or \
1951 type == "char *" or type == "const char *" :
1952 continue
1953 output.write(" <type name='%s'>\n" % (type))
1954 ids = funcs[type]
William M. Brack09a4d0a2004-01-05 14:28:43 +00001955 ids.sort()
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001956 for id in ids:
1957 output.write(" <ref name='%s'/>\n" % (id))
1958 output.write(" </type>\n")
1959
1960 def serialize_xrefs_alpha(self, output):
1961 letter = None
1962 ids = self.idx.identifiers.keys()
1963 ids.sort()
1964 for id in ids:
1965 if id[0] != letter:
1966 if letter != None:
1967 output.write(" </letter>\n")
1968 letter = id[0]
1969 output.write(" <letter name='%s'>\n" % (letter))
1970 output.write(" <ref name='%s'/>\n" % (id))
1971 if letter != None:
1972 output.write(" </letter>\n")
1973
1974 def serialize_xrefs_references(self, output):
1975 typ = self.idx.identifiers.keys()
1976 typ.sort()
1977 for id in typ:
1978 idf = self.idx.identifiers[id]
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001979 module = idf.header
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001980 output.write(" <reference name='%s' href='%s'/>\n" % (id,
1981 'html/' + self.basename + '-' +
1982 self.modulename_file(module) + '.html#' +
1983 id))
1984
1985 def serialize_xrefs_index(self, output):
1986 index = self.xref
1987 typ = index.keys()
1988 typ.sort()
1989 letter = None
1990 count = 0
1991 chunk = 0
1992 chunks = []
1993 for id in typ:
1994 if len(index[id]) > 30:
1995 continue
1996 if id[0] != letter:
1997 if letter == None or count > 200:
1998 if letter != None:
1999 output.write(" </letter>\n")
2000 output.write(" </chunk>\n")
2001 count = 0
2002 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2003 output.write(" <chunk name='chunk%s'>\n" % (chunk))
2004 first_letter = id[0]
2005 chunk = chunk + 1
2006 elif letter != None:
2007 output.write(" </letter>\n")
2008 letter = id[0]
2009 output.write(" <letter name='%s'>\n" % (letter))
2010 output.write(" <word name='%s'>\n" % (id))
2011 tokens = index[id];
2012 tokens.sort()
2013 tok = None
William M. Bracka2e844a2004-01-06 11:52:13 +00002014 for token in tokens:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00002015 if tok == token:
2016 continue
2017 tok = token
2018 output.write(" <ref name='%s'/>\n" % (token))
2019 count = count + 1
2020 output.write(" </word>\n")
2021 if letter != None:
2022 output.write(" </letter>\n")
2023 output.write(" </chunk>\n")
William M. Brack966668a2003-12-20 02:10:28 +00002024 if count != 0:
2025 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00002026 output.write(" <chunks>\n")
2027 for ch in chunks:
2028 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
2029 ch[0], ch[1], ch[2]))
2030 output.write(" </chunks>\n")
2031
2032 def serialize_xrefs(self, output):
2033 output.write(" <references>\n")
2034 self.serialize_xrefs_references(output)
2035 output.write(" </references>\n")
2036 output.write(" <alpha>\n")
2037 self.serialize_xrefs_alpha(output)
2038 output.write(" </alpha>\n")
2039 output.write(" <constructors>\n")
2040 self.serialize_xrefs_constructors(output)
2041 output.write(" </constructors>\n")
2042 output.write(" <functions>\n")
2043 self.serialize_xrefs_functions(output)
2044 output.write(" </functions>\n")
2045 output.write(" <files>\n")
2046 self.serialize_xrefs_files(output)
2047 output.write(" </files>\n")
2048 output.write(" <index>\n")
2049 self.serialize_xrefs_index(output)
2050 output.write(" </index>\n")
2051
2052 def serialize(self):
2053 filename = "%s-api.xml" % self.name
2054 print "Saving XML description %s" % (filename)
2055 output = open(filename, "w")
2056 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2057 output.write("<api name='%s'>\n" % self.name)
2058 output.write(" <files>\n")
2059 headers = self.headers.keys()
2060 headers.sort()
2061 for file in headers:
2062 self.serialize_exports(output, file)
2063 output.write(" </files>\n")
2064 output.write(" <symbols>\n")
2065 macros = self.idx.macros.keys()
2066 macros.sort()
2067 for macro in macros:
2068 self.serialize_macro(output, macro)
2069 enums = self.idx.enums.keys()
2070 enums.sort()
2071 for enum in enums:
2072 self.serialize_enum(output, enum)
2073 typedefs = self.idx.typedefs.keys()
2074 typedefs.sort()
2075 for typedef in typedefs:
2076 self.serialize_typedef(output, typedef)
2077 variables = self.idx.variables.keys()
2078 variables.sort()
2079 for variable in variables:
2080 self.serialize_variable(output, variable)
2081 functions = self.idx.functions.keys()
2082 functions.sort()
2083 for function in functions:
2084 self.serialize_function(output, function)
2085 output.write(" </symbols>\n")
2086 output.write("</api>\n")
2087 output.close()
2088
2089 filename = "%s-refs.xml" % self.name
2090 print "Saving XML Cross References %s" % (filename)
2091 output = open(filename, "w")
2092 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2093 output.write("<apirefs name='%s'>\n" % self.name)
2094 self.serialize_xrefs(output)
2095 output.write("</apirefs>\n")
2096 output.close()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002097
2098
2099def rebuild():
2100 builder = None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00002101 if glob.glob("parser.c") != [] :
2102 print "Rebuilding API description for libxml2"
2103 builder = docBuilder("libxml2", [".", "."],
2104 ["xmlwin32version.h", "tst.c"])
2105 elif glob.glob("../parser.c") != [] :
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002106 print "Rebuilding API description for libxml2"
2107 builder = docBuilder("libxml2", ["..", "../include/libxml"],
Daniel Veillardd4330462003-04-29 12:40:16 +00002108 ["xmlwin32version.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002109 elif glob.glob("../libxslt/transform.c") != [] :
2110 print "Rebuilding API description for libxslt"
2111 builder = docBuilder("libxslt", ["../libxslt"],
Daniel Veillard024b5702002-12-12 00:15:55 +00002112 ["win32config.h", "libxslt.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002113 else:
2114 print "rebuild() failed, unable to guess the module"
2115 return None
2116 builder.scan()
2117 builder.analyze()
2118 builder.serialize()
Daniel Veillard024b5702002-12-12 00:15:55 +00002119 if glob.glob("../libexslt/exslt.c") != [] :
2120 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2121 extra.scan()
2122 extra.analyze()
2123 extra.serialize()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002124 return builder
2125
2126#
2127# for debugging the parser
2128#
2129def parse(filename):
2130 parser = CParser(filename)
2131 idx = parser.parse()
2132 return idx
2133
2134if __name__ == "__main__":
Daniel Veillarda2351322004-06-27 12:08:10 +00002135 if len(sys.argv) > 1:
2136 debug = 1
2137 parse(sys.argv[1])
2138 else:
2139 rebuild()