blob: a5275f1fb7cb22e346e616377c7eddd0a7e74d8c [file] [log] [blame]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
Daniel Veillard540a31a2003-01-21 11:21:07 +000010import os, sys
Daniel Veillarda9b66d02002-12-11 14:23:49 +000011import string
12import glob
13
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000014debug=0
Daniel Veillarda2351322004-06-27 12:08:10 +000015#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000017
Daniel Veillardde0a0a52003-04-24 17:12:57 +000018#
19# C parser analysis code
20#
21ignored_files = {
22 "trio": "too many non standard macros",
23 "trio.c": "too many non standard macros",
24 "trionan.c": "too many non standard macros",
25 "triostr.c": "too many non standard macros",
26 "acconfig.h": "generated portability layer",
27 "config.h": "generated portability layer",
28 "libxml.h": "internal only",
Daniel Veillard92fc02c2003-04-24 23:12:35 +000029 "testOOM.c": "out of memory tester",
30 "testOOMlib.h": "out of memory tester",
31 "testOOMlib.c": "out of memory tester",
Daniel Veillard189f46b2004-01-25 21:03:04 +000032 "rngparser.c": "not yet integrated",
33 "rngparser.h": "not yet integrated",
Daniel Veillarda2351322004-06-27 12:08:10 +000034 "elfgcchack.h": "not a normal header",
Daniel Veillard95175012005-07-03 16:09:51 +000035 "testHTML.c": "test tool",
36 "testReader.c": "test tool",
37 "testSchemas.c": "test tool",
38 "testXPath.c": "test tool",
39 "testAutomata.c": "test tool",
40 "testModule.c": "test tool",
41 "testRegexp.c": "test tool",
42 "testThreads.c": "test tool",
43 "testC14N.c": "test tool",
44 "testRelax.c": "test tool",
45 "testThreadsWin32.c": "test tool",
46 "testSAX.c": "test tool",
47 "testURI.c": "test tool",
Daniel Veillarde43cc572004-11-03 11:50:29 +000048 "testapi.c": "generated regression tests",
Daniel Veillard95175012005-07-03 16:09:51 +000049 "runtest.c": "regression tests program",
50 "runsuite.c": "regression tests program",
Daniel Veillard5d4644e2005-04-01 13:11:58 +000051 "tst.c": "not part of the library",
Daniel Veillardf6b71bd2005-01-04 17:50:14 +000052 "testdso.c": "test for dynamid shared libraries",
Daniel Veillardde0a0a52003-04-24 17:12:57 +000053}
54
55ignored_words = {
56 "WINAPI": (0, "Windows keyword"),
57 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
William M. Brackcdfa2862003-08-29 06:03:38 +000058 "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000059 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
60 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000061 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000062 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
63 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
William M. Brackcdfa2862003-08-29 06:03:38 +000064 "XMLCALL": (0, "Special macro for win32 calls"),
Daniel Veillarde8ba84e2003-11-18 13:54:15 +000065 "XSLTCALL": (0, "Special macro for win32 calls"),
66 "EXSLTCALL": (0, "Special macro for win32 calls"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000067 "__declspec": (3, "Windows keyword"),
Daniel Veillarda2351322004-06-27 12:08:10 +000068 "__stdcall": (0, "Windows keyword"),
Daniel Veillardde0a0a52003-04-24 17:12:57 +000069 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
70 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
71 "X_IN_Y": (5, "macro function builder"),
72}
73
Daniel Veillarda9b66d02002-12-11 14:23:49 +000074def escape(raw):
75 raw = string.replace(raw, '&', '&')
76 raw = string.replace(raw, '<', '&lt;')
77 raw = string.replace(raw, '>', '&gt;')
78 raw = string.replace(raw, "'", '&apos;')
79 raw = string.replace(raw, '"', '&quot;')
80 return raw
81
Daniel Veillard2925c0a2003-11-17 13:58:17 +000082def uniq(items):
83 d = {}
84 for item in items:
85 d[item]=1
86 return d.keys()
87
Daniel Veillarda9b66d02002-12-11 14:23:49 +000088class identifier:
Daniel Veillard5d4644e2005-04-01 13:11:58 +000089 def __init__(self, name, header=None, module=None, type=None, lineno = 0,
Daniel Veillarda2351322004-06-27 12:08:10 +000090 info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +000091 self.name = name
Daniel Veillard5d4644e2005-04-01 13:11:58 +000092 self.header = header
Daniel Veillardbe586972003-11-18 20:56:51 +000093 self.module = module
94 self.type = type
95 self.info = info
96 self.extra = extra
97 self.lineno = lineno
98 self.static = 0
Daniel Veillarda2351322004-06-27 12:08:10 +000099 if conditionals == None or len(conditionals) == 0:
100 self.conditionals = None
101 else:
102 self.conditionals = conditionals[:]
103 if self.name == debugsym:
104 print "=> define %s : %s" % (debugsym, (module, type, info,
105 extra, conditionals))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000106
Daniel Veillardbe586972003-11-18 20:56:51 +0000107 def __repr__(self):
108 r = "%s %s:" % (self.type, self.name)
109 if self.static:
110 r = r + " static"
111 if self.module != None:
112 r = r + " from %s" % (self.module)
113 if self.info != None:
114 r = r + " " + `self.info`
115 if self.extra != None:
116 r = r + " " + `self.extra`
Daniel Veillarda2351322004-06-27 12:08:10 +0000117 if self.conditionals != None:
118 r = r + " " + `self.conditionals`
Daniel Veillardbe586972003-11-18 20:56:51 +0000119 return r
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000120
121
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000122 def set_header(self, header):
123 self.header = header
Daniel Veillardbe586972003-11-18 20:56:51 +0000124 def set_module(self, module):
125 self.module = module
126 def set_type(self, type):
127 self.type = type
128 def set_info(self, info):
129 self.info = info
130 def set_extra(self, extra):
131 self.extra = extra
132 def set_lineno(self, lineno):
133 self.lineno = lineno
134 def set_static(self, static):
135 self.static = static
Daniel Veillarda2351322004-06-27 12:08:10 +0000136 def set_conditionals(self, conditionals):
137 if conditionals == None or len(conditionals) == 0:
138 self.conditionals = None
139 else:
140 self.conditionals = conditionals[:]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000141
Daniel Veillardbe586972003-11-18 20:56:51 +0000142 def get_name(self):
143 return self.name
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000144 def get_header(self):
145 return self.module
Daniel Veillardbe586972003-11-18 20:56:51 +0000146 def get_module(self):
147 return self.module
148 def get_type(self):
149 return self.type
150 def get_info(self):
151 return self.info
152 def get_lineno(self):
153 return self.lineno
154 def get_extra(self):
155 return self.extra
156 def get_static(self):
157 return self.static
Daniel Veillarda2351322004-06-27 12:08:10 +0000158 def get_conditionals(self):
159 return self.conditionals
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000160
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000161 def update(self, header, module, type = None, info = None, extra=None,
Daniel Veillarda2351322004-06-27 12:08:10 +0000162 conditionals=None):
163 if self.name == debugsym:
164 print "=> update %s : %s" % (debugsym, (module, type, info,
165 extra, conditionals))
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000166 if header != None and self.header == None:
167 self.set_header(module)
168 if module != None and (self.module == None or self.header == self.module):
Daniel Veillardbe586972003-11-18 20:56:51 +0000169 self.set_module(module)
170 if type != None and self.type == None:
171 self.set_type(type)
172 if info != None:
173 self.set_info(info)
174 if extra != None:
175 self.set_extra(extra)
Daniel Veillarda2351322004-06-27 12:08:10 +0000176 if conditionals != None:
177 self.set_conditionals(conditionals)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000178
179class index:
Daniel Veillardbe586972003-11-18 20:56:51 +0000180 def __init__(self, name = "noname"):
181 self.name = name
182 self.identifiers = {}
183 self.functions = {}
184 self.variables = {}
185 self.includes = {}
186 self.structs = {}
187 self.enums = {}
188 self.typedefs = {}
189 self.macros = {}
190 self.references = {}
191 self.info = {}
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000192
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000193 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +0000194 if name[0:2] == '__':
195 return None
196 d = None
197 try:
198 d = self.identifiers[name]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000199 d.update(header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000200 except:
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000201 d = identifier(name, header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000202 self.identifiers[name] = d
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000203
Daniel Veillardbe586972003-11-18 20:56:51 +0000204 if d != None and static == 1:
205 d.set_static(1)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000206
Daniel Veillardbe586972003-11-18 20:56:51 +0000207 if d != None and name != None and type != None:
208 self.references[name] = d
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000209
Daniel Veillarda2351322004-06-27 12:08:10 +0000210 if name == debugsym:
211 print "New ref: %s" % (d)
212
213 return d
214
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000215 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
Daniel Veillardbe586972003-11-18 20:56:51 +0000216 if name[0:2] == '__':
217 return None
218 d = None
219 try:
220 d = self.identifiers[name]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000221 d.update(header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000222 except:
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000223 d = identifier(name, header, module, type, lineno, info, extra, conditionals)
Daniel Veillardbe586972003-11-18 20:56:51 +0000224 self.identifiers[name] = d
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000225
Daniel Veillardbe586972003-11-18 20:56:51 +0000226 if d != None and static == 1:
227 d.set_static(1)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000228
Daniel Veillardbe586972003-11-18 20:56:51 +0000229 if d != None and name != None and type != None:
230 if type == "function":
231 self.functions[name] = d
232 elif type == "functype":
233 self.functions[name] = d
234 elif type == "variable":
235 self.variables[name] = d
236 elif type == "include":
237 self.includes[name] = d
238 elif type == "struct":
239 self.structs[name] = d
240 elif type == "enum":
241 self.enums[name] = d
242 elif type == "typedef":
243 self.typedefs[name] = d
244 elif type == "macro":
245 self.macros[name] = d
246 else:
247 print "Unable to register type ", type
Daniel Veillarda2351322004-06-27 12:08:10 +0000248
249 if name == debugsym:
250 print "New symbol: %s" % (d)
251
Daniel Veillardbe586972003-11-18 20:56:51 +0000252 return d
253
254 def merge(self, idx):
255 for id in idx.functions.keys():
256 #
257 # macro might be used to override functions or variables
258 # definitions
259 #
260 if self.macros.has_key(id):
261 del self.macros[id]
262 if self.functions.has_key(id):
263 print "function %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000264 id, self.functions[id].header, idx.functions[id].header)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000265 else:
Daniel Veillardbe586972003-11-18 20:56:51 +0000266 self.functions[id] = idx.functions[id]
267 self.identifiers[id] = idx.functions[id]
268 for id in idx.variables.keys():
269 #
270 # macro might be used to override functions or variables
271 # definitions
272 #
273 if self.macros.has_key(id):
274 del self.macros[id]
275 if self.variables.has_key(id):
276 print "variable %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000277 id, self.variables[id].header, idx.variables[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000278 else:
279 self.variables[id] = idx.variables[id]
280 self.identifiers[id] = idx.variables[id]
281 for id in idx.structs.keys():
282 if self.structs.has_key(id):
283 print "struct %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000284 id, self.structs[id].header, idx.structs[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000285 else:
286 self.structs[id] = idx.structs[id]
287 self.identifiers[id] = idx.structs[id]
288 for id in idx.typedefs.keys():
289 if self.typedefs.has_key(id):
290 print "typedef %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000291 id, self.typedefs[id].header, idx.typedefs[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000292 else:
293 self.typedefs[id] = idx.typedefs[id]
294 self.identifiers[id] = idx.typedefs[id]
295 for id in idx.macros.keys():
296 #
297 # macro might be used to override functions or variables
298 # definitions
299 #
300 if self.variables.has_key(id):
301 continue
302 if self.functions.has_key(id):
303 continue
304 if self.enums.has_key(id):
305 continue
306 if self.macros.has_key(id):
307 print "macro %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000308 id, self.macros[id].header, idx.macros[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000309 else:
310 self.macros[id] = idx.macros[id]
311 self.identifiers[id] = idx.macros[id]
312 for id in idx.enums.keys():
313 if self.enums.has_key(id):
314 print "enum %s from %s redeclared in %s" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000315 id, self.enums[id].header, idx.enums[id].header)
Daniel Veillardbe586972003-11-18 20:56:51 +0000316 else:
317 self.enums[id] = idx.enums[id]
318 self.identifiers[id] = idx.enums[id]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000319
Daniel Veillardbe586972003-11-18 20:56:51 +0000320 def merge_public(self, idx):
321 for id in idx.functions.keys():
322 if self.functions.has_key(id):
William M. Brack21e4ef22005-01-02 09:53:13 +0000323 # check that function condition agrees with header
324 if idx.functions[id].conditionals != \
325 self.functions[id].conditionals:
326 print "Header condition differs from Function for %s:" \
327 % id
328 print " H: %s" % self.functions[id].conditionals
329 print " C: %s" % idx.functions[id].conditionals
Daniel Veillardbe586972003-11-18 20:56:51 +0000330 up = idx.functions[id]
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000331 self.functions[id].update(None, up.module, up.type, up.info, up.extra)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000332 # else:
333 # print "Function %s from %s is not declared in headers" % (
William M. Brack21e4ef22005-01-02 09:53:13 +0000334 # id, idx.functions[id].module)
Daniel Veillardc1eed322002-12-12 11:01:32 +0000335 # TODO: do the same for variables.
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000336
Daniel Veillardbe586972003-11-18 20:56:51 +0000337 def analyze_dict(self, type, dict):
338 count = 0
339 public = 0
340 for name in dict.keys():
341 id = dict[name]
342 count = count + 1
343 if id.static == 0:
344 public = public + 1
345 if count != public:
346 print " %d %s , %d public" % (count, type, public)
347 elif count != 0:
348 print " %d public %s" % (count, type)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000349
350
Daniel Veillardbe586972003-11-18 20:56:51 +0000351 def analyze(self):
352 self.analyze_dict("functions", self.functions)
353 self.analyze_dict("variables", self.variables)
354 self.analyze_dict("structs", self.structs)
355 self.analyze_dict("typedefs", self.typedefs)
356 self.analyze_dict("macros", self.macros)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000357
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000358class CLexer:
Daniel Veillardbe586972003-11-18 20:56:51 +0000359 """A lexer for the C language, tokenize the input by reading and
360 analyzing it line by line"""
361 def __init__(self, input):
362 self.input = input
363 self.tokens = []
364 self.line = ""
365 self.lineno = 0
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000366
Daniel Veillardbe586972003-11-18 20:56:51 +0000367 def getline(self):
368 line = ''
369 while line == '':
370 line = self.input.readline()
371 if not line:
372 return None
373 self.lineno = self.lineno + 1
374 line = string.lstrip(line)
375 line = string.rstrip(line)
376 if line == '':
377 continue
378 while line[-1] == '\\':
379 line = line[:-1]
380 n = self.input.readline()
381 self.lineno = self.lineno + 1
382 n = string.lstrip(n)
383 n = string.rstrip(n)
384 if not n:
385 break
386 else:
387 line = line + n
388 return line
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000389
Daniel Veillardbe586972003-11-18 20:56:51 +0000390 def getlineno(self):
391 return self.lineno
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000392
Daniel Veillardbe586972003-11-18 20:56:51 +0000393 def push(self, token):
394 self.tokens.insert(0, token);
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000395
Daniel Veillardbe586972003-11-18 20:56:51 +0000396 def debug(self):
397 print "Last token: ", self.last
398 print "Token queue: ", self.tokens
399 print "Line %d end: " % (self.lineno), self.line
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000400
Daniel Veillardbe586972003-11-18 20:56:51 +0000401 def token(self):
402 while self.tokens == []:
403 if self.line == "":
404 line = self.getline()
405 else:
406 line = self.line
407 self.line = ""
408 if line == None:
409 return None
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000410
Daniel Veillardbe586972003-11-18 20:56:51 +0000411 if line[0] == '#':
412 self.tokens = map((lambda x: ('preproc', x)),
413 string.split(line))
414 break;
415 l = len(line)
416 if line[0] == '"' or line[0] == "'":
417 end = line[0]
418 line = line[1:]
419 found = 0
420 tok = ""
421 while found == 0:
422 i = 0
423 l = len(line)
424 while i < l:
425 if line[i] == end:
426 self.line = line[i+1:]
427 line = line[:i]
428 l = i
429 found = 1
430 break
431 if line[i] == '\\':
432 i = i + 1
433 i = i + 1
434 tok = tok + line
435 if found == 0:
436 line = self.getline()
437 if line == None:
438 return None
439 self.last = ('string', tok)
440 return self.last
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000441
Daniel Veillardbe586972003-11-18 20:56:51 +0000442 if l >= 2 and line[0] == '/' and line[1] == '*':
443 line = line[2:]
444 found = 0
445 tok = ""
446 while found == 0:
447 i = 0
448 l = len(line)
449 while i < l:
450 if line[i] == '*' and i+1 < l and line[i+1] == '/':
451 self.line = line[i+2:]
452 line = line[:i-1]
453 l = i
454 found = 1
455 break
456 i = i + 1
457 if tok != "":
458 tok = tok + "\n"
459 tok = tok + line
460 if found == 0:
461 line = self.getline()
462 if line == None:
463 return None
464 self.last = ('comment', tok)
465 return self.last
466 if l >= 2 and line[0] == '/' and line[1] == '/':
467 line = line[2:]
468 self.last = ('comment', line)
469 return self.last
470 i = 0
471 while i < l:
472 if line[i] == '/' and i+1 < l and line[i+1] == '/':
473 self.line = line[i:]
474 line = line[:i]
475 break
476 if line[i] == '/' and i+1 < l and line[i+1] == '*':
477 self.line = line[i:]
478 line = line[:i]
479 break
480 if line[i] == '"' or line[i] == "'":
481 self.line = line[i:]
482 line = line[:i]
483 break
484 i = i + 1
485 l = len(line)
486 i = 0
487 while i < l:
488 if line[i] == ' ' or line[i] == '\t':
489 i = i + 1
490 continue
491 o = ord(line[i])
492 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
493 (o >= 48 and o <= 57):
494 s = i
495 while i < l:
496 o = ord(line[i])
497 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
498 (o >= 48 and o <= 57) or string.find(
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000499 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
Daniel Veillardbe586972003-11-18 20:56:51 +0000500 i = i + 1
501 else:
502 break
503 self.tokens.append(('name', line[s:i]))
504 continue
505 if string.find("(){}:;,[]", line[i]) != -1:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000506# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
507# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
508# line[i] == ',' or line[i] == '[' or line[i] == ']':
Daniel Veillardbe586972003-11-18 20:56:51 +0000509 self.tokens.append(('sep', line[i]))
510 i = i + 1
511 continue
512 if string.find("+-*><=/%&!|.", line[i]) != -1:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000513# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
514# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
515# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
516# line[i] == '!' or line[i] == '|' or line[i] == '.':
Daniel Veillardbe586972003-11-18 20:56:51 +0000517 if line[i] == '.' and i + 2 < l and \
518 line[i+1] == '.' and line[i+2] == '.':
519 self.tokens.append(('name', '...'))
520 i = i + 3
521 continue
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000522
Daniel Veillardbe586972003-11-18 20:56:51 +0000523 j = i + 1
524 if j < l and (
525 string.find("+-*><=/%&!|", line[j]) != -1):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000526# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
527# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
528# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
529# line[j] == '!' or line[j] == '|'):
Daniel Veillardbe586972003-11-18 20:56:51 +0000530 self.tokens.append(('op', line[i:j+1]))
531 i = j + 1
532 else:
533 self.tokens.append(('op', line[i]))
534 i = i + 1
535 continue
536 s = i
537 while i < l:
538 o = ord(line[i])
539 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
540 (o >= 48 and o <= 57) or (
541 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000542# line[i] != ' ' and line[i] != '\t' and
543# line[i] != '(' and line[i] != ')' and
544# line[i] != '{' and line[i] != '}' and
545# line[i] != ':' and line[i] != ';' and
546# line[i] != ',' and line[i] != '+' and
547# line[i] != '-' and line[i] != '*' and
548# line[i] != '/' and line[i] != '%' and
549# line[i] != '&' and line[i] != '!' and
550# line[i] != '|' and line[i] != '[' and
551# line[i] != ']' and line[i] != '=' and
552# line[i] != '*' and line[i] != '>' and
553# line[i] != '<'):
Daniel Veillardbe586972003-11-18 20:56:51 +0000554 i = i + 1
555 else:
556 break
557 self.tokens.append(('name', line[s:i]))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000558
Daniel Veillardbe586972003-11-18 20:56:51 +0000559 tok = self.tokens[0]
560 self.tokens = self.tokens[1:]
561 self.last = tok
562 return tok
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000563
564class CParser:
Daniel Veillardbe586972003-11-18 20:56:51 +0000565 """The C module parser"""
566 def __init__(self, filename, idx = None):
567 self.filename = filename
568 if len(filename) > 2 and filename[-2:] == '.h':
569 self.is_header = 1
570 else:
571 self.is_header = 0
572 self.input = open(filename)
573 self.lexer = CLexer(self.input)
574 if idx == None:
575 self.index = index()
576 else:
577 self.index = idx
578 self.top_comment = ""
579 self.last_comment = ""
580 self.comment = None
581 self.collect_ref = 0
Daniel Veillard1e906612003-12-05 14:57:46 +0000582 self.no_error = 0
Daniel Veillarda2351322004-06-27 12:08:10 +0000583 self.conditionals = []
584 self.defines = []
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000585
Daniel Veillardbe586972003-11-18 20:56:51 +0000586 def collect_references(self):
587 self.collect_ref = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000588
Daniel Veillard1e906612003-12-05 14:57:46 +0000589 def stop_error(self):
590 self.no_error = 1
591
592 def start_error(self):
593 self.no_error = 0
594
Daniel Veillardbe586972003-11-18 20:56:51 +0000595 def lineno(self):
596 return self.lexer.getlineno()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000597
Daniel Veillardbe586972003-11-18 20:56:51 +0000598 def index_add(self, name, module, static, type, info=None, extra = None):
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000599 if self.is_header == 1:
600 self.index.add(name, module, module, static, type, self.lineno(),
601 info, extra, self.conditionals)
602 else:
603 self.index.add(name, None, module, static, type, self.lineno(),
604 info, extra, self.conditionals)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000605
Daniel Veillardbe586972003-11-18 20:56:51 +0000606 def index_add_ref(self, name, module, static, type, info=None,
607 extra = None):
Daniel Veillard5d4644e2005-04-01 13:11:58 +0000608 if self.is_header == 1:
609 self.index.add_ref(name, module, module, static, type,
610 self.lineno(), info, extra, self.conditionals)
611 else:
612 self.index.add_ref(name, None, module, static, type, self.lineno(),
613 info, extra, self.conditionals)
Daniel Veillardd8cf9062003-11-11 21:12:36 +0000614
Daniel Veillard1e906612003-12-05 14:57:46 +0000615 def warning(self, msg):
616 if self.no_error:
617 return
618 print msg
619
Daniel Veillardbe586972003-11-18 20:56:51 +0000620 def error(self, msg, token=-1):
Daniel Veillard1e906612003-12-05 14:57:46 +0000621 if self.no_error:
622 return
623
Daniel Veillardbe586972003-11-18 20:56:51 +0000624 print "Parse Error: " + msg
625 if token != -1:
626 print "Got token ", token
627 self.lexer.debug()
628 sys.exit(1)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000629
Daniel Veillardbe586972003-11-18 20:56:51 +0000630 def debug(self, msg, token=-1):
631 print "Debug: " + msg
632 if token != -1:
633 print "Got token ", token
634 self.lexer.debug()
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000635
Daniel Veillardbe586972003-11-18 20:56:51 +0000636 def parseTopComment(self, comment):
637 res = {}
638 lines = string.split(comment, "\n")
639 item = None
640 for line in lines:
641 while line != "" and (line[0] == ' ' or line[0] == '\t'):
642 line = line[1:]
643 while line != "" and line[0] == '*':
644 line = line[1:]
645 while line != "" and (line[0] == ' ' or line[0] == '\t'):
646 line = line[1:]
647 try:
648 (it, line) = string.split(line, ":", 1)
649 item = it
650 while line != "" and (line[0] == ' ' or line[0] == '\t'):
651 line = line[1:]
652 if res.has_key(item):
653 res[item] = res[item] + " " + line
654 else:
655 res[item] = line
656 except:
657 if item != None:
658 if res.has_key(item):
659 res[item] = res[item] + " " + line
660 else:
661 res[item] = line
662 self.index.info = res
663
664 def parseComment(self, token):
665 if self.top_comment == "":
666 self.top_comment = token[1]
667 if self.comment == None or token[1][0] == '*':
668 self.comment = token[1];
669 else:
670 self.comment = self.comment + token[1]
671 token = self.lexer.token()
Daniel Veillard1e906612003-12-05 14:57:46 +0000672
673 if string.find(self.comment, "DOC_DISABLE") != -1:
674 self.stop_error()
675
676 if string.find(self.comment, "DOC_ENABLE") != -1:
677 self.start_error()
678
Daniel Veillardbe586972003-11-18 20:56:51 +0000679 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000680
681 #
682 # Parse a comment block associate to a macro
683 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000684 def parseMacroComment(self, name, quiet = 0):
685 if name[0:2] == '__':
686 quiet = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000687
Daniel Veillardbe586972003-11-18 20:56:51 +0000688 args = []
689 desc = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000690
Daniel Veillardbe586972003-11-18 20:56:51 +0000691 if self.comment == None:
692 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000693 self.warning("Missing comment for macro %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000694 return((args, desc))
695 if self.comment[0] != '*':
696 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000697 self.warning("Missing * in macro comment for %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000698 return((args, desc))
699 lines = string.split(self.comment, '\n')
700 if lines[0] == '*':
701 del lines[0]
702 if lines[0] != "* %s:" % (name):
703 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000704 self.warning("Misformatted macro comment for %s" % (name))
705 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000706 return((args, desc))
707 del lines[0]
708 while lines[0] == '*':
709 del lines[0]
710 while len(lines) > 0 and lines[0][0:3] == '* @':
711 l = lines[0][3:]
712 try:
713 (arg, desc) = string.split(l, ':', 1)
714 desc=string.strip(desc)
715 arg=string.strip(arg)
716 except:
717 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000718 self.warning("Misformatted macro comment for %s" % (name))
719 self.warning(" problem with '%s'" % (lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000720 del lines[0]
721 continue
722 del lines[0]
723 l = string.strip(lines[0])
724 while len(l) > 2 and l[0:3] != '* @':
725 while l[0] == '*':
726 l = l[1:]
727 desc = desc + ' ' + string.strip(l)
728 del lines[0]
729 if len(lines) == 0:
730 break
731 l = lines[0]
732 args.append((arg, desc))
733 while len(lines) > 0 and lines[0] == '*':
734 del lines[0]
735 desc = ""
736 while len(lines) > 0:
737 l = lines[0]
738 while len(l) > 0 and l[0] == '*':
739 l = l[1:]
740 l = string.strip(l)
741 desc = desc + " " + l
742 del lines[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000743
Daniel Veillardbe586972003-11-18 20:56:51 +0000744 desc = string.strip(desc)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000745
Daniel Veillardbe586972003-11-18 20:56:51 +0000746 if quiet == 0:
747 if desc == "":
Daniel Veillard1e906612003-12-05 14:57:46 +0000748 self.warning("Macro comment for %s lack description of the macro" % (name))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000749
Daniel Veillardbe586972003-11-18 20:56:51 +0000750 return((args, desc))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000751
752 #
753 # Parse a comment block and merge the informations found in the
754 # parameters descriptions, finally returns a block as complete
755 # as possible
756 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000757 def mergeFunctionComment(self, name, description, quiet = 0):
758 if name == 'main':
759 quiet = 1
760 if name[0:2] == '__':
761 quiet = 1
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000762
Daniel Veillardbe586972003-11-18 20:56:51 +0000763 (ret, args) = description
764 desc = ""
765 retdesc = ""
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000766
Daniel Veillardbe586972003-11-18 20:56:51 +0000767 if self.comment == None:
768 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000769 self.warning("Missing comment for function %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000770 return(((ret[0], retdesc), args, desc))
771 if self.comment[0] != '*':
772 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000773 self.warning("Missing * in function comment for %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000774 return(((ret[0], retdesc), args, desc))
775 lines = string.split(self.comment, '\n')
776 if lines[0] == '*':
777 del lines[0]
778 if lines[0] != "* %s:" % (name):
779 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000780 self.warning("Misformatted function comment for %s" % (name))
781 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000782 return(((ret[0], retdesc), args, desc))
783 del lines[0]
784 while lines[0] == '*':
785 del lines[0]
786 nbargs = len(args)
787 while len(lines) > 0 and lines[0][0:3] == '* @':
788 l = lines[0][3:]
789 try:
790 (arg, desc) = string.split(l, ':', 1)
791 desc=string.strip(desc)
792 arg=string.strip(arg)
793 except:
794 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000795 self.warning("Misformatted function comment for %s" % (name))
796 self.warning(" problem with '%s'" % (lines[0]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000797 del lines[0]
798 continue
799 del lines[0]
800 l = string.strip(lines[0])
801 while len(l) > 2 and l[0:3] != '* @':
802 while l[0] == '*':
803 l = l[1:]
804 desc = desc + ' ' + string.strip(l)
805 del lines[0]
806 if len(lines) == 0:
807 break
808 l = lines[0]
809 i = 0
810 while i < nbargs:
811 if args[i][1] == arg:
812 args[i] = (args[i][0], arg, desc)
813 break;
814 i = i + 1
815 if i >= nbargs:
816 if not quiet:
Daniel Veillard1e906612003-12-05 14:57:46 +0000817 self.warning("Unable to find arg %s from function comment for %s" % (
818 arg, name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000819 while len(lines) > 0 and lines[0] == '*':
820 del lines[0]
821 desc = ""
822 while len(lines) > 0:
823 l = lines[0]
824 while len(l) > 0 and l[0] == '*':
825 l = l[1:]
826 l = string.strip(l)
827 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
828 try:
829 l = string.split(l, ' ', 1)[1]
830 except:
831 l = ""
832 retdesc = string.strip(l)
833 del lines[0]
834 while len(lines) > 0:
835 l = lines[0]
836 while len(l) > 0 and l[0] == '*':
837 l = l[1:]
838 l = string.strip(l)
839 retdesc = retdesc + " " + l
840 del lines[0]
841 else:
842 desc = desc + " " + l
843 del lines[0]
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000844
Daniel Veillardbe586972003-11-18 20:56:51 +0000845 retdesc = string.strip(retdesc)
846 desc = string.strip(desc)
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000847
Daniel Veillardbe586972003-11-18 20:56:51 +0000848 if quiet == 0:
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000849 #
850 # report missing comments
851 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000852 i = 0
853 while i < nbargs:
854 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
William M. Brack21e4ef22005-01-02 09:53:13 +0000855 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
Daniel Veillardbe586972003-11-18 20:56:51 +0000856 i = i + 1
857 if retdesc == "" and ret[0] != "void":
William M. Brack21e4ef22005-01-02 09:53:13 +0000858 self.warning("Function comment for %s lacks description of return value" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +0000859 if desc == "":
William M. Brack21e4ef22005-01-02 09:53:13 +0000860 self.warning("Function comment for %s lacks description of the function" % (name))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000861
862
Daniel Veillardbe586972003-11-18 20:56:51 +0000863 return(((ret[0], retdesc), args, desc))
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000864
Daniel Veillardbe586972003-11-18 20:56:51 +0000865 def parsePreproc(self, token):
Daniel Veillarda2351322004-06-27 12:08:10 +0000866 if debug:
867 print "=> preproc ", token, self.lexer.tokens
Daniel Veillardbe586972003-11-18 20:56:51 +0000868 name = token[1]
869 if name == "#include":
870 token = self.lexer.token()
871 if token == None:
872 return None
873 if token[0] == 'preproc':
874 self.index_add(token[1], self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000875 "include")
Daniel Veillardbe586972003-11-18 20:56:51 +0000876 return self.lexer.token()
877 return token
878 if name == "#define":
879 token = self.lexer.token()
880 if token == None:
881 return None
882 if token[0] == 'preproc':
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000883 # TODO macros with arguments
Daniel Veillardbe586972003-11-18 20:56:51 +0000884 name = token[1]
885 lst = []
886 token = self.lexer.token()
887 while token != None and token[0] == 'preproc' and \
888 token[1][0] != '#':
889 lst.append(token[1])
890 token = self.lexer.token()
891 try:
892 name = string.split(name, '(') [0]
893 except:
894 pass
895 info = self.parseMacroComment(name, not self.is_header)
896 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000897 "macro", info)
Daniel Veillardbe586972003-11-18 20:56:51 +0000898 return token
William M. Brack21e4ef22005-01-02 09:53:13 +0000899
900 #
901 # Processing of conditionals modified by Bill 1/1/05
902 #
903 # We process conditionals (i.e. tokens from #ifdef, #ifndef,
904 # #if, #else and #endif) for headers and mainline code,
905 # store the ones from the header in libxml2-api.xml, and later
906 # (in the routine merge_public) verify that the two (header and
907 # mainline code) agree.
908 #
909 # There is a small problem with processing the headers. Some of
910 # the variables are not concerned with enabling / disabling of
911 # library functions (e.g. '__XML_PARSER_H__'), and we don't want
912 # them to be included in libxml2-api.xml, or involved in
913 # the check between the header and the mainline code. To
914 # accomplish this, we ignore any conditional which doesn't include
915 # the string 'ENABLED'
916 #
917 if name == "#ifdef":
918 apstr = self.lexer.tokens[0][1]
Daniel Veillarda2351322004-06-27 12:08:10 +0000919 try:
William M. Brack21e4ef22005-01-02 09:53:13 +0000920 self.defines.append(apstr)
921 if string.find(apstr, 'ENABLED') != -1:
922 self.conditionals.append("defined(%s)" % apstr)
Daniel Veillarda2351322004-06-27 12:08:10 +0000923 except:
924 pass
William M. Brack21e4ef22005-01-02 09:53:13 +0000925 elif name == "#ifndef":
926 apstr = self.lexer.tokens[0][1]
927 try:
928 self.defines.append(apstr)
929 if string.find(apstr, 'ENABLED') != -1:
930 self.conditionals.append("!defined(%s)" % apstr)
931 except:
932 pass
933 elif name == "#if":
934 apstr = ""
935 for tok in self.lexer.tokens:
936 if apstr != "":
937 apstr = apstr + " "
938 apstr = apstr + tok[1]
939 try:
940 self.defines.append(apstr)
941 if string.find(apstr, 'ENABLED') != -1:
942 self.conditionals.append(apstr)
943 except:
944 pass
945 elif name == "#else":
946 if self.conditionals != [] and \
947 string.find(self.defines[-1], 'ENABLED') != -1:
948 self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
949 elif name == "#endif":
950 if self.conditionals != [] and \
951 string.find(self.defines[-1], 'ENABLED') != -1:
Daniel Veillarda2351322004-06-27 12:08:10 +0000952 self.conditionals = self.conditionals[:-1]
953 self.defines = self.defines[:-1]
Daniel Veillardbe586972003-11-18 20:56:51 +0000954 token = self.lexer.token()
955 while token != None and token[0] == 'preproc' and \
956 token[1][0] != '#':
957 token = self.lexer.token()
958 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000959
960 #
961 # token acquisition on top of the lexer, it handle internally
962 # preprocessor and comments since they are logically not part of
963 # the program structure.
964 #
Daniel Veillardbe586972003-11-18 20:56:51 +0000965 def token(self):
966 global ignored_words
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000967
Daniel Veillardbe586972003-11-18 20:56:51 +0000968 token = self.lexer.token()
969 while token != None:
970 if token[0] == 'comment':
971 token = self.parseComment(token)
972 continue
973 elif token[0] == 'preproc':
974 token = self.parsePreproc(token)
975 continue
Daniel Veillard99dd7632005-05-06 11:40:56 +0000976 elif token[0] == "name" and token[1] == "__const":
977 token = ("name", "const")
978 return token
979 elif token[0] == "name" and token[1] == "__attribute":
980 token = self.lexer.token()
981 while token != None and token[1] != ";":
982 token = self.lexer.token()
983 return token
Daniel Veillardbe586972003-11-18 20:56:51 +0000984 elif token[0] == "name" and ignored_words.has_key(token[1]):
985 (n, info) = ignored_words[token[1]]
986 i = 0
987 while i < n:
988 token = self.lexer.token()
989 i = i + 1
990 token = self.lexer.token()
991 continue
992 else:
993 if debug:
994 print "=> ", token
995 return token
996 return None
Daniel Veillarda9b66d02002-12-11 14:23:49 +0000997
998 #
999 # Parse a typedef, it records the type and its name.
1000 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001001 def parseTypedef(self, token):
1002 if token == None:
1003 return None
1004 token = self.parseType(token)
1005 if token == None:
1006 self.error("parsing typedef")
1007 return None
1008 base_type = self.type
1009 type = base_type
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001010 #self.debug("end typedef type", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001011 while token != None:
1012 if token[0] == "name":
1013 name = token[1]
1014 signature = self.signature
1015 if signature != None:
1016 type = string.split(type, '(')[0]
1017 d = self.mergeFunctionComment(name,
1018 ((type, None), signature), 1)
1019 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001020 "functype", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001021 else:
1022 if base_type == "struct":
1023 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001024 "struct", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001025 base_type = "struct " + name
1026 else:
1027 self.index_add(name, self.filename, not self.is_header,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001028 "typedef", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001029 token = self.token()
1030 else:
1031 self.error("parsing typedef: expecting a name")
1032 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001033 #self.debug("end typedef", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001034 if token != None and token[0] == 'sep' and token[1] == ',':
1035 type = base_type
1036 token = self.token()
1037 while token != None and token[0] == "op":
1038 type = type + token[1]
1039 token = self.token()
1040 elif token != None and token[0] == 'sep' and token[1] == ';':
1041 break;
1042 elif token != None and token[0] == 'name':
1043 type = base_type
1044 continue;
1045 else:
1046 self.error("parsing typedef: expecting ';'", token)
1047 return token
1048 token = self.token()
1049 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001050
1051 #
1052 # Parse a C code block, used for functions it parse till
1053 # the balancing } included
1054 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001055 def parseBlock(self, token):
1056 while token != None:
1057 if token[0] == "sep" and token[1] == "{":
1058 token = self.token()
1059 token = self.parseBlock(token)
1060 elif token[0] == "sep" and token[1] == "}":
1061 self.comment = None
1062 token = self.token()
1063 return token
1064 else:
1065 if self.collect_ref == 1:
1066 oldtok = token
1067 token = self.token()
1068 if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1069 if token[0] == "sep" and token[1] == "(":
1070 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001071 0, "function")
Daniel Veillardbe586972003-11-18 20:56:51 +00001072 token = self.token()
1073 elif token[0] == "name":
1074 token = self.token()
1075 if token[0] == "sep" and (token[1] == ";" or
1076 token[1] == "," or token[1] == "="):
1077 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001078 0, "type")
Daniel Veillardbe586972003-11-18 20:56:51 +00001079 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1080 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001081 0, "typedef")
Daniel Veillardbe586972003-11-18 20:56:51 +00001082 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1083 self.index_add_ref(oldtok[1], self.filename,
Daniel Veillardd8cf9062003-11-11 21:12:36 +00001084 0, "typedef")
1085
Daniel Veillardbe586972003-11-18 20:56:51 +00001086 else:
1087 token = self.token()
1088 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001089
1090 #
1091 # Parse a C struct definition till the balancing }
1092 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001093 def parseStruct(self, token):
1094 fields = []
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001095 #self.debug("start parseStruct", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001096 while token != None:
1097 if token[0] == "sep" and token[1] == "{":
1098 token = self.token()
1099 token = self.parseTypeBlock(token)
1100 elif token[0] == "sep" and token[1] == "}":
1101 self.struct_fields = fields
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001102 #self.debug("end parseStruct", token)
1103 #print fields
Daniel Veillardbe586972003-11-18 20:56:51 +00001104 token = self.token()
1105 return token
1106 else:
1107 base_type = self.type
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001108 #self.debug("before parseType", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001109 token = self.parseType(token)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001110 #self.debug("after parseType", token)
Daniel Veillardbe586972003-11-18 20:56:51 +00001111 if token != None and token[0] == "name":
1112 fname = token[1]
1113 token = self.token()
1114 if token[0] == "sep" and token[1] == ";":
1115 self.comment = None
1116 token = self.token()
1117 fields.append((self.type, fname, self.comment))
1118 self.comment = None
1119 else:
1120 self.error("parseStruct: expecting ;", token)
1121 elif token != None and token[0] == "sep" and token[1] == "{":
1122 token = self.token()
1123 token = self.parseTypeBlock(token)
1124 if token != None and token[0] == "name":
1125 token = self.token()
1126 if token != None and token[0] == "sep" and token[1] == ";":
1127 token = self.token()
1128 else:
1129 self.error("parseStruct: expecting ;", token)
1130 else:
1131 self.error("parseStruct: name", token)
1132 token = self.token()
1133 self.type = base_type;
1134 self.struct_fields = fields
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001135 #self.debug("end parseStruct", token)
1136 #print fields
Daniel Veillardbe586972003-11-18 20:56:51 +00001137 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001138
1139 #
1140 # Parse a C enum block, parse till the balancing }
1141 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001142 def parseEnumBlock(self, token):
1143 self.enums = []
1144 name = None
1145 self.comment = None
1146 comment = ""
1147 value = "0"
1148 while token != None:
1149 if token[0] == "sep" and token[1] == "{":
1150 token = self.token()
1151 token = self.parseTypeBlock(token)
1152 elif token[0] == "sep" and token[1] == "}":
1153 if name != None:
1154 if self.comment != None:
1155 comment = self.comment
1156 self.comment = None
1157 self.enums.append((name, value, comment))
1158 token = self.token()
1159 return token
1160 elif token[0] == "name":
1161 if name != None:
1162 if self.comment != None:
1163 comment = string.strip(self.comment)
1164 self.comment = None
1165 self.enums.append((name, value, comment))
1166 name = token[1]
1167 comment = ""
1168 token = self.token()
1169 if token[0] == "op" and token[1][0] == "=":
1170 value = ""
1171 if len(token[1]) > 1:
1172 value = token[1][1:]
1173 token = self.token()
1174 while token[0] != "sep" or (token[1] != ',' and
1175 token[1] != '}'):
1176 value = value + token[1]
1177 token = self.token()
1178 else:
1179 try:
1180 value = "%d" % (int(value) + 1)
1181 except:
Daniel Veillard1e906612003-12-05 14:57:46 +00001182 self.warning("Failed to compute value of enum %s" % (name))
Daniel Veillardbe586972003-11-18 20:56:51 +00001183 value=""
1184 if token[0] == "sep" and token[1] == ",":
1185 token = self.token()
1186 else:
1187 token = self.token()
1188 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001189
1190 #
1191 # Parse a C definition block, used for structs it parse till
1192 # the balancing }
1193 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001194 def parseTypeBlock(self, token):
1195 while token != None:
1196 if token[0] == "sep" and token[1] == "{":
1197 token = self.token()
1198 token = self.parseTypeBlock(token)
1199 elif token[0] == "sep" and token[1] == "}":
1200 token = self.token()
1201 return token
1202 else:
1203 token = self.token()
1204 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001205
1206 #
1207 # Parse a type: the fact that the type name can either occur after
1208 # the definition or within the definition makes it a little harder
1209 # if inside, the name token is pushed back before returning
1210 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001211 def parseType(self, token):
1212 self.type = ""
1213 self.struct_fields = []
1214 self.signature = None
1215 if token == None:
1216 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001217
Daniel Veillardbe586972003-11-18 20:56:51 +00001218 while token[0] == "name" and (
William M. Brackfbb619f2005-06-06 13:49:18 +00001219 token[1] == "const" or \
1220 token[1] == "unsigned" or \
1221 token[1] == "signed"):
Daniel Veillardbe586972003-11-18 20:56:51 +00001222 if self.type == "":
1223 self.type = token[1]
1224 else:
1225 self.type = self.type + " " + token[1]
1226 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001227
Daniel Veillardbe586972003-11-18 20:56:51 +00001228 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1229 if self.type == "":
1230 self.type = token[1]
1231 else:
1232 self.type = self.type + " " + token[1]
1233 if token[0] == "name" and token[1] == "int":
1234 if self.type == "":
1235 self.type = tmp[1]
1236 else:
1237 self.type = self.type + " " + tmp[1]
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001238
Daniel Veillardbe586972003-11-18 20:56:51 +00001239 elif token[0] == "name" and token[1] == "struct":
1240 if self.type == "":
1241 self.type = token[1]
1242 else:
1243 self.type = self.type + " " + token[1]
1244 token = self.token()
1245 nametok = None
1246 if token[0] == "name":
1247 nametok = token
1248 token = self.token()
1249 if token != None and token[0] == "sep" and token[1] == "{":
1250 token = self.token()
1251 token = self.parseStruct(token)
1252 elif token != None and token[0] == "op" and token[1] == "*":
1253 self.type = self.type + " " + nametok[1] + " *"
1254 token = self.token()
1255 while token != None and token[0] == "op" and token[1] == "*":
1256 self.type = self.type + " *"
1257 token = self.token()
1258 if token[0] == "name":
1259 nametok = token
1260 token = self.token()
1261 else:
1262 self.error("struct : expecting name", token)
1263 return token
1264 elif token != None and token[0] == "name" and nametok != None:
1265 self.type = self.type + " " + nametok[1]
1266 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001267
Daniel Veillardbe586972003-11-18 20:56:51 +00001268 if nametok != None:
1269 self.lexer.push(token)
1270 token = nametok
1271 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001272
Daniel Veillardbe586972003-11-18 20:56:51 +00001273 elif token[0] == "name" and token[1] == "enum":
1274 if self.type == "":
1275 self.type = token[1]
1276 else:
1277 self.type = self.type + " " + token[1]
1278 self.enums = []
1279 token = self.token()
1280 if token != None and token[0] == "sep" and token[1] == "{":
1281 token = self.token()
1282 token = self.parseEnumBlock(token)
1283 else:
1284 self.error("parsing enum: expecting '{'", token)
1285 enum_type = None
1286 if token != None and token[0] != "name":
1287 self.lexer.push(token)
1288 token = ("name", "enum")
1289 else:
1290 enum_type = token[1]
1291 for enum in self.enums:
1292 self.index_add(enum[0], self.filename,
1293 not self.is_header, "enum",
1294 (enum[1], enum[2], enum_type))
1295 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001296
Daniel Veillardbe586972003-11-18 20:56:51 +00001297 elif token[0] == "name":
1298 if self.type == "":
1299 self.type = token[1]
1300 else:
1301 self.type = self.type + " " + token[1]
1302 else:
1303 self.error("parsing type %s: expecting a name" % (self.type),
1304 token)
1305 return token
1306 token = self.token()
1307 while token != None and (token[0] == "op" or
1308 token[0] == "name" and token[1] == "const"):
1309 self.type = self.type + " " + token[1]
1310 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001311
1312 #
1313 # if there is a parenthesis here, this means a function type
1314 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001315 if token != None and token[0] == "sep" and token[1] == '(':
1316 self.type = self.type + token[1]
1317 token = self.token()
1318 while token != None and token[0] == "op" and token[1] == '*':
1319 self.type = self.type + token[1]
1320 token = self.token()
1321 if token == None or token[0] != "name" :
1322 self.error("parsing function type, name expected", token);
1323 return token
1324 self.type = self.type + token[1]
1325 nametok = token
1326 token = self.token()
1327 if token != None and token[0] == "sep" and token[1] == ')':
1328 self.type = self.type + token[1]
1329 token = self.token()
1330 if token != None and token[0] == "sep" and token[1] == '(':
1331 token = self.token()
1332 type = self.type;
1333 token = self.parseSignature(token);
1334 self.type = type;
1335 else:
1336 self.error("parsing function type, '(' expected", token);
1337 return token
1338 else:
1339 self.error("parsing function type, ')' expected", token);
1340 return token
1341 self.lexer.push(token)
1342 token = nametok
1343 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001344
1345 #
1346 # do some lookahead for arrays
1347 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001348 if token != None and token[0] == "name":
1349 nametok = token
1350 token = self.token()
1351 if token != None and token[0] == "sep" and token[1] == '[':
1352 self.type = self.type + nametok[1]
1353 while token != None and token[0] == "sep" and token[1] == '[':
1354 self.type = self.type + token[1]
1355 token = self.token()
1356 while token != None and token[0] != 'sep' and \
1357 token[1] != ']' and token[1] != ';':
1358 self.type = self.type + token[1]
1359 token = self.token()
1360 if token != None and token[0] == 'sep' and token[1] == ']':
1361 self.type = self.type + token[1]
1362 token = self.token()
1363 else:
1364 self.error("parsing array type, ']' expected", token);
1365 return token
1366 elif token != None and token[0] == "sep" and token[1] == ':':
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001367 # remove :12 in case it's a limited int size
Daniel Veillardbe586972003-11-18 20:56:51 +00001368 token = self.token()
1369 token = self.token()
1370 self.lexer.push(token)
1371 token = nametok
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001372
Daniel Veillardbe586972003-11-18 20:56:51 +00001373 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001374
1375 #
1376 # Parse a signature: '(' has been parsed and we scan the type definition
1377 # up to the ')' included
Daniel Veillardbe586972003-11-18 20:56:51 +00001378 def parseSignature(self, token):
1379 signature = []
1380 if token != None and token[0] == "sep" and token[1] == ')':
1381 self.signature = []
1382 token = self.token()
1383 return token
1384 while token != None:
1385 token = self.parseType(token)
1386 if token != None and token[0] == "name":
1387 signature.append((self.type, token[1], None))
1388 token = self.token()
1389 elif token != None and token[0] == "sep" and token[1] == ',':
1390 token = self.token()
1391 continue
1392 elif token != None and token[0] == "sep" and token[1] == ')':
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001393 # only the type was provided
Daniel Veillardbe586972003-11-18 20:56:51 +00001394 if self.type == "...":
1395 signature.append((self.type, "...", None))
1396 else:
1397 signature.append((self.type, None, None))
1398 if token != None and token[0] == "sep":
1399 if token[1] == ',':
1400 token = self.token()
1401 continue
1402 elif token[1] == ')':
1403 token = self.token()
1404 break
1405 self.signature = signature
1406 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001407
1408 #
1409 # Parse a global definition, be it a type, variable or function
1410 # the extern "C" blocks are a bit nasty and require it to recurse.
1411 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001412 def parseGlobal(self, token):
1413 static = 0
1414 if token[1] == 'extern':
1415 token = self.token()
1416 if token == None:
1417 return token
1418 if token[0] == 'string':
1419 if token[1] == 'C':
1420 token = self.token()
1421 if token == None:
1422 return token
1423 if token[0] == 'sep' and token[1] == "{":
1424 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001425# print 'Entering extern "C line ', self.lineno()
Daniel Veillardbe586972003-11-18 20:56:51 +00001426 while token != None and (token[0] != 'sep' or
1427 token[1] != "}"):
1428 if token[0] == 'name':
1429 token = self.parseGlobal(token)
1430 else:
1431 self.error(
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001432 "token %s %s unexpected at the top level" % (
1433 token[0], token[1]))
Daniel Veillardbe586972003-11-18 20:56:51 +00001434 token = self.parseGlobal(token)
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001435# print 'Exiting extern "C" line', self.lineno()
Daniel Veillardbe586972003-11-18 20:56:51 +00001436 token = self.token()
1437 return token
1438 else:
1439 return token
1440 elif token[1] == 'static':
1441 static = 1
1442 token = self.token()
1443 if token == None or token[0] != 'name':
1444 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001445
Daniel Veillardbe586972003-11-18 20:56:51 +00001446 if token[1] == 'typedef':
1447 token = self.token()
1448 return self.parseTypedef(token)
1449 else:
1450 token = self.parseType(token)
1451 type_orig = self.type
1452 if token == None or token[0] != "name":
1453 return token
1454 type = type_orig
1455 self.name = token[1]
1456 token = self.token()
1457 while token != None and (token[0] == "sep" or token[0] == "op"):
1458 if token[0] == "sep":
1459 if token[1] == "[":
1460 type = type + token[1]
1461 token = self.token()
1462 while token != None and (token[0] != "sep" or \
1463 token[1] != ";"):
1464 type = type + token[1]
1465 token = self.token()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001466
Daniel Veillardbe586972003-11-18 20:56:51 +00001467 if token != None and token[0] == "op" and token[1] == "=":
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001468 #
1469 # Skip the initialization of the variable
1470 #
Daniel Veillardbe586972003-11-18 20:56:51 +00001471 token = self.token()
1472 if token[0] == 'sep' and token[1] == '{':
1473 token = self.token()
1474 token = self.parseBlock(token)
1475 else:
1476 self.comment = None
1477 while token != None and (token[0] != "sep" or \
1478 (token[1] != ';' and token[1] != ',')):
1479 token = self.token()
1480 self.comment = None
1481 if token == None or token[0] != "sep" or (token[1] != ';' and
1482 token[1] != ','):
1483 self.error("missing ';' or ',' after value")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001484
Daniel Veillardbe586972003-11-18 20:56:51 +00001485 if token != None and token[0] == "sep":
1486 if token[1] == ";":
1487 self.comment = None
1488 token = self.token()
1489 if type == "struct":
1490 self.index_add(self.name, self.filename,
1491 not self.is_header, "struct", self.struct_fields)
1492 else:
1493 self.index_add(self.name, self.filename,
1494 not self.is_header, "variable", type)
1495 break
1496 elif token[1] == "(":
1497 token = self.token()
1498 token = self.parseSignature(token)
1499 if token == None:
1500 return None
1501 if token[0] == "sep" and token[1] == ";":
1502 d = self.mergeFunctionComment(self.name,
1503 ((type, None), self.signature), 1)
1504 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001505 "function", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001506 token = self.token()
1507 elif token[0] == "sep" and token[1] == "{":
1508 d = self.mergeFunctionComment(self.name,
1509 ((type, None), self.signature), static)
1510 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001511 "function", d)
Daniel Veillardbe586972003-11-18 20:56:51 +00001512 token = self.token()
1513 token = self.parseBlock(token);
1514 elif token[1] == ',':
1515 self.comment = None
1516 self.index_add(self.name, self.filename, static,
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001517 "variable", type)
Daniel Veillardbe586972003-11-18 20:56:51 +00001518 type = type_orig
1519 token = self.token()
1520 while token != None and token[0] == "sep":
1521 type = type + token[1]
1522 token = self.token()
1523 if token != None and token[0] == "name":
1524 self.name = token[1]
1525 token = self.token()
1526 else:
1527 break
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001528
Daniel Veillardbe586972003-11-18 20:56:51 +00001529 return token
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001530
Daniel Veillardbe586972003-11-18 20:56:51 +00001531 def parse(self):
Daniel Veillard1e906612003-12-05 14:57:46 +00001532 self.warning("Parsing %s" % (self.filename))
Daniel Veillardbe586972003-11-18 20:56:51 +00001533 token = self.token()
1534 while token != None:
1535 if token[0] == 'name':
1536 token = self.parseGlobal(token)
1537 else:
1538 self.error("token %s %s unexpected at the top level" % (
1539 token[0], token[1]))
1540 token = self.parseGlobal(token)
1541 return
1542 self.parseTopComment(self.top_comment)
1543 return self.index
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001544
1545
1546class docBuilder:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001547 """A documentation builder"""
1548 def __init__(self, name, directories=['.'], excludes=[]):
1549 self.name = name
1550 self.directories = directories
1551 self.excludes = excludes + ignored_files.keys()
1552 self.modules = {}
1553 self.headers = {}
1554 self.idx = index()
1555 self.xref = {}
1556 self.index = {}
1557 if name == 'libxml2':
1558 self.basename = 'libxml'
1559 else:
1560 self.basename = name
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001561
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001562 def indexString(self, id, str):
1563 if str == None:
1564 return
1565 str = string.replace(str, "'", ' ')
1566 str = string.replace(str, '"', ' ')
1567 str = string.replace(str, "/", ' ')
1568 str = string.replace(str, '*', ' ')
1569 str = string.replace(str, "[", ' ')
1570 str = string.replace(str, "]", ' ')
1571 str = string.replace(str, "(", ' ')
1572 str = string.replace(str, ")", ' ')
1573 str = string.replace(str, "<", ' ')
1574 str = string.replace(str, '>', ' ')
1575 str = string.replace(str, "&", ' ')
1576 str = string.replace(str, '#', ' ')
1577 str = string.replace(str, ",", ' ')
1578 str = string.replace(str, '.', ' ')
1579 str = string.replace(str, ';', ' ')
1580 tokens = string.split(str)
1581 for token in tokens:
1582 try:
1583 c = token[0]
1584 if string.find(string.letters, c) < 0:
1585 pass
1586 elif len(token) < 3:
1587 pass
1588 else:
1589 lower = string.lower(token)
1590 # TODO: generalize this a bit
1591 if lower == 'and' or lower == 'the':
1592 pass
1593 elif self.xref.has_key(token):
1594 self.xref[token].append(id)
1595 else:
1596 self.xref[token] = [id]
1597 except:
1598 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001599
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001600 def analyze(self):
1601 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1602 self.idx.analyze()
1603
1604 def scanHeaders(self):
1605 for header in self.headers.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001606 parser = CParser(header)
1607 idx = parser.parse()
1608 self.headers[header] = idx;
1609 self.idx.merge(idx)
1610
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001611 def scanModules(self):
1612 for module in self.modules.keys():
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001613 parser = CParser(module)
1614 idx = parser.parse()
1615 # idx.analyze()
1616 self.modules[module] = idx
1617 self.idx.merge_public(idx)
1618
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001619 def scan(self):
1620 for directory in self.directories:
1621 files = glob.glob(directory + "/*.c")
1622 for file in files:
1623 skip = 0
1624 for excl in self.excludes:
1625 if string.find(file, excl) != -1:
1626 skip = 1;
1627 break
1628 if skip == 0:
1629 self.modules[file] = None;
1630 files = glob.glob(directory + "/*.h")
1631 for file in files:
1632 skip = 0
1633 for excl in self.excludes:
1634 if string.find(file, excl) != -1:
1635 skip = 1;
1636 break
1637 if skip == 0:
1638 self.headers[file] = None;
1639 self.scanHeaders()
1640 self.scanModules()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001641
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001642 def modulename_file(self, file):
1643 module = os.path.basename(file)
1644 if module[-2:] == '.h':
1645 module = module[:-2]
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001646 elif module[-2:] == '.c':
1647 module = module[:-2]
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001648 return module
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001649
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001650 def serialize_enum(self, output, name):
1651 id = self.idx.enums[name]
1652 output.write(" <enum name='%s' file='%s'" % (name,
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001653 self.modulename_file(id.header)))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001654 if id.info != None:
1655 info = id.info
1656 if info[0] != None and info[0] != '':
1657 try:
1658 val = eval(info[0])
1659 except:
1660 val = info[0]
1661 output.write(" value='%s'" % (val));
1662 if info[2] != None and info[2] != '':
1663 output.write(" type='%s'" % info[2]);
1664 if info[1] != None and info[1] != '':
1665 output.write(" info='%s'" % escape(info[1]));
1666 output.write("/>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001667
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001668 def serialize_macro(self, output, name):
1669 id = self.idx.macros[name]
1670 output.write(" <macro name='%s' file='%s'>\n" % (name,
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001671 self.modulename_file(id.header)))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001672 if id.info != None:
1673 try:
1674 (args, desc) = id.info
1675 if desc != None and desc != "":
1676 output.write(" <info>%s</info>\n" % (escape(desc)))
1677 self.indexString(name, desc)
1678 for arg in args:
1679 (name, desc) = arg
1680 if desc != None and desc != "":
1681 output.write(" <arg name='%s' info='%s'/>\n" % (
1682 name, escape(desc)))
1683 self.indexString(name, desc)
1684 else:
1685 output.write(" <arg name='%s'/>\n" % (name))
1686 except:
1687 pass
1688 output.write(" </macro>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001689
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001690 def serialize_typedef(self, output, name):
1691 id = self.idx.typedefs[name]
1692 if id.info[0:7] == 'struct ':
1693 output.write(" <struct name='%s' file='%s' type='%s'" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001694 name, self.modulename_file(id.header), id.info))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001695 name = id.info[7:]
1696 if self.idx.structs.has_key(name) and ( \
1697 type(self.idx.structs[name].info) == type(()) or
Daniel Veillardc1eed322002-12-12 11:01:32 +00001698 type(self.idx.structs[name].info) == type([])):
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001699 output.write(">\n");
1700 try:
1701 for field in self.idx.structs[name].info:
1702 desc = field[2]
1703 self.indexString(name, desc)
1704 if desc == None:
1705 desc = ''
1706 else:
1707 desc = escape(desc)
1708 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1709 except:
1710 print "Failed to serialize struct %s" % (name)
1711 output.write(" </struct>\n")
1712 else:
1713 output.write("/>\n");
1714 else :
1715 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001716 name, self.modulename_file(id.header), id.info))
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001717
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001718 def serialize_variable(self, output, name):
1719 id = self.idx.variables[name]
1720 if id.info != None:
1721 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001722 name, self.modulename_file(id.header), id.info))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001723 else:
1724 output.write(" <variable name='%s' file='%s'/>\n" % (
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001725 name, self.modulename_file(id.header)))
Daniel Veillardc1eed322002-12-12 11:01:32 +00001726
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001727 def serialize_function(self, output, name):
1728 id = self.idx.functions[name]
Daniel Veillarda2351322004-06-27 12:08:10 +00001729 if name == debugsym:
1730 print "=>", id
1731
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001732 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1733 name, self.modulename_file(id.header),
1734 self.modulename_file(id.module)))
William M. Brack21e4ef22005-01-02 09:53:13 +00001735 #
1736 # Processing of conditionals modified by Bill 1/1/05
1737 #
Daniel Veillarda2351322004-06-27 12:08:10 +00001738 if id.conditionals != None:
William M. Brack21e4ef22005-01-02 09:53:13 +00001739 apstr = ""
Daniel Veillarda2351322004-06-27 12:08:10 +00001740 for cond in id.conditionals:
William M. Brack21e4ef22005-01-02 09:53:13 +00001741 if apstr != "":
1742 apstr = apstr + " &amp;&amp; "
1743 apstr = apstr + cond
1744 output.write(" <cond>%s</cond>\n"% (apstr));
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001745 try:
1746 (ret, params, desc) = id.info
1747 output.write(" <info>%s</info>\n" % (escape(desc)))
1748 self.indexString(name, desc)
1749 if ret[0] != None:
1750 if ret[0] == "void":
1751 output.write(" <return type='void'/>\n")
1752 else:
1753 output.write(" <return type='%s' info='%s'/>\n" % (
1754 ret[0], escape(ret[1])))
1755 self.indexString(name, ret[1])
1756 for param in params:
1757 if param[0] == 'void':
1758 continue
1759 if param[2] == None:
1760 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1761 else:
1762 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1763 self.indexString(name, param[2])
1764 except:
1765 print "Failed to save function %s info: " % name, `id.info`
1766 output.write(" </%s>\n" % (id.type))
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001767
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001768 def serialize_exports(self, output, file):
1769 module = self.modulename_file(file)
1770 output.write(" <file name='%s'>\n" % (module))
1771 dict = self.headers[file]
Daniel Veillardbe586972003-11-18 20:56:51 +00001772 if dict.info != None:
1773 for data in ('Summary', 'Description', 'Author'):
1774 try:
1775 output.write(" <%s>%s</%s>\n" % (
1776 string.lower(data),
1777 escape(dict.info[data]),
1778 string.lower(data)))
1779 except:
1780 print "Header %s lacks a %s description" % (module, data)
1781 if dict.info.has_key('Description'):
1782 desc = dict.info['Description']
1783 if string.find(desc, "DEPRECATED") != -1:
1784 output.write(" <deprecated/>\n")
1785
Daniel Veillard1a792412003-11-18 23:52:38 +00001786 ids = dict.macros.keys()
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001787 ids.sort()
1788 for id in uniq(ids):
Daniel Veillard1a792412003-11-18 23:52:38 +00001789 # Macros are sometime used to masquerade other types.
1790 if dict.functions.has_key(id):
1791 continue
1792 if dict.variables.has_key(id):
1793 continue
1794 if dict.typedefs.has_key(id):
1795 continue
1796 if dict.structs.has_key(id):
1797 continue
1798 if dict.enums.has_key(id):
1799 continue
1800 output.write(" <exports symbol='%s' type='macro'/>\n" % (id))
1801 ids = dict.enums.keys()
1802 ids.sort()
1803 for id in uniq(ids):
1804 output.write(" <exports symbol='%s' type='enum'/>\n" % (id))
1805 ids = dict.typedefs.keys()
1806 ids.sort()
1807 for id in uniq(ids):
1808 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id))
1809 ids = dict.structs.keys()
1810 ids.sort()
1811 for id in uniq(ids):
1812 output.write(" <exports symbol='%s' type='struct'/>\n" % (id))
1813 ids = dict.variables.keys()
1814 ids.sort()
1815 for id in uniq(ids):
1816 output.write(" <exports symbol='%s' type='variable'/>\n" % (id))
1817 ids = dict.functions.keys()
1818 ids.sort()
1819 for id in uniq(ids):
1820 output.write(" <exports symbol='%s' type='function'/>\n" % (id))
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001821 output.write(" </file>\n")
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001822
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001823 def serialize_xrefs_files(self, output):
1824 headers = self.headers.keys()
1825 headers.sort()
1826 for file in headers:
1827 module = self.modulename_file(file)
1828 output.write(" <file name='%s'>\n" % (module))
1829 dict = self.headers[file]
William M. Bracka2e844a2004-01-06 11:52:13 +00001830 ids = uniq(dict.functions.keys() + dict.variables.keys() + \
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001831 dict.macros.keys() + dict.typedefs.keys() + \
William M. Bracka2e844a2004-01-06 11:52:13 +00001832 dict.structs.keys() + dict.enums.keys())
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001833 ids.sort()
William M. Bracka2e844a2004-01-06 11:52:13 +00001834 for id in ids:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001835 output.write(" <ref name='%s'/>\n" % (id))
1836 output.write(" </file>\n")
1837 pass
Daniel Veillarda9b66d02002-12-11 14:23:49 +00001838
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001839 def serialize_xrefs_functions(self, output):
1840 funcs = {}
1841 for name in self.idx.functions.keys():
1842 id = self.idx.functions[name]
1843 try:
1844 (ret, params, desc) = id.info
1845 for param in params:
1846 if param[0] == 'void':
1847 continue
1848 if funcs.has_key(param[0]):
1849 funcs[param[0]].append(name)
1850 else:
1851 funcs[param[0]] = [name]
1852 except:
1853 pass
1854 typ = funcs.keys()
1855 typ.sort()
1856 for type in typ:
1857 if type == '' or type == 'void' or type == "int" or \
1858 type == "char *" or type == "const char *" :
1859 continue
1860 output.write(" <type name='%s'>\n" % (type))
1861 ids = funcs[type]
1862 ids.sort()
William M. Brackcf9eadf2003-12-25 13:24:05 +00001863 pid = '' # not sure why we have dups, but get rid of them!
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001864 for id in ids:
William M. Brackcf9eadf2003-12-25 13:24:05 +00001865 if id != pid:
1866 output.write(" <ref name='%s'/>\n" % (id))
1867 pid = id
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001868 output.write(" </type>\n")
1869
1870 def serialize_xrefs_constructors(self, output):
1871 funcs = {}
1872 for name in self.idx.functions.keys():
1873 id = self.idx.functions[name]
1874 try:
1875 (ret, params, desc) = id.info
1876 if ret[0] == "void":
1877 continue
1878 if funcs.has_key(ret[0]):
1879 funcs[ret[0]].append(name)
1880 else:
1881 funcs[ret[0]] = [name]
1882 except:
1883 pass
1884 typ = funcs.keys()
1885 typ.sort()
1886 for type in typ:
1887 if type == '' or type == 'void' or type == "int" or \
1888 type == "char *" or type == "const char *" :
1889 continue
1890 output.write(" <type name='%s'>\n" % (type))
1891 ids = funcs[type]
William M. Brack09a4d0a2004-01-05 14:28:43 +00001892 ids.sort()
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001893 for id in ids:
1894 output.write(" <ref name='%s'/>\n" % (id))
1895 output.write(" </type>\n")
1896
1897 def serialize_xrefs_alpha(self, output):
1898 letter = None
1899 ids = self.idx.identifiers.keys()
1900 ids.sort()
1901 for id in ids:
1902 if id[0] != letter:
1903 if letter != None:
1904 output.write(" </letter>\n")
1905 letter = id[0]
1906 output.write(" <letter name='%s'>\n" % (letter))
1907 output.write(" <ref name='%s'/>\n" % (id))
1908 if letter != None:
1909 output.write(" </letter>\n")
1910
1911 def serialize_xrefs_references(self, output):
1912 typ = self.idx.identifiers.keys()
1913 typ.sort()
1914 for id in typ:
1915 idf = self.idx.identifiers[id]
Daniel Veillard5d4644e2005-04-01 13:11:58 +00001916 module = idf.header
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001917 output.write(" <reference name='%s' href='%s'/>\n" % (id,
1918 'html/' + self.basename + '-' +
1919 self.modulename_file(module) + '.html#' +
1920 id))
1921
1922 def serialize_xrefs_index(self, output):
1923 index = self.xref
1924 typ = index.keys()
1925 typ.sort()
1926 letter = None
1927 count = 0
1928 chunk = 0
1929 chunks = []
1930 for id in typ:
1931 if len(index[id]) > 30:
1932 continue
1933 if id[0] != letter:
1934 if letter == None or count > 200:
1935 if letter != None:
1936 output.write(" </letter>\n")
1937 output.write(" </chunk>\n")
1938 count = 0
1939 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1940 output.write(" <chunk name='chunk%s'>\n" % (chunk))
1941 first_letter = id[0]
1942 chunk = chunk + 1
1943 elif letter != None:
1944 output.write(" </letter>\n")
1945 letter = id[0]
1946 output.write(" <letter name='%s'>\n" % (letter))
1947 output.write(" <word name='%s'>\n" % (id))
1948 tokens = index[id];
1949 tokens.sort()
1950 tok = None
William M. Bracka2e844a2004-01-06 11:52:13 +00001951 for token in tokens:
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001952 if tok == token:
1953 continue
1954 tok = token
1955 output.write(" <ref name='%s'/>\n" % (token))
1956 count = count + 1
1957 output.write(" </word>\n")
1958 if letter != None:
1959 output.write(" </letter>\n")
1960 output.write(" </chunk>\n")
William M. Brack966668a2003-12-20 02:10:28 +00001961 if count != 0:
1962 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00001963 output.write(" <chunks>\n")
1964 for ch in chunks:
1965 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
1966 ch[0], ch[1], ch[2]))
1967 output.write(" </chunks>\n")
1968
1969 def serialize_xrefs(self, output):
1970 output.write(" <references>\n")
1971 self.serialize_xrefs_references(output)
1972 output.write(" </references>\n")
1973 output.write(" <alpha>\n")
1974 self.serialize_xrefs_alpha(output)
1975 output.write(" </alpha>\n")
1976 output.write(" <constructors>\n")
1977 self.serialize_xrefs_constructors(output)
1978 output.write(" </constructors>\n")
1979 output.write(" <functions>\n")
1980 self.serialize_xrefs_functions(output)
1981 output.write(" </functions>\n")
1982 output.write(" <files>\n")
1983 self.serialize_xrefs_files(output)
1984 output.write(" </files>\n")
1985 output.write(" <index>\n")
1986 self.serialize_xrefs_index(output)
1987 output.write(" </index>\n")
1988
1989 def serialize(self):
1990 filename = "%s-api.xml" % self.name
1991 print "Saving XML description %s" % (filename)
1992 output = open(filename, "w")
1993 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1994 output.write("<api name='%s'>\n" % self.name)
1995 output.write(" <files>\n")
1996 headers = self.headers.keys()
1997 headers.sort()
1998 for file in headers:
1999 self.serialize_exports(output, file)
2000 output.write(" </files>\n")
2001 output.write(" <symbols>\n")
2002 macros = self.idx.macros.keys()
2003 macros.sort()
2004 for macro in macros:
2005 self.serialize_macro(output, macro)
2006 enums = self.idx.enums.keys()
2007 enums.sort()
2008 for enum in enums:
2009 self.serialize_enum(output, enum)
2010 typedefs = self.idx.typedefs.keys()
2011 typedefs.sort()
2012 for typedef in typedefs:
2013 self.serialize_typedef(output, typedef)
2014 variables = self.idx.variables.keys()
2015 variables.sort()
2016 for variable in variables:
2017 self.serialize_variable(output, variable)
2018 functions = self.idx.functions.keys()
2019 functions.sort()
2020 for function in functions:
2021 self.serialize_function(output, function)
2022 output.write(" </symbols>\n")
2023 output.write("</api>\n")
2024 output.close()
2025
2026 filename = "%s-refs.xml" % self.name
2027 print "Saving XML Cross References %s" % (filename)
2028 output = open(filename, "w")
2029 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2030 output.write("<apirefs name='%s'>\n" % self.name)
2031 self.serialize_xrefs(output)
2032 output.write("</apirefs>\n")
2033 output.close()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002034
2035
2036def rebuild():
2037 builder = None
Daniel Veillarde8ba84e2003-11-18 13:54:15 +00002038 if glob.glob("parser.c") != [] :
2039 print "Rebuilding API description for libxml2"
2040 builder = docBuilder("libxml2", [".", "."],
2041 ["xmlwin32version.h", "tst.c"])
2042 elif glob.glob("../parser.c") != [] :
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002043 print "Rebuilding API description for libxml2"
2044 builder = docBuilder("libxml2", ["..", "../include/libxml"],
Daniel Veillardd4330462003-04-29 12:40:16 +00002045 ["xmlwin32version.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002046 elif glob.glob("../libxslt/transform.c") != [] :
2047 print "Rebuilding API description for libxslt"
2048 builder = docBuilder("libxslt", ["../libxslt"],
Daniel Veillard024b5702002-12-12 00:15:55 +00002049 ["win32config.h", "libxslt.h", "tst.c"])
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002050 else:
2051 print "rebuild() failed, unable to guess the module"
2052 return None
2053 builder.scan()
2054 builder.analyze()
2055 builder.serialize()
Daniel Veillard024b5702002-12-12 00:15:55 +00002056 if glob.glob("../libexslt/exslt.c") != [] :
2057 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2058 extra.scan()
2059 extra.analyze()
2060 extra.serialize()
Daniel Veillarda9b66d02002-12-11 14:23:49 +00002061 return builder
2062
2063#
2064# for debugging the parser
2065#
2066def parse(filename):
2067 parser = CParser(filename)
2068 idx = parser.parse()
2069 return idx
2070
2071if __name__ == "__main__":
Daniel Veillarda2351322004-06-27 12:08:10 +00002072 if len(sys.argv) > 1:
2073 debug = 1
2074 parse(sys.argv[1])
2075 else:
2076 rebuild()