Blame - doc/apibuild.py - platform/external/libxml2

blob: 8d1b204a9e18974a7c3751942e4e9af6c08ee6b9 [file] [log] [blame]

Daniel Veillard	a9b66d0	2002-12-11 14:23:49 +0000	[diff] [blame^]	1	#!/usr/bin/python -u
				2	#
				3	# This is the API builder, it parses the C sources and build the
				4	# API formal description in XML.
				5	#
				6	# See Copyright for the status of this software.
				7	#
				8	# daniel@veillard.com
				9	#
				10	import sys
				11	import string
				12	import glob
				13
				14	def escape(raw):
				15	raw = string.replace(raw, '&', '&')
				16	raw = string.replace(raw, '<', '<')
				17	raw = string.replace(raw, '>', '>')
				18	raw = string.replace(raw, "'", ''')
				19	raw = string.replace(raw, '"', '"')
				20	return raw
				21
				22	class identifier:
				23	def __init__(self, name, module=None, type=None, info=None, extra=None):
				24	self.name = name
				25	self.module = module
				26	self.type = type
				27	self.info = info
				28	self.extra = extra
				29	self.static = 0
				30
				31	def __repr__(self):
				32	r = "%s %s:" % (self.type, self.name)
				33	if self.static:
				34	r = r + " static"
				35	if self.module != None:
				36	r = r + " from %s" % (self.module)
				37	if self.info != None:
				38	r = r + " " + `self.info`
				39	if self.extra != None:
				40	r = r + " " + `self.extra`
				41	return r
				42
				43
				44	def set_module(self, module):
				45	self.module = module
				46	def set_type(self, type):
				47	self.type = type
				48	def set_info(self, info):
				49	self.info = info
				50	def set_extra(self, extra):
				51	self.extra = extra
				52	def set_static(self, static):
				53	self.static = static
				54
				55	def update(self, module, type = None, info = None, extra=None):
				56	if module != None and self.module == None:
				57	self.set_module(module)
				58	if type != None and self.type == None:
				59	self.set_type(type)
				60	if info != None:
				61	self.set_info(info)
				62	if extra != None:
				63	self.set_extra(extra)
				64
				65
				66	class index:
				67	def __init__(self, name = "noname"):
				68	self.name = name;
				69	self.identifiers = {}
				70	self.functions = {}
				71	self.variables = {}
				72	self.includes = {}
				73	self.structs = {}
				74	self.enums = {}
				75	self.typedefs = {}
				76	self.macros = {}
				77	self.references = {}
				78
				79	def add(self, name, module, static, type, info=None, extra=None):
				80	if name[0:2] == '__':
				81	return None
				82	d = None
				83	try:
				84	d = self.identifiers[name]
				85	d.update(module, type, info, extra)
				86	except:
				87	d = identifier(name, module, type, info, extra)
				88	self.identifiers[name] = d
				89
				90	if d != None and static == 1:
				91	d.set_static(1)
				92
				93	if d != None and name != None and type != None:
				94	if type == "function":
				95	self.functions[name] = d
				96	elif type == "functype":
				97	self.functions[name] = d
				98	elif type == "variable":
				99	self.variables[name] = d
				100	elif type == "include":
				101	self.includes[name] = d
				102	elif type == "struct":
				103	self.structs[name] = d
				104	elif type == "enum":
				105	self.enums[name] = d
				106	elif type == "typedef":
				107	self.typedefs[name] = d
				108	elif type == "macro":
				109	self.macros[name] = d
				110	else:
				111	print "Unable to register type ", type
				112	return d
				113
				114	def merge(self, idx):
				115	for id in idx.functions.keys():
				116	#
				117	# macro might be used to override functions or variables
				118	# definitions
				119	#
				120	if self.macros.has_key(id):
				121	del self.macros[id]
				122	if self.functions.has_key(id):
				123	print "function %s from %s redeclared in %s" % (
				124	id, self.functions[id].module, idx.functions[id].module)
				125	else:
				126	self.functions[id] = idx.functions[id]
				127	self.identifiers[id] = idx.functions[id]
				128	for id in idx.variables.keys():
				129	#
				130	# macro might be used to override functions or variables
				131	# definitions
				132	#
				133	if self.macros.has_key(id):
				134	del self.macros[id]
				135	if self.variables.has_key(id):
				136	print "variable %s from %s redeclared in %s" % (
				137	id, self.variables[id].module, idx.variables[id].module)
				138	else:
				139	self.variables[id] = idx.variables[id]
				140	self.identifiers[id] = idx.variables[id]
				141	for id in idx.structs.keys():
				142	if self.structs.has_key(id):
				143	print "struct %s from %s redeclared in %s" % (
				144	id, self.structs[id].module, idx.structs[id].module)
				145	else:
				146	self.structs[id] = idx.structs[id]
				147	self.identifiers[id] = idx.structs[id]
				148	for id in idx.typedefs.keys():
				149	if self.typedefs.has_key(id):
				150	print "typedef %s from %s redeclared in %s" % (
				151	id, self.typedefs[id].module, idx.typedefs[id].module)
				152	else:
				153	self.typedefs[id] = idx.typedefs[id]
				154	self.identifiers[id] = idx.typedefs[id]
				155	for id in idx.macros.keys():
				156	#
				157	# macro might be used to override functions or variables
				158	# definitions
				159	#
				160	if self.variables.has_key(id):
				161	continue
				162	if self.functions.has_key(id):
				163	continue
				164	if self.enums.has_key(id):
				165	continue
				166	if self.macros.has_key(id):
				167	print "macro %s from %s redeclared in %s" % (
				168	id, self.macros[id].module, idx.macros[id].module)
				169	else:
				170	self.macros[id] = idx.macros[id]
				171	self.identifiers[id] = idx.macros[id]
				172	for id in idx.enums.keys():
				173	if self.enums.has_key(id):
				174	print "enum %s from %s redeclared in %s" % (
				175	id, self.enums[id].module, idx.enums[id].module)
				176	else:
				177	self.enums[id] = idx.enums[id]
				178	self.identifiers[id] = idx.enums[id]
				179
				180	def merge_public(self, idx):
				181	for id in idx.functions.keys():
				182	if self.functions.has_key(id):
				183	up = idx.functions[id]
				184	self.functions[id].update(None, up.type, up.info, up.extra)
				185	else:
				186	if idx.functions[id].static == 0:
				187	self.functions[id] = idx.functions[id]
				188
				189	def analyze_dict(self, type, dict):
				190	count = 0
				191	public = 0
				192	for name in dict.keys():
				193	id = dict[name]
				194	count = count + 1
				195	if id.static == 0:
				196	public = public + 1
				197	if count != public:
				198	print " %d %s , %d public" % (count, type, public)
				199	elif count != 0:
				200	print " %d public %s" % (count, type)
				201
				202
				203	def analyze(self):
				204	self.analyze_dict("functions", self.functions)
				205	self.analyze_dict("variables", self.variables)
				206	self.analyze_dict("structs", self.structs)
				207	self.analyze_dict("typedefs", self.typedefs)
				208	self.analyze_dict("macros", self.macros)
				209
				210	#
				211	# C parser analysis code
				212	#
				213	ignored_files = {
				214	"trio": "too many non standard macros",
				215	"trio.c": "too many non standard macros",
				216	"trionan.c": "too many non standard macros",
				217	"triostr.c": "too many non standard macros",
				218	"acconfig.h": "generated portability layer",
				219	"config.h": "generated portability layer",
				220	"libxml.h": "internal only",
				221	}
				222
				223	ignored_words = {
				224	"WINAPI": (0, "Windows keyword"),
				225	"LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
				226	"__declspec": (3, "Windows keyword"),
				227	"ATTRIBUTE_UNUSED": (0, "macro keyword"),
				228	}
				229
				230	class CLexer:
				231	"""A lexer for the C language, tokenize the input by reading and
				232	analyzing it line by line"""
				233	def __init__(self, input):
				234	self.input = input
				235	self.tokens = []
				236	self.line = ""
				237	self.lineno = 0
				238
				239	def getline(self):
				240	line = ''
				241	while line == '':
				242	line = self.input.readline()
				243	if not line:
				244	return None
				245	self.lineno = self.lineno + 1
				246	line = string.lstrip(line)
				247	line = string.rstrip(line)
				248	if line == '':
				249	continue
				250	while line[-1] == '\\':
				251	line = line[:-1]
				252	n = self.input.readline()
				253	self.lineno = self.lineno + 1
				254	n = string.lstrip(n)
				255	n = string.rstrip(n)
				256	if not n:
				257	break
				258	else:
				259	line = line + n
				260	return line
				261
				262	def getlineno(self):
				263	return self.lineno
				264
				265	def push(self, token):
				266	self.tokens.insert(0, token);
				267
				268	def debug(self):
				269	print "Last token: ", self.last
				270	print "Token queue: ", self.tokens
				271	print "Line %d end: " % (self.lineno), self.line
				272
				273	def token(self):
				274	while self.tokens == []:
				275	if self.line == "":
				276	line = self.getline()
				277	else:
				278	line = self.line
				279	self.line = ""
				280	if line == None:
				281	return None
				282
				283	if line[0] == '#':
				284	self.tokens = map((lambda x: ('preproc', x)),
				285	string.split(line))
				286	break;
				287	l = len(line)
				288	if line[0] == '"' or line[0] == "'":
				289	end = line[0]
				290	line = line[1:]
				291	found = 0
				292	tok = ""
				293	while found == 0:
				294	i = 0
				295	l = len(line)
				296	while i < l:
				297	if line[i] == end:
				298	self.line = line[i+1:]
				299	line = line[:i]
				300	l = i
				301	found = 1
				302	break
				303	if line[i] == '\\':
				304	i = i + 1
				305	i = i + 1
				306	tok = tok + line
				307	if found == 0:
				308	line = self.getline()
				309	if line == None:
				310	return None
				311	self.last = ('string', tok)
				312	return self.last
				313
				314	if l >= 2 and line[0] == '/' and line[1] == '*':
				315	line = line[2:]
				316	found = 0
				317	tok = ""
				318	while found == 0:
				319	i = 0
				320	l = len(line)
				321	while i < l:
				322	if line[i] == '*' and i+1 < l and line[i+1] == '/':
				323	self.line = line[i+2:]
				324	line = line[:i-1]
				325	l = i
				326	found = 1
				327	break
				328	i = i + 1
				329	if tok != "":
				330	tok = tok + "\n"
				331	tok = tok + line
				332	if found == 0:
				333	line = self.getline()
				334	if line == None:
				335	return None
				336	self.last = ('comment', tok)
				337	return self.last
				338	if l >= 2 and line[0] == '/' and line[1] == '/':
				339	line = line[2:]
				340	self.last = ('comment', line)
				341	return self.last
				342	i = 0
				343	while i < l:
				344	if line[i] == '/' and i+1 < l and line[i+1] == '/':
				345	self.line = line[i:]
				346	line = line[:i]
				347	break
				348	if line[i] == '/' and i+1 < l and line[i+1] == '*':
				349	self.line = line[i:]
				350	line = line[:i]
				351	break
				352	if line[i] == '"' or line[i] == "'":
				353	self.line = line[i:]
				354	line = line[:i]
				355	break
				356	i = i + 1
				357	l = len(line)
				358	i = 0
				359	while i < l:
				360	if line[i] == ' ' or line[i] == '\t':
				361	i = i + 1
				362	continue
				363	o = ord(line[i])
				364	if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
				365	(o >= 48 and o <= 57):
				366	s = i
				367	while i < l:
				368	o = ord(line[i])
				369	if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
				370	(o >= 48 and o <= 57) or string.find(
				371	" \t(){}:;,+-*/%&!\|[]=><", line[i]) == -1:
				372	i = i + 1
				373	else:
				374	break
				375	self.tokens.append(('name', line[s:i]))
				376	continue
				377	if string.find("(){}:;,[]", line[i]) != -1:
				378	# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
				379	# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
				380	# line[i] == ',' or line[i] == '[' or line[i] == ']':
				381	self.tokens.append(('sep', line[i]))
				382	i = i + 1
				383	continue
				384	if string.find("+-*><=/%&!\|.", line[i]) != -1:
				385	# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
				386	# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
				387	# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
				388	# line[i] == '!' or line[i] == '\|' or line[i] == '.':
				389	if line[i] == '.' and i + 2 < l and \
				390	line[i+1] == '.' and line[i+2] == '.':
				391	self.tokens.append(('name', '...'))
				392	i = i + 3
				393	continue
				394
				395	j = i + 1
				396	if j < l and (
				397	string.find("+-*><=/%&!\|", line[j]) != -1):
				398	# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
				399	# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
				400	# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
				401	# line[j] == '!' or line[j] == '\|'):
				402	self.tokens.append(('op', line[i:j+1]))
				403	i = j + 1
				404	else:
				405	self.tokens.append(('op', line[i]))
				406	i = i + 1
				407	continue
				408	s = i
				409	while i < l:
				410	o = ord(line[i])
				411	if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
				412	(o >= 48 and o <= 57) or (
				413	string.find(" \t(){}:;,+-*/%&!\|[]=><", line[i]) == -1):
				414	# line[i] != ' ' and line[i] != '\t' and
				415	# line[i] != '(' and line[i] != ')' and
				416	# line[i] != '{' and line[i] != '}' and
				417	# line[i] != ':' and line[i] != ';' and
				418	# line[i] != ',' and line[i] != '+' and
				419	# line[i] != '-' and line[i] != '*' and
				420	# line[i] != '/' and line[i] != '%' and
				421	# line[i] != '&' and line[i] != '!' and
				422	# line[i] != '\|' and line[i] != '[' and
				423	# line[i] != ']' and line[i] != '=' and
				424	# line[i] != '*' and line[i] != '>' and
				425	# line[i] != '<'):
				426	i = i + 1
				427	else:
				428	break
				429	self.tokens.append(('name', line[s:i]))
				430
				431	tok = self.tokens[0]
				432	self.tokens = self.tokens[1:]
				433	self.last = tok
				434	return tok
				435
				436	class CParser:
				437	"""The C module parser"""
				438	def __init__(self, filename, idx = None):
				439	self.filename = filename
				440	if len(filename) > 2 and filename[-2:] == '.h':
				441	self.is_header = 1
				442	else:
				443	self.is_header = 0
				444	self.input = open(filename)
				445	self.lexer = CLexer(self.input)
				446	if idx == None:
				447	self.index = index()
				448	else:
				449	self.index = idx
				450	self.top_comment = ""
				451	self.last_comment = ""
				452	self.comment = None
				453
				454	def lineno(self):
				455	return self.lexer.getlineno()
				456
				457	def error(self, msg, token=-1):
				458	print "Parse Error: " + msg
				459	if token != -1:
				460	print "Got token ", token
				461	self.lexer.debug()
				462	sys.exit(1)
				463
				464	def debug(self, msg, token=-1):
				465	print "Debug: " + msg
				466	if token != -1:
				467	print "Got token ", token
				468	self.lexer.debug()
				469
				470	def parseComment(self, token):
				471	if self.top_comment == "":
				472	self.top_comment = token[1]
				473	if self.comment == None or token[1][0] == '*':
				474	self.comment = token[1];
				475	else:
				476	self.comment = self.comment + token[1]
				477	token = self.lexer.token()
				478	return token
				479
				480	#
				481	# Parse a comment block associate to a macro
				482	#
				483	def parseMacroComment(self, name, quiet = 0):
				484	if name[0:2] == '__':
				485	quiet = 1
				486
				487	args = []
				488	desc = ""
				489
				490	if self.comment == None:
				491	if not quiet:
				492	print "Missing comment for macro %s" % (name)
				493	return((args, desc))
				494	if self.comment[0] != '*':
				495	if not quiet:
				496	print "Missing * in macro comment for %s" % (name)
				497	return((args, desc))
				498	lines = string.split(self.comment, '\n')
				499	if lines[0] == '*':
				500	del lines[0]
				501	if lines[0] != "* %s:" % (name):
				502	if not quiet:
				503	print "Misformatted macro comment for %s" % (name)
				504	print " Expecting '* %s:' got '%s'" % (name, lines[0])
				505	return((args, desc))
				506	del lines[0]
				507	while lines[0] == '*':
				508	del lines[0]
				509	while len(lines) > 0 and lines[0][0:3] == '* @':
				510	l = lines[0][3:]
				511	try:
				512	(arg, desc) = string.split(l, ':', 1)
				513	desc=string.strip(desc)
				514	arg=string.strip(arg)
				515	except:
				516	if not quiet:
				517	print "Misformatted macro comment for %s" % (name)
				518	print " problem with '%s'" % (lines[0])
				519	del lines[0]
				520	continue
				521	del lines[0]
				522	l = string.strip(lines[0])
				523	while len(l) > 2 and l[0:3] != '* @':
				524	while l[0] == '*':
				525	l = l[1:]
				526	desc = desc + ' ' + string.strip(l)
				527	del lines[0]
				528	if len(lines) == 0:
				529	break
				530	l = lines[0]
				531	args.append((arg, desc))
				532	while len(lines) > 0 and lines[0] == '*':
				533	del lines[0]
				534	desc = ""
				535	while len(lines) > 0:
				536	l = lines[0]
				537	while len(l) > 0 and l[0] == '*':
				538	l = l[1:]
				539	l = string.strip(l)
				540	desc = desc + " " + l
				541	del lines[0]
				542
				543	desc = string.strip(desc)
				544
				545	if quiet == 0:
				546	if desc == "":
				547	print "Macro comment for %s lack description of the macro" % (name)
				548
				549	return((args, desc))
				550
				551	#
				552	# Parse a comment block and merge the informations found in the
				553	# parameters descriptions, finally returns a block as complete
				554	# as possible
				555	#
				556	def mergeFunctionComment(self, name, description, quiet = 0):
				557	if name == 'main':
				558	quiet = 1
				559	if name[0:2] == '__':
				560	quiet = 1
				561
				562	(ret, args) = description
				563	desc = ""
				564	retdesc = ""
				565
				566	if self.comment == None:
				567	if not quiet:
				568	print "Missing comment for function %s" % (name)
				569	return(((ret[0], retdesc), args, desc))
				570	if self.comment[0] != '*':
				571	if not quiet:
				572	print "Missing * in function comment for %s" % (name)
				573	return(((ret[0], retdesc), args, desc))
				574	lines = string.split(self.comment, '\n')
				575	if lines[0] == '*':
				576	del lines[0]
				577	if lines[0] != "* %s:" % (name):
				578	if not quiet:
				579	print "Misformatted function comment for %s" % (name)
				580	print " Expecting '* %s:' got '%s'" % (name, lines[0])
				581	return(((ret[0], retdesc), args, desc))
				582	del lines[0]
				583	while lines[0] == '*':
				584	del lines[0]
				585	nbargs = len(args)
				586	while len(lines) > 0 and lines[0][0:3] == '* @':
				587	l = lines[0][3:]
				588	try:
				589	(arg, desc) = string.split(l, ':', 1)
				590	desc=string.strip(desc)
				591	arg=string.strip(arg)
				592	except:
				593	if not quiet:
				594	print "Misformatted function comment for %s" % (name)
				595	print " problem with '%s'" % (lines[0])
				596	del lines[0]
				597	continue
				598	del lines[0]
				599	l = string.strip(lines[0])
				600	while len(l) > 2 and l[0:3] != '* @':
				601	while l[0] == '*':
				602	l = l[1:]
				603	desc = desc + ' ' + string.strip(l)
				604	del lines[0]
				605	if len(lines) == 0:
				606	break
				607	l = lines[0]
				608	i = 0
				609	while i < nbargs:
				610	if args[i][1] == arg:
				611	args[i] = (args[i][0], arg, desc)
				612	break;
				613	i = i + 1
				614	if i >= nbargs:
				615	if not quiet:
				616	print "Uname to find arg %s from function comment for %s" % (
				617	arg, name)
				618	while len(lines) > 0 and lines[0] == '*':
				619	del lines[0]
				620	desc = ""
				621	while len(lines) > 0:
				622	l = lines[0]
				623	while len(l) > 0 and l[0] == '*':
				624	l = l[1:]
				625	l = string.strip(l)
				626	if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
				627	try:
				628	l = string.split(l, ' ', 1)[1]
				629	except:
				630	l = ""
				631	retdesc = string.strip(l)
				632	del lines[0]
				633	while len(lines) > 0:
				634	l = lines[0]
				635	while len(l) > 0 and l[0] == '*':
				636	l = l[1:]
				637	l = string.strip(l)
				638	retdesc = retdesc + " " + l
				639	del lines[0]
				640	else:
				641	desc = desc + " " + l
				642	del lines[0]
				643
				644	retdesc = string.strip(retdesc)
				645	desc = string.strip(desc)
				646
				647	if quiet == 0:
				648	#
				649	# report missing comments
				650	#
				651	i = 0
				652	while i < nbargs:
				653	if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
				654	print "Function comment for %s lack description of arg %s" % (name, args[i][1])
				655	i = i + 1
				656	if retdesc == "" and ret[0] != "void":
				657	print "Function comment for %s lack description of return value" % (name)
				658	if desc == "":
				659	print "Function comment for %s lack description of the function" % (name)
				660
				661
				662	return(((ret[0], retdesc), args, desc))
				663
				664	def parsePreproc(self, token):
				665	name = token[1]
				666	if name == "#include":
				667	token = self.lexer.token()
				668	if token == None:
				669	return None
				670	if token[0] == 'preproc':
				671	self.index.add(token[1], self.filename, not self.is_header,
				672	"include")
				673	return self.lexer.token()
				674	return token
				675	if name == "#define":
				676	token = self.lexer.token()
				677	if token == None:
				678	return None
				679	if token[0] == 'preproc':
				680	# TODO macros with arguments
				681	name = token[1]
				682	lst = []
				683	token = self.lexer.token()
				684	while token != None and token[0] == 'preproc' and \
				685	token[1][0] != '#':
				686	lst.append(token[1])
				687	token = self.lexer.token()
				688	try:
				689	name = string.split(name, '(') [0]
				690	except:
				691	pass
				692	info = self.parseMacroComment(name, not self.is_header)
				693	self.index.add(name, self.filename, not self.is_header,
				694	"macro", info)
				695	return token
				696	token = self.lexer.token()
				697	while token != None and token[0] == 'preproc' and \
				698	token[1][0] != '#':
				699	token = self.lexer.token()
				700	return token
				701
				702	#
				703	# token acquisition on top of the lexer, it handle internally
				704	# preprocessor and comments since they are logically not part of
				705	# the program structure.
				706	#
				707	def token(self):
				708	global ignored_words
				709
				710	token = self.lexer.token()
				711	while token != None:
				712	if token[0] == 'comment':
				713	token = self.parseComment(token)
				714	continue
				715	elif token[0] == 'preproc':
				716	token = self.parsePreproc(token)
				717	continue
				718	elif token[0] == "name" and ignored_words.has_key(token[1]):
				719	(n, info) = ignored_words[token[1]]
				720	i = 0
				721	while i < n:
				722	token = self.lexer.token()
				723	i = i + 1
				724	token = self.lexer.token()
				725	continue
				726	else:
				727	#print "=> ", token
				728	return token
				729	return None
				730
				731	#
				732	# Parse a typedef, it records the type and its name.
				733	#
				734	def parseTypedef(self, token):
				735	if token == None:
				736	return None
				737	token = self.parseType(token)
				738	if token == None:
				739	self.error("parsing typedef")
				740	return None
				741	base_type = self.type
				742	type = base_type
				743	#self.debug("end typedef type", token)
				744	while token != None:
				745	if token[0] == "name":
				746	name = token[1]
				747	signature = self.signature
				748	if signature != None:
				749	d = self.mergeFunctionComment(name,
				750	((type, None), signature), 1)
				751	self.index.add(name, self.filename, not self.is_header,
				752	"functype", d)
				753	else:
				754	if base_type == "struct":
				755	self.index.add(name, self.filename, not self.is_header,
				756	"struct", type)
				757	base_type = "struct " + name
				758	else:
				759	self.index.add(name, self.filename, not self.is_header,
				760	"typedef", type)
				761	token = self.token()
				762	else:
				763	self.error("parsing typedef: expecting a name")
				764	return token
				765	#self.debug("end typedef", token)
				766	if token != None and token[0] == 'sep' and token[1] == ',':
				767	type = base_type
				768	token = self.token()
				769	while token != None and token[0] == "op":
				770	type = type + token[1]
				771	token = self.token()
				772	elif token != None and token[0] == 'sep' and token[1] == ';':
				773	break;
				774	elif token != None and token[0] == 'name':
				775	type = base_type
				776	continue;
				777	else:
				778	self.error("parsing typedef: expecting ';'", token)
				779	return token
				780	token = self.token()
				781	return token
				782
				783	#
				784	# Parse a C code block, used for functions it parse till
				785	# the balancing } included
				786	#
				787	def parseBlock(self, token):
				788	while token != None:
				789	if token[0] == "sep" and token[1] == "{":
				790	token = self.token()
				791	token = self.parseBlock(token)
				792	elif token[0] == "sep" and token[1] == "}":
				793	self.comment = None
				794	token = self.token()
				795	return token
				796	else:
				797	token = self.token()
				798	return token
				799
				800	#
				801	# Parse a C struct definition till the balancing }
				802	#
				803	def parseStruct(self, token):
				804	fields = []
				805	#self.debug("start parseStruct", token)
				806	while token != None:
				807	if token[0] == "sep" and token[1] == "{":
				808	token = self.token()
				809	token = self.parseTypeBlock(token)
				810	elif token[0] == "sep" and token[1] == "}":
				811	self.struct_fields = fields
				812	#self.debug("end parseStruct", token)
				813	#print fields
				814	token = self.token()
				815	return token
				816	else:
				817	base_type = self.type
				818	#self.debug("before parseType", token)
				819	token = self.parseType(token)
				820	#self.debug("after parseType", token)
				821	if token != None and token[0] == "name":
				822	fname = token[1]
				823	token = self.token()
				824	if token[0] == "sep" and token[1] == ";":
				825	self.comment = None
				826	token = self.token()
				827	fields.append((self.type, fname, self.comment))
				828	self.comment = None
				829	else:
				830	self.error("parseStruct: expecting ;", token)
				831	elif token != None and token[0] == "sep" and token[1] == "{":
				832	token = self.token()
				833	token = self.parseTypeBlock(token)
				834	if token != None and token[0] == "name":
				835	token = self.token()
				836	if token != None and token[0] == "sep" and token[1] == ";":
				837	token = self.token()
				838	else:
				839	self.error("parseStruct: expecting ;", token)
				840	else:
				841	self.error("parseStruct: name", token)
				842	token = self.token()
				843	self.type = base_type;
				844	self.struct_fields = fields
				845	#self.debug("end parseStruct", token)
				846	#print fields
				847	return token
				848
				849	#
				850	# Parse a C enum block, parse till the balancing }
				851	#
				852	def parseEnumBlock(self, token):
				853	self.enums = []
				854	name = None
				855	self.comment = None
				856	comment = ""
				857	value = ""
				858	while token != None:
				859	if token[0] == "sep" and token[1] == "{":
				860	token = self.token()
				861	token = self.parseTypeBlock(token)
				862	elif token[0] == "sep" and token[1] == "}":
				863	if name != None:
				864	if self.comment != None:
				865	comment = self.comment
				866	self.comment = None
				867	self.enums.append((name, value, comment))
				868	token = self.token()
				869	return token
				870	elif token[0] == "name":
				871	if name != None:
				872	if self.comment != None:
				873	comment = string.strip(self.comment)
				874	self.comment = None
				875	self.enums.append((name, value, comment))
				876	name = token[1]
				877	comment = ""
				878	value = ""
				879	token = self.token()
				880	if token[0] == "op" and token[1][0] == "=":
				881	if len(token[1]) > 1:
				882	value = token[1][1:]
				883	token = self.token()
				884	while token[0] != "sep" or (token[1] != ',' and
				885	token[1] != '}'):
				886	value = value + token[1]
				887	token = self.token()
				888	if token[0] == "sep" and token[1] == ",":
				889	token = self.token()
				890	else:
				891	token = self.token()
				892	return token
				893
				894	#
				895	# Parse a C definition block, used for structs it parse till
				896	# the balancing }
				897	#
				898	def parseTypeBlock(self, token):
				899	while token != None:
				900	if token[0] == "sep" and token[1] == "{":
				901	token = self.token()
				902	token = self.parseTypeBlock(token)
				903	elif token[0] == "sep" and token[1] == "}":
				904	token = self.token()
				905	return token
				906	else:
				907	token = self.token()
				908	return token
				909
				910	#
				911	# Parse a type: the fact that the type name can either occur after
				912	# the definition or within the definition makes it a little harder
				913	# if inside, the name token is pushed back before returning
				914	#
				915	def parseType(self, token):
				916	self.type = ""
				917	self.struct_fields = []
				918	self.signature = None
				919	if token == None:
				920	return token
				921
				922	while token[0] == "name" and (
				923	token[1] == "const" or token[1] == "unsigned"):
				924	if self.type == "":
				925	self.type = token[1]
				926	else:
				927	self.type = self.type + " " + token[1]
				928	token = self.token()
				929
				930	if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
				931	if self.type == "":
				932	self.type = token[1]
				933	else:
				934	self.type = self.type + " " + token[1]
				935	if token[0] == "name" and token[1] == "int":
				936	if self.type == "":
				937	self.type = tmp[1]
				938	else:
				939	self.type = self.type + " " + tmp[1]
				940
				941	elif token[0] == "name" and token[1] == "struct":
				942	if self.type == "":
				943	self.type = token[1]
				944	else:
				945	self.type = self.type + " " + token[1]
				946	token = self.token()
				947	nametok = None
				948	if token[0] == "name":
				949	nametok = token
				950	token = self.token()
				951	if token != None and token[0] == "sep" and token[1] == "{":
				952	token = self.token()
				953	token = self.parseStruct(token)
				954	elif token != None and token[0] == "op" and token[1] == "*":
				955	self.type = self.type + " " + nametok[1] + " *"
				956	token = self.token()
				957	while token != None and token[0] == "op" and token[1] == "*":
				958	self.type = self.type + " *"
				959	token = self.token()
				960	if token[0] == "name":
				961	nametok = token
				962	token = self.token()
				963	else:
				964	self.error("struct : expecting name", token)
				965	return token
				966	elif token != None and token[0] == "name" and nametok != None:
				967	self.type = self.type + " " + nametok[1]
				968	return token
				969
				970	if nametok != None:
				971	self.lexer.push(token)
				972	token = nametok
				973	return token
				974
				975	elif token[0] == "name" and token[1] == "enum":
				976	if self.type == "":
				977	self.type = token[1]
				978	else:
				979	self.type = self.type + " " + token[1]
				980	self.enums = []
				981	token = self.token()
				982	if token != None and token[0] == "sep" and token[1] == "{":
				983	token = self.token()
				984	token = self.parseEnumBlock(token)
				985	else:
				986	self.error("parsing enum: expecting '{'", token)
				987	enum_type = None
				988	if token != None and token[0] != "name":
				989	self.lexer.push(token)
				990	token = ("name", "enum")
				991	else:
				992	enum_type = token[1]
				993	for enum in self.enums:
				994	self.index.add(enum[0], self.filename,
				995	not self.is_header, "enum",
				996	(enum[1], enum[2], enum_type))
				997	return token
				998
				999	elif token[0] == "name":
				1000	if self.type == "":
				1001	self.type = token[1]
				1002	else:
				1003	self.type = self.type + " " + token[1]
				1004	else:
				1005	self.error("parsing type %s: expecting a name" % (self.type),
				1006	token)
				1007	return token
				1008	token = self.token()
				1009	while token != None and (token[0] == "op" or
				1010	token[0] == "name" and token[1] == "const"):
				1011	self.type = self.type + " " + token[1]
				1012	token = self.token()
				1013
				1014	#
				1015	# if there is a parenthesis here, this means a function type
				1016	#
				1017	if token != None and token[0] == "sep" and token[1] == '(':
				1018	self.type = self.type + token[1]
				1019	token = self.token()
				1020	while token != None and token[0] == "op" and token[1] == '*':
				1021	self.type = self.type + token[1]
				1022	token = self.token()
				1023	if token == None or token[0] != "name" :
				1024	self.error("parsing function type, name expected", token);
				1025	return token
				1026	self.type = self.type + token[1]
				1027	nametok = token
				1028	token = self.token()
				1029	if token != None and token[0] == "sep" and token[1] == ')':
				1030	self.type = self.type + token[1]
				1031	token = self.token()
				1032	if token != None and token[0] == "sep" and token[1] == '(':
				1033	token = self.token()
				1034	type = self.type;
				1035	token = self.parseSignature(token);
				1036	self.type = type;
				1037	else:
				1038	self.error("parsing function type, '(' expected", token);
				1039	return token
				1040	else:
				1041	self.error("parsing function type, ')' expected", token);
				1042	return token
				1043	self.lexer.push(token)
				1044	token = nametok
				1045	return token
				1046
				1047	#
				1048	# do some lookahead for arrays
				1049	#
				1050	if token != None and token[0] == "name":
				1051	nametok = token
				1052	token = self.token()
				1053	if token != None and token[0] == "sep" and token[1] == '[':
				1054	self.type = self.type + nametok[1]
				1055	while token != None and token[0] == "sep" and token[1] == '[':
				1056	self.type = self.type + token[1]
				1057	token = self.token()
				1058	while token != None and token[0] != 'sep' and \
				1059	token[1] != ']' and token[1] != ';':
				1060	self.type = self.type + token[1]
				1061	token = self.token()
				1062	if token != None and token[0] == 'sep' and token[1] == ']':
				1063	self.type = self.type + token[1]
				1064	token = self.token()
				1065	else:
				1066	self.error("parsing array type, ']' expected", token);
				1067	return token
				1068	elif token != None and token[0] == "sep" and token[1] == ':':
				1069	# remove :12 in case it's a limited int size
				1070	token = self.token()
				1071	token = self.token()
				1072	self.lexer.push(token)
				1073	token = nametok
				1074
				1075	return token
				1076
				1077	#
				1078	# Parse a signature: '(' has been parsed and we scan the type definition
				1079	# up to the ')' included
				1080	def parseSignature(self, token):
				1081	signature = []
				1082	if token != None and token[0] == "sep" and token[1] == ')':
				1083	self.signature = []
				1084	token = self.token()
				1085	return token
				1086	while token != None:
				1087	token = self.parseType(token)
				1088	if token != None and token[0] == "name":
				1089	signature.append((self.type, token[1], None))
				1090	token = self.token()
				1091	elif token != None and token[0] == "sep" and token[1] == ',':
				1092	token = self.token()
				1093	continue
				1094	elif token != None and token[0] == "sep" and token[1] == ')':
				1095	# only the type was provided
				1096	if self.type == "...":
				1097	signature.append((self.type, "...", None))
				1098	else:
				1099	signature.append((self.type, None, None))
				1100	if token != None and token[0] == "sep":
				1101	if token[1] == ',':
				1102	token = self.token()
				1103	continue
				1104	elif token[1] == ')':
				1105	token = self.token()
				1106	break
				1107	self.signature = signature
				1108	return token
				1109
				1110	#
				1111	# Parse a global definition, be it a type, variable or function
				1112	# the extern "C" blocks are a bit nasty and require it to recurse.
				1113	#
				1114	def parseGlobal(self, token):
				1115	static = 0
				1116	if token[1] == 'extern':
				1117	token = self.token()
				1118	if token == None:
				1119	return token
				1120	if token[0] == 'string':
				1121	if token[1] == 'C':
				1122	token = self.token()
				1123	if token == None:
				1124	return token
				1125	if token[0] == 'sep' and token[1] == "{":
				1126	token = self.token()
				1127	# print 'Entering extern "C line ', self.lineno()
				1128	while token != None and (token[0] != 'sep' or
				1129	token[1] != "}"):
				1130	if token[0] == 'name':
				1131	token = self.parseGlobal(token)
				1132	else:
				1133	self.error(
				1134	"token %s %s unexpected at the top level" % (
				1135	token[0], token[1]))
				1136	token = self.parseGlobal(token)
				1137	# print 'Exiting extern "C" line', self.lineno()
				1138	token = self.token()
				1139	return token
				1140	else:
				1141	return token
				1142	elif token[1] == 'static':
				1143	static = 1
				1144	token = self.token()
				1145	if token == None or token[0] != 'name':
				1146	return token
				1147
				1148	if token[1] == 'typedef':
				1149	token = self.token()
				1150	return self.parseTypedef(token)
				1151	else:
				1152	token = self.parseType(token)
				1153	type_orig = self.type
				1154	if token == None or token[0] != "name":
				1155	return token
				1156	type = type_orig
				1157	self.name = token[1]
				1158	token = self.token()
				1159	while token != None and (token[0] == "sep" or token[0] == "op"):
				1160	if token[0] == "sep":
				1161	if token[1] == "[":
				1162	type = type + token[1]
				1163	token = self.token()
				1164	while token != None and (token[0] != "sep" or \
				1165	token[1] != ";"):
				1166	type = type + token[1]
				1167	token = self.token()
				1168
				1169	if token != None and token[0] == "op" and token[1] == "=":
				1170	#
				1171	# Skip the initialization of the variable
				1172	#
				1173	token = self.token()
				1174	if token[0] == 'sep' and token[1] == '{':
				1175	token = self.token()
				1176	token = self.parseBlock(token)
				1177	else:
				1178	self.comment = None
				1179	while token != None and (token[0] != "sep" or \
				1180	(token[1] != ';' and token[1] != ',')):
				1181	token = self.token()
				1182	self.comment = None
				1183	if token == None or token[0] != "sep" or (token[1] != ';' and
				1184	token[1] != ','):
				1185	self.error("missing ';' or ',' after value")
				1186
				1187	if token != None and token[0] == "sep":
				1188	if token[1] == ";":
				1189	self.comment = None
				1190	token = self.token()
				1191	if type == "struct":
				1192	self.index.add(self.name, self.filename,
				1193	not self.is_header, "struct", self.struct_fields)
				1194	else:
				1195	self.index.add(self.name, self.filename,
				1196	not self.is_header, "variable", type)
				1197	break
				1198	elif token[1] == "(":
				1199	token = self.token()
				1200	token = self.parseSignature(token)
				1201	if token == None:
				1202	return None
				1203	if token[0] == "sep" and token[1] == ";":
				1204	d = self.mergeFunctionComment(self.name,
				1205	((type, None), self.signature), 1)
				1206	self.index.add(self.name, self.filename, static,
				1207	"function", d)
				1208	token = self.token()
				1209	if token[0] == "sep" and token[1] == "{":
				1210	d = self.mergeFunctionComment(self.name,
				1211	((type, None), self.signature), static)
				1212	self.index.add(self.name, self.filename, static,
				1213	"function", d)
				1214	token = self.token()
				1215	token = self.parseBlock(token);
				1216	elif token[1] == ',':
				1217	self.comment = None
				1218	self.index.add(self.name, self.filename, static,
				1219	"variable", type)
				1220	type = type_orig
				1221	token = self.token()
				1222	while token != None and token[0] == "sep":
				1223	type = type + token[1]
				1224	token = self.token()
				1225	if token != None and token[0] == "name":
				1226	self.name = token[1]
				1227	token = self.token()
				1228	else:
				1229	break
				1230
				1231	return token
				1232
				1233	def parse(self):
				1234	print "Parsing %s" % (self.filename)
				1235	token = self.token()
				1236	while token != None:
				1237	if token[0] == 'name':
				1238	token = self.parseGlobal(token)
				1239	else:
				1240	self.error("token %s %s unexpected at the top level" % (
				1241	token[0], token[1]))
				1242	token = self.parseGlobal(token)
				1243	return
				1244	return self.index
				1245
				1246
				1247	class docBuilder:
				1248	"""A documentation builder"""
				1249	def __init__(self, name, directories=['.'], excludes=[]):
				1250	self.name = name
				1251	self.directories = directories
				1252	self.excludes = excludes + ignored_files.keys()
				1253	self.modules = {}
				1254	self.headers = {}
				1255	self.idx = index()
				1256
				1257	def analyze(self):
				1258	print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
				1259	self.idx.analyze()
				1260
				1261	def scanHeaders(self):
				1262	for header in self.headers.keys():
				1263	parser = CParser(header)
				1264	idx = parser.parse()
				1265	self.headers[header] = idx;
				1266	self.idx.merge(idx)
				1267
				1268	def scanModules(self):
				1269	for module in self.modules.keys():
				1270	parser = CParser(module)
				1271	idx = parser.parse()
				1272	# idx.analyze()
				1273	self.modules[module] = idx
				1274	self.idx.merge_public(idx)
				1275
				1276	def scan(self):
				1277	for directory in self.directories:
				1278	files = glob.glob(directory + "/*.c")
				1279	for file in files:
				1280	skip = 0
				1281	for excl in self.excludes:
				1282	if string.find(file, excl) != -1:
				1283	skip = 1;
				1284	break
				1285	if skip == 0:
				1286	self.modules[file] = None;
				1287	files = glob.glob(directory + "/*.h")
				1288	for file in files:
				1289	skip = 0
				1290	for excl in self.excludes:
				1291	if string.find(file, excl) != -1:
				1292	skip = 1;
				1293	break
				1294	if skip == 0:
				1295	self.headers[file] = None;
				1296	self.scanHeaders()
				1297	self.scanModules()
				1298
				1299	def modulename_file(self, file):
				1300	module = string.split(file, '/')[-1]
				1301	if module[-2:] == '.h':
				1302	module = module[:-2]
				1303	return module
				1304
				1305	def serialize_enum(self, output, name):
				1306	id = self.idx.enums[name]
				1307	output.write(" <enum name='%s' file='%s'" % (name,
				1308	self.modulename_file(id.module)))
				1309	if id.info != None:
				1310	info = id.info
				1311	if info[0] != None and info[0] != '':
				1312	output.write(" value='%s'" % info[0]);
				1313	if info[2] != None and info[2] != '':
				1314	output.write(" type='%s'" % info[2]);
				1315	if info[1] != None and info[1] != '':
				1316	output.write(" info='%s'" % escape(info[1]));
				1317	output.write("/>\n")
				1318
				1319	def serialize_macro(self, output, name):
				1320	id = self.idx.macros[name]
				1321	output.write(" <macro name='%s' file='%s'>\n" % (name,
				1322	self.modulename_file(id.module)))
				1323	if id.info != None:
				1324	try:
				1325	(args, desc) = id.info
				1326	if desc != None and desc != "":
				1327	output.write(" <info>%s</info>\n" % (escape(desc)))
				1328	for arg in args:
				1329	(name, desc) = arg
				1330	if desc != None and desc != "":
				1331	output.write(" <arg name='%s' info='%s'/>\n" % (
				1332	name, escape(desc)))
				1333	else:
				1334	output.write(" <arg name='%s'/>\n" % (name))
				1335	except:
				1336	pass
				1337	output.write(" </macro>\n")
				1338
				1339	def serialize_typedef(self, output, name):
				1340	id = self.idx.typedefs[name]
				1341	if id.info[0:7] == 'struct ':
				1342	output.write(" <struct name='%s' file='%s' type='%s'" % (
				1343	name, self.modulename_file(id.module), id.info))
				1344	name = id.info[7:]
				1345	if self.idx.structs.has_key(name):
				1346	output.write(">\n");
				1347	for field in self.idx.structs[name].info:
				1348	desc = field[2]
				1349	if desc == None:
				1350	desc = ''
				1351	else:
				1352	desc = escape(desc)
				1353	output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
				1354	output.write(" </struct>\n")
				1355	else:
				1356	output.write("/>\n");
				1357	else :
				1358	output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
				1359	name, self.modulename_file(id.module), id.info))
				1360
				1361	def serialize_function(self, output, name):
				1362	id = self.idx.functions[name]
				1363	output.write(" <%s name='%s' file='%s'>\n" % (id.type, name,
				1364	self.modulename_file(id.module)))
				1365	try:
				1366	(ret, params, desc) = id.info
				1367	output.write(" <info>%s</info>\n" % (escape(desc)))
				1368	if ret[0] != None:
				1369	if ret[0] == "void":
				1370	output.write(" <return type='void'/>\n")
				1371	else:
				1372	output.write(" <return type='%s' info='%s'/>\n" % (
				1373	ret[0], escape(ret[1])))
				1374	for param in params:
				1375	if param[0] == 'void':
				1376	continue
				1377	if param[2] == None:
				1378	output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
				1379	else:
				1380	output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
				1381	except:
				1382	print "Failed to save function %s info: " % name, `id.info`
				1383	output.write(" </%s>\n" % (id.type))
				1384
				1385	def serialize_exports(self, output, file):
				1386	module = self.modulename_file(file)
				1387	output.write(" <file name='%s'>\n" % (module))
				1388	dict = self.headers[file]
				1389	ids = dict.functions.keys() + dict.variables.keys() + \
				1390	dict.macros.keys() + dict.typedefs.keys() + \
				1391	dict.structs.keys() + dict.enums.keys()
				1392	ids.sort()
				1393	for id in ids:
				1394	output.write(" <exports symbol='%s'/>\n" % (id))
				1395	output.write(" </file>\n")
				1396
				1397
				1398	def serialize(self, filename = None):
				1399	if filename == None:
				1400	filename = "%s-api.xml" % self.name
				1401	print "Saving XML description %s" % (filename)
				1402	output = open(filename, "w")
				1403	output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
				1404	output.write("<api name='%s'>\n" % self.name)
				1405	output.write(" <files>\n")
				1406	for file in self.headers.keys():
				1407	self.serialize_exports(output, file)
				1408	output.write(" </files>\n")
				1409	output.write(" <symbols>\n")
				1410	macros = self.idx.macros.keys()
				1411	macros.sort()
				1412	for macro in macros:
				1413	self.serialize_macro(output, macro)
				1414	enums = self.idx.enums.keys()
				1415	enums.sort()
				1416	for enum in enums:
				1417	self.serialize_enum(output, enum)
				1418	typedefs = self.idx.typedefs.keys()
				1419	typedefs.sort()
				1420	for typedef in typedefs:
				1421	self.serialize_typedef(output, typedef)
				1422	functions = self.idx.functions.keys()
				1423	functions.sort()
				1424	for function in functions:
				1425	self.serialize_function(output, function)
				1426	output.write(" </symbols>\n")
				1427	output.write("</api>\n")
				1428	output.close()
				1429
				1430
				1431	def rebuild():
				1432	builder = None
				1433	if glob.glob("../parser.c") != [] :
				1434	print "Rebuilding API description for libxml2"
				1435	builder = docBuilder("libxml2", ["..", "../include/libxml"],
				1436	["xmlwin32version.h", "tst.c"])
				1437	elif glob.glob("../libxslt/transform.c") != [] :
				1438	print "Rebuilding API description for libxslt"
				1439	builder = docBuilder("libxslt", ["../libxslt"],
				1440	["win32config.h", "tst.c"])
				1441	else:
				1442	print "rebuild() failed, unable to guess the module"
				1443	return None
				1444	builder.scan()
				1445	builder.analyze()
				1446	builder.serialize()
				1447	return builder
				1448
				1449	#
				1450	# for debugging the parser
				1451	#
				1452	def parse(filename):
				1453	parser = CParser(filename)
				1454	idx = parser.parse()
				1455	return idx
				1456
				1457	if __name__ == "__main__":
				1458	rebuild()