Blame - doc/parsedecl.py - fp2-dev/platform/external/libxml2

blob: 1811b4846150f1c54f90bd2c795af7744196c883 [file] [log] [blame]

Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	1	#!/usr/bin/python -u
				2	#
				3	# tries to parse the output of gtk-doc declaration files and make
				4	# an XML reusable description from them
				5	#
				6	# TODO: try to extracts comments from the DocBook output of
				7
				8	import sys
				9	import string
				10
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	11	macros = {}
				12	variables = {}
				13	structs = {}
				14	typedefs = {}
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	15	enums = {}
				16	functions = {}
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	17	user_functions = {}
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	18	ret_types = {}
				19	types = {}
				20
				21	sections = []
				22	files = {}
				23	identifiers_file = {}
				24	identifiers_type = {}
				25
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	26	##################################################################
				27	#
				28	# Parsing: libxml-decl.txt
				29	#
				30	##################################################################
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	31	def mormalizeTypeSpaces(raw, function):
				32	global types
				33
				34	tokens = string.split(raw)
				35	type = ''
				36	for token in tokens:
				37	if type != '':
				38	type = type + ' ' + token
				39	else:
				40	type = token
				41	if types.has_key(type):
				42	types[type].append(function)
				43	else:
				44	types[type] = [function]
				45	return type
				46
				47	def removeComments(raw):
				48	while string.find(raw, '/*') > 0:
				49	e = string.find(raw, '/*')
				50	tmp = raw[0:e]
				51	raw = raw[e:]
				52	e = string.find(raw, '*/')
				53	if e > 0:
				54	raw = tmp + raw[e + 2:]
				55	else:
				56	raw = tmp
				57	return raw
				58
				59	def extractArgs(raw, function):
				60	raw = removeComments(raw)
				61	list = string.split(raw, ",")
				62	ret = []
				63	for arg in list:
				64	i = len(arg)
				65	if i == 0:
				66	continue
				67	i = i - 1
				68	c = arg[i]
				69	while string.find(string.letters, c) >= 0 or \
				70	string.find(string.digits, c) >= 0:
				71	i = i - 1
				72	if i < 0:
				73	break
				74	c = arg[i]
				75	name = arg[i+1:]
				76	while string.find(string.whitespace, c) >= 0:
				77	i = i - 1
				78	if i < 0:
				79	break
				80	c = arg[i]
				81	type = mormalizeTypeSpaces(arg[0:i+1], function)
				82	# print "list: %s -> %s, %s" % (list, type, name)
				83	ret.append((type, name))
				84	return ret
				85
				86	def extractTypes(raw, function):
				87	global ret_types
				88
				89	tokens = string.split(raw)
				90	type = ''
				91	for token in tokens:
				92	if type != '':
				93	type = type + ' ' + token
				94	else:
				95	type = token
				96	if ret_types.has_key(type):
				97	ret_types[type].append(function)
				98	else:
				99	ret_types[type] = [function]
				100	return type
				101
				102	def parseMacro():
				103	global input
				104	global macros
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	105	global variables
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	106
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	107	var = 1
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	108	line = input.readline()[:-1]
				109	while line != "</MACRO>":
				110	if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
				111	name = line[6:-7]
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	112	elif string.find(line, "#define") >= 0:
				113	var = 0
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	114	line = input.readline()[:-1]
				115
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	116	if var == 1:
				117	variables[name] = ''
				118	identifiers_type[name] = "variable"
				119	else:
				120	macros[name] = ''
				121	identifiers_type[name] = "macro"
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	122
				123	def parseStruct():
				124	global input
				125	global structs
				126
				127	line = input.readline()[:-1]
				128	while line != "</STRUCT>":
				129	if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
				130	name = line[6:-7]
				131	line = input.readline()[:-1]
				132
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	133	structs[name] = ''
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	134	identifiers_type[name] = "struct"
				135
				136	def parseTypedef():
				137	global input
				138	global typedefs
				139
				140	line = input.readline()[:-1]
				141	while line != "</TYPEDEF>":
				142	if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
				143	name = line[6:-7]
				144	line = input.readline()[:-1]
				145
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	146	typedefs[name] = ''
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	147	identifiers_type[name] = "typedef"
				148
				149	def parseEnum():
				150	global input
				151	global enums
				152
				153	line = input.readline()[:-1]
				154	consts = []
				155	while line != "</ENUM>":
				156	if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
				157	name = line[6:-7]
				158	elif string.find(line, 'enum') >= 0:
				159	pass
				160	elif string.find(line, '{') >= 0:
				161	pass
				162	elif string.find(line, '}') >= 0:
				163	pass
				164	elif string.find(line, ';') >= 0:
				165	pass
				166	else:
				167	comment = string.find(line, '/*')
				168	if comment >= 0:
				169	line = line[0:comment]
				170	decls = string.split(line, ",")
				171	for decl in decls:
				172	val = string.split(decl, "=")[0]
				173	tokens = string.split(val)
				174	if len(tokens) >= 1:
				175	token = tokens[0]
				176	if string.find(string.letters, token[0]) >= 0:
				177	consts.append(token)
				178	identifiers_type[token] = "const"
				179	line = input.readline()[:-1]
				180
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	181	enums[name] = [consts, '']
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	182	identifiers_type[name] = "enum"
				183
				184	def parseStaticFunction():
				185	global input
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	186	global user_functions
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	187
				188	line = input.readline()[:-1]
				189	type = None
				190	signature = None
				191	while line != "</USER_FUNCTION>":
				192	if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
				193	name = line[6:-7]
				194	elif line[0:9] == "<RETURNS>" and line[-10:] == "</RETURNS>":
				195	type = extractTypes(line[9:-10], name)
				196	else:
				197	signature = line
				198	line = input.readline()[:-1]
				199
				200	args = extractArgs(signature, name)
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	201	user_functions[name] = [type , args, '']
				202	identifiers_type[name] = "functype"
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	203
				204	def parseFunction():
				205	global input
				206	global functions
				207
				208	line = input.readline()[:-1]
				209	type = None
				210	signature = None
				211	while line != "</FUNCTION>":
				212	if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
				213	name = line[6:-7]
				214	elif line[0:9] == "<RETURNS>" and line[-10:] == "</RETURNS>":
				215	type = extractTypes(line[9:-10], name)
				216	else:
				217	signature = line
				218	line = input.readline()[:-1]
				219
				220	args = extractArgs(signature, name)
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	221	functions[name] = [type , args, '']
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	222	identifiers_type[name] = "function"
				223
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	224	print "Parsing: libxml-decl.txt"
				225	input = open('libxml-decl.txt')
				226	while 1:
				227	line = input.readline()
				228	if not line:
				229	break
				230	line = line[:-1]
				231	if line == "<MACRO>":
				232	parseMacro()
				233	elif line == "<ENUM>":
				234	parseEnum()
				235	elif line == "<FUNCTION>":
				236	parseFunction()
				237	elif line == "<STRUCT>":
				238	parseStruct()
				239	elif line == "<TYPEDEF>":
				240	parseTypedef()
				241	elif line == "<USER_FUNCTION>":
				242	parseStaticFunction()
				243	elif len(line) >= 1 and line[0] == "<":
				244	print "unhandled %s" % (line)
				245
				246	print "Parsed: %d macros. %d structs, %d typedefs, %d enums" % (
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	247	len(macros.keys()), len(structs.keys()), len(typedefs.keys()),
				248	len(enums))
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	249	c = 0
				250	for enum in enums.keys():
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	251	consts = enums[enum][0]
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	252	c = c + len(consts)
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	253	print " %d variables, %d constants, %d functions and %d functypes" % (
				254	len(variables.keys()), c, len(functions.keys()),
				255	len(user_functions.keys()))
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	256	print "The functions manipulates %d different types" % (len(types.keys()))
				257	print "The functions returns %d different types" % (len(ret_types.keys()))
				258
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	259	##################################################################
				260	#
				261	# Parsing: libxml-decl-list.txt
				262	#
				263	##################################################################
				264	def parseSection():
				265	global input
				266	global sections
				267	global files
				268	global identifiers_file
				269
				270	tokens = []
				271	line = input.readline()[:-1]
				272	while line != "</SECTION>":
				273	if line[0:6] == "<FILE>" and line[-7:] == "</FILE>":
				274	name = line[6:-7]
				275	elif len(line) > 0:
				276	tokens.append(line)
				277	line = input.readline()[:-1]
				278
				279	sections.append(name)
				280	files[name] = tokens
				281	for token in tokens:
				282	identifiers_file[token] = name
				283	#
				284	# Small transitivity for enum values
				285	#
				286	if enums.has_key(token):
				287	for const in enums[token][0]:
				288	identifiers_file[const] = name
				289
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	290	print "Parsing: libxml-decl-list.txt"
				291	input = open('libxml-decl-list.txt')
				292	while 1:
				293	line = input.readline()
				294	if not line:
				295	break
				296	line = line[:-1]
				297	if line == "<SECTION>":
				298	parseSection()
				299	elif len(line) >= 1 and line[0] == "<":
				300	print "unhandled %s" % (line)
				301
				302	print "Parsed: %d files %d identifiers" % (len(files), len(identifiers_file.keys()))
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	303	##################################################################
				304	#
				305	# Parsing: xml/*.xml
				306	# To enrich the existing info with extracted comments
				307	#
				308	##################################################################
				309
				310	nbcomments = 0
				311
				312	def insertComment(name, title, value):
				313	global nbcomments
				314
				315	if functions.has_key(name):
				316	functions[name][2] = value
				317	elif typedefs.has_key(name):
				318	typedefs[name] = value
				319	elif macros.has_key(name):
				320	macros[name] = value
				321	elif variables.has_key(name):
				322	variables[name] = value
				323	elif structs.has_key(name):
				324	structs[name] = value
				325	elif enums.has_key(name):
				326	enums[name][1] = value
				327	elif user_functions.has_key(name):
				328	user_functions[name] = value
				329	else:
				330	print "lost comment %s: %s" % (name, value)
				331	return
				332	nbcomments = nbcomments + 1
				333
				334	import os
				335	import xmllib
				336	try:
				337	import sgmlop
				338	except ImportError:
				339	sgmlop = None # accelerator not available
				340
				341	debug = 0
				342
				343	if sgmlop:
				344	class FastParser:
				345	"""sgmlop based XML parser. this is typically 15x faster
				346	than SlowParser..."""
				347
				348	def __init__(self, target):
				349
				350	# setup callbacks
				351	self.finish_starttag = target.start
				352	self.finish_endtag = target.end
				353	self.handle_data = target.data
				354
				355	# activate parser
				356	self.parser = sgmlop.XMLParser()
				357	self.parser.register(self)
				358	self.feed = self.parser.feed
				359	self.entity = {
				360	"amp": "&", "gt": ">", "lt": "<",
				361	"apos": "'", "quot": '"'
				362	}
				363
				364	def close(self):
				365	try:
				366	self.parser.close()
				367	finally:
				368	self.parser = self.feed = None # nuke circular reference
				369
				370	def handle_entityref(self, entity):
				371	# <string> entity
				372	try:
				373	self.handle_data(self.entity[entity])
				374	except KeyError:
				375	self.handle_data("&%s;" % entity)
				376
				377	else:
				378	FastParser = None
				379
				380
				381	class SlowParser(xmllib.XMLParser):
				382	"""slow but safe standard parser, based on the XML parser in
				383	Python's standard library."""
				384
				385	def __init__(self, target):
				386	self.unknown_starttag = target.start
				387	self.handle_data = target.data
				388	self.unknown_endtag = target.end
				389	xmllib.XMLParser.__init__(self)
				390
				391	def getparser(target = None):
				392	# get the fastest available parser, and attach it to an
				393	# unmarshalling object. return both objects.
				394	if target == None:
				395	target = docParser()
				396	if FastParser:
				397	return FastParser(target), target
				398	return SlowParser(target), target
				399
				400	class docParser:
				401	def __init__(self):
				402	self._methodname = None
				403	self._data = []
				404	self.id = None
				405	self.title = None
				406	self.descr = None
				407	self.string = None
				408
				409	def close(self):
				410	if debug:
				411	print "close"
				412
				413	def getmethodname(self):
				414	return self._methodname
				415
				416	def data(self, text):
				417	if debug:
				418	print "data %s" % text
				419	self._data.append(text)
				420
				421	def start(self, tag, attrs):
				422	if debug:
				423	print "start %s, %s" % (tag, attrs)
				424	if tag == 'refsect2':
				425	self.id = None
				426	self.title = None
				427	self.descr = None
				428	self.string = None
				429	elif tag == 'para':
				430	self._data = []
				431	elif tag == 'title':
				432	self._data = []
				433	elif tag == 'anchor' and self.id == None:
				434	if attrs.has_key('id'):
				435	self.id = attrs['id']
				436	self.id = string.replace(self.id, '-CAPS', '')
				437	self.id = string.replace(self.id, '-', '_')
				438
				439	def end(self, tag):
				440	if debug:
				441	print "end %s" % tag
				442	if tag == 'refsect2':
				443	insertComment(self.id, self.title, self.string)
				444	elif tag == 'para':
				445	if self.string == None:
				446	str = ''
				447	for c in self._data:
				448	str = str + c
				449	str = string.replace(str, '\n', ' ')
				450	str = string.replace(str, '\r', ' ')
				451	str = string.replace(str, ' ', ' ')
				452	str = string.replace(str, ' ', ' ')
				453	str = string.replace(str, ' ', ' ')
				454	while len(str) >= 1 and str[0] == ' ':
				455	str=str[1:]
				456	self.string = str
				457	self._data = []
				458	elif tag == 'title':
				459	str = ''
				460	for c in self._data:
				461	str = str + c
				462	str = string.replace(str, '\n', ' ')
				463	str = string.replace(str, '\r', ' ')
				464	str = string.replace(str, ' ', ' ')
				465	str = string.replace(str, ' ', ' ')
				466	str = string.replace(str, ' ', ' ')
				467	while len(str) >= 1 and str[0] == ' ':
				468	str=str[1:]
				469	self.title = str
				470
				471	xmlfiles = 0
				472	filenames = os.listdir("xml")
				473	for filename in filenames:
				474	try:
				475	f = open("xml/" + filename, 'r')
				476	except IOError, msg:
				477	print file, ":", msg
				478	continue
				479	data = f.read()
				480	(parser, target) = getparser()
				481	parser.feed(data)
				482	parser.close()
				483	xmlfiles = xmlfiles + 1
				484
				485	print "Parsed: %d XML files collexting %d comments" % (xmlfiles, nbcomments)
				486
				487	##################################################################
				488	#
				489	# Saving: libxml2-api.xml
				490	#
				491	##################################################################
				492
				493	def escape(raw):
				494	raw = string.replace(raw, '<', '<')
				495	raw = string.replace(raw, '>', '>')
				496	return raw
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	497
				498	print "Saving XML description libxml2-api.xml"
				499	output = open("libxml2-api.xml", "w")
				500	output.write("<api name='libxml2'>\n")
				501	output.write(" <files>\n")
				502	for file in files.keys():
				503	output.write(" <file name='%s'>\n" % file)
				504	for symbol in files[file]:
				505	output.write(" <exports symbol='%s'/>\n" % (symbol))
				506	output.write(" </file>\n")
				507	output.write(" </files>\n")
				508
				509	output.write(" <symbols>\n")
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	510	symbols=macros.keys()
				511	for i in structs.keys(): symbols.append(i)
				512	for i in variables.keys(): variables.append(i)
				513	for i in typedefs.keys(): symbols.append(i)
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	514	for i in enums.keys():
				515	symbols.append(i)
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	516	for j in enums[i][0]:
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	517	symbols.append(j)
				518	for i in functions.keys(): symbols.append(i)
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	519	for i in user_functions.keys(): symbols.append(i)
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	520	symbols.sort()
				521	prev = None
				522	for i in symbols:
				523	if i == prev:
				524	# print "Symbol %s redefined" % (i)
				525	continue
				526	else:
				527	prev = i
				528	if identifiers_type.has_key(i):
				529	type = identifiers_type[i]
				530
				531	if identifiers_file.has_key(i):
				532	file = identifiers_file[i]
				533	else:
				534	file = None
				535
				536	output.write(" <%s name='%s'" % (type, i))
				537	if file != None:
				538	output.write(" file='%s'" % (file))
				539	if type == "function":
				540	output.write(">\n");
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	541	(ret, args, doc) = functions[i]
				542	if doc != None and doc != '':
				543	output.write(" <info>%s</info>\n" % (escape(doc)))
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	544	output.write(" <return type='%s'/>\n" % (ret))
				545	for arg in args:
				546	output.write(" <arg name='%s' type='%s'/>\n" % (
				547	arg[1], arg[0]))
				548	output.write(" </%s>\n" % (type));
Daniel Veillard	2d1464f	2002-01-21 23:16:56 +0000	[diff] [blame^]	549	elif type == 'macro':
				550	if macros[i] != None and macros[i] != '':
				551	output.write(" info='%s'/>\n" % (escape(macros[i])))
				552	else:
				553	output.write("/>\n");
				554	elif type == 'struct':
				555	if structs[i] != None and structs[i] != '':
				556	output.write(" info='%s'/>\n" % (escape(structs[i])))
				557	else:
				558	output.write("/>\n");
				559	elif type == 'functype':
				560	if user_functions[i] != None and user_functions[i] != '':
				561	output.write(" info='%s'/>\n" % (escape(user_functions[i])))
				562	else:
				563	output.write("/>\n");
				564	elif type == 'variable':
				565	if variables[i] != None and variables[i] != '':
				566	output.write(" info='%s'/>\n" % (escape(variables[i])))
				567	else:
				568	output.write("/>\n");
				569	elif type == 'typedef':
				570	if typedefs[i] != None and typedefs[i] != '':
				571	output.write(" info='%s'/>\n" % (escape(typedefs[i])))
				572	else:
				573	output.write("/>\n");
Daniel Veillard	6100647	2002-01-21 17:31:47 +0000	[diff] [blame]	574	else:
				575	output.write("/>\n");
				576	else:
				577	print "Symbol %s not found in identifiers list" % (i)
				578	output.write(" </symbols>\n")
				579	output.write("</api>\n")
				580	print "generated XML for %d symbols" % (len(symbols))