Blame - doc/index.py - fp2-dev/platform/external/libxml2

blob: f42e7b2f9c883f50487fd349167b284c7e621e7e [file] [log] [blame]

Daniel Veillard	3371ff8	2002-10-01 13:37:48 +0000	[diff] [blame]	1	#!/usr/bin/python -u
				2	#
				3	# imports the API description and fills up a database with
				4	# name relevance to modules, functions or web pages
				5	#
Daniel Veillard	2c77cd7	2002-10-01 13:54:14 +0000	[diff] [blame^]	6	# Operation needed:
				7	# =================
				8	#
				9	# install mysqld, the python wrappers for mysql and libxml2, start mysqld
				10	# Change the root passwd of mysql:
				11	# mysqladmin -u root password new_password
				12	# Create the new database xmlsoft
				13	# mysqladmin -p create xmlsoft
				14	# Create a database user 'veillard' and give him passord access
				15	# change veillard and abcde with the right user name and passwd
				16	# mysql -p
				17	# password:
				18	# mysql> GRANT ALL PRIVILEGES ON xmlsoft TO veillard@localhost
				19	# IDENTIFIED BY 'abcde' WITH GRANT OPTION;
				20	#
				21	# As the user check the access:
				22	# mysql -p xmlsoft
				23	# Enter password:
				24	# Welcome to the MySQL monitor....
				25	# mysql> use xmlsoft
				26	# Database changed
				27	# mysql> quit
				28	# Bye
				29	#
				30	# Then run the script in the doc subdir, it will create the symbols and
				31	# word tables and populate them with informations extracted from
				32	# the libxml2-api.xml API description, and make then accessible read-only
				33	# by nobody@loaclhost the user expected to be Apache's one
				34	#
				35	# On the Apache configuration, make sure you have php support enabled
				36	#
				37
Daniel Veillard	3371ff8	2002-10-01 13:37:48 +0000	[diff] [blame]	38	import MySQLdb
				39	import libxml2
				40	import sys
				41	import string
				42	import os
				43
				44	#
				45	# The dictionnary of tables required and the SQL command needed
				46	# to create them
				47	#
				48	TABLES={
				49	"symbols" : """CREATE TABLE symbols (
				50	name varchar(255) NOT NULL,
				51	module varchar(255) NOT NULL,
				52	type varchar(25) NOT NULL,
				53	descr varchar(255),
				54	UNIQUE KEY name (name),
				55	KEY module (module))""",
				56	"words" : """CREATE TABLE words (
				57	name varchar(50) NOT NULL,
				58	symbol varchar(255) NOT NULL,
				59	relevance int,
				60	KEY name (name),
				61	KEY symbol (symbol),
				62	UNIQUE KEY ID (name, symbol))""",
				63	}
				64
				65	#
				66	# The XML API description file to parse
				67	#
				68	API="libxml2-api.xml"
				69	DB=None
				70
				71	#########################################################################
				72	# #
				73	# MySQL database interfaces #
				74	# #
				75	#########################################################################
				76	def createTable(db, name):
				77	global TABLES
				78
				79	if db == None:
				80	return -1
				81	if name == None:
				82	return -1
				83	c = db.cursor()
				84
				85	ret = c.execute("DROP TABLE IF EXISTS %s" % (name))
				86	if ret == 1:
				87	print "Removed table %s" % (name)
				88	print "Creating table %s" % (name)
				89	try:
				90	ret = c.execute(TABLES[name])
				91	except:
				92	print "Failed to create table %s" % (name)
				93	return -1
				94	return ret
				95
				96	def checkTables(db):
				97	global TABLES
				98
				99	if db == None:
				100	return -1
				101	c = db.cursor()
				102	nbtables = c.execute("show tables")
				103	print "Found %d tables" % (nbtables)
				104	tables = {}
				105	i = 0
				106	while i < nbtables:
				107	l = c.fetchone()
				108	name = l[0]
				109	tables[name] = {}
				110	i = i + 1
				111
				112	for table in TABLES.keys():
				113	if not tables.has_key(table):
				114	print "table %s missing" % (table)
				115	createTable(db, table)
				116	print "checkTables finished"
				117
				118	# make sure apache can access the tables read-only
				119	try:
				120	ret = c.execute("GRANT SELECT ON xmlsoft.* TO nobody@localhost")
				121	except:
				122	pass
				123	return 0
				124
				125	def openMySQL(db="xmlsoft", passwd=None):
				126	global DB
				127
				128	if passwd == None:
				129	passwd = "ducon"
				130	DB = MySQLdb.connect(passwd=passwd, db=db)
				131	if DB == None:
				132	return -1
				133	ret = checkTables(DB)
				134	return ret
				135
				136	def updateWord(name, symbol, relevance):
				137	global DB
				138
				139	if DB == None:
				140	openMySQL()
				141	if DB == None:
				142	return -1
				143	if name == None:
				144	return -1
				145	if symbol == None:
				146	return -1
				147
				148	c = DB.cursor()
				149	try:
				150	ret = c.execute(
				151	"""INSERT INTO words (name, symbol, relevance) VALUES ('%s','%s', %d)""" %
				152	(name, symbol, relevance))
				153	except:
				154	try:
				155	ret = c.execute(
				156	"""UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'""" %
				157	(relevance, name, symbol))
				158	except:
				159	print "Update word (%s, %s, %s) failed command" % (name, symbol, relevance)
				160	print "UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'" % (relevance, name, symbol)
				161	print sys.exc_type, sys.exc_value
				162	return -1
				163
				164	return ret
				165
				166	def updateSymbol(name, module, type, desc):
				167	global DB
				168
				169	updateWord(name, name, 50)
				170	if DB == None:
				171	openMySQL()
				172	if DB == None:
				173	return -1
				174	if name == None:
				175	return -1
				176	if module == None:
				177	return -1
				178	if type == None:
				179	return -1
				180
				181	try:
				182	desc = string.replace(desc, "'", " ")
				183	l = string.split(desc, ".")
				184	desc = l[0]
				185	desc = desc[0:99]
				186	except:
				187	desc = ""
				188
				189	c = DB.cursor()
				190	try:
				191	ret = c.execute(
				192	"""INSERT INTO symbols (name, module, type, descr) VALUES ('%s','%s', '%s', '%s')""" %
				193	(name, module, type, desc))
				194	except:
				195	try:
				196	ret = c.execute(
				197	"""UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" %
				198	(module, type, desc, name))
				199	except:
				200	print "Update symbol (%s, %s, %s) failed command" % (name, module, type)
				201	print """UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" % (module, type, desc, name)
				202	print sys.exc_type, sys.exc_value
				203	return -1
				204
				205	return ret
				206
				207	def addFunction(name, module, desc = ""):
				208	return updateSymbol(name, module, 'function', desc)
				209
				210	def addMacro(name, module, desc = ""):
				211	return updateSymbol(name, module, 'macro', desc)
				212
				213	def addEnum(name, module, desc = ""):
				214	return updateSymbol(name, module, 'enum', desc)
				215
				216	def addStruct(name, module, desc = ""):
				217	return updateSymbol(name, module, 'struct', desc)
				218
				219	def addConst(name, module, desc = ""):
				220	return updateSymbol(name, module, 'const', desc)
				221
				222	def addType(name, module, desc = ""):
				223	return updateSymbol(name, module, 'type', desc)
				224
				225	def addFunctype(name, module, desc = ""):
				226	return updateSymbol(name, module, 'functype', desc)
				227
				228	#########################################################################
				229	# #
				230	# Word dictionnary and analysis routines #
				231	# #
				232	#########################################################################
				233
				234	wordsDict = {}
				235
				236	def splitIdentifier(str):
				237	ret = []
				238	while str != "":
				239	cur = string.lower(str[0])
				240	str = str[1:]
				241	if ((cur < 'a') or (cur > 'z')):
				242	continue
				243	while (str != "") and (str[0] >= 'A') and (str[0] <= 'Z'):
				244	cur = cur + string.lower(str[0])
				245	str = str[1:]
				246	while (str != "") and (str[0] >= 'a') and (str[0] <= 'z'):
				247	cur = cur + str[0]
				248	str = str[1:]
				249	while (str != "") and (str[0] >= '0') and (str[0] <= '9'):
				250	str = str[1:]
				251	ret.append(cur)
				252	return ret
				253
				254	def addWord(word, module, symbol, relevance):
				255	global wordsDict
				256
				257	if word == None or len(word) < 3:
				258	return -1
				259	if module == None or symbol == None:
				260	return -1
				261	if wordsDict.has_key(word):
				262	d = wordsDict[word]
				263	if d == None:
				264	return 0
				265	if len(d) > 500:
				266	wordsDict[word] = None
				267	return 0
				268	try:
				269	relevance = relevance + d[(module, symbol)]
				270	except:
				271	pass
				272	else:
				273	wordsDict[word] = {}
				274	wordsDict[word][(module, symbol)] = relevance
				275	return relevance
				276
				277	def addString(str, module, symbol, relevance):
				278	if str == None or len(str) < 3:
				279	return -1
				280	ret = 0
				281	str = string.replace(str, ".", " ")
				282	str = string.replace(str, ",", " ")
				283	str = string.replace(str, "'", " ")
				284	str = string.replace(str, '"', " ")
				285	str = string.replace(str, ";", " ")
				286	str = string.replace(str, "-", " ")
				287	l = string.split(str)
				288	for word in l:
				289	if len(word) > 2:
				290	ret = ret + addWord(word, module, symbol, 5)
				291
				292	return ret
				293
				294
				295	#########################################################################
				296	# #
				297	# XML API description analysis #
				298	# #
				299	#########################################################################
				300
				301	def loadAPI(filename):
				302	doc = libxml2.parseFile(filename)
				303	print "loaded %s" % (filename)
				304	return doc
				305
				306	def foundExport(file, symbol):
				307	if file == None:
				308	return 0
				309	if symbol == None:
				310	return 0
				311	addFunction(symbol, file)
				312	l = splitIdentifier(symbol)
				313	for word in l:
				314	addWord(word, file, symbol, 10)
				315	return 1
				316
				317	def analyzeAPIFile(top):
				318	count = 0
				319	name = top.prop("name")
				320	cur = top.children
				321	while cur != None:
				322	if cur.type == 'text':
				323	cur = cur.next
				324	continue
				325	if cur.name == "exports":
				326	count = count + foundExport(name, cur.prop("symbol"))
				327	else:
				328	print "unexpected element %s in API doc <file name='%s'>" % (name)
				329	cur = cur.next
				330	return count
				331
				332	def analyzeAPIFiles(top):
				333	count = 0
				334	cur = top.children
				335
				336	while cur != None:
				337	if cur.type == 'text':
				338	cur = cur.next
				339	continue
				340	if cur.name == "file":
				341	count = count + analyzeAPIFile(cur)
				342	else:
				343	print "unexpected element %s in API doc <files>" % (cur.name)
				344	cur = cur.next
				345	return count
				346
				347	def analyzeAPIEnum(top):
				348	file = top.prop("file")
				349	if file == None:
				350	return 0
				351	symbol = top.prop("name")
				352	if symbol == None:
				353	return 0
				354
				355	addEnum(symbol, file)
				356	l = splitIdentifier(symbol)
				357	for word in l:
				358	addWord(word, file, symbol, 10)
				359
				360	return 1
				361
				362	def analyzeAPIConst(top):
				363	file = top.prop("file")
				364	if file == None:
				365	return 0
				366	symbol = top.prop("name")
				367	if symbol == None:
				368	return 0
				369
				370	addConst(symbol, file)
				371	l = splitIdentifier(symbol)
				372	for word in l:
				373	addWord(word, file, symbol, 10)
				374
				375	return 1
				376
				377	def analyzeAPIType(top):
				378	file = top.prop("file")
				379	if file == None:
				380	return 0
				381	symbol = top.prop("name")
				382	if symbol == None:
				383	return 0
				384
				385	addType(symbol, file)
				386	l = splitIdentifier(symbol)
				387	for word in l:
				388	addWord(word, file, symbol, 10)
				389	return 1
				390
				391	def analyzeAPIFunctype(top):
				392	file = top.prop("file")
				393	if file == None:
				394	return 0
				395	symbol = top.prop("name")
				396	if symbol == None:
				397	return 0
				398
				399	addFunctype(symbol, file)
				400	l = splitIdentifier(symbol)
				401	for word in l:
				402	addWord(word, file, symbol, 10)
				403	return 1
				404
				405	def analyzeAPIStruct(top):
				406	file = top.prop("file")
				407	if file == None:
				408	return 0
				409	symbol = top.prop("name")
				410	if symbol == None:
				411	return 0
				412
				413	addStruct(symbol, file)
				414	l = splitIdentifier(symbol)
				415	for word in l:
				416	addWord(word, file, symbol, 10)
				417
				418	info = top.prop("info")
				419	if info != None:
				420	l = string.split(info)
				421	for word in l:
				422	if len(word) > 2:
				423	addWord(word, file, symbol, 5)
				424	return 1
				425
				426	def analyzeAPIMacro(top):
				427	file = top.prop("file")
				428	if file == None:
				429	return 0
				430	symbol = top.prop("name")
				431	if symbol == None:
				432	return 0
				433
				434	info = None
				435	cur = top.children
				436	while cur != None:
				437	if cur.type == 'text':
				438	cur = cur.next
				439	continue
				440	if cur.name == "info":
				441	info = cur.content
				442	break
				443	cur = cur.next
				444
				445	l = splitIdentifier(symbol)
				446	for word in l:
				447	addWord(word, file, symbol, 10)
				448
				449	if info == None:
				450	addMacro(symbol, file)
				451	print "Macro %s description has no <info>" % (symbol)
				452	return 0
				453
				454	addMacro(symbol, file, info)
				455	l = string.split(info)
				456	for word in l:
				457	if len(word) > 2:
				458	addWord(word, file, symbol, 5)
				459	return 1
				460
				461	def analyzeAPIFunction(top):
				462	file = top.prop("file")
				463	if file == None:
				464	return 0
				465	symbol = top.prop("name")
				466	if symbol == None:
				467	return 0
				468
				469	info = None
				470	cur = top.children
				471	while cur != None:
				472	if cur.type == 'text':
				473	cur = cur.next
				474	continue
				475	if cur.name == "info":
				476	info = cur.content
				477	elif cur.name == "return":
				478	rinfo = cur.prop("info")
				479	if rinfo != None:
				480	addString(rinfo, file, symbol, 7)
				481	elif cur.name == "arg":
				482	ainfo = cur.prop("info")
				483	if rinfo != None:
				484	addString(ainfo, file, symbol, 5)
				485	name = cur.prop("name")
				486	if name != None:
				487	addWord(name, file, symbol, 7)
				488	cur = cur.next
				489	if info == None:
				490	print "Function %s description has no <info>" % (symbol)
				491	addFunction(symbol, file, "")
				492	else:
				493	addFunction(symbol, file, info)
				494	addString(info, file, symbol, 5)
				495
				496	l = splitIdentifier(symbol)
				497	for word in l:
				498	addWord(word, file, symbol, 10)
				499
				500	return 1
				501
				502	def analyzeAPISymbols(top):
				503	count = 0
				504	cur = top.children
				505
				506	while cur != None:
				507	if cur.type == 'text':
				508	cur = cur.next
				509	continue
				510	if cur.name == "macro":
				511	count = count + analyzeAPIMacro(cur)
				512	elif cur.name == "function":
				513	count = count + analyzeAPIFunction(cur)
				514	elif cur.name == "const":
				515	count = count + analyzeAPIConst(cur)
				516	elif cur.name == "typedef":
				517	count = count + analyzeAPIType(cur)
				518	elif cur.name == "struct":
				519	count = count + analyzeAPIStruct(cur)
				520	elif cur.name == "enum":
				521	count = count + analyzeAPIEnum(cur)
				522	elif cur.name == "functype":
				523	count = count + analyzeAPIFunctype(cur)
				524	else:
				525	print "unexpected element %s in API doc <files>" % (cur.name)
				526	cur = cur.next
				527	return count
				528
				529	def analyzeAPI(doc):
				530	count = 0
				531	if doc == None:
				532	return -1
				533	root = doc.getRootElement()
				534	if root.name != "api":
				535	print "Unexpected root name"
				536	return -1
				537	cur = root.children
				538	while cur != None:
				539	if cur.type == 'text':
				540	cur = cur.next
				541	continue
				542	if cur.name == "files":
				543	pass
				544	# count = count + analyzeAPIFiles(cur)
				545	elif cur.name == "symbols":
				546	count = count + analyzeAPISymbols(cur)
				547	else:
				548	print "unexpected element %s in API doc" % (cur.name)
				549	cur = cur.next
				550	return count
				551
				552	#########################################################################
				553	# #
				554	# Main code: open the DB, the API XML and analyze it #
				555	# #
				556	#########################################################################
				557	try:
				558	openMySQL()
				559	except:
				560	print "Failed to open the database"
				561	print sys.exc_type, sys.exc_value
				562	sys.exit(1)
				563
				564	try:
				565	doc = loadAPI(API)
				566	ret = analyzeAPI(doc)
				567	print "Analyzed %d blocs" % (ret)
				568	doc.freeDoc()
				569	except:
				570	print "Failed to parse and analyze %s" % (API)
				571	print sys.exc_type, sys.exc_value
				572	sys.exit(1)
				573
				574	print "Indexed %d words" % (len(wordsDict))
				575	i = 0
				576	skipped = 0
				577	for word in wordsDict.keys():
				578	refs = wordsDict[word]
				579	if refs == None:
				580	skipped = skipped + 1
				581	continue;
				582	for (module, symbol) in refs.keys():
				583	updateWord(word, symbol, refs[(module, symbol)])
				584	i = i + 1
				585
				586	print "Found %d associations, skipped %d words" % (i, skipped)