blob: 4e32419bda4b0a0039532e825359f45bcd9bd377 [file] [log] [blame]
Daniel Veillard3371ff82002-10-01 13:37:48 +00001#!/usr/bin/python -u
2#
3# imports the API description and fills up a database with
4# name relevance to modules, functions or web pages
5#
Daniel Veillard2c77cd72002-10-01 13:54:14 +00006# Operation needed:
7# =================
8#
9# install mysqld, the python wrappers for mysql and libxml2, start mysqld
10# Change the root passwd of mysql:
11# mysqladmin -u root password new_password
12# Create the new database xmlsoft
13# mysqladmin -p create xmlsoft
14# Create a database user 'veillard' and give him passord access
15# change veillard and abcde with the right user name and passwd
16# mysql -p
17# password:
18# mysql> GRANT ALL PRIVILEGES ON xmlsoft TO veillard@localhost
19# IDENTIFIED BY 'abcde' WITH GRANT OPTION;
20#
21# As the user check the access:
22# mysql -p xmlsoft
23# Enter password:
24# Welcome to the MySQL monitor....
25# mysql> use xmlsoft
26# Database changed
27# mysql> quit
28# Bye
29#
30# Then run the script in the doc subdir, it will create the symbols and
31# word tables and populate them with informations extracted from
32# the libxml2-api.xml API description, and make then accessible read-only
33# by nobody@loaclhost the user expected to be Apache's one
34#
35# On the Apache configuration, make sure you have php support enabled
36#
37
Daniel Veillard3371ff82002-10-01 13:37:48 +000038import MySQLdb
39import libxml2
40import sys
41import string
42import os
43
44#
45# The dictionnary of tables required and the SQL command needed
46# to create them
47#
48TABLES={
49 "symbols" : """CREATE TABLE symbols (
50 name varchar(255) NOT NULL,
51 module varchar(255) NOT NULL,
52 type varchar(25) NOT NULL,
53 descr varchar(255),
54 UNIQUE KEY name (name),
55 KEY module (module))""",
56 "words" : """CREATE TABLE words (
57 name varchar(50) NOT NULL,
58 symbol varchar(255) NOT NULL,
59 relevance int,
60 KEY name (name),
61 KEY symbol (symbol),
62 UNIQUE KEY ID (name, symbol))""",
Daniel Veillarddc6d4ab2002-10-04 15:58:34 +000063 "Queries" : """CREATE TABLE Queries (
64 ID int(11) NOT NULL auto_increment,
65 Value varchar(50) NOT NULL,
66 Count int(11) NOT NULL,
67 UNIQUE KEY id (ID,Value(35)),
68 INDEX (ID))""",
Daniel Veillard3371ff82002-10-01 13:37:48 +000069}
70
71#
72# The XML API description file to parse
73#
74API="libxml2-api.xml"
75DB=None
76
77#########################################################################
78# #
79# MySQL database interfaces #
80# #
81#########################################################################
82def createTable(db, name):
83 global TABLES
84
85 if db == None:
86 return -1
87 if name == None:
88 return -1
89 c = db.cursor()
90
91 ret = c.execute("DROP TABLE IF EXISTS %s" % (name))
92 if ret == 1:
93 print "Removed table %s" % (name)
94 print "Creating table %s" % (name)
95 try:
96 ret = c.execute(TABLES[name])
97 except:
98 print "Failed to create table %s" % (name)
99 return -1
100 return ret
101
102def checkTables(db):
103 global TABLES
104
105 if db == None:
106 return -1
107 c = db.cursor()
108 nbtables = c.execute("show tables")
109 print "Found %d tables" % (nbtables)
110 tables = {}
111 i = 0
112 while i < nbtables:
113 l = c.fetchone()
114 name = l[0]
115 tables[name] = {}
116 i = i + 1
117
118 for table in TABLES.keys():
119 if not tables.has_key(table):
120 print "table %s missing" % (table)
121 createTable(db, table)
122 print "checkTables finished"
123
124 # make sure apache can access the tables read-only
125 try:
126 ret = c.execute("GRANT SELECT ON xmlsoft.* TO nobody@localhost")
Daniel Veillarddc6d4ab2002-10-04 15:58:34 +0000127 ret = c.execute("GRANT INSERT,SELECT,UPDATE ON xmlsoft.Queries TO nobody@localhost")
Daniel Veillard3371ff82002-10-01 13:37:48 +0000128 except:
129 pass
130 return 0
131
132def openMySQL(db="xmlsoft", passwd=None):
133 global DB
134
135 if passwd == None:
Daniel Veillard538d3b92002-10-01 14:04:56 +0000136 try:
137 passwd = os.environ["MySQL_PASS"]
138 except:
139 print "No password available, set environment MySQL_PASS"
140 sys.exit(1)
141
Daniel Veillard3371ff82002-10-01 13:37:48 +0000142 DB = MySQLdb.connect(passwd=passwd, db=db)
143 if DB == None:
144 return -1
145 ret = checkTables(DB)
146 return ret
147
148def updateWord(name, symbol, relevance):
149 global DB
150
151 if DB == None:
152 openMySQL()
153 if DB == None:
154 return -1
155 if name == None:
156 return -1
157 if symbol == None:
158 return -1
159
160 c = DB.cursor()
161 try:
162 ret = c.execute(
163"""INSERT INTO words (name, symbol, relevance) VALUES ('%s','%s', %d)""" %
164 (name, symbol, relevance))
165 except:
166 try:
167 ret = c.execute(
168 """UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'""" %
169 (relevance, name, symbol))
170 except:
171 print "Update word (%s, %s, %s) failed command" % (name, symbol, relevance)
172 print "UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'" % (relevance, name, symbol)
173 print sys.exc_type, sys.exc_value
174 return -1
175
176 return ret
177
178def updateSymbol(name, module, type, desc):
179 global DB
180
181 updateWord(name, name, 50)
182 if DB == None:
183 openMySQL()
184 if DB == None:
185 return -1
186 if name == None:
187 return -1
188 if module == None:
189 return -1
190 if type == None:
191 return -1
192
193 try:
194 desc = string.replace(desc, "'", " ")
195 l = string.split(desc, ".")
196 desc = l[0]
197 desc = desc[0:99]
198 except:
199 desc = ""
200
201 c = DB.cursor()
202 try:
203 ret = c.execute(
204"""INSERT INTO symbols (name, module, type, descr) VALUES ('%s','%s', '%s', '%s')""" %
205 (name, module, type, desc))
206 except:
207 try:
208 ret = c.execute(
209"""UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" %
210 (module, type, desc, name))
211 except:
212 print "Update symbol (%s, %s, %s) failed command" % (name, module, type)
213 print """UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" % (module, type, desc, name)
214 print sys.exc_type, sys.exc_value
215 return -1
216
217 return ret
218
219def addFunction(name, module, desc = ""):
220 return updateSymbol(name, module, 'function', desc)
221
222def addMacro(name, module, desc = ""):
223 return updateSymbol(name, module, 'macro', desc)
224
225def addEnum(name, module, desc = ""):
226 return updateSymbol(name, module, 'enum', desc)
227
228def addStruct(name, module, desc = ""):
229 return updateSymbol(name, module, 'struct', desc)
230
231def addConst(name, module, desc = ""):
232 return updateSymbol(name, module, 'const', desc)
233
234def addType(name, module, desc = ""):
235 return updateSymbol(name, module, 'type', desc)
236
237def addFunctype(name, module, desc = ""):
238 return updateSymbol(name, module, 'functype', desc)
239
240#########################################################################
241# #
242# Word dictionnary and analysis routines #
243# #
244#########################################################################
245
246wordsDict = {}
247
248def splitIdentifier(str):
249 ret = []
250 while str != "":
251 cur = string.lower(str[0])
252 str = str[1:]
253 if ((cur < 'a') or (cur > 'z')):
254 continue
255 while (str != "") and (str[0] >= 'A') and (str[0] <= 'Z'):
256 cur = cur + string.lower(str[0])
257 str = str[1:]
258 while (str != "") and (str[0] >= 'a') and (str[0] <= 'z'):
259 cur = cur + str[0]
260 str = str[1:]
261 while (str != "") and (str[0] >= '0') and (str[0] <= '9'):
262 str = str[1:]
263 ret.append(cur)
264 return ret
265
266def addWord(word, module, symbol, relevance):
267 global wordsDict
268
269 if word == None or len(word) < 3:
270 return -1
271 if module == None or symbol == None:
272 return -1
273 if wordsDict.has_key(word):
274 d = wordsDict[word]
275 if d == None:
276 return 0
277 if len(d) > 500:
278 wordsDict[word] = None
279 return 0
280 try:
281 relevance = relevance + d[(module, symbol)]
282 except:
283 pass
284 else:
285 wordsDict[word] = {}
286 wordsDict[word][(module, symbol)] = relevance
287 return relevance
288
289def addString(str, module, symbol, relevance):
290 if str == None or len(str) < 3:
291 return -1
292 ret = 0
293 str = string.replace(str, ".", " ")
294 str = string.replace(str, ",", " ")
295 str = string.replace(str, "'", " ")
296 str = string.replace(str, '"', " ")
297 str = string.replace(str, ";", " ")
298 str = string.replace(str, "-", " ")
299 l = string.split(str)
300 for word in l:
301 if len(word) > 2:
302 ret = ret + addWord(word, module, symbol, 5)
303
304 return ret
305
306
307#########################################################################
308# #
309# XML API description analysis #
310# #
311#########################################################################
312
313def loadAPI(filename):
314 doc = libxml2.parseFile(filename)
315 print "loaded %s" % (filename)
316 return doc
317
318def foundExport(file, symbol):
319 if file == None:
320 return 0
321 if symbol == None:
322 return 0
323 addFunction(symbol, file)
324 l = splitIdentifier(symbol)
325 for word in l:
326 addWord(word, file, symbol, 10)
327 return 1
328
329def analyzeAPIFile(top):
330 count = 0
331 name = top.prop("name")
332 cur = top.children
333 while cur != None:
334 if cur.type == 'text':
335 cur = cur.next
336 continue
337 if cur.name == "exports":
338 count = count + foundExport(name, cur.prop("symbol"))
339 else:
340 print "unexpected element %s in API doc <file name='%s'>" % (name)
341 cur = cur.next
342 return count
343
344def analyzeAPIFiles(top):
345 count = 0
346 cur = top.children
347
348 while cur != None:
349 if cur.type == 'text':
350 cur = cur.next
351 continue
352 if cur.name == "file":
353 count = count + analyzeAPIFile(cur)
354 else:
355 print "unexpected element %s in API doc <files>" % (cur.name)
356 cur = cur.next
357 return count
358
359def analyzeAPIEnum(top):
360 file = top.prop("file")
361 if file == None:
362 return 0
363 symbol = top.prop("name")
364 if symbol == None:
365 return 0
366
367 addEnum(symbol, file)
368 l = splitIdentifier(symbol)
369 for word in l:
370 addWord(word, file, symbol, 10)
371
372 return 1
373
374def analyzeAPIConst(top):
375 file = top.prop("file")
376 if file == None:
377 return 0
378 symbol = top.prop("name")
379 if symbol == None:
380 return 0
381
382 addConst(symbol, file)
383 l = splitIdentifier(symbol)
384 for word in l:
385 addWord(word, file, symbol, 10)
386
387 return 1
388
389def analyzeAPIType(top):
390 file = top.prop("file")
391 if file == None:
392 return 0
393 symbol = top.prop("name")
394 if symbol == None:
395 return 0
396
397 addType(symbol, file)
398 l = splitIdentifier(symbol)
399 for word in l:
400 addWord(word, file, symbol, 10)
401 return 1
402
403def analyzeAPIFunctype(top):
404 file = top.prop("file")
405 if file == None:
406 return 0
407 symbol = top.prop("name")
408 if symbol == None:
409 return 0
410
411 addFunctype(symbol, file)
412 l = splitIdentifier(symbol)
413 for word in l:
414 addWord(word, file, symbol, 10)
415 return 1
416
417def analyzeAPIStruct(top):
418 file = top.prop("file")
419 if file == None:
420 return 0
421 symbol = top.prop("name")
422 if symbol == None:
423 return 0
424
425 addStruct(symbol, file)
426 l = splitIdentifier(symbol)
427 for word in l:
428 addWord(word, file, symbol, 10)
429
430 info = top.prop("info")
431 if info != None:
432 l = string.split(info)
433 for word in l:
434 if len(word) > 2:
435 addWord(word, file, symbol, 5)
436 return 1
437
438def analyzeAPIMacro(top):
439 file = top.prop("file")
440 if file == None:
441 return 0
442 symbol = top.prop("name")
443 if symbol == None:
444 return 0
445
446 info = None
447 cur = top.children
448 while cur != None:
449 if cur.type == 'text':
450 cur = cur.next
451 continue
452 if cur.name == "info":
453 info = cur.content
454 break
455 cur = cur.next
456
457 l = splitIdentifier(symbol)
458 for word in l:
459 addWord(word, file, symbol, 10)
460
461 if info == None:
462 addMacro(symbol, file)
463 print "Macro %s description has no <info>" % (symbol)
464 return 0
465
466 addMacro(symbol, file, info)
467 l = string.split(info)
468 for word in l:
469 if len(word) > 2:
470 addWord(word, file, symbol, 5)
471 return 1
472
473def analyzeAPIFunction(top):
474 file = top.prop("file")
475 if file == None:
476 return 0
477 symbol = top.prop("name")
478 if symbol == None:
479 return 0
480
481 info = None
482 cur = top.children
483 while cur != None:
484 if cur.type == 'text':
485 cur = cur.next
486 continue
487 if cur.name == "info":
488 info = cur.content
489 elif cur.name == "return":
490 rinfo = cur.prop("info")
491 if rinfo != None:
492 addString(rinfo, file, symbol, 7)
493 elif cur.name == "arg":
494 ainfo = cur.prop("info")
495 if rinfo != None:
496 addString(ainfo, file, symbol, 5)
497 name = cur.prop("name")
498 if name != None:
499 addWord(name, file, symbol, 7)
500 cur = cur.next
501 if info == None:
502 print "Function %s description has no <info>" % (symbol)
503 addFunction(symbol, file, "")
504 else:
505 addFunction(symbol, file, info)
506 addString(info, file, symbol, 5)
507
508 l = splitIdentifier(symbol)
509 for word in l:
510 addWord(word, file, symbol, 10)
511
512 return 1
513
514def analyzeAPISymbols(top):
515 count = 0
516 cur = top.children
517
518 while cur != None:
519 if cur.type == 'text':
520 cur = cur.next
521 continue
522 if cur.name == "macro":
523 count = count + analyzeAPIMacro(cur)
524 elif cur.name == "function":
525 count = count + analyzeAPIFunction(cur)
526 elif cur.name == "const":
527 count = count + analyzeAPIConst(cur)
528 elif cur.name == "typedef":
529 count = count + analyzeAPIType(cur)
530 elif cur.name == "struct":
531 count = count + analyzeAPIStruct(cur)
532 elif cur.name == "enum":
533 count = count + analyzeAPIEnum(cur)
534 elif cur.name == "functype":
535 count = count + analyzeAPIFunctype(cur)
536 else:
537 print "unexpected element %s in API doc <files>" % (cur.name)
538 cur = cur.next
539 return count
540
541def analyzeAPI(doc):
542 count = 0
543 if doc == None:
544 return -1
545 root = doc.getRootElement()
546 if root.name != "api":
547 print "Unexpected root name"
548 return -1
549 cur = root.children
550 while cur != None:
551 if cur.type == 'text':
552 cur = cur.next
553 continue
554 if cur.name == "files":
555 pass
556# count = count + analyzeAPIFiles(cur)
557 elif cur.name == "symbols":
558 count = count + analyzeAPISymbols(cur)
559 else:
560 print "unexpected element %s in API doc" % (cur.name)
561 cur = cur.next
562 return count
563
564#########################################################################
565# #
566# Main code: open the DB, the API XML and analyze it #
567# #
568#########################################################################
569try:
570 openMySQL()
571except:
572 print "Failed to open the database"
573 print sys.exc_type, sys.exc_value
574 sys.exit(1)
575
576try:
577 doc = loadAPI(API)
578 ret = analyzeAPI(doc)
579 print "Analyzed %d blocs" % (ret)
580 doc.freeDoc()
581except:
582 print "Failed to parse and analyze %s" % (API)
583 print sys.exc_type, sys.exc_value
584 sys.exit(1)
585
586print "Indexed %d words" % (len(wordsDict))
587i = 0
588skipped = 0
589for word in wordsDict.keys():
590 refs = wordsDict[word]
591 if refs == None:
592 skipped = skipped + 1
593 continue;
594 for (module, symbol) in refs.keys():
595 updateWord(word, symbol, refs[(module, symbol)])
596 i = i + 1
597
598print "Found %d associations, skipped %d words" % (i, skipped)