blob: f42e7b2f9c883f50487fd349167b284c7e621e7e [file] [log] [blame]
Daniel Veillard3371ff82002-10-01 13:37:48 +00001#!/usr/bin/python -u
2#
3# imports the API description and fills up a database with
4# name relevance to modules, functions or web pages
5#
Daniel Veillard2c77cd72002-10-01 13:54:14 +00006# Operation needed:
7# =================
8#
9# install mysqld, the python wrappers for mysql and libxml2, start mysqld
10# Change the root passwd of mysql:
11# mysqladmin -u root password new_password
12# Create the new database xmlsoft
13# mysqladmin -p create xmlsoft
14# Create a database user 'veillard' and give him passord access
15# change veillard and abcde with the right user name and passwd
16# mysql -p
17# password:
18# mysql> GRANT ALL PRIVILEGES ON xmlsoft TO veillard@localhost
19# IDENTIFIED BY 'abcde' WITH GRANT OPTION;
20#
21# As the user check the access:
22# mysql -p xmlsoft
23# Enter password:
24# Welcome to the MySQL monitor....
25# mysql> use xmlsoft
26# Database changed
27# mysql> quit
28# Bye
29#
30# Then run the script in the doc subdir, it will create the symbols and
31# word tables and populate them with informations extracted from
32# the libxml2-api.xml API description, and make then accessible read-only
33# by nobody@loaclhost the user expected to be Apache's one
34#
35# On the Apache configuration, make sure you have php support enabled
36#
37
Daniel Veillard3371ff82002-10-01 13:37:48 +000038import MySQLdb
39import libxml2
40import sys
41import string
42import os
43
44#
45# The dictionnary of tables required and the SQL command needed
46# to create them
47#
48TABLES={
49 "symbols" : """CREATE TABLE symbols (
50 name varchar(255) NOT NULL,
51 module varchar(255) NOT NULL,
52 type varchar(25) NOT NULL,
53 descr varchar(255),
54 UNIQUE KEY name (name),
55 KEY module (module))""",
56 "words" : """CREATE TABLE words (
57 name varchar(50) NOT NULL,
58 symbol varchar(255) NOT NULL,
59 relevance int,
60 KEY name (name),
61 KEY symbol (symbol),
62 UNIQUE KEY ID (name, symbol))""",
63}
64
65#
66# The XML API description file to parse
67#
68API="libxml2-api.xml"
69DB=None
70
71#########################################################################
72# #
73# MySQL database interfaces #
74# #
75#########################################################################
76def createTable(db, name):
77 global TABLES
78
79 if db == None:
80 return -1
81 if name == None:
82 return -1
83 c = db.cursor()
84
85 ret = c.execute("DROP TABLE IF EXISTS %s" % (name))
86 if ret == 1:
87 print "Removed table %s" % (name)
88 print "Creating table %s" % (name)
89 try:
90 ret = c.execute(TABLES[name])
91 except:
92 print "Failed to create table %s" % (name)
93 return -1
94 return ret
95
96def checkTables(db):
97 global TABLES
98
99 if db == None:
100 return -1
101 c = db.cursor()
102 nbtables = c.execute("show tables")
103 print "Found %d tables" % (nbtables)
104 tables = {}
105 i = 0
106 while i < nbtables:
107 l = c.fetchone()
108 name = l[0]
109 tables[name] = {}
110 i = i + 1
111
112 for table in TABLES.keys():
113 if not tables.has_key(table):
114 print "table %s missing" % (table)
115 createTable(db, table)
116 print "checkTables finished"
117
118 # make sure apache can access the tables read-only
119 try:
120 ret = c.execute("GRANT SELECT ON xmlsoft.* TO nobody@localhost")
121 except:
122 pass
123 return 0
124
125def openMySQL(db="xmlsoft", passwd=None):
126 global DB
127
128 if passwd == None:
129 passwd = "ducon"
130 DB = MySQLdb.connect(passwd=passwd, db=db)
131 if DB == None:
132 return -1
133 ret = checkTables(DB)
134 return ret
135
136def updateWord(name, symbol, relevance):
137 global DB
138
139 if DB == None:
140 openMySQL()
141 if DB == None:
142 return -1
143 if name == None:
144 return -1
145 if symbol == None:
146 return -1
147
148 c = DB.cursor()
149 try:
150 ret = c.execute(
151"""INSERT INTO words (name, symbol, relevance) VALUES ('%s','%s', %d)""" %
152 (name, symbol, relevance))
153 except:
154 try:
155 ret = c.execute(
156 """UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'""" %
157 (relevance, name, symbol))
158 except:
159 print "Update word (%s, %s, %s) failed command" % (name, symbol, relevance)
160 print "UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'" % (relevance, name, symbol)
161 print sys.exc_type, sys.exc_value
162 return -1
163
164 return ret
165
166def updateSymbol(name, module, type, desc):
167 global DB
168
169 updateWord(name, name, 50)
170 if DB == None:
171 openMySQL()
172 if DB == None:
173 return -1
174 if name == None:
175 return -1
176 if module == None:
177 return -1
178 if type == None:
179 return -1
180
181 try:
182 desc = string.replace(desc, "'", " ")
183 l = string.split(desc, ".")
184 desc = l[0]
185 desc = desc[0:99]
186 except:
187 desc = ""
188
189 c = DB.cursor()
190 try:
191 ret = c.execute(
192"""INSERT INTO symbols (name, module, type, descr) VALUES ('%s','%s', '%s', '%s')""" %
193 (name, module, type, desc))
194 except:
195 try:
196 ret = c.execute(
197"""UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" %
198 (module, type, desc, name))
199 except:
200 print "Update symbol (%s, %s, %s) failed command" % (name, module, type)
201 print """UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" % (module, type, desc, name)
202 print sys.exc_type, sys.exc_value
203 return -1
204
205 return ret
206
207def addFunction(name, module, desc = ""):
208 return updateSymbol(name, module, 'function', desc)
209
210def addMacro(name, module, desc = ""):
211 return updateSymbol(name, module, 'macro', desc)
212
213def addEnum(name, module, desc = ""):
214 return updateSymbol(name, module, 'enum', desc)
215
216def addStruct(name, module, desc = ""):
217 return updateSymbol(name, module, 'struct', desc)
218
219def addConst(name, module, desc = ""):
220 return updateSymbol(name, module, 'const', desc)
221
222def addType(name, module, desc = ""):
223 return updateSymbol(name, module, 'type', desc)
224
225def addFunctype(name, module, desc = ""):
226 return updateSymbol(name, module, 'functype', desc)
227
228#########################################################################
229# #
230# Word dictionnary and analysis routines #
231# #
232#########################################################################
233
234wordsDict = {}
235
236def splitIdentifier(str):
237 ret = []
238 while str != "":
239 cur = string.lower(str[0])
240 str = str[1:]
241 if ((cur < 'a') or (cur > 'z')):
242 continue
243 while (str != "") and (str[0] >= 'A') and (str[0] <= 'Z'):
244 cur = cur + string.lower(str[0])
245 str = str[1:]
246 while (str != "") and (str[0] >= 'a') and (str[0] <= 'z'):
247 cur = cur + str[0]
248 str = str[1:]
249 while (str != "") and (str[0] >= '0') and (str[0] <= '9'):
250 str = str[1:]
251 ret.append(cur)
252 return ret
253
254def addWord(word, module, symbol, relevance):
255 global wordsDict
256
257 if word == None or len(word) < 3:
258 return -1
259 if module == None or symbol == None:
260 return -1
261 if wordsDict.has_key(word):
262 d = wordsDict[word]
263 if d == None:
264 return 0
265 if len(d) > 500:
266 wordsDict[word] = None
267 return 0
268 try:
269 relevance = relevance + d[(module, symbol)]
270 except:
271 pass
272 else:
273 wordsDict[word] = {}
274 wordsDict[word][(module, symbol)] = relevance
275 return relevance
276
277def addString(str, module, symbol, relevance):
278 if str == None or len(str) < 3:
279 return -1
280 ret = 0
281 str = string.replace(str, ".", " ")
282 str = string.replace(str, ",", " ")
283 str = string.replace(str, "'", " ")
284 str = string.replace(str, '"', " ")
285 str = string.replace(str, ";", " ")
286 str = string.replace(str, "-", " ")
287 l = string.split(str)
288 for word in l:
289 if len(word) > 2:
290 ret = ret + addWord(word, module, symbol, 5)
291
292 return ret
293
294
295#########################################################################
296# #
297# XML API description analysis #
298# #
299#########################################################################
300
301def loadAPI(filename):
302 doc = libxml2.parseFile(filename)
303 print "loaded %s" % (filename)
304 return doc
305
306def foundExport(file, symbol):
307 if file == None:
308 return 0
309 if symbol == None:
310 return 0
311 addFunction(symbol, file)
312 l = splitIdentifier(symbol)
313 for word in l:
314 addWord(word, file, symbol, 10)
315 return 1
316
317def analyzeAPIFile(top):
318 count = 0
319 name = top.prop("name")
320 cur = top.children
321 while cur != None:
322 if cur.type == 'text':
323 cur = cur.next
324 continue
325 if cur.name == "exports":
326 count = count + foundExport(name, cur.prop("symbol"))
327 else:
328 print "unexpected element %s in API doc <file name='%s'>" % (name)
329 cur = cur.next
330 return count
331
332def analyzeAPIFiles(top):
333 count = 0
334 cur = top.children
335
336 while cur != None:
337 if cur.type == 'text':
338 cur = cur.next
339 continue
340 if cur.name == "file":
341 count = count + analyzeAPIFile(cur)
342 else:
343 print "unexpected element %s in API doc <files>" % (cur.name)
344 cur = cur.next
345 return count
346
347def analyzeAPIEnum(top):
348 file = top.prop("file")
349 if file == None:
350 return 0
351 symbol = top.prop("name")
352 if symbol == None:
353 return 0
354
355 addEnum(symbol, file)
356 l = splitIdentifier(symbol)
357 for word in l:
358 addWord(word, file, symbol, 10)
359
360 return 1
361
362def analyzeAPIConst(top):
363 file = top.prop("file")
364 if file == None:
365 return 0
366 symbol = top.prop("name")
367 if symbol == None:
368 return 0
369
370 addConst(symbol, file)
371 l = splitIdentifier(symbol)
372 for word in l:
373 addWord(word, file, symbol, 10)
374
375 return 1
376
377def analyzeAPIType(top):
378 file = top.prop("file")
379 if file == None:
380 return 0
381 symbol = top.prop("name")
382 if symbol == None:
383 return 0
384
385 addType(symbol, file)
386 l = splitIdentifier(symbol)
387 for word in l:
388 addWord(word, file, symbol, 10)
389 return 1
390
391def analyzeAPIFunctype(top):
392 file = top.prop("file")
393 if file == None:
394 return 0
395 symbol = top.prop("name")
396 if symbol == None:
397 return 0
398
399 addFunctype(symbol, file)
400 l = splitIdentifier(symbol)
401 for word in l:
402 addWord(word, file, symbol, 10)
403 return 1
404
405def analyzeAPIStruct(top):
406 file = top.prop("file")
407 if file == None:
408 return 0
409 symbol = top.prop("name")
410 if symbol == None:
411 return 0
412
413 addStruct(symbol, file)
414 l = splitIdentifier(symbol)
415 for word in l:
416 addWord(word, file, symbol, 10)
417
418 info = top.prop("info")
419 if info != None:
420 l = string.split(info)
421 for word in l:
422 if len(word) > 2:
423 addWord(word, file, symbol, 5)
424 return 1
425
426def analyzeAPIMacro(top):
427 file = top.prop("file")
428 if file == None:
429 return 0
430 symbol = top.prop("name")
431 if symbol == None:
432 return 0
433
434 info = None
435 cur = top.children
436 while cur != None:
437 if cur.type == 'text':
438 cur = cur.next
439 continue
440 if cur.name == "info":
441 info = cur.content
442 break
443 cur = cur.next
444
445 l = splitIdentifier(symbol)
446 for word in l:
447 addWord(word, file, symbol, 10)
448
449 if info == None:
450 addMacro(symbol, file)
451 print "Macro %s description has no <info>" % (symbol)
452 return 0
453
454 addMacro(symbol, file, info)
455 l = string.split(info)
456 for word in l:
457 if len(word) > 2:
458 addWord(word, file, symbol, 5)
459 return 1
460
461def analyzeAPIFunction(top):
462 file = top.prop("file")
463 if file == None:
464 return 0
465 symbol = top.prop("name")
466 if symbol == None:
467 return 0
468
469 info = None
470 cur = top.children
471 while cur != None:
472 if cur.type == 'text':
473 cur = cur.next
474 continue
475 if cur.name == "info":
476 info = cur.content
477 elif cur.name == "return":
478 rinfo = cur.prop("info")
479 if rinfo != None:
480 addString(rinfo, file, symbol, 7)
481 elif cur.name == "arg":
482 ainfo = cur.prop("info")
483 if rinfo != None:
484 addString(ainfo, file, symbol, 5)
485 name = cur.prop("name")
486 if name != None:
487 addWord(name, file, symbol, 7)
488 cur = cur.next
489 if info == None:
490 print "Function %s description has no <info>" % (symbol)
491 addFunction(symbol, file, "")
492 else:
493 addFunction(symbol, file, info)
494 addString(info, file, symbol, 5)
495
496 l = splitIdentifier(symbol)
497 for word in l:
498 addWord(word, file, symbol, 10)
499
500 return 1
501
502def analyzeAPISymbols(top):
503 count = 0
504 cur = top.children
505
506 while cur != None:
507 if cur.type == 'text':
508 cur = cur.next
509 continue
510 if cur.name == "macro":
511 count = count + analyzeAPIMacro(cur)
512 elif cur.name == "function":
513 count = count + analyzeAPIFunction(cur)
514 elif cur.name == "const":
515 count = count + analyzeAPIConst(cur)
516 elif cur.name == "typedef":
517 count = count + analyzeAPIType(cur)
518 elif cur.name == "struct":
519 count = count + analyzeAPIStruct(cur)
520 elif cur.name == "enum":
521 count = count + analyzeAPIEnum(cur)
522 elif cur.name == "functype":
523 count = count + analyzeAPIFunctype(cur)
524 else:
525 print "unexpected element %s in API doc <files>" % (cur.name)
526 cur = cur.next
527 return count
528
529def analyzeAPI(doc):
530 count = 0
531 if doc == None:
532 return -1
533 root = doc.getRootElement()
534 if root.name != "api":
535 print "Unexpected root name"
536 return -1
537 cur = root.children
538 while cur != None:
539 if cur.type == 'text':
540 cur = cur.next
541 continue
542 if cur.name == "files":
543 pass
544# count = count + analyzeAPIFiles(cur)
545 elif cur.name == "symbols":
546 count = count + analyzeAPISymbols(cur)
547 else:
548 print "unexpected element %s in API doc" % (cur.name)
549 cur = cur.next
550 return count
551
552#########################################################################
553# #
554# Main code: open the DB, the API XML and analyze it #
555# #
556#########################################################################
557try:
558 openMySQL()
559except:
560 print "Failed to open the database"
561 print sys.exc_type, sys.exc_value
562 sys.exit(1)
563
564try:
565 doc = loadAPI(API)
566 ret = analyzeAPI(doc)
567 print "Analyzed %d blocs" % (ret)
568 doc.freeDoc()
569except:
570 print "Failed to parse and analyze %s" % (API)
571 print sys.exc_type, sys.exc_value
572 sys.exit(1)
573
574print "Indexed %d words" % (len(wordsDict))
575i = 0
576skipped = 0
577for word in wordsDict.keys():
578 refs = wordsDict[word]
579 if refs == None:
580 skipped = skipped + 1
581 continue;
582 for (module, symbol) in refs.keys():
583 updateWord(word, symbol, refs[(module, symbol)])
584 i = i + 1
585
586print "Found %d associations, skipped %d words" % (i, skipped)