blob: 17479786a6e8b7bcadfd3480ed62d76cf4ae68df [file] [log] [blame]
Daniel Veillard3371ff82002-10-01 13:37:48 +00001#!/usr/bin/python -u
2#
3# imports the API description and fills up a database with
4# name relevance to modules, functions or web pages
5#
Daniel Veillard2c77cd72002-10-01 13:54:14 +00006# Operation needed:
7# =================
8#
9# install mysqld, the python wrappers for mysql and libxml2, start mysqld
10# Change the root passwd of mysql:
11# mysqladmin -u root password new_password
12# Create the new database xmlsoft
13# mysqladmin -p create xmlsoft
14# Create a database user 'veillard' and give him passord access
15# change veillard and abcde with the right user name and passwd
16# mysql -p
17# password:
18# mysql> GRANT ALL PRIVILEGES ON xmlsoft TO veillard@localhost
19# IDENTIFIED BY 'abcde' WITH GRANT OPTION;
20#
21# As the user check the access:
22# mysql -p xmlsoft
23# Enter password:
24# Welcome to the MySQL monitor....
25# mysql> use xmlsoft
26# Database changed
27# mysql> quit
28# Bye
29#
30# Then run the script in the doc subdir, it will create the symbols and
31# word tables and populate them with informations extracted from
32# the libxml2-api.xml API description, and make then accessible read-only
33# by nobody@loaclhost the user expected to be Apache's one
34#
35# On the Apache configuration, make sure you have php support enabled
36#
37
Daniel Veillard3371ff82002-10-01 13:37:48 +000038import MySQLdb
39import libxml2
40import sys
41import string
42import os
43
44#
45# The dictionnary of tables required and the SQL command needed
46# to create them
47#
48TABLES={
49 "symbols" : """CREATE TABLE symbols (
50 name varchar(255) NOT NULL,
51 module varchar(255) NOT NULL,
52 type varchar(25) NOT NULL,
53 descr varchar(255),
54 UNIQUE KEY name (name),
55 KEY module (module))""",
56 "words" : """CREATE TABLE words (
57 name varchar(50) NOT NULL,
58 symbol varchar(255) NOT NULL,
59 relevance int,
60 KEY name (name),
61 KEY symbol (symbol),
62 UNIQUE KEY ID (name, symbol))""",
63}
64
65#
66# The XML API description file to parse
67#
68API="libxml2-api.xml"
69DB=None
70
71#########################################################################
72# #
73# MySQL database interfaces #
74# #
75#########################################################################
76def createTable(db, name):
77 global TABLES
78
79 if db == None:
80 return -1
81 if name == None:
82 return -1
83 c = db.cursor()
84
85 ret = c.execute("DROP TABLE IF EXISTS %s" % (name))
86 if ret == 1:
87 print "Removed table %s" % (name)
88 print "Creating table %s" % (name)
89 try:
90 ret = c.execute(TABLES[name])
91 except:
92 print "Failed to create table %s" % (name)
93 return -1
94 return ret
95
96def checkTables(db):
97 global TABLES
98
99 if db == None:
100 return -1
101 c = db.cursor()
102 nbtables = c.execute("show tables")
103 print "Found %d tables" % (nbtables)
104 tables = {}
105 i = 0
106 while i < nbtables:
107 l = c.fetchone()
108 name = l[0]
109 tables[name] = {}
110 i = i + 1
111
112 for table in TABLES.keys():
113 if not tables.has_key(table):
114 print "table %s missing" % (table)
115 createTable(db, table)
116 print "checkTables finished"
117
118 # make sure apache can access the tables read-only
119 try:
120 ret = c.execute("GRANT SELECT ON xmlsoft.* TO nobody@localhost")
121 except:
122 pass
123 return 0
124
125def openMySQL(db="xmlsoft", passwd=None):
126 global DB
127
128 if passwd == None:
Daniel Veillard538d3b92002-10-01 14:04:56 +0000129 try:
130 passwd = os.environ["MySQL_PASS"]
131 except:
132 print "No password available, set environment MySQL_PASS"
133 sys.exit(1)
134
Daniel Veillard3371ff82002-10-01 13:37:48 +0000135 DB = MySQLdb.connect(passwd=passwd, db=db)
136 if DB == None:
137 return -1
138 ret = checkTables(DB)
139 return ret
140
141def updateWord(name, symbol, relevance):
142 global DB
143
144 if DB == None:
145 openMySQL()
146 if DB == None:
147 return -1
148 if name == None:
149 return -1
150 if symbol == None:
151 return -1
152
153 c = DB.cursor()
154 try:
155 ret = c.execute(
156"""INSERT INTO words (name, symbol, relevance) VALUES ('%s','%s', %d)""" %
157 (name, symbol, relevance))
158 except:
159 try:
160 ret = c.execute(
161 """UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'""" %
162 (relevance, name, symbol))
163 except:
164 print "Update word (%s, %s, %s) failed command" % (name, symbol, relevance)
165 print "UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'" % (relevance, name, symbol)
166 print sys.exc_type, sys.exc_value
167 return -1
168
169 return ret
170
171def updateSymbol(name, module, type, desc):
172 global DB
173
174 updateWord(name, name, 50)
175 if DB == None:
176 openMySQL()
177 if DB == None:
178 return -1
179 if name == None:
180 return -1
181 if module == None:
182 return -1
183 if type == None:
184 return -1
185
186 try:
187 desc = string.replace(desc, "'", " ")
188 l = string.split(desc, ".")
189 desc = l[0]
190 desc = desc[0:99]
191 except:
192 desc = ""
193
194 c = DB.cursor()
195 try:
196 ret = c.execute(
197"""INSERT INTO symbols (name, module, type, descr) VALUES ('%s','%s', '%s', '%s')""" %
198 (name, module, type, desc))
199 except:
200 try:
201 ret = c.execute(
202"""UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" %
203 (module, type, desc, name))
204 except:
205 print "Update symbol (%s, %s, %s) failed command" % (name, module, type)
206 print """UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" % (module, type, desc, name)
207 print sys.exc_type, sys.exc_value
208 return -1
209
210 return ret
211
212def addFunction(name, module, desc = ""):
213 return updateSymbol(name, module, 'function', desc)
214
215def addMacro(name, module, desc = ""):
216 return updateSymbol(name, module, 'macro', desc)
217
218def addEnum(name, module, desc = ""):
219 return updateSymbol(name, module, 'enum', desc)
220
221def addStruct(name, module, desc = ""):
222 return updateSymbol(name, module, 'struct', desc)
223
224def addConst(name, module, desc = ""):
225 return updateSymbol(name, module, 'const', desc)
226
227def addType(name, module, desc = ""):
228 return updateSymbol(name, module, 'type', desc)
229
230def addFunctype(name, module, desc = ""):
231 return updateSymbol(name, module, 'functype', desc)
232
233#########################################################################
234# #
235# Word dictionnary and analysis routines #
236# #
237#########################################################################
238
239wordsDict = {}
240
241def splitIdentifier(str):
242 ret = []
243 while str != "":
244 cur = string.lower(str[0])
245 str = str[1:]
246 if ((cur < 'a') or (cur > 'z')):
247 continue
248 while (str != "") and (str[0] >= 'A') and (str[0] <= 'Z'):
249 cur = cur + string.lower(str[0])
250 str = str[1:]
251 while (str != "") and (str[0] >= 'a') and (str[0] <= 'z'):
252 cur = cur + str[0]
253 str = str[1:]
254 while (str != "") and (str[0] >= '0') and (str[0] <= '9'):
255 str = str[1:]
256 ret.append(cur)
257 return ret
258
259def addWord(word, module, symbol, relevance):
260 global wordsDict
261
262 if word == None or len(word) < 3:
263 return -1
264 if module == None or symbol == None:
265 return -1
266 if wordsDict.has_key(word):
267 d = wordsDict[word]
268 if d == None:
269 return 0
270 if len(d) > 500:
271 wordsDict[word] = None
272 return 0
273 try:
274 relevance = relevance + d[(module, symbol)]
275 except:
276 pass
277 else:
278 wordsDict[word] = {}
279 wordsDict[word][(module, symbol)] = relevance
280 return relevance
281
282def addString(str, module, symbol, relevance):
283 if str == None or len(str) < 3:
284 return -1
285 ret = 0
286 str = string.replace(str, ".", " ")
287 str = string.replace(str, ",", " ")
288 str = string.replace(str, "'", " ")
289 str = string.replace(str, '"', " ")
290 str = string.replace(str, ";", " ")
291 str = string.replace(str, "-", " ")
292 l = string.split(str)
293 for word in l:
294 if len(word) > 2:
295 ret = ret + addWord(word, module, symbol, 5)
296
297 return ret
298
299
300#########################################################################
301# #
302# XML API description analysis #
303# #
304#########################################################################
305
306def loadAPI(filename):
307 doc = libxml2.parseFile(filename)
308 print "loaded %s" % (filename)
309 return doc
310
311def foundExport(file, symbol):
312 if file == None:
313 return 0
314 if symbol == None:
315 return 0
316 addFunction(symbol, file)
317 l = splitIdentifier(symbol)
318 for word in l:
319 addWord(word, file, symbol, 10)
320 return 1
321
322def analyzeAPIFile(top):
323 count = 0
324 name = top.prop("name")
325 cur = top.children
326 while cur != None:
327 if cur.type == 'text':
328 cur = cur.next
329 continue
330 if cur.name == "exports":
331 count = count + foundExport(name, cur.prop("symbol"))
332 else:
333 print "unexpected element %s in API doc <file name='%s'>" % (name)
334 cur = cur.next
335 return count
336
337def analyzeAPIFiles(top):
338 count = 0
339 cur = top.children
340
341 while cur != None:
342 if cur.type == 'text':
343 cur = cur.next
344 continue
345 if cur.name == "file":
346 count = count + analyzeAPIFile(cur)
347 else:
348 print "unexpected element %s in API doc <files>" % (cur.name)
349 cur = cur.next
350 return count
351
352def analyzeAPIEnum(top):
353 file = top.prop("file")
354 if file == None:
355 return 0
356 symbol = top.prop("name")
357 if symbol == None:
358 return 0
359
360 addEnum(symbol, file)
361 l = splitIdentifier(symbol)
362 for word in l:
363 addWord(word, file, symbol, 10)
364
365 return 1
366
367def analyzeAPIConst(top):
368 file = top.prop("file")
369 if file == None:
370 return 0
371 symbol = top.prop("name")
372 if symbol == None:
373 return 0
374
375 addConst(symbol, file)
376 l = splitIdentifier(symbol)
377 for word in l:
378 addWord(word, file, symbol, 10)
379
380 return 1
381
382def analyzeAPIType(top):
383 file = top.prop("file")
384 if file == None:
385 return 0
386 symbol = top.prop("name")
387 if symbol == None:
388 return 0
389
390 addType(symbol, file)
391 l = splitIdentifier(symbol)
392 for word in l:
393 addWord(word, file, symbol, 10)
394 return 1
395
396def analyzeAPIFunctype(top):
397 file = top.prop("file")
398 if file == None:
399 return 0
400 symbol = top.prop("name")
401 if symbol == None:
402 return 0
403
404 addFunctype(symbol, file)
405 l = splitIdentifier(symbol)
406 for word in l:
407 addWord(word, file, symbol, 10)
408 return 1
409
410def analyzeAPIStruct(top):
411 file = top.prop("file")
412 if file == None:
413 return 0
414 symbol = top.prop("name")
415 if symbol == None:
416 return 0
417
418 addStruct(symbol, file)
419 l = splitIdentifier(symbol)
420 for word in l:
421 addWord(word, file, symbol, 10)
422
423 info = top.prop("info")
424 if info != None:
425 l = string.split(info)
426 for word in l:
427 if len(word) > 2:
428 addWord(word, file, symbol, 5)
429 return 1
430
431def analyzeAPIMacro(top):
432 file = top.prop("file")
433 if file == None:
434 return 0
435 symbol = top.prop("name")
436 if symbol == None:
437 return 0
438
439 info = None
440 cur = top.children
441 while cur != None:
442 if cur.type == 'text':
443 cur = cur.next
444 continue
445 if cur.name == "info":
446 info = cur.content
447 break
448 cur = cur.next
449
450 l = splitIdentifier(symbol)
451 for word in l:
452 addWord(word, file, symbol, 10)
453
454 if info == None:
455 addMacro(symbol, file)
456 print "Macro %s description has no <info>" % (symbol)
457 return 0
458
459 addMacro(symbol, file, info)
460 l = string.split(info)
461 for word in l:
462 if len(word) > 2:
463 addWord(word, file, symbol, 5)
464 return 1
465
466def analyzeAPIFunction(top):
467 file = top.prop("file")
468 if file == None:
469 return 0
470 symbol = top.prop("name")
471 if symbol == None:
472 return 0
473
474 info = None
475 cur = top.children
476 while cur != None:
477 if cur.type == 'text':
478 cur = cur.next
479 continue
480 if cur.name == "info":
481 info = cur.content
482 elif cur.name == "return":
483 rinfo = cur.prop("info")
484 if rinfo != None:
485 addString(rinfo, file, symbol, 7)
486 elif cur.name == "arg":
487 ainfo = cur.prop("info")
488 if rinfo != None:
489 addString(ainfo, file, symbol, 5)
490 name = cur.prop("name")
491 if name != None:
492 addWord(name, file, symbol, 7)
493 cur = cur.next
494 if info == None:
495 print "Function %s description has no <info>" % (symbol)
496 addFunction(symbol, file, "")
497 else:
498 addFunction(symbol, file, info)
499 addString(info, file, symbol, 5)
500
501 l = splitIdentifier(symbol)
502 for word in l:
503 addWord(word, file, symbol, 10)
504
505 return 1
506
507def analyzeAPISymbols(top):
508 count = 0
509 cur = top.children
510
511 while cur != None:
512 if cur.type == 'text':
513 cur = cur.next
514 continue
515 if cur.name == "macro":
516 count = count + analyzeAPIMacro(cur)
517 elif cur.name == "function":
518 count = count + analyzeAPIFunction(cur)
519 elif cur.name == "const":
520 count = count + analyzeAPIConst(cur)
521 elif cur.name == "typedef":
522 count = count + analyzeAPIType(cur)
523 elif cur.name == "struct":
524 count = count + analyzeAPIStruct(cur)
525 elif cur.name == "enum":
526 count = count + analyzeAPIEnum(cur)
527 elif cur.name == "functype":
528 count = count + analyzeAPIFunctype(cur)
529 else:
530 print "unexpected element %s in API doc <files>" % (cur.name)
531 cur = cur.next
532 return count
533
534def analyzeAPI(doc):
535 count = 0
536 if doc == None:
537 return -1
538 root = doc.getRootElement()
539 if root.name != "api":
540 print "Unexpected root name"
541 return -1
542 cur = root.children
543 while cur != None:
544 if cur.type == 'text':
545 cur = cur.next
546 continue
547 if cur.name == "files":
548 pass
549# count = count + analyzeAPIFiles(cur)
550 elif cur.name == "symbols":
551 count = count + analyzeAPISymbols(cur)
552 else:
553 print "unexpected element %s in API doc" % (cur.name)
554 cur = cur.next
555 return count
556
557#########################################################################
558# #
559# Main code: open the DB, the API XML and analyze it #
560# #
561#########################################################################
562try:
563 openMySQL()
564except:
565 print "Failed to open the database"
566 print sys.exc_type, sys.exc_value
567 sys.exit(1)
568
569try:
570 doc = loadAPI(API)
571 ret = analyzeAPI(doc)
572 print "Analyzed %d blocs" % (ret)
573 doc.freeDoc()
574except:
575 print "Failed to parse and analyze %s" % (API)
576 print sys.exc_type, sys.exc_value
577 sys.exit(1)
578
579print "Indexed %d words" % (len(wordsDict))
580i = 0
581skipped = 0
582for word in wordsDict.keys():
583 refs = wordsDict[word]
584 if refs == None:
585 skipped = skipped + 1
586 continue;
587 for (module, symbol) in refs.keys():
588 updateWord(word, symbol, refs[(module, symbol)])
589 i = i + 1
590
591print "Found %d associations, skipped %d words" % (i, skipped)