blob: 474eb92f0e26f45ce683a034220feb179706a86b [file] [log] [blame]
Daniel Veillard3371ff82002-10-01 13:37:48 +00001#!/usr/bin/python -u
2#
3# imports the API description and fills up a database with
4# name relevance to modules, functions or web pages
5#
6import MySQLdb
7import libxml2
8import sys
9import string
10import os
11
12#
13# The dictionnary of tables required and the SQL command needed
14# to create them
15#
16TABLES={
17 "symbols" : """CREATE TABLE symbols (
18 name varchar(255) NOT NULL,
19 module varchar(255) NOT NULL,
20 type varchar(25) NOT NULL,
21 descr varchar(255),
22 UNIQUE KEY name (name),
23 KEY module (module))""",
24 "words" : """CREATE TABLE words (
25 name varchar(50) NOT NULL,
26 symbol varchar(255) NOT NULL,
27 relevance int,
28 KEY name (name),
29 KEY symbol (symbol),
30 UNIQUE KEY ID (name, symbol))""",
31}
32
33#
34# The XML API description file to parse
35#
36API="libxml2-api.xml"
37DB=None
38
39#########################################################################
40# #
41# MySQL database interfaces #
42# #
43#########################################################################
44def createTable(db, name):
45 global TABLES
46
47 if db == None:
48 return -1
49 if name == None:
50 return -1
51 c = db.cursor()
52
53 ret = c.execute("DROP TABLE IF EXISTS %s" % (name))
54 if ret == 1:
55 print "Removed table %s" % (name)
56 print "Creating table %s" % (name)
57 try:
58 ret = c.execute(TABLES[name])
59 except:
60 print "Failed to create table %s" % (name)
61 return -1
62 return ret
63
64def checkTables(db):
65 global TABLES
66
67 if db == None:
68 return -1
69 c = db.cursor()
70 nbtables = c.execute("show tables")
71 print "Found %d tables" % (nbtables)
72 tables = {}
73 i = 0
74 while i < nbtables:
75 l = c.fetchone()
76 name = l[0]
77 tables[name] = {}
78 i = i + 1
79
80 for table in TABLES.keys():
81 if not tables.has_key(table):
82 print "table %s missing" % (table)
83 createTable(db, table)
84 print "checkTables finished"
85
86 # make sure apache can access the tables read-only
87 try:
88 ret = c.execute("GRANT SELECT ON xmlsoft.* TO nobody@localhost")
89 except:
90 pass
91 return 0
92
93def openMySQL(db="xmlsoft", passwd=None):
94 global DB
95
96 if passwd == None:
97 passwd = "ducon"
98 DB = MySQLdb.connect(passwd=passwd, db=db)
99 if DB == None:
100 return -1
101 ret = checkTables(DB)
102 return ret
103
104def updateWord(name, symbol, relevance):
105 global DB
106
107 if DB == None:
108 openMySQL()
109 if DB == None:
110 return -1
111 if name == None:
112 return -1
113 if symbol == None:
114 return -1
115
116 c = DB.cursor()
117 try:
118 ret = c.execute(
119"""INSERT INTO words (name, symbol, relevance) VALUES ('%s','%s', %d)""" %
120 (name, symbol, relevance))
121 except:
122 try:
123 ret = c.execute(
124 """UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'""" %
125 (relevance, name, symbol))
126 except:
127 print "Update word (%s, %s, %s) failed command" % (name, symbol, relevance)
128 print "UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'" % (relevance, name, symbol)
129 print sys.exc_type, sys.exc_value
130 return -1
131
132 return ret
133
134def updateSymbol(name, module, type, desc):
135 global DB
136
137 updateWord(name, name, 50)
138 if DB == None:
139 openMySQL()
140 if DB == None:
141 return -1
142 if name == None:
143 return -1
144 if module == None:
145 return -1
146 if type == None:
147 return -1
148
149 try:
150 desc = string.replace(desc, "'", " ")
151 l = string.split(desc, ".")
152 desc = l[0]
153 desc = desc[0:99]
154 except:
155 desc = ""
156
157 c = DB.cursor()
158 try:
159 ret = c.execute(
160"""INSERT INTO symbols (name, module, type, descr) VALUES ('%s','%s', '%s', '%s')""" %
161 (name, module, type, desc))
162 except:
163 try:
164 ret = c.execute(
165"""UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" %
166 (module, type, desc, name))
167 except:
168 print "Update symbol (%s, %s, %s) failed command" % (name, module, type)
169 print """UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" % (module, type, desc, name)
170 print sys.exc_type, sys.exc_value
171 return -1
172
173 return ret
174
175def addFunction(name, module, desc = ""):
176 return updateSymbol(name, module, 'function', desc)
177
178def addMacro(name, module, desc = ""):
179 return updateSymbol(name, module, 'macro', desc)
180
181def addEnum(name, module, desc = ""):
182 return updateSymbol(name, module, 'enum', desc)
183
184def addStruct(name, module, desc = ""):
185 return updateSymbol(name, module, 'struct', desc)
186
187def addConst(name, module, desc = ""):
188 return updateSymbol(name, module, 'const', desc)
189
190def addType(name, module, desc = ""):
191 return updateSymbol(name, module, 'type', desc)
192
193def addFunctype(name, module, desc = ""):
194 return updateSymbol(name, module, 'functype', desc)
195
196#########################################################################
197# #
198# Word dictionnary and analysis routines #
199# #
200#########################################################################
201
202wordsDict = {}
203
204def splitIdentifier(str):
205 ret = []
206 while str != "":
207 cur = string.lower(str[0])
208 str = str[1:]
209 if ((cur < 'a') or (cur > 'z')):
210 continue
211 while (str != "") and (str[0] >= 'A') and (str[0] <= 'Z'):
212 cur = cur + string.lower(str[0])
213 str = str[1:]
214 while (str != "") and (str[0] >= 'a') and (str[0] <= 'z'):
215 cur = cur + str[0]
216 str = str[1:]
217 while (str != "") and (str[0] >= '0') and (str[0] <= '9'):
218 str = str[1:]
219 ret.append(cur)
220 return ret
221
222def addWord(word, module, symbol, relevance):
223 global wordsDict
224
225 if word == None or len(word) < 3:
226 return -1
227 if module == None or symbol == None:
228 return -1
229 if wordsDict.has_key(word):
230 d = wordsDict[word]
231 if d == None:
232 return 0
233 if len(d) > 500:
234 wordsDict[word] = None
235 return 0
236 try:
237 relevance = relevance + d[(module, symbol)]
238 except:
239 pass
240 else:
241 wordsDict[word] = {}
242 wordsDict[word][(module, symbol)] = relevance
243 return relevance
244
245def addString(str, module, symbol, relevance):
246 if str == None or len(str) < 3:
247 return -1
248 ret = 0
249 str = string.replace(str, ".", " ")
250 str = string.replace(str, ",", " ")
251 str = string.replace(str, "'", " ")
252 str = string.replace(str, '"', " ")
253 str = string.replace(str, ";", " ")
254 str = string.replace(str, "-", " ")
255 l = string.split(str)
256 for word in l:
257 if len(word) > 2:
258 ret = ret + addWord(word, module, symbol, 5)
259
260 return ret
261
262
263#########################################################################
264# #
265# XML API description analysis #
266# #
267#########################################################################
268
269def loadAPI(filename):
270 doc = libxml2.parseFile(filename)
271 print "loaded %s" % (filename)
272 return doc
273
274def foundExport(file, symbol):
275 if file == None:
276 return 0
277 if symbol == None:
278 return 0
279 addFunction(symbol, file)
280 l = splitIdentifier(symbol)
281 for word in l:
282 addWord(word, file, symbol, 10)
283 return 1
284
285def analyzeAPIFile(top):
286 count = 0
287 name = top.prop("name")
288 cur = top.children
289 while cur != None:
290 if cur.type == 'text':
291 cur = cur.next
292 continue
293 if cur.name == "exports":
294 count = count + foundExport(name, cur.prop("symbol"))
295 else:
296 print "unexpected element %s in API doc <file name='%s'>" % (name)
297 cur = cur.next
298 return count
299
300def analyzeAPIFiles(top):
301 count = 0
302 cur = top.children
303
304 while cur != None:
305 if cur.type == 'text':
306 cur = cur.next
307 continue
308 if cur.name == "file":
309 count = count + analyzeAPIFile(cur)
310 else:
311 print "unexpected element %s in API doc <files>" % (cur.name)
312 cur = cur.next
313 return count
314
315def analyzeAPIEnum(top):
316 file = top.prop("file")
317 if file == None:
318 return 0
319 symbol = top.prop("name")
320 if symbol == None:
321 return 0
322
323 addEnum(symbol, file)
324 l = splitIdentifier(symbol)
325 for word in l:
326 addWord(word, file, symbol, 10)
327
328 return 1
329
330def analyzeAPIConst(top):
331 file = top.prop("file")
332 if file == None:
333 return 0
334 symbol = top.prop("name")
335 if symbol == None:
336 return 0
337
338 addConst(symbol, file)
339 l = splitIdentifier(symbol)
340 for word in l:
341 addWord(word, file, symbol, 10)
342
343 return 1
344
345def analyzeAPIType(top):
346 file = top.prop("file")
347 if file == None:
348 return 0
349 symbol = top.prop("name")
350 if symbol == None:
351 return 0
352
353 addType(symbol, file)
354 l = splitIdentifier(symbol)
355 for word in l:
356 addWord(word, file, symbol, 10)
357 return 1
358
359def analyzeAPIFunctype(top):
360 file = top.prop("file")
361 if file == None:
362 return 0
363 symbol = top.prop("name")
364 if symbol == None:
365 return 0
366
367 addFunctype(symbol, file)
368 l = splitIdentifier(symbol)
369 for word in l:
370 addWord(word, file, symbol, 10)
371 return 1
372
373def analyzeAPIStruct(top):
374 file = top.prop("file")
375 if file == None:
376 return 0
377 symbol = top.prop("name")
378 if symbol == None:
379 return 0
380
381 addStruct(symbol, file)
382 l = splitIdentifier(symbol)
383 for word in l:
384 addWord(word, file, symbol, 10)
385
386 info = top.prop("info")
387 if info != None:
388 l = string.split(info)
389 for word in l:
390 if len(word) > 2:
391 addWord(word, file, symbol, 5)
392 return 1
393
394def analyzeAPIMacro(top):
395 file = top.prop("file")
396 if file == None:
397 return 0
398 symbol = top.prop("name")
399 if symbol == None:
400 return 0
401
402 info = None
403 cur = top.children
404 while cur != None:
405 if cur.type == 'text':
406 cur = cur.next
407 continue
408 if cur.name == "info":
409 info = cur.content
410 break
411 cur = cur.next
412
413 l = splitIdentifier(symbol)
414 for word in l:
415 addWord(word, file, symbol, 10)
416
417 if info == None:
418 addMacro(symbol, file)
419 print "Macro %s description has no <info>" % (symbol)
420 return 0
421
422 addMacro(symbol, file, info)
423 l = string.split(info)
424 for word in l:
425 if len(word) > 2:
426 addWord(word, file, symbol, 5)
427 return 1
428
429def analyzeAPIFunction(top):
430 file = top.prop("file")
431 if file == None:
432 return 0
433 symbol = top.prop("name")
434 if symbol == None:
435 return 0
436
437 info = None
438 cur = top.children
439 while cur != None:
440 if cur.type == 'text':
441 cur = cur.next
442 continue
443 if cur.name == "info":
444 info = cur.content
445 elif cur.name == "return":
446 rinfo = cur.prop("info")
447 if rinfo != None:
448 addString(rinfo, file, symbol, 7)
449 elif cur.name == "arg":
450 ainfo = cur.prop("info")
451 if rinfo != None:
452 addString(ainfo, file, symbol, 5)
453 name = cur.prop("name")
454 if name != None:
455 addWord(name, file, symbol, 7)
456 cur = cur.next
457 if info == None:
458 print "Function %s description has no <info>" % (symbol)
459 addFunction(symbol, file, "")
460 else:
461 addFunction(symbol, file, info)
462 addString(info, file, symbol, 5)
463
464 l = splitIdentifier(symbol)
465 for word in l:
466 addWord(word, file, symbol, 10)
467
468 return 1
469
470def analyzeAPISymbols(top):
471 count = 0
472 cur = top.children
473
474 while cur != None:
475 if cur.type == 'text':
476 cur = cur.next
477 continue
478 if cur.name == "macro":
479 count = count + analyzeAPIMacro(cur)
480 elif cur.name == "function":
481 count = count + analyzeAPIFunction(cur)
482 elif cur.name == "const":
483 count = count + analyzeAPIConst(cur)
484 elif cur.name == "typedef":
485 count = count + analyzeAPIType(cur)
486 elif cur.name == "struct":
487 count = count + analyzeAPIStruct(cur)
488 elif cur.name == "enum":
489 count = count + analyzeAPIEnum(cur)
490 elif cur.name == "functype":
491 count = count + analyzeAPIFunctype(cur)
492 else:
493 print "unexpected element %s in API doc <files>" % (cur.name)
494 cur = cur.next
495 return count
496
497def analyzeAPI(doc):
498 count = 0
499 if doc == None:
500 return -1
501 root = doc.getRootElement()
502 if root.name != "api":
503 print "Unexpected root name"
504 return -1
505 cur = root.children
506 while cur != None:
507 if cur.type == 'text':
508 cur = cur.next
509 continue
510 if cur.name == "files":
511 pass
512# count = count + analyzeAPIFiles(cur)
513 elif cur.name == "symbols":
514 count = count + analyzeAPISymbols(cur)
515 else:
516 print "unexpected element %s in API doc" % (cur.name)
517 cur = cur.next
518 return count
519
520#########################################################################
521# #
522# Main code: open the DB, the API XML and analyze it #
523# #
524#########################################################################
525try:
526 openMySQL()
527except:
528 print "Failed to open the database"
529 print sys.exc_type, sys.exc_value
530 sys.exit(1)
531
532try:
533 doc = loadAPI(API)
534 ret = analyzeAPI(doc)
535 print "Analyzed %d blocs" % (ret)
536 doc.freeDoc()
537except:
538 print "Failed to parse and analyze %s" % (API)
539 print sys.exc_type, sys.exc_value
540 sys.exit(1)
541
542print "Indexed %d words" % (len(wordsDict))
543i = 0
544skipped = 0
545for word in wordsDict.keys():
546 refs = wordsDict[word]
547 if refs == None:
548 skipped = skipped + 1
549 continue;
550 for (module, symbol) in refs.keys():
551 updateWord(word, symbol, refs[(module, symbol)])
552 i = i + 1
553
554print "Found %d associations, skipped %d words" % (i, skipped)