blob: e3b8588abe0c18da919181de61393dd7efdfa723 [file] [log] [blame]
Daniel Veillard3371ff82002-10-01 13:37:48 +00001#!/usr/bin/python -u
2#
3# imports the API description and fills up a database with
4# name relevance to modules, functions or web pages
5#
Daniel Veillard2c77cd72002-10-01 13:54:14 +00006# Operation needed:
7# =================
8#
9# install mysqld, the python wrappers for mysql and libxml2, start mysqld
10# Change the root passwd of mysql:
11# mysqladmin -u root password new_password
12# Create the new database xmlsoft
13# mysqladmin -p create xmlsoft
14# Create a database user 'veillard' and give him passord access
15# change veillard and abcde with the right user name and passwd
16# mysql -p
17# password:
18# mysql> GRANT ALL PRIVILEGES ON xmlsoft TO veillard@localhost
19# IDENTIFIED BY 'abcde' WITH GRANT OPTION;
20#
21# As the user check the access:
22# mysql -p xmlsoft
23# Enter password:
24# Welcome to the MySQL monitor....
25# mysql> use xmlsoft
26# Database changed
27# mysql> quit
28# Bye
29#
30# Then run the script in the doc subdir, it will create the symbols and
31# word tables and populate them with informations extracted from
32# the libxml2-api.xml API description, and make then accessible read-only
33# by nobody@loaclhost the user expected to be Apache's one
34#
35# On the Apache configuration, make sure you have php support enabled
36#
37
Daniel Veillard3371ff82002-10-01 13:37:48 +000038import MySQLdb
39import libxml2
40import sys
41import string
42import os
43
44#
Daniel Veillard141d04b2002-10-06 21:51:18 +000045# We are not interested in parsing errors here
46#
47def callback(ctx, str):
48 return
49libxml2.registerErrorHandler(callback, None)
50
51#
Daniel Veillard3371ff82002-10-01 13:37:48 +000052# The dictionnary of tables required and the SQL command needed
53# to create them
54#
55TABLES={
56 "symbols" : """CREATE TABLE symbols (
57 name varchar(255) NOT NULL,
58 module varchar(255) NOT NULL,
59 type varchar(25) NOT NULL,
60 descr varchar(255),
61 UNIQUE KEY name (name),
62 KEY module (module))""",
63 "words" : """CREATE TABLE words (
64 name varchar(50) NOT NULL,
65 symbol varchar(255) NOT NULL,
66 relevance int,
67 KEY name (name),
68 KEY symbol (symbol),
69 UNIQUE KEY ID (name, symbol))""",
Daniel Veillard141d04b2002-10-06 21:51:18 +000070 "wordsHTML" : """CREATE TABLE wordsHTML (
71 name varchar(50) NOT NULL,
72 resource varchar(255) NOT NULL,
73 section varchar(255),
74 id varchar(50),
75 relevance int,
76 KEY name (name),
77 KEY resource (resource),
78 UNIQUE KEY ref (name, resource))""",
79 "pages" : """CREATE TABLE pages (
80 resource varchar(255) NOT NULL,
81 title varchar(255) NOT NULL,
82 UNIQUE KEY name (resource))""",
Daniel Veillarddc6d4ab2002-10-04 15:58:34 +000083 "Queries" : """CREATE TABLE Queries (
84 ID int(11) NOT NULL auto_increment,
85 Value varchar(50) NOT NULL,
86 Count int(11) NOT NULL,
87 UNIQUE KEY id (ID,Value(35)),
88 INDEX (ID))""",
Daniel Veillard3371ff82002-10-01 13:37:48 +000089}
90
91#
92# The XML API description file to parse
93#
94API="libxml2-api.xml"
95DB=None
96
97#########################################################################
98# #
99# MySQL database interfaces #
100# #
101#########################################################################
102def createTable(db, name):
103 global TABLES
104
105 if db == None:
106 return -1
107 if name == None:
108 return -1
109 c = db.cursor()
110
111 ret = c.execute("DROP TABLE IF EXISTS %s" % (name))
112 if ret == 1:
113 print "Removed table %s" % (name)
114 print "Creating table %s" % (name)
115 try:
116 ret = c.execute(TABLES[name])
117 except:
118 print "Failed to create table %s" % (name)
119 return -1
120 return ret
121
122def checkTables(db):
123 global TABLES
124
125 if db == None:
126 return -1
127 c = db.cursor()
128 nbtables = c.execute("show tables")
129 print "Found %d tables" % (nbtables)
130 tables = {}
131 i = 0
132 while i < nbtables:
133 l = c.fetchone()
134 name = l[0]
135 tables[name] = {}
136 i = i + 1
137
138 for table in TABLES.keys():
139 if not tables.has_key(table):
140 print "table %s missing" % (table)
141 createTable(db, table)
142 print "checkTables finished"
143
144 # make sure apache can access the tables read-only
145 try:
146 ret = c.execute("GRANT SELECT ON xmlsoft.* TO nobody@localhost")
Daniel Veillarddc6d4ab2002-10-04 15:58:34 +0000147 ret = c.execute("GRANT INSERT,SELECT,UPDATE ON xmlsoft.Queries TO nobody@localhost")
Daniel Veillard3371ff82002-10-01 13:37:48 +0000148 except:
149 pass
150 return 0
151
152def openMySQL(db="xmlsoft", passwd=None):
153 global DB
154
155 if passwd == None:
Daniel Veillard538d3b92002-10-01 14:04:56 +0000156 try:
157 passwd = os.environ["MySQL_PASS"]
158 except:
159 print "No password available, set environment MySQL_PASS"
160 sys.exit(1)
161
Daniel Veillard3371ff82002-10-01 13:37:48 +0000162 DB = MySQLdb.connect(passwd=passwd, db=db)
163 if DB == None:
164 return -1
165 ret = checkTables(DB)
166 return ret
167
168def updateWord(name, symbol, relevance):
169 global DB
170
171 if DB == None:
172 openMySQL()
173 if DB == None:
174 return -1
175 if name == None:
176 return -1
177 if symbol == None:
178 return -1
179
180 c = DB.cursor()
181 try:
182 ret = c.execute(
183"""INSERT INTO words (name, symbol, relevance) VALUES ('%s','%s', %d)""" %
184 (name, symbol, relevance))
185 except:
186 try:
187 ret = c.execute(
188 """UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'""" %
189 (relevance, name, symbol))
190 except:
191 print "Update word (%s, %s, %s) failed command" % (name, symbol, relevance)
192 print "UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'" % (relevance, name, symbol)
193 print sys.exc_type, sys.exc_value
194 return -1
195
196 return ret
197
198def updateSymbol(name, module, type, desc):
199 global DB
200
201 updateWord(name, name, 50)
202 if DB == None:
203 openMySQL()
204 if DB == None:
205 return -1
206 if name == None:
207 return -1
208 if module == None:
209 return -1
210 if type == None:
211 return -1
212
213 try:
214 desc = string.replace(desc, "'", " ")
215 l = string.split(desc, ".")
216 desc = l[0]
217 desc = desc[0:99]
218 except:
219 desc = ""
220
221 c = DB.cursor()
222 try:
223 ret = c.execute(
224"""INSERT INTO symbols (name, module, type, descr) VALUES ('%s','%s', '%s', '%s')""" %
225 (name, module, type, desc))
226 except:
227 try:
228 ret = c.execute(
229"""UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" %
230 (module, type, desc, name))
231 except:
232 print "Update symbol (%s, %s, %s) failed command" % (name, module, type)
233 print """UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" % (module, type, desc, name)
234 print sys.exc_type, sys.exc_value
235 return -1
236
237 return ret
238
239def addFunction(name, module, desc = ""):
240 return updateSymbol(name, module, 'function', desc)
241
242def addMacro(name, module, desc = ""):
243 return updateSymbol(name, module, 'macro', desc)
244
245def addEnum(name, module, desc = ""):
246 return updateSymbol(name, module, 'enum', desc)
247
248def addStruct(name, module, desc = ""):
249 return updateSymbol(name, module, 'struct', desc)
250
251def addConst(name, module, desc = ""):
252 return updateSymbol(name, module, 'const', desc)
253
254def addType(name, module, desc = ""):
255 return updateSymbol(name, module, 'type', desc)
256
257def addFunctype(name, module, desc = ""):
258 return updateSymbol(name, module, 'functype', desc)
259
Daniel Veillard141d04b2002-10-06 21:51:18 +0000260def addPage(resource, title):
261 global DB
262
263 if DB == None:
264 openMySQL()
265 if DB == None:
266 return -1
267 if resource == None:
268 return -1
269
270 c = DB.cursor()
271 try:
272 ret = c.execute(
273 """INSERT INTO pages (resource, title) VALUES ('%s','%s')""" %
274 (resource, title))
275 except:
276 try:
277 ret = c.execute(
278 """UPDATE pages SET title='%s' WHERE resource='%s'""" %
279 (title, resource))
280 except:
281 print "Update symbol (%s, %s, %s) failed command" % (name, module, type)
282 print """UPDATE pages SET title='%s' WHERE resource='%s'""" % (title, resource)
283 print sys.exc_type, sys.exc_value
284 return -1
285
286 return ret
287
288def updateWordHTML(name, resource, desc, id, relevance):
289 global DB
290
291 if DB == None:
292 openMySQL()
293 if DB == None:
294 return -1
295 if name == None:
296 return -1
297 if resource == None:
298 return -1
299 if id == None:
300 id = ""
301 if desc == None:
302 desc = ""
303 else:
304 try:
305 desc = string.replace(desc, "'", " ")
306 desc = desc[0:99]
307 except:
308 desc = ""
309
310 c = DB.cursor()
311 try:
312 ret = c.execute(
313"""INSERT INTO wordsHTML (name, resource, section, id, relevance) VALUES ('%s','%s', '%s', '%s', '%d')""" %
314 (name, resource, desc, id, relevance))
315 except:
316 try:
317 ret = c.execute(
318"""UPDATE wordsHTML SET section='%s', id='%s', relevance='%d' where name='%s' and resource='%s'""" %
319 (desc, id, relevance, name, resource))
320 except:
321 print "Update symbol (%s, %s, %d) failed command" % (name, resource, relevance)
322 print """UPDATE wordsHTML SET section='%s', id='%s', relevance='%d' where name='%s' and resource='%s'""" % (desc, id, relevance, name, resource)
323 print sys.exc_type, sys.exc_value
324 return -1
325
326 return ret
327
Daniel Veillard3371ff82002-10-01 13:37:48 +0000328#########################################################################
329# #
330# Word dictionnary and analysis routines #
331# #
332#########################################################################
333
334wordsDict = {}
Daniel Veillard141d04b2002-10-06 21:51:18 +0000335wordsDictHTML = {}
Daniel Veillard3371ff82002-10-01 13:37:48 +0000336
337def splitIdentifier(str):
338 ret = []
339 while str != "":
340 cur = string.lower(str[0])
341 str = str[1:]
342 if ((cur < 'a') or (cur > 'z')):
343 continue
344 while (str != "") and (str[0] >= 'A') and (str[0] <= 'Z'):
345 cur = cur + string.lower(str[0])
346 str = str[1:]
347 while (str != "") and (str[0] >= 'a') and (str[0] <= 'z'):
348 cur = cur + str[0]
349 str = str[1:]
350 while (str != "") and (str[0] >= '0') and (str[0] <= '9'):
351 str = str[1:]
352 ret.append(cur)
353 return ret
354
355def addWord(word, module, symbol, relevance):
356 global wordsDict
357
358 if word == None or len(word) < 3:
359 return -1
360 if module == None or symbol == None:
361 return -1
362 if wordsDict.has_key(word):
363 d = wordsDict[word]
364 if d == None:
365 return 0
366 if len(d) > 500:
367 wordsDict[word] = None
368 return 0
369 try:
370 relevance = relevance + d[(module, symbol)]
371 except:
372 pass
373 else:
374 wordsDict[word] = {}
375 wordsDict[word][(module, symbol)] = relevance
376 return relevance
377
378def addString(str, module, symbol, relevance):
379 if str == None or len(str) < 3:
380 return -1
381 ret = 0
382 str = string.replace(str, ".", " ")
383 str = string.replace(str, ",", " ")
384 str = string.replace(str, "'", " ")
385 str = string.replace(str, '"', " ")
386 str = string.replace(str, ";", " ")
387 str = string.replace(str, "-", " ")
388 l = string.split(str)
389 for word in l:
390 if len(word) > 2:
391 ret = ret + addWord(word, module, symbol, 5)
392
393 return ret
394
Daniel Veillard141d04b2002-10-06 21:51:18 +0000395def addWordHTML(word, resource, id, section, relevance):
396 global wordsDictHTML
397
398 if word == None or len(word) < 3:
399 return -1
400 if resource == None or section == None:
401 return -1
402
403 if wordsDictHTML.has_key(word):
404 d = wordsDictHTML[word]
405 if d == None:
406 return 0
407 if len(d) > 15:
408 wordsDictHTML[word] = None
409 return 0
410 try:
411 (r,i,s) = d[resource]
412 if i != None:
413 id = i
414 if s != None:
415 section = s
416 relevance = relevance + r
417 except:
418 pass
419 else:
420 wordsDictHTML[word] = {}
421 wordsDictHTML[word][resource] = (relevance, id, section)
422 return relevance
423
424def addStringHTML(str, resource, id, section, relevance):
425 if str == None or len(str) < 3:
426 return -1
427 ret = 0
428 str = string.replace(str, ".", " ")
429 str = string.replace(str, ",", " ")
430 str = string.replace(str, "'", " ")
431 str = string.replace(str, '"', " ")
432 str = string.replace(str, ";", " ")
433 str = string.replace(str, "-", " ")
434 str = string.replace(str, "(", " ")
435 str = string.replace(str, ")", " ")
436 str = string.replace(str, "{", " ")
437 str = string.replace(str, "}", " ")
438 str = string.replace(str, "<", " ")
439 str = string.replace(str, ">", " ")
440 str = string.replace(str, "/", " ")
441 str = string.replace(str, "*", " ")
442 str = string.replace(str, ":", " ")
443 str = string.replace(str, "\n", " ")
444 str = string.replace(str, "\r", " ")
445 str = string.replace(str, "\xc2", " ")
446 str = string.replace(str, "\xa0", " ")
447 l = string.split(str)
448 for word in l:
449 if len(word) > 2:
450 ret = ret + addWordHTML(word, resource, id, section, relevance)
451
452 return ret
453
Daniel Veillard3371ff82002-10-01 13:37:48 +0000454
455#########################################################################
456# #
457# XML API description analysis #
458# #
459#########################################################################
460
461def loadAPI(filename):
462 doc = libxml2.parseFile(filename)
463 print "loaded %s" % (filename)
464 return doc
465
466def foundExport(file, symbol):
467 if file == None:
468 return 0
469 if symbol == None:
470 return 0
471 addFunction(symbol, file)
472 l = splitIdentifier(symbol)
473 for word in l:
474 addWord(word, file, symbol, 10)
475 return 1
476
477def analyzeAPIFile(top):
478 count = 0
479 name = top.prop("name")
480 cur = top.children
481 while cur != None:
482 if cur.type == 'text':
483 cur = cur.next
484 continue
485 if cur.name == "exports":
486 count = count + foundExport(name, cur.prop("symbol"))
487 else:
488 print "unexpected element %s in API doc <file name='%s'>" % (name)
489 cur = cur.next
490 return count
491
492def analyzeAPIFiles(top):
493 count = 0
494 cur = top.children
495
496 while cur != None:
497 if cur.type == 'text':
498 cur = cur.next
499 continue
500 if cur.name == "file":
501 count = count + analyzeAPIFile(cur)
502 else:
503 print "unexpected element %s in API doc <files>" % (cur.name)
504 cur = cur.next
505 return count
506
507def analyzeAPIEnum(top):
508 file = top.prop("file")
509 if file == None:
510 return 0
511 symbol = top.prop("name")
512 if symbol == None:
513 return 0
514
515 addEnum(symbol, file)
516 l = splitIdentifier(symbol)
517 for word in l:
518 addWord(word, file, symbol, 10)
519
520 return 1
521
522def analyzeAPIConst(top):
523 file = top.prop("file")
524 if file == None:
525 return 0
526 symbol = top.prop("name")
527 if symbol == None:
528 return 0
529
530 addConst(symbol, file)
531 l = splitIdentifier(symbol)
532 for word in l:
533 addWord(word, file, symbol, 10)
534
535 return 1
536
537def analyzeAPIType(top):
538 file = top.prop("file")
539 if file == None:
540 return 0
541 symbol = top.prop("name")
542 if symbol == None:
543 return 0
544
545 addType(symbol, file)
546 l = splitIdentifier(symbol)
547 for word in l:
548 addWord(word, file, symbol, 10)
549 return 1
550
551def analyzeAPIFunctype(top):
552 file = top.prop("file")
553 if file == None:
554 return 0
555 symbol = top.prop("name")
556 if symbol == None:
557 return 0
558
559 addFunctype(symbol, file)
560 l = splitIdentifier(symbol)
561 for word in l:
562 addWord(word, file, symbol, 10)
563 return 1
564
565def analyzeAPIStruct(top):
566 file = top.prop("file")
567 if file == None:
568 return 0
569 symbol = top.prop("name")
570 if symbol == None:
571 return 0
572
573 addStruct(symbol, file)
574 l = splitIdentifier(symbol)
575 for word in l:
576 addWord(word, file, symbol, 10)
577
578 info = top.prop("info")
579 if info != None:
580 l = string.split(info)
581 for word in l:
582 if len(word) > 2:
583 addWord(word, file, symbol, 5)
584 return 1
585
586def analyzeAPIMacro(top):
587 file = top.prop("file")
588 if file == None:
589 return 0
590 symbol = top.prop("name")
591 if symbol == None:
592 return 0
593
594 info = None
595 cur = top.children
596 while cur != None:
597 if cur.type == 'text':
598 cur = cur.next
599 continue
600 if cur.name == "info":
601 info = cur.content
602 break
603 cur = cur.next
604
605 l = splitIdentifier(symbol)
606 for word in l:
607 addWord(word, file, symbol, 10)
608
609 if info == None:
610 addMacro(symbol, file)
611 print "Macro %s description has no <info>" % (symbol)
612 return 0
613
614 addMacro(symbol, file, info)
615 l = string.split(info)
616 for word in l:
617 if len(word) > 2:
618 addWord(word, file, symbol, 5)
619 return 1
620
621def analyzeAPIFunction(top):
622 file = top.prop("file")
623 if file == None:
624 return 0
625 symbol = top.prop("name")
626 if symbol == None:
627 return 0
628
629 info = None
630 cur = top.children
631 while cur != None:
632 if cur.type == 'text':
633 cur = cur.next
634 continue
635 if cur.name == "info":
636 info = cur.content
637 elif cur.name == "return":
638 rinfo = cur.prop("info")
639 if rinfo != None:
640 addString(rinfo, file, symbol, 7)
641 elif cur.name == "arg":
642 ainfo = cur.prop("info")
643 if rinfo != None:
644 addString(ainfo, file, symbol, 5)
645 name = cur.prop("name")
646 if name != None:
647 addWord(name, file, symbol, 7)
648 cur = cur.next
649 if info == None:
650 print "Function %s description has no <info>" % (symbol)
651 addFunction(symbol, file, "")
652 else:
653 addFunction(symbol, file, info)
654 addString(info, file, symbol, 5)
655
656 l = splitIdentifier(symbol)
657 for word in l:
658 addWord(word, file, symbol, 10)
659
660 return 1
661
662def analyzeAPISymbols(top):
663 count = 0
664 cur = top.children
665
666 while cur != None:
667 if cur.type == 'text':
668 cur = cur.next
669 continue
670 if cur.name == "macro":
671 count = count + analyzeAPIMacro(cur)
672 elif cur.name == "function":
673 count = count + analyzeAPIFunction(cur)
674 elif cur.name == "const":
675 count = count + analyzeAPIConst(cur)
676 elif cur.name == "typedef":
677 count = count + analyzeAPIType(cur)
678 elif cur.name == "struct":
679 count = count + analyzeAPIStruct(cur)
680 elif cur.name == "enum":
681 count = count + analyzeAPIEnum(cur)
682 elif cur.name == "functype":
683 count = count + analyzeAPIFunctype(cur)
684 else:
685 print "unexpected element %s in API doc <files>" % (cur.name)
686 cur = cur.next
687 return count
688
689def analyzeAPI(doc):
690 count = 0
691 if doc == None:
692 return -1
693 root = doc.getRootElement()
694 if root.name != "api":
695 print "Unexpected root name"
696 return -1
697 cur = root.children
698 while cur != None:
699 if cur.type == 'text':
700 cur = cur.next
701 continue
702 if cur.name == "files":
703 pass
704# count = count + analyzeAPIFiles(cur)
705 elif cur.name == "symbols":
706 count = count + analyzeAPISymbols(cur)
707 else:
708 print "unexpected element %s in API doc" % (cur.name)
709 cur = cur.next
710 return count
711
712#########################################################################
713# #
Daniel Veillard141d04b2002-10-06 21:51:18 +0000714# Web pages parsing and analysis #
715# #
716#########################################################################
717
718import glob
719
Daniel Veillard9b006132002-10-07 11:13:27 +0000720def analyzeHTMLText(doc, resource, p, section, id):
721 words = 0
722 try:
723 content = p.content
724 words = words + addStringHTML(content, resource, id, section, 5)
725 except:
726 return -1
727 return words
728
Daniel Veillard141d04b2002-10-06 21:51:18 +0000729def analyzeHTMLPara(doc, resource, p, section, id):
730 words = 0
731 try:
732 content = p.content
733 words = words + addStringHTML(content, resource, id, section, 5)
734 except:
735 return -1
736 return words
737
738def analyzeHTMLPre(doc, resource, p, section, id):
739 words = 0
740 try:
741 content = p.content
742 words = words + addStringHTML(content, resource, id, section, 5)
743 except:
744 return -1
745 return words
746
Daniel Veillard9b006132002-10-07 11:13:27 +0000747def analyzeHTML(doc, resource, p, section, id):
748 words = 0
749 try:
750 content = p.content
751 words = words + addStringHTML(content, resource, id, section, 5)
752 except:
753 return -1
754 return words
755
Daniel Veillard141d04b2002-10-06 21:51:18 +0000756def analyzeHTML(doc, resource):
757 para = 0;
758 ctxt = doc.xpathNewContext()
759 try:
760 res = ctxt.xpathEval("//head/title")
761 title = res[0].content
762 except:
763 title = "Page %s" % (resource)
764 addPage(resource, title)
765 try:
Daniel Veillard9b006132002-10-07 11:13:27 +0000766 items = ctxt.xpathEval("//h1 | //h2 | //h3 | //text()")
Daniel Veillard141d04b2002-10-06 21:51:18 +0000767 section = title
768 id = ""
769 for item in items:
770 if item.name == 'h1' or item.name == 'h2' or item.name == 'h3':
771 section = item.content
772 if item.prop("id"):
773 id = item.prop("id")
774 elif item.prop("name"):
775 id = item.prop("name")
Daniel Veillard9b006132002-10-07 11:13:27 +0000776 elif item.type == 'text':
777 analyzeHTMLText(doc, resource, item, section, id)
778 para = para + 1
779 elif item.name == 'text':
Daniel Veillard141d04b2002-10-06 21:51:18 +0000780 analyzeHTMLPara(doc, resource, item, section, id)
781 para = para + 1
782 elif item.name == 'pre':
783 analyzeHTMLPre(doc, resource, item, section, id)
784 para = para + 1
785 else:
786 print "Page %s, unexpected %s element" % (resource, item.name)
787 except:
788 print "Page %s: problem analyzing" % (resource)
789 print sys.exc_type, sys.exc_value
790
791 return para
792
793def analyzeHTMLPages():
794 ret = 0
795 HTMLfiles = glob.glob("*.html") + glob.glob("tutorial/*.html")
796 for html in HTMLfiles:
797 if html[0:3] == "API":
798 continue
799 if html == "xml.html":
800 continue
801 try:
802 doc = libxml2.htmlParseFile(html, None)
803 res = analyzeHTML(doc, html)
804 print "Parsed %s : %d paragraphs" % (html, res)
805 ret = ret + 1
806 except:
807 print "could not parse %s" % (html)
808 return ret
809
810#########################################################################
811# #
Daniel Veillard3371ff82002-10-01 13:37:48 +0000812# Main code: open the DB, the API XML and analyze it #
813# #
814#########################################################################
815try:
816 openMySQL()
817except:
818 print "Failed to open the database"
819 print sys.exc_type, sys.exc_value
820 sys.exit(1)
821
Daniel Veillard141d04b2002-10-06 21:51:18 +0000822ret = analyzeHTMLPages()
823print "Indexed %d words in %d HTML pages" % (len(wordsDictHTML), ret)
824
825i = 0
826skipped = 0
827for word in wordsDictHTML.keys():
828 refs = wordsDictHTML[word]
829 if refs == None:
830 skipped = skipped + 1
831 continue;
832 for resource in refs.keys():
833 (relevance, id, section) = refs[resource]
834 updateWordHTML(word, resource, section, id, relevance)
835 i = i + 1
836
837print "Found %d associations in HTML pages" % (i)
838
Daniel Veillard3371ff82002-10-01 13:37:48 +0000839try:
840 doc = loadAPI(API)
841 ret = analyzeAPI(doc)
842 print "Analyzed %d blocs" % (ret)
843 doc.freeDoc()
844except:
845 print "Failed to parse and analyze %s" % (API)
846 print sys.exc_type, sys.exc_value
847 sys.exit(1)
848
849print "Indexed %d words" % (len(wordsDict))
850i = 0
851skipped = 0
852for word in wordsDict.keys():
853 refs = wordsDict[word]
854 if refs == None:
855 skipped = skipped + 1
856 continue;
857 for (module, symbol) in refs.keys():
858 updateWord(word, symbol, refs[(module, symbol)])
859 i = i + 1
860
861print "Found %d associations, skipped %d words" % (i, skipped)