improved HTML indexing make the queries also lookup the HTML based indexes
* doc/index.py: improved HTML indexing
* doc/search.php: make the queries also lookup the HTML based indexes
Daniel
diff --git a/ChangeLog b/ChangeLog
index a1cc3b8..9580528 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Mon Oct 7 13:12:03 CEST 2002 Daniel Veillard <daniel@veillard.com>
+
+ * doc/index.py: improved HTML indexing
+ * doc/search.php: make the queries also lookup the HTML based indexes
+
Sun Oct 6 23:50:29 CEST 2002 Daniel Veillard <daniel@veillard.com>
* doc/index.py: added HTML page indexing
diff --git a/doc/index.py b/doc/index.py
index 2cca7c0..e3b8588 100755
--- a/doc/index.py
+++ b/doc/index.py
@@ -717,6 +717,15 @@
import glob
+def analyzeHTMLText(doc, resource, p, section, id):
+ words = 0
+ try:
+ content = p.content
+ words = words + addStringHTML(content, resource, id, section, 5)
+ except:
+ return -1
+ return words
+
def analyzeHTMLPara(doc, resource, p, section, id):
words = 0
try:
@@ -735,6 +744,15 @@
return -1
return words
+def analyzeHTML(doc, resource, p, section, id):
+ words = 0
+ try:
+ content = p.content
+ words = words + addStringHTML(content, resource, id, section, 5)
+ except:
+ return -1
+ return words
+
def analyzeHTML(doc, resource):
para = 0;
ctxt = doc.xpathNewContext()
@@ -745,7 +763,7 @@
title = "Page %s" % (resource)
addPage(resource, title)
try:
- items = ctxt.xpathEval("//h1 | //h2 | //h3 | //p | //pre")
+ items = ctxt.xpathEval("//h1 | //h2 | //h3 | //text()")
section = title
id = ""
for item in items:
@@ -755,7 +773,10 @@
id = item.prop("id")
elif item.prop("name"):
id = item.prop("name")
- elif item.name == 'p':
+ elif item.type == 'text':
+ analyzeHTMLText(doc, resource, item, section, id)
+ para = para + 1
+ elif item.name == 'text':
analyzeHTMLPara(doc, resource, item, section, id)
para = para + 1
elif item.name == 'pre':
diff --git a/doc/search.php b/doc/search.php
index 7be0055..5f8e604 100644
--- a/doc/search.php
+++ b/doc/search.php
@@ -135,6 +135,20 @@
}
return array($result, $j);
}
+ function queryHTMLWord($word) {
+ $result = NULL;
+ $j = 0;
+ if ($word) {
+ $result = mysql_query ("SELECT relevance, name, id, resource, section FROM wordsHTML WHERE name='$word' ORDER BY relevance DESC");
+ if ($result) {
+ $j = mysql_num_rows($result);
+ if ($j == 0)
+ mysql_free_result($result);
+ }
+ logQueryWord($word);
+ }
+ return array($result, $j);
+ }
function resSort ($a, $b) {
list($ra,$ta,$ma,$na,$da) = $a;
list($rb,$tb,$mb,$nb,$db) = $b;
@@ -162,17 +176,39 @@
$module = mysql_result($result, $i, 3);
$desc = mysql_result($result, $i, 4);
if (array_key_exists($name, $results)) {
- list($r,$t,$m,$n, $d) = $results[$name];
+ list($r,$t,$m,$d,$w,$u) = $results[$name];
$results[$name] = array($r + $relevance + 40,
- $t,$m,$n,$d);
+ $t,$m,$d,$w,$u);
} else {
+ $id = strtoupper($name);
+ $m = strtolower($module);
+ $url = "html/libxml-$m.html#$id";
$results[$name] = array($relevance,$type,
- $module, $name, $desc);
+ $module, $desc, $name, $url);
}
}
mysql_free_result($result);
- } else {
- echo "<p> No symbol found for $word\n";
+ }
+ list($result, $k) = queryHTMLWord($word);
+ if ($k > 0) {
+ for ($i = 0; $i < $k; $i++) {
+ $relevance = mysql_result($result, $i, 0);
+ $name = mysql_result($result, $i, 1);
+ $id = mysql_result($result, $i, 2);
+ $module = mysql_result($result, $i, 3);
+ $desc = mysql_result($result, $i, 4);
+ $url = $module;
+ if ($id != "") {
+ $url = $url + "#$id";
+ }
+ $results[$name + "_html_" + $number+ "_" + $i ] =
+ array($relevance, "documentation",
+ $module, $desc, $word, $url);
+ }
+ mysql_free_result($result);
+ }
+ if (($j <= 0) && ($k <= 0)) {
+ echo "<p> No result found for $word\n";
}
}
mysql_close($link);
@@ -184,11 +220,8 @@
printf("<table><tbody>\n");
printf("<tr><td>Quality</td><td>Symbol</td><td>Type</td><td>module</td><td>Description</td></tr>\n");
while (list ($name, $val) = each ($results)) {
- list($r,$t,$m,$n,$d) = $val;
- $upper = strtoupper($n);
- $module = strtolower($m);
- $url = "html/libxml-$module.html#$upper";
- echo "<tr><td>$r</td><td><a href='$url'>$n</a></td><td>$t</td><td>$m</td><td>$d</td></tr>";
+ list($r,$t,$m,$d,$s,$u) = $val;
+ echo "<tr><td>$r</td><td><a href='$u'>$s</a></td><td>$t</td><td>$m</td><td>$d</td></tr>";
}
printf("</tbody></table>\n");
}