fix comment in scripts element parsing. updated the results. Daniel
* HTMLparser.c: fix comment in scripts element parsing.
* result/HTML/doc3*: updated the results.
Daniel
diff --git a/ChangeLog b/ChangeLog
index 153a9ae..dc68cf0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+Sat Nov 10 12:33:38 CET 2001 Daniel Veillard <daniel@veillard.com>
+
+ * HTMLparser.c: fix comment in scripts element parsing.
+ * result/HTML/doc3*: updated the results.
+
+Sat Nov 10 11:18:18 CET 2001 Daniel Veillard <daniel@veillard.com>
+
+ * uri.c: another URI bug fix #63336, using Joel Young patch.
+
Sat Nov 10 11:07:26 CET 2001 Daniel Veillard <daniel@veillard.com>
* debugXML.c include/libxml/debugXML.h: add xmlGetNodePath()
diff --git a/HTMLparser.c b/HTMLparser.c
index af941a0..8ff74e4 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -53,6 +53,7 @@
xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
xmlChar end, xmlChar end2, xmlChar end3);
+static void htmlParseComment(htmlParserCtxtPtr ctxt);
/************************************************************************
* *
@@ -2299,7 +2300,21 @@
SHRINK;
cur = CUR;
while (IS_CHAR(cur)) {
- if ((cur == '<') && (NXT(1) == '/')) {
+ if ((cur == '<') && (NXT(1) == '!') && (NXT(2) == '-') &&
+ (NXT(3) == '-')) {
+ if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
+ if (ctxt->sax->cdataBlock!= NULL) {
+ /*
+ * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
+ */
+ ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
+ }
+ }
+ nbchar = 0;
+ htmlParseComment(ctxt);
+ cur = CUR;
+ continue;
+ } else if ((cur == '<') && (NXT(1) == '/')) {
/*
* One should break here, the specification is clear:
* Authors should therefore escape "</" within the content.
@@ -3841,6 +3856,7 @@
int base, len;
htmlParserInputPtr in;
const xmlChar *buf;
+ int incomment = 0;
in = ctxt->input;
if (in == NULL) return(-1);
@@ -3859,6 +3875,23 @@
if (third) len -= 2;
else if (next) len --;
for (;base < len;base++) {
+ if (!incomment && (base + 4 < len)) {
+ if ((buf[base] == '<') && (buf[base + 1] == '!') &&
+ (buf[base + 2] == '-') && (buf[base + 3] == '-')) {
+ incomment = 1;
+ }
+ /* do not increment base, some people use <!--> */
+ }
+ if (incomment) {
+ if (base + 3 < len)
+ return(-1);
+ if ((buf[base] == '-') && (buf[base + 1] == '-') &&
+ (buf[base + 2] == '>')) {
+ incomment = 0;
+ base += 2;
+ }
+ continue;
+ }
if (buf[base] == first) {
if (third != 0) {
if ((buf[base + 1] != next) ||
diff --git a/result/HTML/doc3.htm b/result/HTML/doc3.htm
index cdc4dc1..f5c674f 100644
--- a/result/HTML/doc3.htm
+++ b/result/HTML/doc3.htm
@@ -354,7 +354,7 @@
document.write("SRC=http://www.goto.com/d/ssn/dynconsole/?t");
document.write("ype=html&size=100x90&url=http://www.goto.co");
document.write("m/d/search/ssn/&target=_blank&Partner=SSN80");
- document.write("42DF8478957377>");
+ document.write("42DF8478957377></IFRAME>");
} else if ((parseInt(navigator.appVersion) > 3)
&& (navigator.appName == "Netscape")) {
document.write("<SCRIPT language=javascript type=text/javas");
@@ -362,7 +362,7 @@
document.write("SRC=http://www.goto.com/d/ssn/dynconsole/?t");
document.write("ype=js&size=100x90&url=http://www.goto.com/");
document.write("d/search/ssn/&target=_blank&Partner=SSN8042");
- document.write("DF8478957377>");
+ document.write("DF8478957377></SC");
document.write("RIPT>");
} else {
document.write("<A TARGET=_blank ");
@@ -370,7 +370,7 @@
document.write("GIF=true>");
document.write("<IMG ismap ");
document.write("SRC=http://www.goto.com/d/ssn/dynconsole/?t");
- document.write("ype=gif&size=100x90>");
+ document.write("ype=gif&size=100x90></A>");
}
// -->
</script><b><noscript></noscript></b><a href="http://www.goto.com/d/search/ssn/?fromGIF=true" target="_blank"><img align="bottom" border="0" height="90" ismap src="doc3_files/100x90.gif" width="100"></a><b><a href="http://www.goto.com/d/search/ssn/?fromGIF=true" target="_blank"></a></b><b></b><b><!-- END GoTo.com Search Box --></b><!-- Pricewatch Search Box --><form action="http://www.pricewatch.com/search/search.asp" method="get" target="_Blank"><center><p>
diff --git a/result/HTML/doc3.htm.err b/result/HTML/doc3.htm.err
index 949fefb..30688eb 100644
--- a/result/HTML/doc3.htm.err
+++ b/result/HTML/doc3.htm.err
@@ -40,18 +40,6 @@
./test/HTML/doc3.htm:767: error: Unexpected end tag : form
archive</A></FONT> </FORM></CENTER></TD></TR></TBODY></TABLE><!--
^
-./test/HTML/doc3.htm:790: error: Unexpected end tag : iframe
- document.write("42DF8478957377></IFRAME>");
- ^
-./test/HTML/doc3.htm:798: error: End tag : expected '>'
- document.write("DF8478957377></SC");
- ^
-./test/HTML/doc3.htm:798: error: Unexpected end tag : sc
- document.write("DF8478957377></SC");
- ^
-./test/HTML/doc3.htm:806: error: Unexpected end tag : a
- document.write("ype=gif&size=100x90></A>");
- ^
./test/HTML/doc3.htm:815: error: Unexpected end tag : a
</A></A></B><B></NOSCRIPT></B><B><!-- END GoTo.com Search Box --></
^
diff --git a/result/HTML/doc3.htm.sax b/result/HTML/doc3.htm.sax
index 28d6136..93f5a9f 100644
--- a/result/HTML/doc3.htm.sax
+++ b/result/HTML/doc3.htm.sax
Binary files differ