applied patch from James Bursa fixing an html parsing bug in push mode
* HTMLparser.c: applied patch from James Bursa fixing an html parsing
bug in push mode
* result/HTML/repeat.html* test/HTML/repeat.html: added the test to the
regression suite
Daniel
diff --git a/ChangeLog b/ChangeLog
index 5d5ab9f..dee3d81 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+Mon Jul 4 00:58:44 CEST 2005 Daniel Veillard <daniel@veillard.com>
+
+ * HTMLparser.c: applied patch from James Bursa fixing an html parsing
+ bug in push mode
+ * result/HTML/repeat.html* test/HTML/repeat.html: added the test to the
+ regression suite
+
Sun Jul 3 23:42:31 CEST 2005 Daniel Veillard <daniel@veillard.com>
* testapi.c tree.c: fixing a leak detected by testapi in
diff --git a/HTMLparser.c b/HTMLparser.c
index 3e1e75a..c6115d0 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -3349,9 +3349,10 @@
*
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
*
+ * Returns 0 in case of success and -1 in case of error.
*/
-static void
+static int
htmlParseStartTag(htmlParserCtxtPtr ctxt) {
const xmlChar *name;
const xmlChar *attname;
@@ -3365,9 +3366,9 @@
if ((ctxt == NULL) || (ctxt->input == NULL)) {
htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
"htmlParseStartTag: context error\n", NULL, NULL);
- return;
+ return -1;
}
- if (CUR != '<') return;
+ if (CUR != '<') return -1;
NEXT;
GROW;
@@ -3379,7 +3380,7 @@
/* Dump the bogus tag like browsers do */
while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
NEXT;
- return;
+ return -1;
}
if (xmlStrEqual(name, BAD_CAST"meta"))
meta = 1;
@@ -3402,14 +3403,14 @@
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"htmlParseStartTag: misplaced <html> tag\n",
name, NULL);
- return;
+ return 0;
}
if ((ctxt->nameNr != 1) &&
(xmlStrEqual(name, BAD_CAST"head"))) {
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"htmlParseStartTag: misplaced <head> tag\n",
name, NULL);
- return;
+ return 0;
}
if (xmlStrEqual(name, BAD_CAST"body")) {
int indx;
@@ -3420,7 +3421,7 @@
name, NULL);
while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
NEXT;
- return;
+ return 0;
}
}
}
@@ -3533,6 +3534,8 @@
xmlFree((xmlChar *) atts[i]);
}
}
+
+ return 0;
}
/**
@@ -3847,16 +3850,15 @@
xmlChar *currentNode = NULL;
const htmlElemDesc * info;
htmlParserNodeInfo node_info;
- const xmlChar *oldname;
+ int failed;
int depth;
const xmlChar *oldptr;
if ((ctxt == NULL) || (ctxt->input == NULL)) {
htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
- "htmlParseStartTag: context error\n", NULL, NULL);
+ "htmlParseElement: context error\n", NULL, NULL);
return;
}
- depth = ctxt->nameNr;
/* Capture start position */
if (ctxt->record_info) {
node_info.begin_pos = ctxt->input->consumed +
@@ -3864,11 +3866,9 @@
node_info.begin_line = ctxt->input->line;
}
- oldname = ctxt->name;
- htmlParseStartTag(ctxt);
+ failed = htmlParseStartTag(ctxt);
name = ctxt->name;
- if (((depth == ctxt->nameNr) && (xmlStrEqual(oldname, ctxt->name))) ||
- (name == NULL)) {
+ if (failed || (name == NULL)) {
if (CUR == '>')
NEXT;
return;
@@ -4577,11 +4577,11 @@
#endif
} else {
ctxt->instate = XML_PARSER_MISC;
- }
#ifdef DEBUG_PUSH
- xmlGenericError(xmlGenericErrorContext,
- "HPP: entering MISC\n");
+ xmlGenericError(xmlGenericErrorContext,
+ "HPP: entering MISC\n");
#endif
+ }
break;
case XML_PARSER_MISC:
SKIP_BLANKS;
@@ -4739,7 +4739,7 @@
break;
case XML_PARSER_START_TAG: {
const xmlChar *name, *oldname;
- int depth = ctxt->nameNr;
+ int failed;
const htmlElemDesc * info;
if (avail < 2)
@@ -4766,11 +4766,9 @@
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
goto done;
- oldname = ctxt->name;
- htmlParseStartTag(ctxt);
+ failed = htmlParseStartTag(ctxt);
name = ctxt->name;
- if (((depth == ctxt->nameNr) &&
- (xmlStrEqual(oldname, ctxt->name))) ||
+ if (failed ||
(name == NULL)) {
if (CUR == '>')
NEXT;
diff --git a/result/HTML/repeat.html b/result/HTML/repeat.html
new file mode 100644
index 0000000..550c66f
--- /dev/null
+++ b/result/HTML/repeat.html
@@ -0,0 +1,5 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html><body>
+<td></td>
+<td><!-- <a><b> --></td>
+</body></html>
diff --git a/result/HTML/repeat.html.err b/result/HTML/repeat.html.err
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/result/HTML/repeat.html.err
diff --git a/result/HTML/repeat.html.sax b/result/HTML/repeat.html.sax
new file mode 100644
index 0000000..2dbf35c
--- /dev/null
+++ b/result/HTML/repeat.html.sax
@@ -0,0 +1,14 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElement(html)
+SAX.startElement(body)
+SAX.startElement(td)
+SAX.endElement(td)
+SAX.startElement(td)
+SAX.comment( <a><b> )
+SAX.ignorableWhitespace(
+, 1)
+SAX.endElement(td)
+SAX.endElement(body)
+SAX.endElement(html)
+SAX.endDocument()
diff --git a/test/HTML/repeat.html b/test/HTML/repeat.html
new file mode 100644
index 0000000..d6d6f97
--- /dev/null
+++ b/test/HTML/repeat.html
@@ -0,0 +1 @@
+<td><td><!-- <a><b> -->