applied a patch from William Brack about the problem of parsing very large
* HTMLparser.c: applied a patch from William Brack about
the problem of parsing very large HTML instance with comments
as raised by Nick Kew
Daniel
diff --git a/HTMLparser.c b/HTMLparser.c
index 2ee458c..e16102d 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -4317,6 +4317,7 @@
* @first: the first char to lookup
* @next: the next char to lookup or zero
* @third: the next char to lookup or zero
+ * @iscomment: flag to force checking inside comments
*
* Try to find if a sequence (first, next, third) or just (first next) or
* (first) is available in the input stream.
@@ -4330,7 +4331,7 @@
*/
static int
htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
- xmlChar next, xmlChar third) {
+ xmlChar next, xmlChar third, int iscomment) {
int base, len;
htmlParserInputPtr in;
const xmlChar *buf;
@@ -4353,13 +4354,13 @@
if (third) len -= 2;
else if (next) len --;
for (;base < len;base++) {
- if (!incomment && (base + 4 < len)) {
+ if (!incomment && (base + 4 < len) && !iscomment) {
if ((buf[base] == '<') && (buf[base + 1] == '!') &&
(buf[base + 2] == '-') && (buf[base + 3] == '-')) {
incomment = 1;
+ /* do not increment past <! - some people use <!--> */
+ base += 2;
}
- /* do not increment past <!, some people use <!--> */
- base += 2;
}
if (incomment) {
if (base + 3 > len)
@@ -4540,7 +4541,7 @@
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -4573,7 +4574,7 @@
if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
+ (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -4587,7 +4588,7 @@
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -4623,7 +4624,7 @@
if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
+ (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -4660,7 +4661,7 @@
if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
+ (htmlParseLookupSequence(ctxt, '-', '-', '>', 1) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -4710,7 +4711,7 @@
break;
}
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
goto done;
oldname = xmlStrdup(ctxt->name);
@@ -4879,7 +4880,7 @@
* Handle SCRIPT/STYLE separately
*/
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '<', '/', 0) < 0))
+ (htmlParseLookupSequence(ctxt, '<', '/', 0, 0) < 0))
goto done;
htmlParseScript(ctxt);
if ((cur == '<') && (next == '/')) {
@@ -4901,7 +4902,7 @@
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
goto done;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
@@ -4911,7 +4912,8 @@
} else if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
+ (htmlParseLookupSequence(
+ ctxt, '-', '-', '>', 1) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -4939,7 +4941,7 @@
break;
} else if (cur == '&') {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
+ (htmlParseLookupSequence(ctxt, ';', 0, 0, 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -4957,7 +4959,8 @@
if ((ctxt->inputNr == 1) &&
(avail < HTML_PARSER_BIG_BUFFER_SIZE)) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
+ (htmlParseLookupSequence(
+ ctxt, '<', 0, 0, 0) < 0))
goto done;
}
ctxt->checkIndex = 0;
@@ -4985,7 +4988,7 @@
if (avail < 2)
goto done;
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
goto done;
htmlParseEndTag(ctxt);
if (ctxt->nameNr == 0) {