Applied the last patch from Gary Coady for #304637 changing the behaviour
* HTMLparser.c: Applied the last patch from Gary Coady for #304637
changing the behaviour when text nodes are found in body
* result/HTML/*: this changes the output of some tests
Daniel
diff --git a/HTMLparser.c b/HTMLparser.c
index 6b8b562..d11ae08 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -964,7 +964,6 @@
static const char *htmlNoContentElements[] = {
"html",
"head",
- "body",
NULL
};
@@ -2042,6 +2041,7 @@
unsigned int i;
int j;
xmlNodePtr lastChild;
+ xmlDtdPtr dtd;
for (j = 0;j < len;j++)
if (!(IS_BLANK_CH(str[j]))) return(0);
@@ -2054,8 +2054,17 @@
return(1);
if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
return(1);
- if (xmlStrEqual(ctxt->name, BAD_CAST"body"))
- return(1);
+
+ /* Only strip CDATA children of the body tag for strict HTML DTDs */
+ if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) {
+ dtd = xmlGetIntSubset(ctxt->myDoc);
+ if (dtd != NULL && dtd->ExternalID != NULL) {
+ if (!xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4.01//EN") ||
+ !xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4//EN"))
+ return(1);
+ }
+ }
+
if (ctxt->node == NULL) return(0);
lastChild = xmlGetLastChild(ctxt->node);
while ((lastChild) && (lastChild->type == XML_COMMENT_NODE))