- HTMLparser.c: Patch from Jonas Borgström
(htmlGetEndPriority): New function, returns
the priority of a certain element.
(htmlAutoCloseOnClose): Only close inline elements if they
all have lower or equal priority.
- result/HTML: this of course changed a number of tests results.
Daniel
diff --git a/HTMLparser.c b/HTMLparser.c
index 870201d..5e79464 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -565,15 +565,32 @@
};
/*
- * end tags that imply the end of the inside elements
- */
-const char *htmlEndClose[] = {
-"head",
-"body",
-"html",
-NULL
-};
+ * This table is used by the htmlparser to know what to do with
+ * broken html pages. By assigning different priorities to different
+ * elements the parser can decide how to handle extra endtags.
+ * Endtags are only allowed to close elements with lower or equal
+ * priority.
+ */
+typedef struct {
+ const char *name;
+ int priority;
+} elementPriority;
+
+const elementPriority htmlEndPriority[] = {
+ {"div", 150},
+ {"td", 160},
+ {"th", 160},
+ {"tr", 170},
+ {"thead", 180},
+ {"tbody", 180},
+ {"tfoot", 180},
+ {"table", 190},
+ {"head", 200},
+ {"body", 200},
+ {"html", 220},
+ {NULL, 100} /* Default priority */
+};
static const char** htmlStartCloseIndex[100];
static int htmlStartCloseIndexinitialized = 0;
@@ -628,6 +645,23 @@
}
/**
+ * htmlGetEndPriority:
+ * @name: The name of the element to look up the priority for.
+ *
+ * Return value: The "endtag" priority.
+ **/
+static int
+htmlGetEndPriority (const xmlChar *name) {
+ int i = 0;
+
+ while ((htmlEndPriority[i].name != NULL) &&
+ (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name)))
+ i++;
+
+ return(htmlEndPriority[i].priority);
+}
+
+/**
* htmlCheckAutoClose:
* @newtag: The new tag name
* @oldtag: The old tag name
@@ -674,7 +708,7 @@
htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
htmlElemDescPtr info;
xmlChar *oldname;
- int i, endCloses = 0;
+ int i, priority;
#ifdef DEBUG
xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr);
@@ -682,15 +716,20 @@
xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]);
#endif
+ priority = htmlGetEndPriority (newtag);
+
for (i = (ctxt->nameNr - 1);i >= 0;i--) {
+
if (xmlStrEqual(newtag, ctxt->nameTab[i])) break;
+ /*
+ * A missplaced endtagad can only close elements with lower
+ * or equal priority, so if we find an element with higher
+ * priority before we find an element with
+ * matching name, we just ignore this endtag
+ */
+ if (htmlGetEndPriority (ctxt->nameTab[i]) > priority) return;
}
if (i < 0) return;
- for (i = 0; (htmlEndClose[i] != NULL);i++)
- if (xmlStrEqual(newtag, (const xmlChar *) htmlEndClose[i])) {
- endCloses = 1;
- break;
- }
while (!xmlStrEqual(newtag, ctxt->name)) {
info = htmlTagLookup(ctxt->name);
@@ -707,8 +746,6 @@
"Opening and ending tag mismatch: %s and %s\n",
newtag, ctxt->name);
ctxt->wellFormed = 0;
- } else if (endCloses == 0) {
- return;
}
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
ctxt->sax->endElement(ctxt->userData, ctxt->name);