- HTMLparser.c: Patch from Jonas Borgström
(htmlGetEndPriority): New function, returns
the priority of a certain element.
(htmlAutoCloseOnClose): Only close inline elements if they
all have lower or equal priority.
- result/HTML: this of course changed a number of tests results.
Daniel
diff --git a/ChangeLog b/ChangeLog
index 2a45a17..7a5516b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+Fri May 11 16:07:13 CEST 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
+
+	* HTMLparser.c: Patch from Jonas Borgström
+	(htmlGetEndPriority): New function, returns 
+	the priority of a certain element.
+	(htmlAutoCloseOnClose): Only close inline elements if they 
+	all have lower or equal priority.
+	* result/HTML: this of course changed a number of tests results.
+
 Thu May 10 17:30:22 CEST 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
 
 	* xmlIO.c catalog.c: plugged in the default catalog resolution
diff --git a/HTMLparser.c b/HTMLparser.c
index 870201d..5e79464 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -565,15 +565,32 @@
 };
 
 /*
- * end tags that imply the end of the inside elements
- */
-const char *htmlEndClose[] = {
-"head",
-"body",
-"html",
-NULL
-};
+ * This table is used by the htmlparser to know what to do with
+ * broken html pages. By assigning different priorities to different
+ * elements the parser can decide how to handle extra endtags.
+ * Endtags are only allowed to close elements with lower or equal
+ * priority.
+ */ 
 
+typedef struct {
+    const char *name;
+    int priority;
+} elementPriority;
+
+const elementPriority htmlEndPriority[] = {
+    {"div",   150},
+    {"td",    160},
+    {"th",    160},
+    {"tr",    170},
+    {"thead", 180},
+    {"tbody", 180},
+    {"tfoot", 180},
+    {"table", 190},
+    {"head",  200},
+    {"body",  200},
+    {"html",  220},
+    {NULL,    100} /* Default priority */
+};
 
 static const char** htmlStartCloseIndex[100];
 static int htmlStartCloseIndexinitialized = 0;
@@ -628,6 +645,23 @@
 }
 
 /**
+ * htmlGetEndPriority:
+ * @name: The name of the element to look up the priority for.
+ * 
+ * Return value: The "endtag" priority.
+ **/
+static int
+htmlGetEndPriority (const xmlChar *name) {
+	int i = 0;
+
+	while ((htmlEndPriority[i].name != NULL) &&
+	       (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name)))
+	    i++;
+
+	return(htmlEndPriority[i].priority);
+}
+
+/**
  * htmlCheckAutoClose:
  * @newtag:  The new tag name
  * @oldtag:  The old tag name
@@ -674,7 +708,7 @@
 htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
     htmlElemDescPtr info;
     xmlChar *oldname;
-    int i, endCloses = 0;
+    int i, priority;
 
 #ifdef DEBUG
     xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr);
@@ -682,15 +716,20 @@
         xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]);
 #endif
 
+    priority = htmlGetEndPriority (newtag);
+
     for (i = (ctxt->nameNr - 1);i >= 0;i--) {
+
         if (xmlStrEqual(newtag, ctxt->nameTab[i])) break;
+	/*
+	 * A missplaced endtagad can only close elements with lower
+	 * or equal priority, so if we find an element with higher
+	 * priority before we find an element with
+	 * matching name, we just ignore this endtag 
+	 */
+	if (htmlGetEndPriority (ctxt->nameTab[i]) > priority) return;
     }
     if (i < 0) return;
-    for (i = 0; (htmlEndClose[i] != NULL);i++)
-	if (xmlStrEqual(newtag, (const xmlChar *) htmlEndClose[i])) {
-	    endCloses = 1;
-	    break;
-	}
 
     while (!xmlStrEqual(newtag, ctxt->name)) {
 	info = htmlTagLookup(ctxt->name);
@@ -707,8 +746,6 @@
 		 "Opening and ending tag mismatch: %s and %s\n",
 				 newtag, ctxt->name);
 	    ctxt->wellFormed = 0;
-	} else if (endCloses == 0) {
-	    return;
 	}
 	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
 	    ctxt->sax->endElement(ctxt->userData, ctxt->name);
diff --git a/result/HTML/doc3.htm b/result/HTML/doc3.htm
index fd9f514..3ed7223 100644
--- a/result/HTML/doc3.htm
+++ b/result/HTML/doc3.htm
@@ -97,8 +97,7 @@
 </td></tr></tbody></table>
 </center></td></tr></tbody></table></td></tr>
 </tbody></table>
-<table bgcolor="#003399" border="0" cellspacing="6" width="80%"><tbody>
-<tr>
+<table bgcolor="#003399" border="0" cellspacing="6" width="80%"><tbody><tr>
 <td bgcolor="black" valign="top" width="10%"><table border="0" cellpadding="3" cellspacing="0" width="100%"><tbody><tr><td width="100%">
 <img height="1" src="doc3_files/spacer.gif" width="111">
 <br>
@@ -784,8 +783,7 @@
 </td></tr></tbody></table>
 </center>
 </td>
-<td bgcolor="silver" valign="top" width="10%">
-<center>
+<td bgcolor="silver" valign="top" width="10%"><center>
 <p>
 <table bgcolor="silver" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td colstart="1"><center>
 <!--	<FORM ACTION="/cgi-bin/subscribe.pl" METHOD="POST" ENCTYPE="application/x-www-form-urlencoded">

@@ -853,11 +851,10 @@
 							}

 							// -->

 							</script>
-<b><noscript>
+<b><noscript></noscript></b>
 <a href="http://www.goto.com/d/search/ssn/?fromGIF=true" target="_blank"><img align="bottom" border="0" height="90" ismap src="doc3_files/100x90.gif" width="100"></a>
 <b><a href="http://www.goto.com/d/search/ssn/?fromGIF=true" target="_blank"></a></b>
 <b></b>
-</noscript></b>
 <b><!-- END GoTo.com Search Box --></b>
 <!-- Pricewatch Search Box --><form action="http://www.pricewatch.com/search/search.asp" method="get" target="_Blank"><center><p>
 <b><font color="white" face="ARIAL, HELVETICA" size="1">PC Price 

@@ -880,10 +877,9 @@
 </a>
 </td></tr></tbody></table>
 <table bgcolor="silver" border="0" cellpadding="0" cellspacing="0" height="100%" width="100%"><tbody><tr><td width="100%"> </td></tr></tbody></table>
-</center>
+</center></td>
+</tr></tbody></table>
 <!--	</TABLE>--><center></center>
-</td>
-</tr>
 <tr><td colspan="3" valign="TOP" height="70"> </td></tr>
 <table border="0" width="780"><tbody>
 <tr><td width="780"><p align="center">
@@ -893,8 +889,7 @@
 </p></td></tr>
 <!--	<TR>		<TD WIDTH="780">			<P ALIGN="CENTER"><FONT SIZE="1" COLOR="#999999" FACE="Verdana,arial">Site design by Tim Brinkley</FONT>		</TD>	</TR> -->
 </tbody></table>
-<script> window.open=NS_ActualOpen; </script>
-</tbody></table>
 </div>
+<script> window.open=NS_ActualOpen; </script>
 </body>
 </html>
diff --git a/result/HTML/doc3.htm.err b/result/HTML/doc3.htm.err
index 86783ea..949fefb 100644
--- a/result/HTML/doc3.htm.err
+++ b/result/HTML/doc3.htm.err
@@ -37,12 +37,9 @@
 ./test/HTML/doc3.htm:742: error: Unexpected end tag : p
 =7708"></a></IFRAME></CENTER></LI></FONT></TD></TR></TBODY></TABLE></CENTER></P
                                                                               ^
-./test/HTML/doc3.htm:767: error: Opening and ending tag mismatch: font and form
+./test/HTML/doc3.htm:767: error: Unexpected end tag : form
             archive</A></FONT> </FORM></CENTER></TD></TR></TBODY></TABLE><!--		
-                              ^
-./test/HTML/doc3.htm:767: error: Opening and ending tag mismatch: center and font
-            archive</A></FONT> </FORM></CENTER></TD></TR></TBODY></TABLE><!--		
-                                               ^
+                                      ^
 ./test/HTML/doc3.htm:790: error: Unexpected end tag : iframe
 							document.write("42DF8478957377></IFRAME>");
                                                ^
@@ -55,13 +52,10 @@
 ./test/HTML/doc3.htm:806: error: Unexpected end tag : a
 							document.write("ype=gif&size=100x90></A>");
                                                ^
-./test/HTML/doc3.htm:810: error: Opening and ending tag mismatch: b and noscript
-            <B><NOSCRIPT></B><A 
-                             ^
 ./test/HTML/doc3.htm:815: error: Unexpected end tag : a
             </A></A></B><B></NOSCRIPT></B><B><!-- END GoTo.com Search Box --></
                     ^
-./test/HTML/doc3.htm:815: error: Opening and ending tag mismatch: noscript and b
+./test/HTML/doc3.htm:815: error: Unexpected end tag : noscript
             </A></A></B><B></NOSCRIPT></B><B><!-- END GoTo.com Search Box --></
                                       ^
 ./test/HTML/doc3.htm:821: error: Opening and ending tag mismatch: form and center
@@ -76,18 +70,12 @@
 ./test/HTML/doc3.htm:834: error: Unexpected end tag : p
 width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE>
                                                  ^
-./test/HTML/doc3.htm:834: error: Opening and ending tag mismatch: tr and td
-width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE>
-                                                               ^
-./test/HTML/doc3.htm:834: error: Opening and ending tag mismatch: tbody and td
-width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE>
-                                                                       ^
-./test/HTML/doc3.htm:834: error: Opening and ending tag mismatch: table and td
-width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE>
-                                                                              ^
-./test/HTML/doc3.htm:836: error: Opening and ending tag mismatch: table and tbody
+./test/HTML/doc3.htm:835: error: Unexpected end tag : td
+<CENTER></CENTER></TD></TR><TR><TD COLSPAN="3" VALIGN="TOP" 
+                      ^
+./test/HTML/doc3.htm:835: error: Unexpected end tag : tr
+<CENTER></CENTER></TD></TR><TR><TD COLSPAN="3" VALIGN="TOP" 
+                           ^
+./test/HTML/doc3.htm:836: error: Unexpected end tag : table
 HEIGHT="70">&nbsp;</TD> </TR></TABLE>
                                     ^
-./test/HTML/doc3.htm:844: error: Opening and ending tag mismatch: div and tbody
-,arial">Site design by Tim Brinkley</FONT>		</TD>	</TR> --></TBODY></TABLE></DI
-                                                                              ^
diff --git a/result/HTML/doc3.htm.sax b/result/HTML/doc3.htm.sax
index 7c35661..28d6136 100644
--- a/result/HTML/doc3.htm.sax
+++ b/result/HTML/doc3.htm.sax
Binary files differ
diff --git a/result/HTML/wired.html b/result/HTML/wired.html
index e62d5c5..6653097 100644
--- a/result/HTML/wired.html
+++ b/result/HTML/wired.html
@@ -127,17 +127,17 @@
 <tr><td bgcolor="#CCFFCC"><font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#000000"><a href="/news/news/reuters/sports/">Sports</a></font></td></tr>
 <tr><td bgcolor="#99FF99"><font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#000000"><a href="/news/news/reuters/business/">Finance</a></font></td></tr>
 <!-- End upper left nav --><!-- Begin lower Left Nav --><tr><td bgcolor="#FF0000"><font face="Verdana, Arial, Helvetica, sans-serif" color="#FFFFFF"><b><font size="1">FREE DELIVERY</font></b></font></td></tr>
-<tr><td bgcolor="#99FF99"><table cellspacing="0" cellpadding="0" border="0">
-<tr><td bgcolor="#99FF99"><form action="http://r.hotwired.com/r/hw_wm_r_nav_nwsltr/http://perl.hotwired.com/massmail/cgiParser.cgi" method="get" target="_top">
+<tr><td bgcolor="#99FF99"><table cellspacing="0" cellpadding="0" border="0"><tr>
+<td bgcolor="#99FF99"><form action="http://r.hotwired.com/r/hw_wm_r_nav_nwsltr/http://perl.hotwired.com/massmail/cgiParser.cgi" method="get" target="_top">
 <input type="hidden" name="success_page" value="http://www.hotwired.com/email/signup/wirednews-ascii.html">
 <input type="hidden" name="failure_page" value="http://www.hotwired.com/email/signup/wirednews-ascii.html">
 <input type="hidden" name="LIST" value="wn_ascii">
 <input type="hidden" name="SOURCE" value="other">
 <input type="hidden" name="ACTION" value="subscribe">
 <input type="TEXT" name="from" size="10" value="enter email">&nbsp;
- 
-		<td valign="top" bgcolor="#99FF99"><input type="SUBMIT" name="SUBMIT" value="GO"></td>
-</form></td></tr>
+</form></td>
+<td valign="top" bgcolor="#99FF99"><input type="SUBMIT" name="SUBMIT" value="GO"></td>
+</tr></table></td></tr>
 <tr><td bgcolor="#FF0000"><font face="Verdana, Arial, Helvetica, sans-serif" color="#FFFFFF"><b><font size="1">STOCKS</font></b></font></td></tr>
 <tr><td bgcolor="#99FF99"><font face="Verdana, Arial, Helvetica, sans-serif" size="1">Get Quote:</font></td></tr>
 <tr><td bgcolor="#99FF99" marginwidth="0" marginheight="0"><form method="get" action="http://r.wired.com/r/10020/http://stocks.wired.com/stocks_quotes.asp">
@@ -164,8 +164,7 @@
 </font></td></tr>
 <tr><td bgcolor="#99FF99"><font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#000000"><a href="http://redirect.wired.com/redir/53/http://stocks.wired.com/stocks_portfolios.asp">Portfolios</a></font></td></tr>
 <!-- BEGIN B&N spot --><tr><td bgcolor="#FF0000"><font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#FFFFFF"><b>FIND A BOOK</b></font></td></tr>
-<tr><td bgcolor="#CCFFCC">
-<table cellspacing="0" cellpadding="0" border="0" width="145">
+<tr><td bgcolor="#CCFFCC"><table cellspacing="0" cellpadding="0" border="0" width="145">
 <tr><td bgcolor="#CCFFCC"><form action="http://r.wired.com/r/wn_nav_c_bn/http://barnesandnoble.bfast.com/booklink/click">
 <input type="hidden" name="sourceid" value="383471">
 <input type="hidden" name="categoryid" value="categorydropdown">
@@ -207,6 +206,7 @@
 
  </option>
 </select></font>
+</form></td></tr>
 <tr align="left" valign="top"><td valign="top" bgcolor="#CCFFCC">
 <input type="submit" value="GO">
 <img src="http://barnesandnoble.bfast.com/booklink/serve?sourceid=383471&amp;is_search=Y" border="0" align="top">
@@ -219,7 +219,7 @@
 </font>
 <br clear="all">
 </p></td></tr>
-</form></td></tr>
+</table></td></tr>
 <!-- END B&N spot --><!-- BEGIN MAGAZINE SPOT --><tr><td bgcolor="#000000"><font color="#FFFFFF" face="Verdana, Arial, Helvetica, sans-serif" size="1"><b>WIRED 
       MAGAZINE </b></font></td></tr>
 <tr><td bgcolor="#FFFF99" align="CENTER"><font face="verdana, arial, helvetica, sans-serif" size="1">
@@ -629,8 +629,5 @@
 </tr>
 </table>
 <br>
-</td></tr>
-</table></td></tr>
-</table>
 </body>
 </html>
diff --git a/result/HTML/wired.html.err b/result/HTML/wired.html.err
index ecf58e8..82415e0 100644
--- a/result/HTML/wired.html.err
+++ b/result/HTML/wired.html.err
@@ -181,27 +181,15 @@
 ./test/HTML/wired.html:97: error: htmlParseEntityRef: expecting ';'
 lue="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&OPs=MDRTP&M
                                                                               ^
-./test/HTML/wired.html:165: error: Opening and ending tag mismatch: td and form
-</td> 
-     ^
-./test/HTML/wired.html:170: error: Opening and ending tag mismatch: tr and form
+./test/HTML/wired.html:170: error: Unexpected end tag : form
 	</tr>    </form>
-      ^
-./test/HTML/wired.html:171: error: Opening and ending tag mismatch: table and td
-</table></td>
-        ^
-./test/HTML/wired.html:244: error: Opening and ending tag mismatch: td and form
- </select></font></td></tr>
-                      ^
-./test/HTML/wired.html:244: error: Opening and ending tag mismatch: tr and form
- </select></font></td></tr>
-                          ^
+                ^
 ./test/HTML/wired.html:248: error: htmlParseEntityRef: expecting ';'
 MG SRC="http://barnesandnoble.bfast.com/booklink/serve?sourceid=383471&is_searc
                                                                               ^
-./test/HTML/wired.html:266: error: Opening and ending tag mismatch: table and td
-        </table>
-               ^
+./test/HTML/wired.html:265: error: Unexpected end tag : form
+        </tr>  </form>
+                     ^
 ./test/HTML/wired.html:346: error: Opening and ending tag mismatch: td and font
 </td>
     ^
diff --git a/result/HTML/wired.html.sax b/result/HTML/wired.html.sax
index b90ae2d..0edd57e 100644
--- a/result/HTML/wired.html.sax
+++ b/result/HTML/wired.html.sax
@@ -778,7 +778,8 @@
 SAX.characters(&nbsp;, 2)
 SAX.characters(
 , 1)
-SAX.error: Opening and ending tag mismatch: td and form
+SAX.endElement(form)
+SAX.endElement(td)
 SAX.characters( 
 		, 4)
 SAX.startElement(td, valign='top', bgcolor='#99FF99')
@@ -792,12 +793,12 @@
 SAX.endElement(td)
 SAX.characters(
 	, 2)
-SAX.error: Opening and ending tag mismatch: tr and form
+SAX.endElement(tr)
 SAX.characters(    , 4)
-SAX.endElement(form)
+SAX.error: Unexpected end tag : form
 SAX.characters(
 , 1)
-SAX.error: Opening and ending tag mismatch: table and td
+SAX.endElement(table)
 SAX.endElement(td)
 SAX.characters(
   , 3)
@@ -1074,8 +1075,9 @@
 SAX.endElement(option)
 SAX.endElement(select)
 SAX.endElement(font)
-SAX.error: Opening and ending tag mismatch: td and form
-SAX.error: Opening and ending tag mismatch: tr and form
+SAX.endElement(form)
+SAX.endElement(td)
+SAX.endElement(tr)
 SAX.characters(
  , 2)
 SAX.startElement(tr, align='left', valign='top')
@@ -1137,10 +1139,10 @@
         , 9)
 SAX.endElement(tr)
 SAX.characters(  , 2)
-SAX.endElement(form)
+SAX.error: Unexpected end tag : form
 SAX.characters(
         , 9)
-SAX.error: Opening and ending tag mismatch: table and td
+SAX.endElement(table)
 SAX.characters(
 
 , 2)
@@ -1461,14 +1463,14 @@
 
 , 2)
 SAX.endElement(table)
-SAX.characters(
+SAX.ignorableWhitespace(
 
 , 2)
 SAX.comment( end lower left side Navigation )
-SAX.characters(
+SAX.ignorableWhitespace(
 , 1)
 SAX.comment( CONTENT TABLE )
-SAX.characters(
+SAX.ignorableWhitespace(
 
 , 2)
 SAX.startElement(table, border='0', width='447', cellspacing='0', cellpadding='0', bordercolor='#66FF00')
@@ -2828,20 +2830,14 @@
 SAX.characters(
 , 1)
 SAX.endElement(table)
-SAX.characters(
+SAX.ignorableWhitespace(
 
 
 , 3)
 SAX.startElement(br)
 SAX.endElement(br)
-SAX.characters(
+SAX.ignorableWhitespace(
 , 1)
-SAX.endElement(td)
-SAX.endElement(tr)
-SAX.endElement(table)
-SAX.endElement(td)
-SAX.endElement(tr)
-SAX.endElement(table)
 SAX.endElement(body)
 SAX.ignorableWhitespace(
 , 1)