Upgrade libxml2 to c41bc10da31a4a4e5416270110b6d1b79606cad1 Test: make Change-Id: Ic025f2f5cd8dbadc8bf3a446b941c403931de3f1

commit: ecdab2a6c48b5cf44e56f0d070512bcc40c21257 [log] [tgz]
author: Elliott Hughes <enh@google.com> Wed Feb 23 14:33:50 2022 -0800
committer: Elliott Hughes <enh@google.com> Wed Feb 23 15:19:49 2022 -0800
tree: 66a82a6eeaf8e48f33da992b008050278fd41be0
parent: 63686df8cf3fdf7d766695f55876a4968c2e74cd [diff] [blame]
diff --git a/HTMLparser.c b/HTMLparser.c
index b56363a..e72f418 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c

@@ -2517,6 +2517,8 @@
     if ((ExternalID != NULL) ||
 	(URI != NULL))
 	xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI);
+    if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
+	xmlRegisterNodeDefaultValue((xmlNodePtr)cur);
     return(cur);
 }
 
@@ -3045,7 +3047,7 @@
         NEXT;
     }
 
-    if (CUR != '"') {
+    if (CUR != quote) {
         htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
                      "Unfinished PubidLiteral\n", NULL, NULL);
     } else {
@@ -3958,14 +3960,6 @@
 	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
 	             "htmlParseStartTag: invalid element name\n",
 		     NULL, NULL);
-	/* if recover preserve text on classic misconstructs */
-	if ((ctxt->recovery) && ((IS_BLANK_CH(CUR)) || (CUR == '<') ||
-	    (CUR == '=') || (CUR == '>') || (((CUR >= '0') && (CUR <= '9'))))) {
-	    htmlParseCharDataInternal(ctxt, '<');
-	    return(-1);
-	}
-
-
 	/* Dump the bogus tag like browsers do */
 	while ((CUR != 0) && (CUR != '>') &&
                (ctxt->instate != XML_PARSER_EOF))
@@ -4418,9 +4412,15 @@
 	    /*
 	     * Third case :  a sub-element.
 	     */
-	    else if (CUR == '<') {
+	    else if ((CUR == '<') && IS_ASCII_LETTER(NXT(1))) {
 		htmlParseElement(ctxt);
 	    }
+	    else if (CUR == '<') {
+                if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
+                    (ctxt->sax->characters != NULL))
+                    ctxt->sax->characters(ctxt->userData, BAD_CAST "<", 1);
+                NEXT;
+	    }
 
 	    /*
 	     * Fourth case : a reference. If if has not been resolved,
@@ -4817,13 +4817,19 @@
 	    /*
 	     * Third case :  a sub-element.
 	     */
-	    else if (CUR == '<') {
+	    else if ((CUR == '<') && IS_ASCII_LETTER(NXT(1))) {
 		htmlParseElementInternal(ctxt);
 		if (currentNode != NULL) xmlFree(currentNode);
 
 		currentNode = xmlStrdup(ctxt->name);
 		depth = ctxt->nameNr;
 	    }
+	    else if (CUR == '<') {
+                if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
+                    (ctxt->sax->characters != NULL))
+                    ctxt->sax->characters(ctxt->userData, BAD_CAST "<", 1);
+                NEXT;
+            }
 
 	    /*
 	     * Fourth case : a reference. If if has not been resolved,
@@ -5104,7 +5110,7 @@
     ctxt->linenumbers = xmlLineNumbersDefaultValue;
     ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
     ctxt->html = 1;
-    ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
+    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
     ctxt->vctxt.userData = ctxt;
     ctxt->vctxt.error = xmlParserValidityError;
     ctxt->vctxt.warning = xmlParserValidityWarning;
@@ -5185,6 +5191,7 @@
 
     input = xmlNewInputStream(ctxt);
     if (input == NULL) {
+	xmlFreeParserInputBuffer(buf);
 	xmlFreeParserCtxt(ctxt);
 	return(NULL);
     }
@@ -5989,36 +5996,22 @@
 				"HPP: entering END_TAG\n");
 #endif
 			break;
-		    } else if (cur == '<') {
+		    } else if ((cur == '<') && IS_ASCII_LETTER(next)) {
                         if ((!terminate) && (next == 0))
                             goto done;
-                        /*
-                         * Only switch to START_TAG if the next character
-                         * starts a valid name. Otherwise, htmlParseStartTag
-                         * might return without consuming all characters
-                         * up to the final '>'.
-                         */
-                        if ((IS_ASCII_LETTER(next)) ||
-                            (next == '_') || (next == ':') || (next == '.')) {
-                            ctxt->instate = XML_PARSER_START_TAG;
-                            ctxt->checkIndex = 0;
+                        ctxt->instate = XML_PARSER_START_TAG;
+                        ctxt->checkIndex = 0;
 #ifdef DEBUG_PUSH
-                            xmlGenericError(xmlGenericErrorContext,
-                                    "HPP: entering START_TAG\n");
+                        xmlGenericError(xmlGenericErrorContext,
+                                "HPP: entering START_TAG\n");
 #endif
-                        } else {
-                            htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
-                                         "htmlParseTryOrFinish: "
-                                         "invalid element name\n",
-                                         NULL, NULL);
-                            htmlCheckParagraph(ctxt);
-                            if ((ctxt->sax != NULL) &&
-                                (ctxt->sax->characters != NULL))
-                                ctxt->sax->characters(ctxt->userData,
-                                                      in->cur, 1);
-                            NEXT;
-                        }
 			break;
+		    } else if (cur == '<') {
+                        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
+                            (ctxt->sax->characters != NULL))
+			    ctxt->sax->characters(ctxt->userData,
+						  BAD_CAST "<", 1);
+                        NEXT;
 		    } else {
 		        /*
 			 * check that the text sequence is complete
@@ -6999,7 +6992,9 @@
  * @encoding:  the document encoding, or NULL
  * @options:  a combination of htmlParserOption(s)
  *
- * parse an XML from a file descriptor and build a tree.
+ * parse an HTML from a file descriptor and build a tree.
+ * NOTE that the file descriptor will not be closed when the
+ *      reader is closed or reset.
  *
  * Returns the resulting document tree
  */
@@ -7008,17 +7003,17 @@
 {
     htmlParserCtxtPtr ctxt;
     xmlParserInputBufferPtr input;
-    xmlParserInputPtr stream;
+    htmlParserInputPtr stream;
 
     if (fd < 0)
         return (NULL);
-    xmlInitParser();
 
     xmlInitParser();
     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
     if (input == NULL)
         return (NULL);
-    ctxt = xmlNewParserCtxt();
+    input->closecallback = NULL;
+    ctxt = htmlNewParserCtxt();
     if (ctxt == NULL) {
         xmlFreeParserInputBuffer(input);
         return (NULL);
@@ -7026,7 +7021,7 @@
     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
     if (stream == NULL) {
         xmlFreeParserInputBuffer(input);
-	xmlFreeParserCtxt(ctxt);
+	htmlFreeParserCtxt(ctxt);
         return (NULL);
     }
     inputPush(ctxt, stream);
@@ -7282,6 +7277,4 @@
     return (htmlDoRead(ctxt, URL, encoding, options, 1));
 }
 
-#define bottom_HTMLparser
-#include "elfgcchack.h"
 #endif /* LIBXML_HTML_ENABLED */
commit	ecdab2a6c48b5cf44e56f0d070512bcc40c21257	[log] [tgz]
author	Elliott Hughes <enh@google.com>	Wed Feb 23 14:33:50 2022 -0800
committer	Elliott Hughes <enh@google.com>	Wed Feb 23 15:19:49 2022 -0800
tree	66a82a6eeaf8e48f33da992b008050278fd41be0
parent	63686df8cf3fdf7d766695f55876a4968c2e74cd [diff] [blame]