more cleanup of the HTML parser to force it to not bypass SAX, Daniel. Ready for 2.1.1 it seems

commit: d83eb8212e0d9f4595b0bd41c200f2ac9b189b1b [log] [tgz]
author: Daniel Veillard <veillard@src.gnome.org> Fri Jun 30 18:39:56 2000 +0000
committer: Daniel Veillard <veillard@src.gnome.org> Fri Jun 30 18:39:56 2000 +0000
tree: 07c87d73dd749ccbc2aa34924a6df956ec1c7aac
parent: 3f6f7f64ce4c8e4d4a9b7dd4750bc24203a78dce [diff]
diff --git a/ChangeLog b/ChangeLog
index 79d16fe..c47bdb0 100644
--- a/ChangeLog
+++ b/ChangeLog

@@ -1,3 +1,8 @@
+Fri Jun 30 20:29:08 MEST 2000
+
+	* HTMLparser.c HTMLtree.c SAX.c valid.c tree.h : more cleanup
+	  of the HTML parser to force it to not bypass SAX
+
 Fri Jun 30 11:19:59 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
 
 	* win32config.h.in: updated

diff --git a/HTMLparser.c b/HTMLparser.c
index 472d2cf..375a038 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c

@@ -618,7 +618,7 @@
  */
 void
 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *new) {
-    if (!strcmp(new, "html"))
+    if (!xmlStrcmp(new, BAD_CAST"html"))
 	return;
     if (ctxt->nameNr <= 0) {
 #ifdef DEBUG
@@ -628,12 +628,15 @@
 	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
 	    ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
     }
-    if ((!strcmp(new, "body")) || (!strcmp(new, "head")))
+    if ((!xmlStrcmp(new, BAD_CAST"body")) || (!xmlStrcmp(new, BAD_CAST"head")))
         return;
     if (ctxt->nameNr <= 1) {
-	if ((!strcmp(new, "script")) || (!strcmp(new, "style")) ||
-	    (!strcmp(new, "meta")) || (!strcmp(new, "link")) ||
-	    (!strcmp(new, "title")) || (!strcmp(new, "base"))) {
+	if ((!xmlStrcmp(new, BAD_CAST"script")) ||
+	    (!xmlStrcmp(new, BAD_CAST"style")) ||
+	    (!xmlStrcmp(new, BAD_CAST"meta")) ||
+	    (!xmlStrcmp(new, BAD_CAST"link")) ||
+	    (!xmlStrcmp(new, BAD_CAST"title")) ||
+	    (!xmlStrcmp(new, BAD_CAST"base"))) {
 	    /* 
 	     * dropped OBJECT ... i you put it first BODY will be
 	     * assumed !
@@ -2152,17 +2155,15 @@
 	    ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
 	ctxt->wellFormed = 0;
         /* We shouldn't try to resynchronize ... */
-    } else {
     }
     NEXT;
 
     /*
-     * Create the document accordingly to the DOCTYPE
+     * Create or update the document accordingly to the DOCTYPE
      */
-    if (ctxt->myDoc != NULL)
-        xmlFreeDoc(ctxt->myDoc);
-    
-    ctxt->myDoc = htmlNewDoc(URI, ExternalID);
+    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
+	(!ctxt->disableSAX))
+	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
 
     /*
      * Cleanup, since we don't use all those identifiers
@@ -2846,13 +2847,6 @@
     SKIP_BLANKS;
 
     /*
-     * Create the document if not done already.
-     */
-    if (ctxt->myDoc == NULL) {
-        ctxt->myDoc = htmlNewDoc(NULL, NULL);
-    }
-
-    /*
      * Time to start parsing the tree itself
      */
     htmlParseContent(ctxt);
@@ -3171,6 +3165,10 @@
 		if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
 		    ctxt->sax->setDocumentLocator(ctxt->userData,
 						  &xmlDefaultSAXLocator);
+		if ((ctxt->sax) && (ctxt->sax->startDocument) &&
+	            (!ctxt->disableSAX))
+		    ctxt->sax->startDocument(ctxt->userData);
+
 		cur = in->cur[0];
 		next = in->cur[1];
 		if ((cur == '<') && (next == '!') &&
@@ -3190,7 +3188,6 @@
 		    fprintf(stderr, "HPP: entering PROLOG\n");
 #endif
                 } else {
-		    ctxt->myDoc = htmlNewDoc(NULL, NULL);
 		    ctxt->instate = XML_PARSER_MISC;
 		}
 #ifdef DEBUG_PUSH

diff --git a/HTMLtree.c b/HTMLtree.c
index 24a90ba..d981ec0 100644
--- a/HTMLtree.c
+++ b/HTMLtree.c

@@ -158,6 +158,8 @@
     /*
      * Special cases.
      */
+    if (cur->type == XML_DTD_NODE)
+	return;
     if (cur->type == XML_HTML_DOCUMENT_NODE) {
 	htmlDocContentDump(buf, (xmlDocPtr) cur);
 	return;

diff --git a/SAX.c b/SAX.c
index 68e2d31..c352a04 100644
--- a/SAX.c
+++ b/SAX.c

@@ -25,6 +25,7 @@
 #include <libxml/xmlIO.h>
 #include <libxml/SAX.h>
 #include <libxml/uri.h>
+#include <libxml/HTMLtree.h>
 
 /* #define DEBUG_SAX */
 /* #define DEBUG_SAX_TREE */
@@ -157,11 +158,22 @@
 	       const xmlChar *ExternalID, const xmlChar *SystemID)
 {
     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
+    xmlDtdPtr dtd;
 #ifdef DEBUG_SAX
     fprintf(stderr, "SAX.internalSubset(%s, %s, %s)\n",
             name, ExternalID, SystemID);
 #endif
-    xmlCreateIntSubset(ctxt->myDoc, name, ExternalID, SystemID);
+
+    if (ctxt->myDoc == NULL)
+	return;
+    dtd = xmlGetIntSubset(ctxt->myDoc);
+    if (dtd != NULL) {
+	xmlUnlinkNode((xmlNodePtr) dtd);
+	xmlFreeDtd(dtd);
+	ctxt->myDoc->intSubset = NULL;
+    }
+    ctxt->myDoc->intSubset = 
+	xmlCreateIntSubset(ctxt->myDoc, name, ExternalID, SystemID);
 }
 
 /**
@@ -1485,7 +1497,7 @@
  * Default handler for HTML, builds the DOM tree
  */
 xmlSAXHandler htmlDefaultSAXHandler = {
-    NULL,
+    internalSubset,
     NULL,
     NULL,
     NULL,
@@ -1522,7 +1534,7 @@
 void
 htmlDefaultSAXHandlerInit(void)
 {
-    htmlDefaultSAXHandler.internalSubset = NULL;
+    htmlDefaultSAXHandler.internalSubset = internalSubset;
     htmlDefaultSAXHandler.externalSubset = NULL;
     htmlDefaultSAXHandler.isStandalone = NULL;
     htmlDefaultSAXHandler.hasInternalSubset = NULL;

diff --git a/doc/upgrade.html b/doc/upgrade.html
index e7013ba..50aaa6a 100644
--- a/doc/upgrade.html
+++ b/doc/upgrade.html

@@ -48,7 +48,7 @@
     Use <strong>xmlDocGetRootElement(doc)</strong> to get the root element of
     a document. Alternatively if you are sure to not reference Dtds nor have
     PIs or comments before or after the root element s/->root/->children/g
-    will probably do it.    </li>
+    will probably do it.</li>
   <li>The white space issue, this one is more complex, unless special case of
     validating parsing, the line breaks and spaces usually used for indenting
     and formatting the document content becomes significant. So they are
@@ -90,7 +90,7 @@
     <strong>#include&lt;libxml/...></strong> in both cases.</li>
   <li>similar identifiers defined via macros for the child and root fields:
     respectively <strong>xmlChildrenNode</strong> and
-    <strong>xmlRootNode</strong> </li>
+    <strong>xmlRootNode</strong></li>
   <li>a new macro <strong>LIBXML_TEST_VERSION</strong> which should be
     inserted once in the client code</li>
 </ol>
@@ -118,7 +118,7 @@
     <strong>LIBXML_TEST_VERSION</strong> is a fine place).</li>
 </ol>
 
-<p>Following those 3 steps should work. It worked for some of my own code.</p>
+<p>Following those steps should work. It worked for some of my own code.</p>
 
 <p>Let me put some emphasis on the fact that there is far more changes from
 libxml 1.x to 2.x than the ones you may have to patch for. The overall code
@@ -128,6 +128,6 @@
 
 <p><a href="mailto:Daniel.Veillard@w3.org">Daniel Veillard</a></p>
 
-<p>$Id: upgrade.html,v 1.5 2000/05/06 08:11:18 veillard Exp $</p>
+<p>$Id: upgrade.html,v 1.6 2000/06/29 00:43:26 veillard Exp $</p>
 </body>
 </html>

diff --git a/include/libxml/tree.h b/include/libxml/tree.h
index 9c5b280..6c68dc3 100644
--- a/include/libxml/tree.h
+++ b/include/libxml/tree.h

@@ -414,6 +414,7 @@
 					 const xmlChar *name,
 					 const xmlChar *ExternalID,
 					 const xmlChar *SystemID);
+xmlDtdPtr	xmlGetIntSubset		(xmlDocPtr doc);
 void		xmlFreeDtd		(xmlDtdPtr cur);
 xmlNsPtr	xmlNewGlobalNs		(xmlDocPtr doc,
 					 const xmlChar *href,

diff --git a/tree.h b/tree.h
index 9c5b280..6c68dc3 100644
--- a/tree.h
+++ b/tree.h

@@ -414,6 +414,7 @@
 					 const xmlChar *name,
 					 const xmlChar *ExternalID,
 					 const xmlChar *SystemID);
+xmlDtdPtr	xmlGetIntSubset		(xmlDocPtr doc);
 void		xmlFreeDtd		(xmlDtdPtr cur);
 xmlNsPtr	xmlNewGlobalNs		(xmlDocPtr doc,
 					 const xmlChar *href,

diff --git a/valid.c b/valid.c
index 5dc37ed..5235991 100644
--- a/valid.c
+++ b/valid.c

@@ -2031,6 +2031,9 @@
             ((attr->name[1] == 'D') || (attr->name[1] == 'd')) &&
 	    (attr->name[2] == 0)) return(1);
 	 *******************/
+    } else if (doc->type == XML_HTML_DOCUMENT_NODE) {
+	/* TODO @@@ */
+	return(0);    
     } else {
 	xmlAttributePtr attrDecl;
commit	d83eb8212e0d9f4595b0bd41c200f2ac9b189b1b	[log] [tgz]
author	Daniel Veillard <veillard@src.gnome.org>	Fri Jun 30 18:39:56 2000 +0000
committer	Daniel Veillard <veillard@src.gnome.org>	Fri Jun 30 18:39:56 2000 +0000
tree	07c87d73dd749ccbc2aa34924a6df956ec1c7aac
parent	3f6f7f64ce4c8e4d4a9b7dd4750bc24203a78dce [diff]