Mostly HTML generation and parsing enhancements:
- HTMLparser.[ch] testHTML.c: applied the second set of
patches from Wayne Davison <wayned@blorf.net>, adding
htmlEncodeEntities()
- HTMLparser.c: fixed an ignorable white space detection bug
occuring when parsing with SAX only
- result/HTML/*.sax: updated since the output is now HTML
encoded...
Daniel.
diff --git a/ChangeLog b/ChangeLog
index 8671d02..700a90b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Mon Aug 28 11:58:12 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
+
+ * HTMLparser.[ch] testHTML.c: applied the second set of
+ patches from Wayne Davison <wayned@blorf.net>, adding
+ htmlEncodeEntities()
+ * HTMLparser.c: fixed an ignorable white space detection bug
+ occuring when parsing with SAX only
+ * result/HTML/*.sax: updated since the output is now HTML
+ encoded...
+
Mon Aug 28 00:38:31 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* HTMLparser.[ch]: applied some of Wayne Davison <wayned@blorf.net>
diff --git a/HTMLparser.c b/HTMLparser.c
index 05ef674..709be8d 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -1341,7 +1341,7 @@
/* assertion: c is a single UTF-4 value */
if (c < 0x80) {
- if (out >= outend)
+ if (out + 1 >= outend)
break;
*out++ = c;
} else {
@@ -1360,7 +1360,7 @@
return(-2);
}
len = strlen(ent->name);
- if (out + 2 + len > outend)
+ if (out + 2 + len >= outend)
break;
*out++ = '&';
memcpy(out, ent->name, len);
@@ -1374,6 +1374,99 @@
return(0);
}
+/**
+ * htmlEncodeEntities:
+ * @out: a pointer to an array of bytes to store the result
+ * @outlen: the length of @out
+ * @in: a pointer to an array of UTF-8 chars
+ * @inlen: the length of @in
+ * @quoteChar: the quote character to escape (' or ") or zero.
+ *
+ * Take a block of UTF-8 chars in and try to convert it to an ASCII
+ * plus HTML entities block of chars out.
+ *
+ * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
+ * The value of @inlen after return is the number of octets consumed
+ * as the return value is positive, else unpredictiable.
+ * The value of @outlen after return is the number of octets consumed.
+ */
+int
+htmlEncodeEntities(unsigned char* out, int *outlen,
+ const unsigned char* in, int *inlen, int quoteChar) {
+ const unsigned char* processed = in;
+ const unsigned char* outend = out + (*outlen);
+ const unsigned char* outstart = out;
+ const unsigned char* instart = in;
+ const unsigned char* inend = in + (*inlen);
+ unsigned int c, d;
+ int trailing;
+
+ while (in < inend) {
+ d = *in++;
+ if (d < 0x80) { c= d; trailing= 0; }
+ else if (d < 0xC0) {
+ /* trailing byte in leading position */
+ *outlen = out - outstart;
+ *inlen = processed - instart;
+ return(-2);
+ } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
+ else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
+ else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
+ else {
+ /* no chance for this in Ascii */
+ *outlen = out - outstart;
+ *inlen = processed - instart;
+ return(-2);
+ }
+
+ if (inend - in < trailing)
+ break;
+
+ while (trailing--) {
+ if (((d= *in++) & 0xC0) != 0x80) {
+ *outlen = out - outstart;
+ *inlen = processed - instart;
+ return(-2);
+ }
+ c <<= 6;
+ c |= d & 0x3F;
+ }
+
+ /* assertion: c is a single UTF-4 value */
+ if (c < 0x80 && c != quoteChar && c != '&' && c != '<' && c != '>') {
+ if (out >= outend)
+ break;
+ *out++ = c;
+ } else {
+ htmlEntityDescPtr ent;
+ const char *cp;
+ char nbuf[16];
+ int len;
+
+ /*
+ * Try to lookup a predefined HTML entity for it
+ */
+ ent = htmlEntityValueLookup(c);
+ if (ent == NULL) {
+ sprintf(nbuf, "#%u", c);
+ cp = nbuf;
+ }
+ else
+ cp = ent->name;
+ len = strlen(cp);
+ if (out + 2 + len > outend)
+ break;
+ *out++ = '&';
+ memcpy(out, cp, len);
+ out += len;
+ *out++ = ';';
+ }
+ processed = in;
+ }
+ *outlen = out - outstart;
+ *inlen = processed - instart;
+ return(0);
+}
/**
* htmlDecodeEntities:
@@ -1555,6 +1648,12 @@
if (CUR == 0) return(1);
if (CUR != '<') return(0);
+ if (ctxt->name == NULL)
+ return(1);
+ if (!xmlStrcmp(ctxt->name, BAD_CAST"head"))
+ return(1);
+ if (!xmlStrcmp(ctxt->name, BAD_CAST"body"))
+ return(1);
if (ctxt->node == NULL) return(0);
lastChild = xmlGetLastChild(ctxt->node);
if (lastChild == NULL) {
diff --git a/HTMLparser.h b/HTMLparser.h
index 97a8b01..5d42f45 100644
--- a/HTMLparser.h
+++ b/HTMLparser.h
@@ -86,6 +86,10 @@
int *outlen,
const unsigned char* in,
int *inlen);
+int htmlEncodeEntities(unsigned char* out,
+ int *outlen,
+ const unsigned char* in,
+ int *inlen, int quoteChar);
/**
* Interfaces for the Push mode
diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h
index 97a8b01..5d42f45 100644
--- a/include/libxml/HTMLparser.h
+++ b/include/libxml/HTMLparser.h
@@ -86,6 +86,10 @@
int *outlen,
const unsigned char* in,
int *inlen);
+int htmlEncodeEntities(unsigned char* out,
+ int *outlen,
+ const unsigned char* in,
+ int *inlen, int quoteChar);
/**
* Interfaces for the Push mode
diff --git a/result/HTML/Down.html.sax b/result/HTML/Down.html.sax
index 6b23930..defeff1 100644
--- a/result/HTML/Down.html.sax
+++ b/result/HTML/Down.html.sax
@@ -7,46 +7,35 @@
, 1)
SAX.endElement(p)
SAX.startElement(head)
-SAX.endElement(head)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 3)
-SAX.endElement(p)
SAX.startElement(title)
SAX.characters(This service is temporary down, 30)
SAX.endElement(title)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.error: Unexpected end tag : head
-SAX.characters(
+SAX.endElement(head)
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(body, bgcolor='#FFFFFF')
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(h1, align='center')
-SAX.characters(Sorry, this service is tempora, 37)
+SAX.characters(Sorry, this service is tempora, 30)
SAX.endElement(h1)
SAX.startElement(p)
SAX.characters(
-We are doing our best to get , 48)
+We are doing our best to get , 30)
SAX.endElement(p)
SAX.startElement(p)
SAX.characters(The W3C system administrators, 29)
SAX.endElement(p)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.endElement(body)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
diff --git a/result/HTML/doc2.htm.sax b/result/HTML/doc2.htm.sax
index 5eafa0c..edd2b2a 100644
--- a/result/HTML/doc2.htm.sax
+++ b/result/HTML/doc2.htm.sax
@@ -8,38 +8,36 @@
SAX.startElement(title)
SAX.characters(Welcome to Copernic.com, 23)
SAX.endElement(title)
-SAX.endElement(head)
-SAX.startElement(body)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
SAX.startElement(script, language='javascript')
SAX.characters(
- NS_ActualOpen=windo, 194)
+ NS_ActualOpen=windo, 30)
SAX.endElement(script)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
SAX.comment( END Naviscope Javascript )
SAX.error: Misplaced DOCTYPE declaration
SAX.internalSubset(HTML, -//W3C//DTD HTML 4.0 Transitional//EN, )
SAX.comment( saved from url=(0027)http://www.agents-tech.com/ )
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
SAX.startElement(meta, content='text/html; charset=iso-8859-1', http-equiv='Content-Type')
SAX.endElement(meta)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
SAX.startElement(meta, content='Copernic.com Inc. develops innovative agent technology solutions to efficiently access and manage the overwhelming quantity of information available on the Internet and intranets.', name='DESCRIPTION')
SAX.endElement(meta)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
SAX.startElement(meta, content='agent,technology,intranet,extranet,management,filtering,ranking,solution,service,intelligent,intelligence,client,server,architecture,developer,development,information,telecommunication,announcement,press,product,profile,contact,multi-agent,meta-search,metasearch,multi-thread,mobile,wireless,shopping,robot,PCS,Copernic,engine,toolkit,CDK,EDK', name='KEYWORDS')
SAX.endElement(meta)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
SAX.startElement(meta, content='MSHTML 5.00.3103.1000', name='GENERATOR')
SAX.endElement(meta)
-SAX.error: Unexpected end tag : head
+SAX.endElement(head)
+SAX.startElement(body)
SAX.startElement(frameset, border='false', cols='172,*', frameBorder='0', frameSpacing='0')
SAX.startElement(frame, marginHeight='0', marginWidth='0', name='left', noResize, scrolling='no', src='doc2_files/side.htm', target='rtop')
SAX.endElement(frame)
@@ -54,23 +52,18 @@
, 4)
SAX.startElement(body, bgcolor='#FFFFFF', text='#000000', link='#000080', vlink='#000080', alink='#000080', topmargin='0', leftmargin='0', marginheight='0', marginwidth='0')
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 3)
-SAX.endElement(p)
SAX.startElement(p)
-SAX.characters(This page uses frames, but you, 61)
+SAX.characters(This page uses frames, but you, 30)
SAX.endElement(p)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 3)
-SAX.endElement(p)
SAX.endElement(body)
SAX.characters(
, 3)
SAX.endElement(noframes)
SAX.endElement(frameset)
-SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
diff --git a/result/HTML/doc3.htm.sax b/result/HTML/doc3.htm.sax
index 98ef2ac..c810f35 100644
--- a/result/HTML/doc3.htm.sax
+++ b/result/HTML/doc3.htm.sax
Binary files differ
diff --git a/result/HTML/entities.html.sax b/result/HTML/entities.html.sax
index bc1920b..709b60d 100644
--- a/result/HTML/entities.html.sax
+++ b/result/HTML/entities.html.sax
@@ -4,19 +4,19 @@
SAX.startElement(body)
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: no name
-SAX.startElement(p, tst='a&b', tst2='a&b', tst3='a & b')
+SAX.startElement(p, tst='a&b', tst2='a&b', tst3='a & b')
SAX.characters(
a, 2)
-SAX.characters(&, 1)
+SAX.characters(&, 1)
SAX.characters(b
a, 3)
SAX.error: htmlParseEntityRef: expecting ';'
-SAX.characters(&, 1)
+SAX.characters(&, 1)
SAX.characters(b, 1)
SAX.characters(
a , 3)
SAX.error: htmlParseEntityRef: no name
-SAX.characters(&, 1)
+SAX.characters(&, 1)
SAX.characters( b
, 3)
SAX.endElement(p)
diff --git a/result/HTML/fp40.htm.sax b/result/HTML/fp40.htm.sax
index 94c7055..0314276 100644
--- a/result/HTML/fp40.htm.sax
+++ b/result/HTML/fp40.htm.sax
@@ -9,40 +9,34 @@
, 2)
SAX.endElement(p)
SAX.startElement(head)
-SAX.endElement(head)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
SAX.startElement(meta, name='GENERATOR', content='Microsoft FrontPage 4.0')
SAX.endElement(meta)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(title)
-SAX.characters(README - Microsoft FrontPage 2, 51)
+SAX.characters(README - Microsoft FrontPage 2, 30)
SAX.endElement(title)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
SAX.startElement(meta, name='Microsoft Theme', content='none')
SAX.endElement(meta)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.error: Unexpected end tag : head
-SAX.characters(
+SAX.endElement(head)
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(body)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
SAX.startElement(font, face='Verdana')
SAX.characters(
, 1)
SAX.startElement(h1)
SAX.startElement(a, name='top')
-SAX.characters(Microsoft FrontPage 2000 Serve, 48)
+SAX.characters(Microsoft FrontPage 2000 Serve, 30)
SAX.endElement(a)
SAX.endElement(h1)
SAX.characters(
@@ -50,8 +44,8 @@
, 2)
SAX.startElement(font, size='2')
SAX.startElement(i)
-SAX.characters(© Copyright Microsoft Corpora, 40)
-SAX.characters( , 2)
+SAX.characters(© Copyright Microsoft Cor, 26)
+SAX.characters( , 2)
SAX.endElement(i)
SAX.endElement(font)
SAX.characters(
@@ -59,7 +53,7 @@
, 3)
SAX.startElement(p)
-SAX.characters(The FrontPage Server Extension, 88)
+SAX.characters(The FrontPage Server Extension, 30)
SAX.endElement(p)
SAX.startElement(ul)
SAX.characters(
@@ -75,7 +69,7 @@
SAX.characters(
, 3)
SAX.startElement(li)
-SAX.characters(Browse-time FrontPage web func, 39)
+SAX.characters(Browse-time FrontPage web func, 30)
SAX.endElement(li)
SAX.characters(
, 1)
@@ -86,7 +80,7 @@
, 3)
SAX.startElement(h2)
SAX.characters(Contents, 8)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.endElement(h2)
SAX.characters(
@@ -104,7 +98,7 @@
SAX.characters(
, 1)
SAX.startElement(p)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.endElement(p)
SAX.characters(
, 1)
@@ -121,7 +115,7 @@
, 2)
SAX.startElement(p)
-SAX.characters(This section provides compleme, 136)
+SAX.characters(This section provides compleme, 30)
SAX.endElement(p)
SAX.characters(
@@ -135,14 +129,14 @@
SAX.characters(
, 1)
SAX.startElement(a, href='#upgrading')
-SAX.characters(Upgrading from previous versio, 62)
+SAX.characters(Upgrading from previous versio, 30)
SAX.endElement(a)
SAX.startElement(br)
SAX.endElement(br)
SAX.characters(
, 1)
SAX.startElement(a, href='#executables')
-SAX.characters(Uploading files into executabl, 39)
+SAX.characters(Uploading files into executabl, 30)
SAX.endElement(a)
SAX.endElement(p)
SAX.characters(
@@ -169,23 +163,22 @@
, 2)
SAX.startElement(p)
-SAX.characters(You need to take some special , 360)
+SAX.characters(You need to take some special , 30)
SAX.endElement(p)
SAX.characters(
, 3)
SAX.endElement(font)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(blockquote)
SAX.characters(
, 3)
SAX.startElement(font, face='Courier New')
SAX.characters(
ResourceConfig /dev/null, 25)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(br)
SAX.endElement(br)
SAX.characters(
@@ -194,8 +187,7 @@
SAX.characters(
, 1)
SAX.endElement(blockquote)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
SAX.startElement(font, face='Verdana')
SAX.characters(
@@ -203,13 +195,13 @@
, 3)
SAX.startElement(p)
-SAX.characters(If you have some settings stor, 90)
+SAX.characters(If you have some settings stor, 30)
SAX.endElement(p)
SAX.characters(
, 2)
SAX.startElement(p)
-SAX.characters(You must stop and restart the , 86)
+SAX.characters(You must stop and restart the , 30)
SAX.endElement(p)
SAX.characters(
@@ -230,26 +222,26 @@
, 4)
SAX.startElement(h3)
SAX.startElement(a, name='upgrading')
-SAX.characters(Upgrading from previous versio, 62)
+SAX.characters(Upgrading from previous versio, 30)
SAX.endElement(a)
SAX.endElement(h3)
SAX.characters(
, 2)
SAX.startElement(p)
-SAX.characters(Custom entries in frontpage.cn, 67)
+SAX.characters(Custom entries in frontpage.cn, 30)
SAX.endElement(p)
SAX.characters(
, 2)
SAX.startElement(p)
-SAX.characters(When you install FrontPage 200, 359)
+SAX.characters(When you install FrontPage 200, 30)
SAX.endElement(p)
SAX.characters(
, 2)
SAX.startElement(p)
-SAX.characters(Do not overwrite the FrontPage, 141)
+SAX.characters(Do not overwrite the FrontPage, 30)
SAX.endElement(p)
SAX.characters(
@@ -270,7 +262,7 @@
, 4)
SAX.startElement(h3)
SAX.startElement(a, name='executables')
-SAX.characters(Uploading files into executabl, 39)
+SAX.characters(Uploading files into executabl, 30)
SAX.endElement(a)
SAX.endElement(h3)
SAX.characters(
@@ -278,15 +270,15 @@
, 3)
SAX.startElement(p)
-SAX.characters(After upgrading to FrontPage 2, 385)
+SAX.characters(After upgrading to FrontPage 2, 30)
SAX.endElement(p)
SAX.characters(
, 3)
SAX.startElement(p)
-SAX.characters(To allow FrontPage authors to , 249)
+SAX.characters(To allow FrontPage authors to , 30)
SAX.startElement(a, href='http://officeupdate.microsoft.com/frontpage/wpp/serk/')
-SAX.characters(http://officeupdate.microsoft., 53)
+SAX.characters(http://officeupdate.microsoft., 30)
SAX.endElement(a)
SAX.characters(., 1)
SAX.endElement(p)
@@ -323,7 +315,7 @@
, 2)
SAX.startElement(p)
-SAX.characters(This section lists sources of , 85)
+SAX.characters(This section lists sources of , 30)
SAX.endElement(p)
SAX.characters(
@@ -337,7 +329,7 @@
SAX.characters(
, 1)
SAX.startElement(a, href='#serkupdate')
-SAX.characters(Server Extensions Resource Kit, 37)
+SAX.characters(Server Extensions Resource Kit, 30)
SAX.endElement(a)
SAX.startElement(br)
SAX.endElement(br)
@@ -371,13 +363,13 @@
, 2)
SAX.startElement(p)
-SAX.characters(The FrontPage 2000 Server Exte, 339)
+SAX.characters(The FrontPage 2000 Server Exte, 30)
SAX.endElement(p)
SAX.characters(
, 2)
SAX.startElement(p)
-SAX.characters(The Server Extensions Resource, 312)
+SAX.characters(The Server Extensions Resource, 30)
SAX.endElement(p)
SAX.characters(
@@ -395,16 +387,16 @@
, 3)
SAX.startElement(h3)
SAX.startElement(a, name='serkupdate')
-SAX.characters(Server Extensions Resource Kit, 37)
+SAX.characters(Server Extensions Resource Kit, 30)
SAX.endElement(a)
SAX.endElement(h3)
SAX.characters(
, 2)
SAX.startElement(p)
-SAX.characters(For updated information about , 157)
+SAX.characters(For updated information about , 30)
SAX.startElement(a, href='http://officeupdate.microsoft.com/frontpage/wpp/serk/')
-SAX.characters(http://officeupdate.microsoft., 53)
+SAX.characters(http://officeupdate.microsoft., 30)
SAX.endElement(a)
SAX.characters(., 1)
SAX.endElement(p)
@@ -432,13 +424,13 @@
, 2)
SAX.startElement(p)
-SAX.characters(For further technical informat, 254)
+SAX.characters(For further technical informat, 30)
SAX.error: htmlParseEntityRef: no name
-SAX.characters(&, 1)
+SAX.characters(&, 1)
SAX.characters( troubleshooters to find
-fast, 302)
+fast, 30)
SAX.startElement(a, href='http://support.microsoft.com/support/')
-SAX.characters(http://support.microsoft.com/s, 37)
+SAX.characters(http://support.microsoft.com/s, 30)
SAX.endElement(a)
SAX.characters(., 1)
SAX.endElement(p)
@@ -457,20 +449,17 @@
, 3)
SAX.startElement(p)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.endElement(p)
SAX.characters(
, 4)
SAX.endElement(font)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.endElement(body)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
diff --git a/result/HTML/liclose.html.sax b/result/HTML/liclose.html.sax
index 519688f..5e37aa8 100644
--- a/result/HTML/liclose.html.sax
+++ b/result/HTML/liclose.html.sax
@@ -8,25 +8,18 @@
, 1)
SAX.endElement(p)
SAX.startElement(head)
-SAX.endElement(head)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 3)
-SAX.endElement(p)
SAX.startElement(title)
SAX.endElement(title)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.error: Unexpected end tag : head
-SAX.characters(
+SAX.endElement(head)
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(body)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(ul)
SAX.characters(
, 1)
@@ -35,14 +28,12 @@
, 11)
SAX.endElement(li)
SAX.startElement(li)
-SAX.characters(Second item, closes the first , 34)
+SAX.characters(Second item, closes the first , 30)
SAX.endElement(li)
SAX.endElement(ul)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(body)
SAX.endElement(html)
diff --git a/result/HTML/reg1.html.sax b/result/HTML/reg1.html.sax
index 135cb57..516c022 100644
--- a/result/HTML/reg1.html.sax
+++ b/result/HTML/reg1.html.sax
@@ -7,43 +7,32 @@
, 1)
SAX.endElement(p)
SAX.startElement(head)
-SAX.endElement(head)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(title)
SAX.characters(Regression test 1, 17)
SAX.endElement(title)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.error: Unexpected end tag : head
-SAX.characters(
+SAX.endElement(head)
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(body)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(h1)
SAX.characters(Regression test 1, 17)
SAX.endElement(h1)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
Ok file no problem
, 20)
SAX.endElement(p)
SAX.endElement(body)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
diff --git a/result/HTML/reg2.html.sax b/result/HTML/reg2.html.sax
index 0db1e53..79a0d27 100644
--- a/result/HTML/reg2.html.sax
+++ b/result/HTML/reg2.html.sax
@@ -7,33 +7,24 @@
, 1)
SAX.endElement(p)
SAX.startElement(head)
-SAX.endElement(head)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(title)
SAX.characters(Regression test 2, 17)
SAX.endElement(title)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.error: Unexpected end tag : head
-SAX.characters(
+SAX.endElement(head)
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(body)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(h1)
SAX.characters(Regression test 2, 17)
SAX.endElement(h1)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
Autoclose of tag P
@@ -45,10 +36,8 @@
, 20)
SAX.endElement(p)
SAX.endElement(body)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
diff --git a/result/HTML/reg3.html.sax b/result/HTML/reg3.html.sax
index 75cd2cc..441a9e2 100644
--- a/result/HTML/reg3.html.sax
+++ b/result/HTML/reg3.html.sax
@@ -7,33 +7,24 @@
, 1)
SAX.endElement(p)
SAX.startElement(head)
-SAX.endElement(head)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(title)
SAX.characters(Regression test 3, 17)
SAX.endElement(title)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.error: Unexpected end tag : head
-SAX.characters(
+SAX.endElement(head)
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(body)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(h1)
SAX.characters(Regression test 3, 17)
SAX.endElement(h1)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
Autoclose of tag P
@@ -41,20 +32,16 @@
SAX.endElement(p)
SAX.startElement(hr)
SAX.endElement(hr)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
Ok file no problem
, 20)
SAX.endElement(p)
SAX.endElement(body)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
diff --git a/result/HTML/reg4.html.sax b/result/HTML/reg4.html.sax
index 832fe69..d2d386b 100644
--- a/result/HTML/reg4.html.sax
+++ b/result/HTML/reg4.html.sax
@@ -7,33 +7,24 @@
, 1)
SAX.endElement(p)
SAX.startElement(head)
-SAX.endElement(head)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(title)
SAX.characters(Regression test 4, 17)
SAX.endElement(title)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.error: Unexpected end tag : head
-SAX.characters(
+SAX.endElement(head)
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(body)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(h1)
SAX.characters(Regression test 4, 17)
SAX.endElement(h1)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
Wrong close of tag P
@@ -41,19 +32,14 @@
SAX.endElement(p)
SAX.startElement(hr)
SAX.endElement(hr)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
-SAX.startElement(p)
-SAX.characters(
+SAX.error: Unexpected end tag : p
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.endElement(body)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
diff --git a/result/HTML/test2.html.sax b/result/HTML/test2.html.sax
index d457ed0..351080a 100644
--- a/result/HTML/test2.html.sax
+++ b/result/HTML/test2.html.sax
@@ -3,25 +3,20 @@
SAX.internalSubset(HTML, -//W3C//DTD HTML 4.0 Transitional//EN, http://www.w3.org/TR/REC-html40/loose.dtd)
SAX.startElement(html)
SAX.startElement(head)
-SAX.endElement(head)
-SAX.startElement(body)
-SAX.startElement(p)
-SAX.characters( , 1)
-SAX.endElement(p)
+SAX.ignorableWhitespace( , 1)
SAX.startElement(title)
SAX.characters(Linux Today, 11)
SAX.endElement(title)
-SAX.error: Unexpected end tag : head
+SAX.endElement(head)
+SAX.startElement(body)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.startElement(body, bgcolor='White', link='Blue', text='Black', VLINK='Black', ALINK='Red')
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(center)
SAX.characters(
, 1)
@@ -138,19 +133,15 @@
SAX.characters(
, 1)
SAX.endElement(center)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(p)
SAX.characters(
, 1)
SAX.endElement(p)
SAX.endElement(body)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
diff --git a/result/HTML/test3.html.sax b/result/HTML/test3.html.sax
index 8546efd..f88b7df 100644
--- a/result/HTML/test3.html.sax
+++ b/result/HTML/test3.html.sax
@@ -7,34 +7,29 @@
, 2)
SAX.endElement(p)
SAX.startElement(head)
-SAX.endElement(head)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
SAX.startElement(base, target='contents')
SAX.endElement(base)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.error: Unexpected end tag : head
-SAX.characters(
+SAX.endElement(head)
+SAX.ignorableWhitespace(
, 2)
SAX.startElement(a, name='ProblemDomain.Package')
SAX.startElement(h2)
-SAX.characters(Component Package diagram Prob, 39)
+SAX.characters(Component Package diagram Prob, 30)
SAX.endElement(h2)
SAX.characters(
, 2)
SAX.endElement(a)
-SAX.endElement(p)
SAX.startElement(p)
SAX.endElement(p)
SAX.startElement(hr)
SAX.endElement(hr)
SAX.error: Unexpected end tag : p
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(dl)
SAX.characters(
, 2)
@@ -60,25 +55,21 @@
SAX.endElement(b)
SAX.endElement(dt)
SAX.startElement(dd)
-SAX.characters(The Problem Domain package is , 59)
+SAX.characters(The Problem Domain package is , 30)
SAX.startElement(dd)
-SAX.characters(Interface, thats stores and ma, 58)
+SAX.characters(Interface, thats stores and ma, 30)
SAX.endElement(dd)
SAX.endElement(dd)
SAX.endElement(dl)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(p)
SAX.endElement(p)
SAX.startElement(hr)
SAX.endElement(hr)
SAX.error: Unexpected end tag : p
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(dl)
SAX.characters(
@@ -174,11 +165,9 @@
SAX.characters(
, 2)
SAX.endElement(dl)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 4)
-SAX.endElement(p)
SAX.startElement(h4)
SAX.startElement(b)
SAX.characters(Links, 5)
@@ -186,10 +175,8 @@
SAX.endElement(b)
SAX.endElement(h4)
SAX.error: Unexpected end tag : b
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(ul)
SAX.startElement(li)
SAX.startElement(b)
@@ -200,16 +187,12 @@
SAX.endElement(a)
SAX.endElement(li)
SAX.endElement(ul)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(dir)
SAX.endElement(dir)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(ul)
SAX.startElement(li)
SAX.startElement(b)
@@ -220,16 +203,12 @@
SAX.endElement(a)
SAX.endElement(li)
SAX.endElement(ul)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(dir)
SAX.endElement(dir)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(ul)
SAX.startElement(li)
SAX.startElement(b)
@@ -240,16 +219,12 @@
SAX.endElement(a)
SAX.endElement(li)
SAX.endElement(ul)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(dir)
SAX.endElement(dir)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.ignorableWhitespace(
diff --git a/result/HTML/wired.html.sax b/result/HTML/wired.html.sax
index d67e0bb..3760a64 100644
--- a/result/HTML/wired.html.sax
+++ b/result/HTML/wired.html.sax
@@ -4,15 +4,13 @@
SAX.startElement(html)
SAX.startElement(head)
SAX.startElement(title)
-SAX.characters(Top Stories News from Wired Ne, 32)
+SAX.characters(Top Stories News from Wired Ne, 30)
SAX.endElement(title)
SAX.endElement(head)
SAX.startElement(body, bgcolor='#FFFFFF', text='#000000', link='#333399', vlink='#660066', alink='#666699')
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(table, border='0', width='600', cellspacing='0', cellpadding='0')
SAX.characters(
, 3)
@@ -27,7 +25,7 @@
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
-SAX.startElement(form, METHOD='GET', ACTION='http://nsads.hotwired.com/event.ng/Type=click&ProfileID=9688&RunID=14074&AdID=22584&GroupID=1&FamilyID=2684&TagValues=8.25.156.159.166.171.172.174.179.180.181.182.183.196.197.199.208.389.412.436.2041.6750.78456.79630.81880&Redirect=http://www.springstreet.com/aa/citysearch.htm', id='form1', name='form1')
+SAX.startElement(form, METHOD='GET', ACTION='http://nsads.hotwired.com/event.ng/Type=click&ProfileID=9688&RunID=14074&AdID=22584&GroupID=1&FamilyID=2684&TagValues=8.25.156.159.166.171.172.174.179.180.181.182.183.196.197.199.208.389.412.436.2041.6750.78456.79630.81880&Redirect=http://www.springstreet.com/aa/citysearch.htm', id='form1', name='form1')
SAX.characters(
, 2)
SAX.startElement(tr)
@@ -306,7 +304,7 @@
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
-SAX.startElement(a, href='http://nsads.hotwired.com/event.ng/Type=click&ProfileID=5597&RunID=17167&AdID=22588&GroupID=1&FamilyID=3228&TagValues=8.25.159.171.172.174.179.180.181.182.183.196.197.199.208.241.389.412.436.2035.6749.6750.70367.78456.79630.81880&Redirect=http:%2F%2Fwww.hp.com%2Fgo%2Foriginal%20', TARGET='_top')
+SAX.startElement(a, href='http://nsads.hotwired.com/event.ng/Type=click&ProfileID=5597&RunID=17167&AdID=22588&GroupID=1&FamilyID=3228&TagValues=8.25.159.171.172.174.179.180.181.182.183.196.197.199.208.241.389.412.436.2035.6749.6750.70367.78456.79630.81880&Redirect=http:%2F%2Fwww.hp.com%2Fgo%2Foriginal%20', TARGET='_top')
SAX.startElement(img, src='http://static.wired.com/advertising/blipverts/hp_colorinkjet/hp_970c_120x60_6.gif', BORDER='1', height='60', width='120', alt='True to the Original')
SAX.endElement(img)
SAX.endElement(a)
@@ -317,22 +315,20 @@
SAX.characters(
, 1)
SAX.endElement(table)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
SAX.comment( WIRED NEWS header )
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
SAX.comment( CMD_HOST = scoop.hotwired.com )
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
SAX.startElement(a, name='#')
SAX.endElement(a)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.startElement(table, border='0', width='600', cellspacing='0', cellpadding='0')
SAX.characters(
@@ -379,13 +375,13 @@
SAX.startElement(br)
SAX.endElement(br)
SAX.startElement(font, size='1', face='Verdana, Arial, Geneva, sans-serif', color='#FFFFFF')
-SAX.characters( , 2)
-SAX.characters( , 2)
-SAX.characters( , 2)
+SAX.characters( , 2)
+SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(b)
SAX.characters(updated 10:15 a.m., 18)
-SAX.characters( , 2)
-SAX.characters( , 2)
+SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(15.Oct.99.PDT, 13)
SAX.endElement(b)
SAX.endElement(font)
@@ -446,7 +442,7 @@
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
-SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate=vignette.hts&Collection=vignette&QueryMode=Internet&Query=', selected)
+SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate=vignette.hts&Collection=vignette&QueryMode=Internet&Query=', selected)
SAX.characters(Wired News, 10)
SAX.endElement(option)
SAX.characters(
@@ -456,7 +452,7 @@
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
-SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate=webmonkey.hts&Collection=webmonkey&QueryMode=Internet&Query=')
+SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate=webmonkey.hts&Collection=webmonkey&QueryMode=Internet&Query=')
SAX.characters(Webmonkey, 9)
SAX.endElement(option)
SAX.characters(
@@ -466,7 +462,7 @@
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
-SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=webmonkey_guides&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate=webmonkey_guides.hts&QueryMode=Internet&Query=')
+SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=webmonkey_guides&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate=webmonkey_guides.hts&QueryMode=Internet&Query=')
SAX.characters(Webmonkey Guides, 16)
SAX.endElement(option)
SAX.characters(
@@ -476,7 +472,7 @@
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
-SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=hotwired&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate=hotwired_archive.hts&QueryMode=Internet&Query=')
+SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=hotwired&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate=hotwired_archive.hts&QueryMode=Internet&Query=')
SAX.characters(HotWired Archives, 17)
SAX.endElement(option)
SAX.characters(
@@ -486,7 +482,7 @@
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
-SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate=magazine.hts&Collection=magazine&QueryMode=Internet&Query=')
+SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate=magazine.hts&Collection=magazine&QueryMode=Internet&Query=')
SAX.characters(Wired Magazine, 14)
SAX.endElement(option)
SAX.characters(
@@ -496,7 +492,7 @@
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
-SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate=animation.hts&Collection=animation&QueryMode=Internet&Query=')
+SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?Action=FilterSearch&Filter=docs_filter.hts&ResultTemplate=animation.hts&Collection=animation&QueryMode=Internet&Query=')
SAX.characters(Animation Express, 17)
SAX.endElement(option)
SAX.characters(
@@ -506,7 +502,7 @@
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
-SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=suck&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate=suck.hts&QueryMode=Internet&Query=')
+SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=suck&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate=suck.hts&QueryMode=Internet&Query=')
SAX.characters(Suck.com, 8)
SAX.endElement(option)
SAX.characters(
@@ -516,7 +512,7 @@
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
-SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=uber_hotwired&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate=uber_hotwired.hts&QueryMode=Internet&Query=')
+SAX.startElement(option, value='http://search.hotwired.com/search97/s97.vts?collection=uber_hotwired&Action=FilterSearch&filter=docs_filter.hts&ResultTemplate=uber_hotwired.hts&QueryMode=Internet&Query=')
SAX.characters(All of HotWired, 15)
SAX.endElement(option)
SAX.characters(
@@ -529,8 +525,8 @@
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
-SAX.startElement(option, value='http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&OPs=MDRTP&MT=')
-SAX.characters(The Web -> HotBot, 17)
+SAX.startElement(option, value='http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&OPs=MDRTP&MT=')
+SAX.characters(The Web -> HotBot, 17)
SAX.endElement(option)
SAX.characters(
, 1)
@@ -578,18 +574,16 @@
SAX.characters(
, 1)
SAX.endElement(table)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
SAX.comment( end WIRED NEWS header )
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
SAX.comment( begin upper left side Navigation )
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(table, border='0', cellpadding='3', cellspacing='0', align='LEFT', bgcolor='#FFFFFF')
SAX.characters(
, 3)
@@ -781,7 +775,7 @@
, 3)
SAX.startElement(input, type='TEXT', name='from', size='10', value='enter email')
SAX.endElement(input)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(
, 1)
SAX.error: Opening and ending tag mismatch: td and form
@@ -851,7 +845,7 @@
SAX.startElement(form, method='get', action='http://r.wired.com/r/10020/http://stocks.wired.com/stocks_quotes.asp')
SAX.startElement(input, type='TEXT', name='Symbol', size='12')
SAX.endElement(input)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(input, type='SUBMIT', name='submit', value='GO')
SAX.endElement(input)
SAX.endElement(form)
@@ -906,7 +900,7 @@
SAX.endElement(area)
SAX.characters(
, 9)
-SAX.startElement(area, SHAPE='RECT', ALT='GetSmart's MortgageFinder', COORDS='0,31,69,55', HREF='http://r.wired.com/r/294/http://www.getsmartinc.com/mortgage/HomeBanner?BANNERNAME=www.getsmartinc.com/mwired001m6075x25')
+SAX.startElement(area, SHAPE='RECT', ALT='GetSmart's MortgageFinder', COORDS='0,31,69,55', HREF='http://r.wired.com/r/294/http://www.getsmartinc.com/mortgage/HomeBanner?BANNERNAME=www.getsmartinc.com/mwired001m6075x25')
SAX.endElement(area)
SAX.endElement(map)
SAX.characters(
@@ -1057,7 +1051,7 @@
, 16)
SAX.startElement(option, value='2800')
SAX.characters( Bargain Books
- , 40)
+ , 30)
SAX.startElement(option, value='4')
SAX.characters(Other
@@ -1099,7 +1093,7 @@
, 2)
SAX.error: htmlParseEntityRef: expecting ';'
-SAX.startElement(img, SRC='http://barnesandnoble.bfast.com/booklink/serve?sourceid=383471&is_search=Y', border='0', align='top')
+SAX.startElement(img, SRC='http://barnesandnoble.bfast.com/booklink/serve?sourceid=383471&is_search=Y', border='0', align='top')
SAX.endElement(img)
SAX.characters(
, 1)
@@ -1111,7 +1105,7 @@
SAX.endElement(td)
SAX.characters(
- , 35)
+ , 30)
SAX.endElement(tr)
SAX.characters(
, 17)
@@ -1471,18 +1465,16 @@
, 2)
SAX.endElement(table)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
SAX.comment( end lower left side Navigation )
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
SAX.comment( CONTENT TABLE )
-SAX.characters(
+SAX.ignorableWhitespace(
, 2)
-SAX.endElement(p)
SAX.startElement(table, border='0', width='447', cellspacing='0', cellpadding='0', bordercolor='#66FF00')
SAX.characters(
, 2)
@@ -1540,9 +1532,9 @@
SAX.startElement(font, size='1', face='Verdana, Arial, Geneva, sans-serif', color='#FF0000')
SAX.characters(8:15 a.m., 9)
SAX.endElement(font)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(font, face='Verdana, Arial, Geneva, sans-serif', size='2')
-SAX.characters(The city council approves a pl, 180)
+SAX.characters(The city council approves a pl, 30)
SAX.endElement(font)
SAX.startElement(br)
SAX.endElement(br)
@@ -1550,7 +1542,7 @@
SAX.startElement(i)
SAX.startElement(a, href='/news/politics/0,1283,,00.html')
SAX.characters(in, 2)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(Politics, 8)
SAX.endElement(a)
SAX.endElement(i)
@@ -1567,7 +1559,7 @@
SAX.characters(
, 11)
SAX.startElement(td, bgcolor='#000000')
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.endElement(td)
SAX.characters(
, 11)
@@ -1575,7 +1567,7 @@
SAX.startElement(font, size='1', face='Verdana, Arial, Helvetica, sans-serif', color='#FFFFFF')
SAX.startElement(b)
SAX.characters(HITS , 5)
-SAX.characters(&, 1)
+SAX.characters(&, 1)
SAX.characters( MISC., 6)
SAX.endElement(b)
SAX.endElement(font)
@@ -1589,7 +1581,7 @@
SAX.characters(
, 11)
SAX.startElement(td)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.endElement(td)
SAX.characters(
, 11)
@@ -1623,7 +1615,7 @@
SAX.startElement(a, href='/news/commentarySection/0,1292,31926,00.html')
SAX.characters(Rants , 6)
SAX.error: htmlParseEntityRef: no name
-SAX.characters(&, 1)
+SAX.characters(&, 1)
SAX.characters( Raves, 6)
SAX.endElement(a)
SAX.endElement(b)
@@ -1632,7 +1624,7 @@
SAX.endElement(br)
SAX.startElement(font, size='2', face='Arial, Helvetica, sans-serif')
SAX.startElement(font, size='1', face='Arial, Geneva, sans-serif', color='#000000')
-SAX.characters(Readers on Apple's G4 ... AOL', 59)
+SAX.characters(Readers on Apple's G4 ... AOL', 30)
SAX.endElement(font)
SAX.startElement(br)
SAX.endElement(br)
@@ -1656,7 +1648,7 @@
SAX.characters(
, 2)
SAX.startElement(td, align='left', bgcolor='#000000')
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.endElement(td)
SAX.characters(
, 2)
@@ -1677,7 +1669,7 @@
SAX.characters(
, 2)
SAX.startElement(td)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.endElement(td)
SAX.characters(
, 1)
@@ -1962,8 +1954,8 @@
SAX.error: htmlParseEntityRef: expecting ';'
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(a, href='http://r.wired.com/r/wn_is_r_ssec/http://ad.doubleclick.net/clk;653163;3599571;s?http://www.sprintbiz.com/s
-ervlet/appservlet?from=/wired/sprint/&template=/security/security.html&SITE=
-wired.com&BANNER=Sprint', style='text-decoration:none')
+ervlet/appservlet?from=/wired/sprint/&template=/security/security.html&SITE=
+wired.com&BANNER=Sprint', style='text-decoration:none')
SAX.startElement(font, color='#000000')
SAX.characters(Sprint, 6)
SAX.error: Opening and ending tag mismatch: a and font
@@ -2011,7 +2003,7 @@
SAX.endElement(font)
SAX.startElement(br)
SAX.endElement(br)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(br)
SAX.endElement(br)
SAX.characters(
@@ -2046,7 +2038,7 @@
SAX.characters(
, 20)
SAX.startElement(td, bgcolor='#000000')
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.endElement(td)
SAX.characters(
, 11)
@@ -2067,7 +2059,7 @@
SAX.characters(
, 12)
SAX.startElement(td)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.endElement(td)
SAX.characters(
, 11)
@@ -2087,7 +2079,7 @@
, 1)
SAX.startElement(font, size='2', face='Arial, Helvetica, sans-serif', color='#000000')
SAX.startElement(b)
-SAX.characters(Führer Furor, 13)
+SAX.characters(Führer Furor, 13)
SAX.endElement(b)
SAX.endElement(font)
SAX.startElement(br)
@@ -2095,14 +2087,14 @@
SAX.startElement(font, size='1', face='Arial, Geneva, sans-serif', color='#000000')
SAX.startElement(p)
SAX.characters(
-Contruction workers in Berli, 637)
+Contruction workers in Berli, 30)
SAX.startElement(br)
SAX.endElement(br)
SAX.endElement(p)
SAX.startElement(li)
SAX.characters(More from , 10)
SAX.error: htmlParseEntityRef: expecting ';'
-SAX.startElement(a, href='http://www.lycos.com/news/flash/hitlerbunker.html?v=wn1015&lpv=1')
+SAX.startElement(a, href='http://www.lycos.com/news/flash/hitlerbunker.html?v=wn1015&lpv=1')
SAX.characters(Lycos, 5)
SAX.endElement(a)
SAX.endElement(li)
@@ -2128,7 +2120,7 @@
, 3)
SAX.startElement(font, size='1')
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(br)
SAX.endElement(br)
SAX.endElement(font)
@@ -2174,9 +2166,9 @@
SAX.startElement(font, color='#ff0000', face='Verdana, Arial, Geneva, sans-serif', size='1')
SAX.characters(10:15 a.m., 10)
SAX.endElement(font)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(font, face='Verdana, Arial, Geneva, sans-serif', size='2')
-SAX.characters(The Dow and Nasdaq suffer size, 180)
+SAX.characters(The Dow and Nasdaq suffer size, 30)
SAX.endElement(font)
SAX.startElement(br)
SAX.endElement(br)
@@ -2184,7 +2176,7 @@
SAX.startElement(i)
SAX.startElement(a, href='/news/reuters/0,1349,,00.html')
SAX.characters(in, 2)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(Reuters, 7)
SAX.endElement(a)
SAX.endElement(i)
@@ -2206,9 +2198,9 @@
SAX.startElement(font, color='#ff0000', face='Verdana, Arial, Geneva, sans-serif', size='1')
SAX.characters(9:10 a.m., 9)
SAX.endElement(font)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(font, face='Verdana, Arial, Geneva, sans-serif', size='2')
-SAX.characters(The bulls and the bears are in, 128)
+SAX.characters(The bulls and the bears are in, 30)
SAX.endElement(font)
SAX.startElement(br)
SAX.endElement(br)
@@ -2216,7 +2208,7 @@
SAX.startElement(i)
SAX.startElement(a, href='/news/reuters/0,1349,,00.html')
SAX.characters(in, 2)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(Reuters, 7)
SAX.endElement(a)
SAX.endElement(i)
@@ -2229,7 +2221,7 @@
SAX.startElement(font, face='Arial, Helvetica, sans-serif', size='3')
SAX.startElement(b)
SAX.startElement(a, href='/news/politics/0,1283,31533,00.html')
-SAX.characters('Want a Loan? What's Your Race, 32)
+SAX.characters('Want a Loan? What's Your Race, 30)
SAX.endElement(a)
SAX.endElement(b)
SAX.endElement(font)
@@ -2238,9 +2230,9 @@
SAX.startElement(font, color='#ff0000', face='Verdana, Arial, Geneva, sans-serif', size='1')
SAX.characters(3:00 a.m., 9)
SAX.endElement(font)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(font, face='Verdana, Arial, Geneva, sans-serif', size='2')
-SAX.characters(The Federal Reserve is in the , 184)
+SAX.characters(The Federal Reserve is in the , 30)
SAX.endElement(font)
SAX.startElement(br)
SAX.endElement(br)
@@ -2248,7 +2240,7 @@
SAX.startElement(i)
SAX.startElement(a, href='/news/politics/0,1283,,00.html')
SAX.characters(in, 2)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(Politics, 8)
SAX.endElement(a)
SAX.endElement(i)
@@ -2270,9 +2262,9 @@
SAX.startElement(font, color='#ff0000', face='Verdana, Arial, Geneva, sans-serif', size='1')
SAX.characters(3:00 a.m., 9)
SAX.endElement(font)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(font, face='Verdana, Arial, Geneva, sans-serif', size='2')
-SAX.characters(The struggle to come up with a, 171)
+SAX.characters(The struggle to come up with a, 30)
SAX.endElement(font)
SAX.startElement(br)
SAX.endElement(br)
@@ -2280,7 +2272,7 @@
SAX.startElement(i)
SAX.startElement(a, href='/news/business/0,1367,,00.html')
SAX.characters(in, 2)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(Business, 8)
SAX.endElement(a)
SAX.endElement(i)
@@ -2302,9 +2294,9 @@
SAX.startElement(font, color='#ff0000', face='Verdana, Arial, Geneva, sans-serif', size='1')
SAX.characters(3:00 a.m., 9)
SAX.endElement(font)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(font, face='Verdana, Arial, Geneva, sans-serif', size='2')
-SAX.characters(High-tech companies are notori, 186)
+SAX.characters(High-tech companies are notori, 30)
SAX.endElement(font)
SAX.startElement(br)
SAX.endElement(br)
@@ -2312,7 +2304,7 @@
SAX.startElement(i)
SAX.startElement(a, href='/news/technology/0,1282,,00.html')
SAX.characters(in, 2)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(Technology, 10)
SAX.endElement(a)
SAX.endElement(i)
@@ -2334,9 +2326,9 @@
SAX.startElement(font, color='#ff0000', face='Verdana, Arial, Geneva, sans-serif', size='1')
SAX.characters(3:00 a.m., 9)
SAX.endElement(font)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(font, face='Verdana, Arial, Geneva, sans-serif', size='2')
-SAX.characters(Windows NT sales remain strong, 165)
+SAX.characters(Windows NT sales remain strong, 30)
SAX.endElement(font)
SAX.startElement(br)
SAX.endElement(br)
@@ -2344,7 +2336,7 @@
SAX.startElement(i)
SAX.startElement(a, href='/news/business/0,1367,,00.html')
SAX.characters(in, 2)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(Business, 8)
SAX.endElement(a)
SAX.endElement(i)
@@ -2371,9 +2363,9 @@
SAX.startElement(font, color='#ff0000', face='Verdana, Arial, Geneva, sans-serif', size='1')
SAX.characters(3:00 a.m., 9)
SAX.endElement(font)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(font, face='Verdana, Arial, Geneva, sans-serif', size='2')
-SAX.characters(Different cancer patients need, 207)
+SAX.characters(Different cancer patients need, 30)
SAX.endElement(font)
SAX.startElement(br)
SAX.endElement(br)
@@ -2381,7 +2373,7 @@
SAX.startElement(i)
SAX.startElement(a, href='/news/technology/0,1282,,00.html')
SAX.characters(in, 2)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(Technology, 10)
SAX.endElement(a)
SAX.endElement(i)
@@ -2403,9 +2395,9 @@
SAX.startElement(font, color='#ff0000', face='Verdana, Arial, Geneva, sans-serif', size='1')
SAX.characters(3:00 a.m., 9)
SAX.endElement(font)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(font, face='Verdana, Arial, Geneva, sans-serif', size='2')
-SAX.characters(It's not just another round of, 196)
+SAX.characters(It's not just another round of, 30)
SAX.endElement(font)
SAX.startElement(br)
SAX.endElement(br)
@@ -2413,7 +2405,7 @@
SAX.startElement(i)
SAX.startElement(a, href='/news/business/0,1367,,00.html')
SAX.characters(in, 2)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(Business, 8)
SAX.endElement(a)
SAX.endElement(i)
@@ -2435,9 +2427,9 @@
SAX.startElement(font, color='#ff0000', face='Verdana, Arial, Geneva, sans-serif', size='1')
SAX.characters(3:00 a.m., 9)
SAX.endElement(font)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.startElement(font, face='Verdana, Arial, Geneva, sans-serif', size='2')
-SAX.characters(The far-flung databases on glo, 196)
+SAX.characters(The far-flung databases on glo, 30)
SAX.endElement(font)
SAX.startElement(br)
SAX.endElement(br)
@@ -2445,7 +2437,7 @@
SAX.startElement(i)
SAX.startElement(a, href='/news/technology/0,1282,,00.html')
SAX.characters(in, 2)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(Technology, 10)
SAX.endElement(a)
SAX.endElement(i)
@@ -2509,7 +2501,7 @@
SAX.startElement(font, face='helvetica, arial', size='3')
SAX.startElement(b)
SAX.startElement(a, href='http://news.lycos.com/stories/TopNews/19991014RTNEWS-ARMS-TREATY.asp')
-SAX.characters(White House Lashes Out on Trea, 32)
+SAX.characters(White House Lashes Out on Trea, 30)
SAX.endElement(a)
SAX.endElement(b)
SAX.endElement(font)
@@ -2559,7 +2551,7 @@
SAX.startElement(font, face='helvetica, arial', size='3')
SAX.startElement(b)
SAX.startElement(a, href='http://www.nytimes.com/library/tech/99/10/biztech/articles/14free.html')
-SAX.characters(Much Is Free in the Wired Worl, 31)
+SAX.characters(Much Is Free in the Wired Worl, 30)
SAX.endElement(a)
SAX.endElement(b)
SAX.endElement(font)
@@ -2596,7 +2588,7 @@
SAX.startElement(font, face='helvetica, arial', size='3')
SAX.startElement(b)
SAX.startElement(a, href='http://www.msnbc.com/news/322926.asp')
-SAX.characters(Domain Owners Surrender Privac, 31)
+SAX.characters(Domain Owners Surrender Privac, 30)
SAX.endElement(a)
SAX.endElement(b)
SAX.endElement(font)
@@ -2723,9 +2715,9 @@
SAX.endElement(a)
SAX.characters(
, 1)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(|, 1)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(
, 1)
SAX.startElement(a, href='http://www.hotwired.com/jobs/')
@@ -2733,9 +2725,9 @@
SAX.endElement(a)
SAX.characters(
, 1)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(|, 1)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(
, 1)
SAX.startElement(a, href='http://home.wired.com/advertising/')
@@ -2752,9 +2744,9 @@
SAX.endElement(a)
SAX.characters(
, 1)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(|, 1)
-SAX.characters( , 2)
+SAX.characters( , 2)
SAX.characters(
, 1)
SAX.startElement(a, href='http://www.wired.com/home/digital/privacy/')
@@ -2772,8 +2764,8 @@
SAX.characters(Copyright, 9)
SAX.endElement(a)
SAX.characters( , 1)
-SAX.characters(©, 2)
-SAX.characters( 1994-99 Wired Digital Inc. Al, 48)
+SAX.characters(©, 2)
+SAX.characters( 1994-99 Wired Digital Inc. Al, 30)
SAX.endElement(font)
SAX.characters(
@@ -2840,16 +2832,14 @@
SAX.characters(
, 1)
SAX.endElement(table)
-SAX.startElement(p)
-SAX.characters(
+SAX.ignorableWhitespace(
, 3)
SAX.startElement(br)
SAX.endElement(br)
-SAX.characters(
+SAX.ignorableWhitespace(
, 1)
-SAX.endElement(p)
SAX.endElement(body)
SAX.startElement(body)
SAX.startElement(p)
diff --git a/testHTML.c b/testHTML.c
index f998072..af088b5 100644
--- a/testHTML.c
+++ b/testHTML.c
@@ -368,8 +368,19 @@
if (atts != NULL) {
for (i = 0;(atts[i] != NULL);i++) {
fprintf(stdout, ", %s", atts[i++]);
- if (atts[i] != NULL)
- fprintf(stdout, "='%s'", atts[i]);
+ if (atts[i] != NULL) {
+ unsigned char output[40];
+ const unsigned char *att = atts[i];
+ int outlen, attlen;
+ fprintf(stdout, "='");
+ while ((attlen = strlen((char*)att)) > 0) {
+ outlen = sizeof output - 1;
+ htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
+ fprintf(stdout, "%.*s", outlen, output);
+ att += attlen;
+ }
+ fprintf(stdout, "'");
+ }
}
}
fprintf(stdout, ")\n");
@@ -400,12 +411,11 @@
void
charactersDebug(void *ctx, const xmlChar *ch, int len)
{
- char output[40];
- int i;
+ unsigned char output[40];
+ int outlen = 30;
- for (i = 0;(i<len) && (i < 30);i++)
- output[i] = ch[i];
- output[i] = 0;
+ htmlEncodeEntities(output, &outlen, ch, &len, 0);
+ output[outlen] = 0;
fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
}