preparing the release of 2.7.2 fix the Solaris portability issue
* configure.in doc/* NEWS: preparing the release of 2.7.2
* dict.c: fix the Solaris portability issue
* parser.c: additional cleanup on #554660 fix
* test/ent13 result/ent13* result/noent/ent13*: added the
example in the regression test suite.
* HTMLparser.c: handle leading BOM in htmlParseElement()
Daniel
svn path=/trunk/; revision=3799
diff --git a/HTMLparser.c b/HTMLparser.c
index 202b24a..24b0fc0 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -4120,6 +4120,8 @@
int
htmlParseDocument(htmlParserCtxtPtr ctxt) {
+ xmlChar start[4];
+ xmlCharEncoding enc;
xmlDtdPtr dtd;
xmlInitParser();
@@ -4139,6 +4141,23 @@
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
+ if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
+ ((ctxt->input->end - ctxt->input->cur) >= 4)) {
+ /*
+ * Get the 4 first bytes and decode the charset
+ * if enc != XML_CHAR_ENCODING_NONE
+ * plug some encoding conversion routines.
+ */
+ start[0] = RAW;
+ start[1] = NXT(1);
+ start[2] = NXT(2);
+ start[3] = NXT(3);
+ enc = xmlDetectCharEncoding(&start[0], 4);
+ if (enc != XML_CHAR_ENCODING_NONE) {
+ xmlSwitchEncoding(ctxt, enc);
+ }
+ }
+
/*
* Wipe out everything which is before the first '<'
*/
@@ -4158,10 +4177,10 @@
while (((CUR == '<') && (NXT(1) == '!') &&
(NXT(2) == '-') && (NXT(3) == '-')) ||
((CUR == '<') && (NXT(1) == '?'))) {
- htmlParseComment(ctxt);
- htmlParsePI(ctxt);
+ htmlParseComment(ctxt);
+ htmlParsePI(ctxt);
SKIP_BLANKS;
- }
+ }
/*