New set of cleanups, released 2.2.3:
- SAX.c debugXML.c parser.c parserInternals.c tree.c valid.c xpath.c:
removed a few warnings in pedantic mode ...
- parserInternals.c parser.c: moved encoding switching function
to parserInternals.c
- configure.in, doc/Makefile.am libxml.spec.in: released 2.2.3
Daniel
diff --git a/ChangeLog b/ChangeLog
index 85a5a9d..455328f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+Sun Sep 17 17:58:37 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
+
+ * SAX.c debugXML.c parser.c parserInternals.c tree.c valid.c xpath.c:
+ removed a few warnings in pedantic mode ...
+ * parserInternals.c parser.c: moved encoding switching function
+ to parserInternals.c
+ * configure.in, doc/Makefile.am libxml.spec.in: released 2.2.3
+
Sat Sep 16 20:12:41 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* HTMLparser.c parser.c: set ctxt->errNo before calling the
diff --git a/SAX.c b/SAX.c
index 3d1475b..b63ed2d 100644
--- a/SAX.c
+++ b/SAX.c
@@ -312,7 +312,7 @@
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlParserInputPtr ret;
- char *URI;
+ xmlChar *URI;
const char *base = NULL;
if (ctxt->input != NULL)
@@ -320,7 +320,7 @@
if (base == NULL)
base = ctxt->directory;
- URI = xmlBuildURI(systemId, base);
+ URI = xmlBuildURI(systemId, (const xmlChar *) base);
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.resolveEntity(%s, %s)\n", publicId, systemId);
@@ -423,7 +423,7 @@
ctxt->sax->warning(ctxt,
"Entity(%s) already defined in the internal subset\n", name);
if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) {
- char *URI;
+ xmlChar *URI;
const char *base = NULL;
if (ctxt->input != NULL)
@@ -431,7 +431,7 @@
if (base == NULL)
base = ctxt->directory;
- URI = xmlBuildURI(systemId, base);
+ URI = xmlBuildURI(systemId, (const xmlChar *) base);
ent->URI = URI;
}
} else if (ctxt->inSubset == 2) {
@@ -442,7 +442,7 @@
ctxt->sax->warning(ctxt,
"Entity(%s) already defined in the external subset\n", name);
if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) {
- char *URI;
+ xmlChar *URI;
const char *base = NULL;
if (ctxt->input != NULL)
@@ -450,7 +450,7 @@
if (base == NULL)
base = ctxt->directory;
- URI = xmlBuildURI(systemId, base);
+ URI = xmlBuildURI(systemId, (const xmlChar *) base);
ent->URI = URI;
}
} else {
diff --git a/config.h.in b/config.h.in
index 7a7587a..01bb475 100644
--- a/config.h.in
+++ b/config.h.in
@@ -27,6 +27,9 @@
/* Define if you have the fpclass function. */
#undef HAVE_FPCLASS
+/* Define if you have the iconv function. */
+#undef HAVE_ICONV
+
/* Define if you have the isnand function. */
#undef HAVE_ISNAND
@@ -96,6 +99,9 @@
/* Define if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
+/* Define if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
/* Define if you have the <sys/dir.h> header file. */
#undef HAVE_SYS_DIR_H
@@ -144,3 +150,6 @@
/* Version number of package */
#undef VERSION
+/* Define if compiler has function prototypes */
+#undef PROTOTYPES
+
diff --git a/configure.in b/configure.in
index 5b7c675..61977a6 100644
--- a/configure.in
+++ b/configure.in
@@ -5,7 +5,7 @@
LIBXML_MAJOR_VERSION=2
LIBXML_MINOR_VERSION=2
-LIBXML_MICRO_VERSION=2
+LIBXML_MICRO_VERSION=3
LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION
LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION
@@ -243,6 +243,7 @@
echo Disabling ICONV support
WITH_ICONV=0
else
+ AC_CHECK_FUNCS(iconv)
if test "$have_iconv" != "" ; then
echo Iconv support not found
WITH_ICONV=0
diff --git a/debugXML.c b/debugXML.c
index e0c41f9..9ee3298 100644
--- a/debugXML.c
+++ b/debugXML.c
@@ -917,6 +917,9 @@
break;
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+ case XML_SGML_DOCUMENT_NODE:
+#endif
list = ((xmlDocPtr) node)->children;
break;
case XML_ATTRIBUTE_NODE:
diff --git a/doc/Makefile.am b/doc/Makefile.am
index e83c1a5..dcb86ac 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -51,6 +51,6 @@
-(cd $(DESTDIR); gtkdoc-fixxref --module=$(DOC_MODULE) --html-dir=$(HTML_DIR))
dist-hook:
- (cd $(srcdir) ; tar cvf - xml.html FAQ.html encoding.html structure.gif DOM.gif html/*.html html/*.sgml) | (cd $(distdir); tar xf -)
+ (cd $(srcdir) ; tar cvf - *.html *.gif html/*.html html/*.sgml) | (cd $(distdir); tar xf -)
.PHONY : html sgml templates scan
diff --git a/libxml.spec.in b/libxml.spec.in
index b28e1e7..82753d8 100644
--- a/libxml.spec.in
+++ b/libxml.spec.in
@@ -92,8 +92,8 @@
%defattr(-, root, root)
%doc AUTHORS ChangeLog NEWS README COPYING COPYING.LIB TODO
-%doc /usr/man/man1/xmllint.1
-%doc /usr/man/man4/libxml.4
+%doc /usr/man/man1/xmllint.1*
+%doc /usr/man/man4/libxml.4*
%{prefix}/lib/lib*.so.*
%{prefix}/bin/xmllint
diff --git a/parser.c b/parser.c
index 4b43510..6f97fb0 100644
--- a/parser.c
+++ b/parser.c
@@ -901,311 +901,6 @@
/************************************************************************
* *
- * Commodity functions to handle encodings *
- * *
- ************************************************************************/
-
-/**
- * xmlSwitchEncoding:
- * @ctxt: the parser context
- * @enc: the encoding value (number)
- *
- * change the input functions when discovering the character encoding
- * of a given entity.
- *
- * Returns 0 in case of success, -1 otherwise
- */
-int
-xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
-{
- xmlCharEncodingHandlerPtr handler;
-
- switch (enc) {
- case XML_CHAR_ENCODING_ERROR:
- ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "encoding unknown\n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- break;
- case XML_CHAR_ENCODING_NONE:
- /* let's assume it's UTF-8 without the XML decl */
- ctxt->charset = XML_CHAR_ENCODING_UTF8;
- return(0);
- case XML_CHAR_ENCODING_UTF8:
- /* default encoding, no conversion should be needed */
- ctxt->charset = XML_CHAR_ENCODING_UTF8;
- return(0);
- default:
- break;
- }
- handler = xmlGetCharEncodingHandler(enc);
- if (handler == NULL) {
- /*
- * Default handlers.
- */
- switch (enc) {
- case XML_CHAR_ENCODING_ERROR:
- ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "encoding unknown\n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- ctxt->charset = XML_CHAR_ENCODING_UTF8;
- break;
- case XML_CHAR_ENCODING_NONE:
- /* let's assume it's UTF-8 without the XML decl */
- ctxt->charset = XML_CHAR_ENCODING_UTF8;
- return(0);
- case XML_CHAR_ENCODING_UTF8:
- case XML_CHAR_ENCODING_ASCII:
- /* default encoding, no conversion should be needed */
- ctxt->charset = XML_CHAR_ENCODING_UTF8;
- return(0);
- case XML_CHAR_ENCODING_UTF16LE:
- break;
- case XML_CHAR_ENCODING_UTF16BE:
- break;
- case XML_CHAR_ENCODING_UCS4LE:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding USC4 little endian not supported\n");
- break;
- case XML_CHAR_ENCODING_UCS4BE:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding USC4 big endian not supported\n");
- break;
- case XML_CHAR_ENCODING_EBCDIC:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding EBCDIC not supported\n");
- break;
- case XML_CHAR_ENCODING_UCS4_2143:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding UCS4 2143 not supported\n");
- break;
- case XML_CHAR_ENCODING_UCS4_3412:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding UCS4 3412 not supported\n");
- break;
- case XML_CHAR_ENCODING_UCS2:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding UCS2 not supported\n");
- break;
- case XML_CHAR_ENCODING_8859_1:
- case XML_CHAR_ENCODING_8859_2:
- case XML_CHAR_ENCODING_8859_3:
- case XML_CHAR_ENCODING_8859_4:
- case XML_CHAR_ENCODING_8859_5:
- case XML_CHAR_ENCODING_8859_6:
- case XML_CHAR_ENCODING_8859_7:
- case XML_CHAR_ENCODING_8859_8:
- case XML_CHAR_ENCODING_8859_9:
- /*
- * We used to keep the internal content in the
- * document encoding however this turns being unmaintainable
- * So xmlGetCharEncodingHandler() will return non-null
- * values for this now.
- */
- if ((ctxt->inputNr == 1) &&
- (ctxt->encoding == NULL) &&
- (ctxt->input->encoding != NULL)) {
- ctxt->encoding = xmlStrdup(ctxt->input->encoding);
- }
- ctxt->charset = enc;
- return(0);
- case XML_CHAR_ENCODING_2022_JP:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding ISO-2022-JPnot supported\n");
- break;
- case XML_CHAR_ENCODING_SHIFT_JIS:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding Shift_JIS not supported\n");
- break;
- case XML_CHAR_ENCODING_EUC_JP:
- ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "char encoding EUC-JPnot supported\n");
- break;
- }
- }
- if (handler == NULL)
- return(-1);
- ctxt->charset = XML_CHAR_ENCODING_UTF8;
- return(xmlSwitchToEncoding(ctxt, handler));
-}
-
-/**
- * xmlSwitchToEncoding:
- * @ctxt: the parser context
- * @handler: the encoding handler
- *
- * change the input functions when discovering the character encoding
- * of a given entity.
- *
- * Returns 0 in case of success, -1 otherwise
- */
-int
-xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
-{
- int nbchars;
-
- if (handler != NULL) {
- if (ctxt->input != NULL) {
- if (ctxt->input->buf != NULL) {
- if (ctxt->input->buf->encoder != NULL) {
- if (ctxt->input->buf->encoder == handler)
- return(0);
- /*
- * Note: this is a bit dangerous, but that's what it
- * takes to use nearly compatible signature for different
- * encodings.
- */
- xmlCharEncCloseFunc(ctxt->input->buf->encoder);
- ctxt->input->buf->encoder = handler;
- return(0);
- }
- ctxt->input->buf->encoder = handler;
-
- /*
- * Is there already some content down the pipe to convert ?
- */
- if ((ctxt->input->buf->buffer != NULL) &&
- (ctxt->input->buf->buffer->use > 0)) {
- int processed;
-
- /*
- * Specific handling of the Byte Order Mark for
- * UTF-16
- */
- if ((handler->name != NULL) &&
- (!strcmp(handler->name, "UTF-16LE")) &&
- (ctxt->input->cur[0] == 0xFF) &&
- (ctxt->input->cur[1] == 0xFE)) {
- ctxt->input->cur += 2;
- }
- if ((handler->name != NULL) &&
- (!strcmp(handler->name, "UTF-16BE")) &&
- (ctxt->input->cur[0] == 0xFE) &&
- (ctxt->input->cur[1] == 0xFF)) {
- ctxt->input->cur += 2;
- }
-
- /*
- * Shring the current input buffer.
- * Move it as the raw buffer and create a new input buffer
- */
- processed = ctxt->input->cur - ctxt->input->base;
- xmlBufferShrink(ctxt->input->buf->buffer, processed);
- ctxt->input->buf->raw = ctxt->input->buf->buffer;
- ctxt->input->buf->buffer = xmlBufferCreate();
-
- if (ctxt->html) {
- /*
- * converst as much as possbile of the buffer
- */
- nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
- ctxt->input->buf->buffer,
- ctxt->input->buf->raw);
- } else {
- /*
- * convert just enough to get
- * '<?xml version="1.0" encoding="xxx"?>'
- * parsed with the autodetected encoding
- * into the parser reading buffer.
- */
- nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
- ctxt->input->buf->buffer,
- ctxt->input->buf->raw);
- }
- if (nbchars < 0) {
- fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
- return(-1);
- }
- ctxt->input->base =
- ctxt->input->cur = ctxt->input->buf->buffer->content;
-
- }
- return(0);
- } else {
- if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
- /*
- * When parsing a static memory array one must know the
- * size to be able to convert the buffer.
- */
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlSwitchEncoding : no input\n");
- return(-1);
- } else {
- int processed;
-
- /*
- * Shring the current input buffer.
- * Move it as the raw buffer and create a new input buffer
- */
- processed = ctxt->input->cur - ctxt->input->base;
-
- ctxt->input->buf->raw = xmlBufferCreate();
- xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
- ctxt->input->length - processed);
- ctxt->input->buf->buffer = xmlBufferCreate();
-
- /*
- * convert as much as possible of the raw input
- * to the parser reading buffer.
- */
- nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
- ctxt->input->buf->buffer,
- ctxt->input->buf->raw);
- if (nbchars < 0) {
- fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
- return(-1);
- }
-
- /*
- * Conversion succeeded, get rid of the old buffer
- */
- if ((ctxt->input->free != NULL) &&
- (ctxt->input->base != NULL))
- ctxt->input->free((xmlChar *) ctxt->input->base);
- ctxt->input->base =
- ctxt->input->cur = ctxt->input->buf->buffer->content;
- }
- }
- } else {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "xmlSwitchEncoding : no input\n");
- return(-1);
- }
- /*
- * The parsing is now done in UTF8 natively
- */
- ctxt->charset = XML_CHAR_ENCODING_UTF8;
- } else
- return(-1);
- return(0);
-
-}
-
-/************************************************************************
- * *
* Commodity functions to handle xmlChars *
* *
************************************************************************/
diff --git a/parserInternals.c b/parserInternals.c
index c20b90a..de757aa 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -1496,6 +1496,311 @@
/************************************************************************
* *
+ * Commodity functions to switch encodings *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlSwitchEncoding:
+ * @ctxt: the parser context
+ * @enc: the encoding value (number)
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity.
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+int
+xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
+{
+ xmlCharEncodingHandlerPtr handler;
+
+ switch (enc) {
+ case XML_CHAR_ENCODING_ERROR:
+ ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData, "encoding unknown\n");
+ ctxt->wellFormed = 0;
+ ctxt->disableSAX = 1;
+ break;
+ case XML_CHAR_ENCODING_NONE:
+ /* let's assume it's UTF-8 without the XML decl */
+ ctxt->charset = XML_CHAR_ENCODING_UTF8;
+ return(0);
+ case XML_CHAR_ENCODING_UTF8:
+ /* default encoding, no conversion should be needed */
+ ctxt->charset = XML_CHAR_ENCODING_UTF8;
+ return(0);
+ default:
+ break;
+ }
+ handler = xmlGetCharEncodingHandler(enc);
+ if (handler == NULL) {
+ /*
+ * Default handlers.
+ */
+ switch (enc) {
+ case XML_CHAR_ENCODING_ERROR:
+ ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData, "encoding unknown\n");
+ ctxt->wellFormed = 0;
+ ctxt->disableSAX = 1;
+ ctxt->charset = XML_CHAR_ENCODING_UTF8;
+ break;
+ case XML_CHAR_ENCODING_NONE:
+ /* let's assume it's UTF-8 without the XML decl */
+ ctxt->charset = XML_CHAR_ENCODING_UTF8;
+ return(0);
+ case XML_CHAR_ENCODING_UTF8:
+ case XML_CHAR_ENCODING_ASCII:
+ /* default encoding, no conversion should be needed */
+ ctxt->charset = XML_CHAR_ENCODING_UTF8;
+ return(0);
+ case XML_CHAR_ENCODING_UTF16LE:
+ break;
+ case XML_CHAR_ENCODING_UTF16BE:
+ break;
+ case XML_CHAR_ENCODING_UCS4LE:
+ ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "char encoding USC4 little endian not supported\n");
+ break;
+ case XML_CHAR_ENCODING_UCS4BE:
+ ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "char encoding USC4 big endian not supported\n");
+ break;
+ case XML_CHAR_ENCODING_EBCDIC:
+ ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "char encoding EBCDIC not supported\n");
+ break;
+ case XML_CHAR_ENCODING_UCS4_2143:
+ ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "char encoding UCS4 2143 not supported\n");
+ break;
+ case XML_CHAR_ENCODING_UCS4_3412:
+ ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "char encoding UCS4 3412 not supported\n");
+ break;
+ case XML_CHAR_ENCODING_UCS2:
+ ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "char encoding UCS2 not supported\n");
+ break;
+ case XML_CHAR_ENCODING_8859_1:
+ case XML_CHAR_ENCODING_8859_2:
+ case XML_CHAR_ENCODING_8859_3:
+ case XML_CHAR_ENCODING_8859_4:
+ case XML_CHAR_ENCODING_8859_5:
+ case XML_CHAR_ENCODING_8859_6:
+ case XML_CHAR_ENCODING_8859_7:
+ case XML_CHAR_ENCODING_8859_8:
+ case XML_CHAR_ENCODING_8859_9:
+ /*
+ * We used to keep the internal content in the
+ * document encoding however this turns being unmaintainable
+ * So xmlGetCharEncodingHandler() will return non-null
+ * values for this now.
+ */
+ if ((ctxt->inputNr == 1) &&
+ (ctxt->encoding == NULL) &&
+ (ctxt->input->encoding != NULL)) {
+ ctxt->encoding = xmlStrdup(ctxt->input->encoding);
+ }
+ ctxt->charset = enc;
+ return(0);
+ case XML_CHAR_ENCODING_2022_JP:
+ ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "char encoding ISO-2022-JPnot supported\n");
+ break;
+ case XML_CHAR_ENCODING_SHIFT_JIS:
+ ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "char encoding Shift_JIS not supported\n");
+ break;
+ case XML_CHAR_ENCODING_EUC_JP:
+ ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "char encoding EUC-JPnot supported\n");
+ break;
+ }
+ }
+ if (handler == NULL)
+ return(-1);
+ ctxt->charset = XML_CHAR_ENCODING_UTF8;
+ return(xmlSwitchToEncoding(ctxt, handler));
+}
+
+/**
+ * xmlSwitchToEncoding:
+ * @ctxt: the parser context
+ * @handler: the encoding handler
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity.
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+int
+xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
+{
+ int nbchars;
+
+ if (handler != NULL) {
+ if (ctxt->input != NULL) {
+ if (ctxt->input->buf != NULL) {
+ if (ctxt->input->buf->encoder != NULL) {
+ if (ctxt->input->buf->encoder == handler)
+ return(0);
+ /*
+ * Note: this is a bit dangerous, but that's what it
+ * takes to use nearly compatible signature for different
+ * encodings.
+ */
+ xmlCharEncCloseFunc(ctxt->input->buf->encoder);
+ ctxt->input->buf->encoder = handler;
+ return(0);
+ }
+ ctxt->input->buf->encoder = handler;
+
+ /*
+ * Is there already some content down the pipe to convert ?
+ */
+ if ((ctxt->input->buf->buffer != NULL) &&
+ (ctxt->input->buf->buffer->use > 0)) {
+ int processed;
+
+ /*
+ * Specific handling of the Byte Order Mark for
+ * UTF-16
+ */
+ if ((handler->name != NULL) &&
+ (!strcmp(handler->name, "UTF-16LE")) &&
+ (ctxt->input->cur[0] == 0xFF) &&
+ (ctxt->input->cur[1] == 0xFE)) {
+ ctxt->input->cur += 2;
+ }
+ if ((handler->name != NULL) &&
+ (!strcmp(handler->name, "UTF-16BE")) &&
+ (ctxt->input->cur[0] == 0xFE) &&
+ (ctxt->input->cur[1] == 0xFF)) {
+ ctxt->input->cur += 2;
+ }
+
+ /*
+ * Shring the current input buffer.
+ * Move it as the raw buffer and create a new input buffer
+ */
+ processed = ctxt->input->cur - ctxt->input->base;
+ xmlBufferShrink(ctxt->input->buf->buffer, processed);
+ ctxt->input->buf->raw = ctxt->input->buf->buffer;
+ ctxt->input->buf->buffer = xmlBufferCreate();
+
+ if (ctxt->html) {
+ /*
+ * converst as much as possbile of the buffer
+ */
+ nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
+ ctxt->input->buf->buffer,
+ ctxt->input->buf->raw);
+ } else {
+ /*
+ * convert just enough to get
+ * '<?xml version="1.0" encoding="xxx"?>'
+ * parsed with the autodetected encoding
+ * into the parser reading buffer.
+ */
+ nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
+ ctxt->input->buf->buffer,
+ ctxt->input->buf->raw);
+ }
+ if (nbchars < 0) {
+ fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
+ return(-1);
+ }
+ ctxt->input->base =
+ ctxt->input->cur = ctxt->input->buf->buffer->content;
+
+ }
+ return(0);
+ } else {
+ if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
+ /*
+ * When parsing a static memory array one must know the
+ * size to be able to convert the buffer.
+ */
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "xmlSwitchEncoding : no input\n");
+ return(-1);
+ } else {
+ int processed;
+
+ /*
+ * Shring the current input buffer.
+ * Move it as the raw buffer and create a new input buffer
+ */
+ processed = ctxt->input->cur - ctxt->input->base;
+
+ ctxt->input->buf->raw = xmlBufferCreate();
+ xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
+ ctxt->input->length - processed);
+ ctxt->input->buf->buffer = xmlBufferCreate();
+
+ /*
+ * convert as much as possible of the raw input
+ * to the parser reading buffer.
+ */
+ nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
+ ctxt->input->buf->buffer,
+ ctxt->input->buf->raw);
+ if (nbchars < 0) {
+ fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
+ return(-1);
+ }
+
+ /*
+ * Conversion succeeded, get rid of the old buffer
+ */
+ if ((ctxt->input->free != NULL) &&
+ (ctxt->input->base != NULL))
+ ctxt->input->free((xmlChar *) ctxt->input->base);
+ ctxt->input->base =
+ ctxt->input->cur = ctxt->input->buf->buffer->content;
+ }
+ }
+ } else {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "xmlSwitchEncoding : no input\n");
+ return(-1);
+ }
+ /*
+ * The parsing is now done in UTF8 natively
+ */
+ ctxt->charset = XML_CHAR_ENCODING_UTF8;
+ } else
+ return(-1);
+ return(0);
+
+}
+
+/************************************************************************
+ * *
* Commodity functions to handle entities processing *
* *
************************************************************************/
@@ -1705,7 +2010,7 @@
return(NULL);
URI = xmlStrdup((xmlChar *) filename);
- directory = xmlParserGetDirectory(URI);
+ directory = xmlParserGetDirectory((const char *) URI);
inputStream = xmlNewInputStream(ctxt);
if (inputStream == NULL) {
@@ -1714,7 +2019,7 @@
return(NULL);
}
- inputStream->filename = URI;
+ inputStream->filename = (const char *) URI;
inputStream->directory = directory;
inputStream->buf = buf;
diff --git a/tree.c b/tree.c
index 4366336..5174134 100644
--- a/tree.c
+++ b/tree.c
@@ -2637,6 +2637,9 @@
case XML_PI_NODE:
case XML_ENTITY_REF_NODE:
case XML_ENTITY_NODE:
+#ifdef LIBXML_SGML_ENABLED
+ case XML_SGML_DOCUMENT_NODE:
+#endif
return;
case XML_ELEMENT_NODE:
case XML_ATTRIBUTE_NODE:
@@ -2719,6 +2722,9 @@
case XML_DOCUMENT_FRAG_NODE:
case XML_NOTATION_NODE:
case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+ case XML_SGML_DOCUMENT_NODE:
+#endif
return;
case XML_ELEMENT_NODE:
case XML_ATTRIBUTE_NODE:
@@ -2845,6 +2851,9 @@
case XML_DOCUMENT_TYPE_NODE:
case XML_NOTATION_NODE:
case XML_DTD_NODE:
+#ifdef LIBXML_SGML_ENABLED
+ case XML_SGML_DOCUMENT_NODE:
+#endif
return(NULL);
case XML_ELEMENT_DECL:
/* TODO !!! */
@@ -2930,6 +2939,9 @@
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE:
+#ifdef LIBXML_SGML_ENABLED
+ case XML_SGML_DOCUMENT_NODE:
+#endif
break;
case XML_NOTATION_NODE:
break;
@@ -3012,6 +3024,9 @@
case XML_DTD_NODE:
case XML_HTML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE:
+#ifdef LIBXML_SGML_ENABLED
+ case XML_SGML_DOCUMENT_NODE:
+#endif
break;
case XML_ELEMENT_DECL:
/* TODO !!! */
@@ -3096,6 +3111,9 @@
case XML_DTD_NODE:
case XML_HTML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE:
+#ifdef LIBXML_SGML_ENABLED
+ case XML_SGML_DOCUMENT_NODE:
+#endif
break;
case XML_ELEMENT_DECL:
case XML_ATTRIBUTE_DECL:
diff --git a/valid.c b/valid.c
index 3e38e3a..f2fc71d 100644
--- a/valid.c
+++ b/valid.c
@@ -3561,6 +3561,9 @@
break;
case XML_ATTRIBUTE_NODE:
case XML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+ case XML_SGML_DOCUMENT_NODE:
+#endif
case XML_HTML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE:
case XML_DOCUMENT_FRAG_NODE:
diff --git a/xpath.c b/xpath.c
index 433bb56..03b5392 100644
--- a/xpath.c
+++ b/xpath.c
@@ -1504,6 +1504,9 @@
case XML_DOCUMENT_TYPE_NODE:
case XML_DOCUMENT_FRAG_NODE:
case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+ case XML_SGML_DOCUMENT_NODE:
+#endif
return(((xmlDocPtr) ctxt->context->node)->children);
case XML_ELEMENT_DECL:
case XML_ATTRIBUTE_DECL:
@@ -1632,6 +1635,9 @@
case XML_DOCUMENT_TYPE_NODE:
case XML_DOCUMENT_FRAG_NODE:
case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+ case XML_SGML_DOCUMENT_NODE:
+#endif
return(NULL);
}
}
@@ -1686,6 +1692,9 @@
case XML_DOCUMENT_TYPE_NODE:
case XML_DOCUMENT_FRAG_NODE:
case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+ case XML_SGML_DOCUMENT_NODE:
+#endif
return(NULL);
}
return(NULL);
@@ -1717,6 +1726,9 @@
case XML_DOCUMENT_TYPE_NODE:
case XML_DOCUMENT_FRAG_NODE:
case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+ case XML_SGML_DOCUMENT_NODE:
+#endif
return(NULL);
}
return(NULL);