| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1 | /* | 
 | 2 |  * parser.c : Internal routines (and obsolete ones) needed for the | 
 | 3 |  *            XML and HTML parsers. | 
 | 4 |  * | 
 | 5 |  * See Copyright for the status of this software. | 
 | 6 |  * | 
 | 7 |  * Daniel.Veillard@w3.org | 
 | 8 |  */ | 
 | 9 |  | 
| Bjorn Reese | 70a9da5 | 2001-04-21 16:57:29 +0000 | [diff] [blame] | 10 | #include "libxml.h" | 
 | 11 |  | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 12 | #ifdef WIN32 | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 13 | #define XML_DIR_SEP '\\' | 
 | 14 | #else | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 15 | #define XML_DIR_SEP '/' | 
 | 16 | #endif | 
 | 17 |  | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 18 | #include <string.h> | 
 | 19 | #ifdef HAVE_CTYPE_H | 
 | 20 | #include <ctype.h> | 
 | 21 | #endif | 
 | 22 | #ifdef HAVE_STDLIB_H | 
 | 23 | #include <stdlib.h> | 
 | 24 | #endif | 
 | 25 | #ifdef HAVE_SYS_STAT_H | 
 | 26 | #include <sys/stat.h> | 
 | 27 | #endif | 
 | 28 | #ifdef HAVE_FCNTL_H | 
 | 29 | #include <fcntl.h> | 
 | 30 | #endif | 
 | 31 | #ifdef HAVE_UNISTD_H | 
 | 32 | #include <unistd.h> | 
 | 33 | #endif | 
 | 34 | #ifdef HAVE_ZLIB_H | 
 | 35 | #include <zlib.h> | 
 | 36 | #endif | 
 | 37 |  | 
 | 38 | #include <libxml/xmlmemory.h> | 
 | 39 | #include <libxml/tree.h> | 
 | 40 | #include <libxml/parser.h> | 
 | 41 | #include <libxml/parserInternals.h> | 
 | 42 | #include <libxml/valid.h> | 
 | 43 | #include <libxml/entities.h> | 
 | 44 | #include <libxml/xmlerror.h> | 
 | 45 | #include <libxml/encoding.h> | 
 | 46 | #include <libxml/valid.h> | 
 | 47 | #include <libxml/xmlIO.h> | 
 | 48 | #include <libxml/uri.h> | 
 | 49 |  | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 50 | void xmlUpgradeOldNs(xmlDocPtr doc); | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 51 |  | 
 | 52 | /************************************************************************ | 
 | 53 |  *									* | 
 | 54 |  * 		Version and Features handling				* | 
 | 55 |  *									* | 
 | 56 |  ************************************************************************/ | 
 | 57 | const char *xmlParserVersion = LIBXML_VERSION_STRING; | 
 | 58 |  | 
 | 59 | /* | 
 | 60 |  * xmlCheckVersion: | 
 | 61 |  * @version: the include version number | 
 | 62 |  * | 
 | 63 |  * check the compiled lib version against the include one. | 
 | 64 |  * This can warn or immediately kill the application | 
 | 65 |  */ | 
 | 66 | void | 
 | 67 | xmlCheckVersion(int version) { | 
 | 68 |     int myversion = (int) LIBXML_VERSION; | 
 | 69 |  | 
| Daniel Veillard | 4de4d3b | 2001-05-07 20:50:47 +0000 | [diff] [blame] | 70 |     xmlInitMemory(); | 
 | 71 |  | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 72 |     if ((myversion / 10000) != (version / 10000)) { | 
 | 73 | 	xmlGenericError(xmlGenericErrorContext,  | 
 | 74 | 		"Fatal: program compiled against libxml %d using libxml %d\n", | 
 | 75 | 		(version / 10000), (myversion / 10000)); | 
 | 76 | 	exit(1); | 
 | 77 |     } | 
 | 78 |     if ((myversion / 100) < (version / 100)) { | 
 | 79 | 	xmlGenericError(xmlGenericErrorContext,  | 
 | 80 | 		"Warning: program compiled against libxml %d using older %d\n", | 
 | 81 | 		(version / 100), (myversion / 100)); | 
 | 82 |     } | 
 | 83 | } | 
 | 84 |  | 
 | 85 |  | 
 | 86 | const char *xmlFeaturesList[] = { | 
 | 87 |     "validate", | 
 | 88 |     "load subset", | 
 | 89 |     "keep blanks", | 
 | 90 |     "disable SAX", | 
 | 91 |     "fetch external entities", | 
 | 92 |     "substitute entities", | 
 | 93 |     "gather line info", | 
 | 94 |     "user data", | 
 | 95 |     "is html", | 
 | 96 |     "is standalone", | 
 | 97 |     "stop parser", | 
 | 98 |     "document", | 
 | 99 |     "is well formed", | 
 | 100 |     "is valid", | 
 | 101 |     "SAX block", | 
 | 102 |     "SAX function internalSubset", | 
 | 103 |     "SAX function isStandalone", | 
 | 104 |     "SAX function hasInternalSubset", | 
 | 105 |     "SAX function hasExternalSubset", | 
 | 106 |     "SAX function resolveEntity", | 
 | 107 |     "SAX function getEntity", | 
 | 108 |     "SAX function entityDecl", | 
 | 109 |     "SAX function notationDecl", | 
 | 110 |     "SAX function attributeDecl", | 
 | 111 |     "SAX function elementDecl", | 
 | 112 |     "SAX function unparsedEntityDecl", | 
 | 113 |     "SAX function setDocumentLocator", | 
 | 114 |     "SAX function startDocument", | 
 | 115 |     "SAX function endDocument", | 
 | 116 |     "SAX function startElement", | 
 | 117 |     "SAX function endElement", | 
 | 118 |     "SAX function reference", | 
 | 119 |     "SAX function characters", | 
 | 120 |     "SAX function ignorableWhitespace", | 
 | 121 |     "SAX function processingInstruction", | 
 | 122 |     "SAX function comment", | 
 | 123 |     "SAX function warning", | 
 | 124 |     "SAX function error", | 
 | 125 |     "SAX function fatalError", | 
 | 126 |     "SAX function getParameterEntity", | 
 | 127 |     "SAX function cdataBlock", | 
 | 128 |     "SAX function externalSubset", | 
 | 129 | }; | 
 | 130 |  | 
 | 131 | /* | 
 | 132 |  * xmlGetFeaturesList: | 
 | 133 |  * @len:  the length of the features name array (input/output) | 
 | 134 |  * @result:  an array of string to be filled with the features name. | 
 | 135 |  * | 
 | 136 |  * Copy at most *@len feature names into the @result array | 
 | 137 |  * | 
 | 138 |  * Returns -1 in case or error, or the total number of features, | 
 | 139 |  *            len is updated with the number of strings copied, | 
 | 140 |  *            strings must not be deallocated | 
 | 141 |  */ | 
 | 142 | int | 
 | 143 | xmlGetFeaturesList(int *len, const char **result) { | 
 | 144 |     int ret, i; | 
 | 145 |  | 
 | 146 |     ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]); | 
 | 147 |     if ((len == NULL) || (result == NULL)) | 
 | 148 | 	return(ret); | 
 | 149 |     if ((*len < 0) || (*len >= 1000)) | 
 | 150 | 	return(-1); | 
 | 151 |     if (*len > ret) | 
 | 152 | 	*len = ret; | 
 | 153 |     for (i = 0;i < *len;i++) | 
 | 154 | 	result[i] = xmlFeaturesList[i]; | 
 | 155 |     return(ret); | 
 | 156 | } | 
 | 157 |  | 
 | 158 | /* | 
 | 159 |  * xmlGetFeature: | 
 | 160 |  * @ctxt:  an XML/HTML parser context | 
 | 161 |  * @name:  the feature name | 
 | 162 |  * @result:  location to store the result | 
 | 163 |  * | 
 | 164 |  * Read the current value of one feature of this parser instance | 
 | 165 |  * | 
 | 166 |  * Returns -1 in case or error, 0 otherwise | 
 | 167 |  */ | 
 | 168 | int | 
 | 169 | xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) { | 
 | 170 |     if ((ctxt == NULL) || (name == NULL) || (result == NULL)) | 
 | 171 | 	return(-1); | 
 | 172 |  | 
 | 173 |     if (!strcmp(name, "validate")) { | 
 | 174 | 	*((int *) result) = ctxt->validate; | 
 | 175 |     } else if (!strcmp(name, "keep blanks")) { | 
 | 176 | 	*((int *) result) = ctxt->keepBlanks; | 
 | 177 |     } else if (!strcmp(name, "disable SAX")) { | 
 | 178 | 	*((int *) result) = ctxt->disableSAX; | 
 | 179 |     } else if (!strcmp(name, "fetch external entities")) { | 
 | 180 | 	*((int *) result) = ctxt->loadsubset; | 
 | 181 |     } else if (!strcmp(name, "substitute entities")) { | 
 | 182 | 	*((int *) result) = ctxt->replaceEntities; | 
 | 183 |     } else if (!strcmp(name, "gather line info")) { | 
 | 184 | 	*((int *) result) = ctxt->record_info; | 
 | 185 |     } else if (!strcmp(name, "user data")) { | 
 | 186 | 	*((void **)result) = ctxt->userData; | 
 | 187 |     } else if (!strcmp(name, "is html")) { | 
 | 188 | 	*((int *) result) = ctxt->html; | 
 | 189 |     } else if (!strcmp(name, "is standalone")) { | 
 | 190 | 	*((int *) result) = ctxt->standalone; | 
 | 191 |     } else if (!strcmp(name, "document")) { | 
 | 192 | 	*((xmlDocPtr *) result) = ctxt->myDoc; | 
 | 193 |     } else if (!strcmp(name, "is well formed")) { | 
 | 194 | 	*((int *) result) = ctxt->wellFormed; | 
 | 195 |     } else if (!strcmp(name, "is valid")) { | 
 | 196 | 	*((int *) result) = ctxt->valid; | 
 | 197 |     } else if (!strcmp(name, "SAX block")) { | 
 | 198 | 	*((xmlSAXHandlerPtr *) result) = ctxt->sax; | 
 | 199 |     } else if (!strcmp(name, "SAX function internalSubset")) { | 
 | 200 |         *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset; | 
 | 201 |     } else if (!strcmp(name, "SAX function isStandalone")) { | 
 | 202 |         *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone; | 
 | 203 |     } else if (!strcmp(name, "SAX function hasInternalSubset")) { | 
 | 204 |         *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset; | 
 | 205 |     } else if (!strcmp(name, "SAX function hasExternalSubset")) { | 
 | 206 |         *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset; | 
 | 207 |     } else if (!strcmp(name, "SAX function resolveEntity")) { | 
 | 208 |         *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity; | 
 | 209 |     } else if (!strcmp(name, "SAX function getEntity")) { | 
 | 210 |         *((getEntitySAXFunc *) result) = ctxt->sax->getEntity; | 
 | 211 |     } else if (!strcmp(name, "SAX function entityDecl")) { | 
 | 212 |         *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl; | 
 | 213 |     } else if (!strcmp(name, "SAX function notationDecl")) { | 
 | 214 |         *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl; | 
 | 215 |     } else if (!strcmp(name, "SAX function attributeDecl")) { | 
 | 216 |         *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl; | 
 | 217 |     } else if (!strcmp(name, "SAX function elementDecl")) { | 
 | 218 |         *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl; | 
 | 219 |     } else if (!strcmp(name, "SAX function unparsedEntityDecl")) { | 
 | 220 |         *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl; | 
 | 221 |     } else if (!strcmp(name, "SAX function setDocumentLocator")) { | 
 | 222 |         *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator; | 
 | 223 |     } else if (!strcmp(name, "SAX function startDocument")) { | 
 | 224 |         *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument; | 
 | 225 |     } else if (!strcmp(name, "SAX function endDocument")) { | 
 | 226 |         *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument; | 
 | 227 |     } else if (!strcmp(name, "SAX function startElement")) { | 
 | 228 |         *((startElementSAXFunc *) result) = ctxt->sax->startElement; | 
 | 229 |     } else if (!strcmp(name, "SAX function endElement")) { | 
 | 230 |         *((endElementSAXFunc *) result) = ctxt->sax->endElement; | 
 | 231 |     } else if (!strcmp(name, "SAX function reference")) { | 
 | 232 |         *((referenceSAXFunc *) result) = ctxt->sax->reference; | 
 | 233 |     } else if (!strcmp(name, "SAX function characters")) { | 
 | 234 |         *((charactersSAXFunc *) result) = ctxt->sax->characters; | 
 | 235 |     } else if (!strcmp(name, "SAX function ignorableWhitespace")) { | 
 | 236 |         *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace; | 
 | 237 |     } else if (!strcmp(name, "SAX function processingInstruction")) { | 
 | 238 |         *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction; | 
 | 239 |     } else if (!strcmp(name, "SAX function comment")) { | 
 | 240 |         *((commentSAXFunc *) result) = ctxt->sax->comment; | 
 | 241 |     } else if (!strcmp(name, "SAX function warning")) { | 
 | 242 |         *((warningSAXFunc *) result) = ctxt->sax->warning; | 
 | 243 |     } else if (!strcmp(name, "SAX function error")) { | 
 | 244 |         *((errorSAXFunc *) result) = ctxt->sax->error; | 
 | 245 |     } else if (!strcmp(name, "SAX function fatalError")) { | 
 | 246 |         *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError; | 
 | 247 |     } else if (!strcmp(name, "SAX function getParameterEntity")) { | 
 | 248 |         *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity; | 
 | 249 |     } else if (!strcmp(name, "SAX function cdataBlock")) { | 
 | 250 |         *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock; | 
 | 251 |     } else if (!strcmp(name, "SAX function externalSubset")) { | 
 | 252 |         *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset; | 
 | 253 |     } else { | 
 | 254 | 	return(-1); | 
 | 255 |     } | 
 | 256 |     return(0); | 
 | 257 | } | 
 | 258 |  | 
 | 259 | /* | 
 | 260 |  * xmlSetFeature: | 
 | 261 |  * @ctxt:  an XML/HTML parser context | 
 | 262 |  * @name:  the feature name | 
 | 263 |  * @value:  pointer to the location of the new value | 
 | 264 |  * | 
 | 265 |  * Change the current value of one feature of this parser instance | 
 | 266 |  * | 
 | 267 |  * Returns -1 in case or error, 0 otherwise | 
 | 268 |  */ | 
 | 269 | int	 | 
 | 270 | xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) { | 
 | 271 |     if ((ctxt == NULL) || (name == NULL) || (value == NULL)) | 
 | 272 | 	return(-1); | 
 | 273 |  | 
 | 274 |     if (!strcmp(name, "validate")) { | 
 | 275 | 	int newvalidate = *((int *) value); | 
 | 276 | 	if ((!ctxt->validate) && (newvalidate != 0)) { | 
 | 277 | 	    if (ctxt->vctxt.warning == NULL) | 
 | 278 | 		ctxt->vctxt.warning = xmlParserValidityWarning; | 
 | 279 | 	    if (ctxt->vctxt.error == NULL) | 
 | 280 | 		ctxt->vctxt.error = xmlParserValidityError; | 
| Daniel Veillard | 34b1b3a | 2001-04-21 14:16:10 +0000 | [diff] [blame] | 281 | 	    ctxt->vctxt.nodeMax = 0; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 282 | 	} | 
 | 283 |         ctxt->validate = newvalidate; | 
 | 284 |     } else if (!strcmp(name, "keep blanks")) { | 
 | 285 |         ctxt->keepBlanks = *((int *) value); | 
 | 286 |     } else if (!strcmp(name, "disable SAX")) { | 
 | 287 |         ctxt->disableSAX = *((int *) value); | 
 | 288 |     } else if (!strcmp(name, "fetch external entities")) { | 
 | 289 | 	ctxt->loadsubset = *((int *) value); | 
 | 290 |     } else if (!strcmp(name, "substitute entities")) { | 
 | 291 |         ctxt->replaceEntities = *((int *) value); | 
 | 292 |     } else if (!strcmp(name, "gather line info")) { | 
 | 293 |         ctxt->record_info = *((int *) value); | 
 | 294 |     } else if (!strcmp(name, "user data")) { | 
 | 295 |         ctxt->userData = *((void **)value); | 
 | 296 |     } else if (!strcmp(name, "is html")) { | 
 | 297 |         ctxt->html = *((int *) value); | 
 | 298 |     } else if (!strcmp(name, "is standalone")) { | 
 | 299 |         ctxt->standalone = *((int *) value); | 
 | 300 |     } else if (!strcmp(name, "document")) { | 
 | 301 |         ctxt->myDoc = *((xmlDocPtr *) value); | 
 | 302 |     } else if (!strcmp(name, "is well formed")) { | 
 | 303 |         ctxt->wellFormed = *((int *) value); | 
 | 304 |     } else if (!strcmp(name, "is valid")) { | 
 | 305 |         ctxt->valid = *((int *) value); | 
 | 306 |     } else if (!strcmp(name, "SAX block")) { | 
 | 307 |         ctxt->sax = *((xmlSAXHandlerPtr *) value); | 
 | 308 |     } else if (!strcmp(name, "SAX function internalSubset")) { | 
 | 309 |         ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value); | 
 | 310 |     } else if (!strcmp(name, "SAX function isStandalone")) { | 
 | 311 |         ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value); | 
 | 312 |     } else if (!strcmp(name, "SAX function hasInternalSubset")) { | 
 | 313 |         ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value); | 
 | 314 |     } else if (!strcmp(name, "SAX function hasExternalSubset")) { | 
 | 315 |         ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value); | 
 | 316 |     } else if (!strcmp(name, "SAX function resolveEntity")) { | 
 | 317 |         ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value); | 
 | 318 |     } else if (!strcmp(name, "SAX function getEntity")) { | 
 | 319 |         ctxt->sax->getEntity = *((getEntitySAXFunc *) value); | 
 | 320 |     } else if (!strcmp(name, "SAX function entityDecl")) { | 
 | 321 |         ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value); | 
 | 322 |     } else if (!strcmp(name, "SAX function notationDecl")) { | 
 | 323 |         ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value); | 
 | 324 |     } else if (!strcmp(name, "SAX function attributeDecl")) { | 
 | 325 |         ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value); | 
 | 326 |     } else if (!strcmp(name, "SAX function elementDecl")) { | 
 | 327 |         ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value); | 
 | 328 |     } else if (!strcmp(name, "SAX function unparsedEntityDecl")) { | 
 | 329 |         ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value); | 
 | 330 |     } else if (!strcmp(name, "SAX function setDocumentLocator")) { | 
 | 331 |         ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value); | 
 | 332 |     } else if (!strcmp(name, "SAX function startDocument")) { | 
 | 333 |         ctxt->sax->startDocument = *((startDocumentSAXFunc *) value); | 
 | 334 |     } else if (!strcmp(name, "SAX function endDocument")) { | 
 | 335 |         ctxt->sax->endDocument = *((endDocumentSAXFunc *) value); | 
 | 336 |     } else if (!strcmp(name, "SAX function startElement")) { | 
 | 337 |         ctxt->sax->startElement = *((startElementSAXFunc *) value); | 
 | 338 |     } else if (!strcmp(name, "SAX function endElement")) { | 
 | 339 |         ctxt->sax->endElement = *((endElementSAXFunc *) value); | 
 | 340 |     } else if (!strcmp(name, "SAX function reference")) { | 
 | 341 |         ctxt->sax->reference = *((referenceSAXFunc *) value); | 
 | 342 |     } else if (!strcmp(name, "SAX function characters")) { | 
 | 343 |         ctxt->sax->characters = *((charactersSAXFunc *) value); | 
 | 344 |     } else if (!strcmp(name, "SAX function ignorableWhitespace")) { | 
 | 345 |         ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value); | 
 | 346 |     } else if (!strcmp(name, "SAX function processingInstruction")) { | 
 | 347 |         ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value); | 
 | 348 |     } else if (!strcmp(name, "SAX function comment")) { | 
 | 349 |         ctxt->sax->comment = *((commentSAXFunc *) value); | 
 | 350 |     } else if (!strcmp(name, "SAX function warning")) { | 
 | 351 |         ctxt->sax->warning = *((warningSAXFunc *) value); | 
 | 352 |     } else if (!strcmp(name, "SAX function error")) { | 
 | 353 |         ctxt->sax->error = *((errorSAXFunc *) value); | 
 | 354 |     } else if (!strcmp(name, "SAX function fatalError")) { | 
 | 355 |         ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value); | 
 | 356 |     } else if (!strcmp(name, "SAX function getParameterEntity")) { | 
 | 357 |         ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value); | 
 | 358 |     } else if (!strcmp(name, "SAX function cdataBlock")) { | 
 | 359 |         ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value); | 
 | 360 |     } else if (!strcmp(name, "SAX function externalSubset")) { | 
 | 361 |         ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value); | 
 | 362 |     } else { | 
 | 363 | 	return(-1); | 
 | 364 |     } | 
 | 365 |     return(0); | 
 | 366 | } | 
 | 367 |  | 
 | 368 | /************************************************************************ | 
 | 369 |  *									* | 
 | 370 |  * 		Some functions to avoid too large macros		* | 
 | 371 |  *									* | 
 | 372 |  ************************************************************************/ | 
 | 373 |  | 
 | 374 | /** | 
 | 375 |  * xmlIsChar: | 
 | 376 |  * @c:  an unicode character (int) | 
 | 377 |  * | 
 | 378 |  * Check whether the character is allowed by the production | 
 | 379 |  * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | 
 | 380 |  *                  | [#x10000-#x10FFFF] | 
 | 381 |  * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. | 
 | 382 |  * Also available as a macro IS_CHAR() | 
 | 383 |  * | 
 | 384 |  * Returns 0 if not, non-zero otherwise | 
 | 385 |  */ | 
 | 386 | int | 
 | 387 | xmlIsChar(int c) { | 
 | 388 |     return( | 
 | 389 |      ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || | 
 | 390 |      (((c) >= 0x20) && ((c) <= 0xD7FF)) || | 
 | 391 |      (((c) >= 0xE000) && ((c) <= 0xFFFD)) || | 
 | 392 |      (((c) >= 0x10000) && ((c) <= 0x10FFFF))); | 
 | 393 | } | 
 | 394 |  | 
 | 395 | /** | 
 | 396 |  * xmlIsBlank: | 
 | 397 |  * @c:  an unicode character (int) | 
 | 398 |  * | 
 | 399 |  * Check whether the character is allowed by the production | 
 | 400 |  * [3] S ::= (#x20 | #x9 | #xD | #xA)+ | 
 | 401 |  * Also available as a macro IS_BLANK() | 
 | 402 |  * | 
 | 403 |  * Returns 0 if not, non-zero otherwise | 
 | 404 |  */ | 
 | 405 | int | 
 | 406 | xmlIsBlank(int c) { | 
 | 407 |     return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D)); | 
 | 408 | } | 
 | 409 |  | 
 | 410 | /** | 
 | 411 |  * xmlIsBaseChar: | 
 | 412 |  * @c:  an unicode character (int) | 
 | 413 |  * | 
 | 414 |  * Check whether the character is allowed by the production | 
 | 415 |  * [85] BaseChar ::= ... long list see REC ... | 
 | 416 |  * | 
 | 417 |  * VI is your friend ! | 
 | 418 |  * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/     (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/ | 
 | 419 |  * and  | 
 | 420 |  * :1,$ s/#x\([0-9A-Z]*\)/     ((c) == 0x\1) ||/ | 
 | 421 |  * | 
 | 422 |  * Returns 0 if not, non-zero otherwise | 
 | 423 |  */ | 
 | 424 | static int xmlBaseArray[] = { | 
 | 425 |   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */ | 
 | 426 |   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */ | 
 | 427 |   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */ | 
 | 428 |   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */ | 
 | 429 |   0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */ | 
 | 430 |   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */ | 
 | 431 |   0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */ | 
 | 432 |   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */ | 
 | 433 |   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */ | 
 | 434 |   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */ | 
 | 435 |   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */ | 
 | 436 |   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */ | 
 | 437 |   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */ | 
 | 438 |   1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */ | 
 | 439 |   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */ | 
 | 440 |   1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */ | 
 | 441 | }; | 
 | 442 |  | 
 | 443 | int | 
 | 444 | xmlIsBaseChar(int c) { | 
 | 445 |     return( | 
 | 446 |       (((c) < 0x0100) ? xmlBaseArray[c] : | 
 | 447 |       (	/* accelerator */ | 
 | 448 |       (((c) >= 0x0100) && ((c) <= 0x0131)) || | 
 | 449 |       (((c) >= 0x0134) && ((c) <= 0x013E)) || | 
 | 450 |       (((c) >= 0x0141) && ((c) <= 0x0148)) || | 
 | 451 |       (((c) >= 0x014A) && ((c) <= 0x017E)) || | 
 | 452 |       (((c) >= 0x0180) && ((c) <= 0x01C3)) || | 
 | 453 |       (((c) >= 0x01CD) && ((c) <= 0x01F0)) || | 
 | 454 |       (((c) >= 0x01F4) && ((c) <= 0x01F5)) || | 
 | 455 |       (((c) >= 0x01FA) && ((c) <= 0x0217)) || | 
 | 456 |       (((c) >= 0x0250) && ((c) <= 0x02A8)) || | 
 | 457 |       (((c) >= 0x02BB) && ((c) <= 0x02C1)) || | 
 | 458 |       ((c) == 0x0386) || | 
 | 459 |       (((c) >= 0x0388) && ((c) <= 0x038A)) || | 
 | 460 |       ((c) == 0x038C) || | 
 | 461 |       (((c) >= 0x038E) && ((c) <= 0x03A1)) || | 
 | 462 |       (((c) >= 0x03A3) && ((c) <= 0x03CE)) || | 
 | 463 |       (((c) >= 0x03D0) && ((c) <= 0x03D6)) || | 
 | 464 |       ((c) == 0x03DA) || | 
 | 465 |       ((c) == 0x03DC) || | 
 | 466 |       ((c) == 0x03DE) || | 
 | 467 |       ((c) == 0x03E0) || | 
 | 468 |       (((c) >= 0x03E2) && ((c) <= 0x03F3)) || | 
 | 469 |       (((c) >= 0x0401) && ((c) <= 0x040C)) || | 
 | 470 |       (((c) >= 0x040E) && ((c) <= 0x044F)) || | 
 | 471 |       (((c) >= 0x0451) && ((c) <= 0x045C)) || | 
 | 472 |       (((c) >= 0x045E) && ((c) <= 0x0481)) || | 
 | 473 |       (((c) >= 0x0490) && ((c) <= 0x04C4)) || | 
 | 474 |       (((c) >= 0x04C7) && ((c) <= 0x04C8)) || | 
 | 475 |       (((c) >= 0x04CB) && ((c) <= 0x04CC)) || | 
 | 476 |       (((c) >= 0x04D0) && ((c) <= 0x04EB)) || | 
 | 477 |       (((c) >= 0x04EE) && ((c) <= 0x04F5)) || | 
 | 478 |       (((c) >= 0x04F8) && ((c) <= 0x04F9)) || | 
 | 479 |       (((c) >= 0x0531) && ((c) <= 0x0556)) || | 
 | 480 |       ((c) == 0x0559) || | 
 | 481 |       (((c) >= 0x0561) && ((c) <= 0x0586)) || | 
 | 482 |       (((c) >= 0x05D0) && ((c) <= 0x05EA)) || | 
 | 483 |       (((c) >= 0x05F0) && ((c) <= 0x05F2)) || | 
 | 484 |       (((c) >= 0x0621) && ((c) <= 0x063A)) || | 
 | 485 |       (((c) >= 0x0641) && ((c) <= 0x064A)) || | 
 | 486 |       (((c) >= 0x0671) && ((c) <= 0x06B7)) || | 
 | 487 |       (((c) >= 0x06BA) && ((c) <= 0x06BE)) || | 
 | 488 |       (((c) >= 0x06C0) && ((c) <= 0x06CE)) || | 
 | 489 |       (((c) >= 0x06D0) && ((c) <= 0x06D3)) || | 
 | 490 |       ((c) == 0x06D5) || | 
 | 491 |       (((c) >= 0x06E5) && ((c) <= 0x06E6)) || | 
 | 492 |      (((c) >= 0x905) && (	/* accelerator */ | 
 | 493 |       (((c) >= 0x0905) && ((c) <= 0x0939)) || | 
 | 494 |       ((c) == 0x093D) || | 
 | 495 |       (((c) >= 0x0958) && ((c) <= 0x0961)) || | 
 | 496 |       (((c) >= 0x0985) && ((c) <= 0x098C)) || | 
 | 497 |       (((c) >= 0x098F) && ((c) <= 0x0990)) || | 
 | 498 |       (((c) >= 0x0993) && ((c) <= 0x09A8)) || | 
 | 499 |       (((c) >= 0x09AA) && ((c) <= 0x09B0)) || | 
 | 500 |       ((c) == 0x09B2) || | 
 | 501 |       (((c) >= 0x09B6) && ((c) <= 0x09B9)) || | 
 | 502 |       (((c) >= 0x09DC) && ((c) <= 0x09DD)) || | 
 | 503 |       (((c) >= 0x09DF) && ((c) <= 0x09E1)) || | 
 | 504 |       (((c) >= 0x09F0) && ((c) <= 0x09F1)) || | 
 | 505 |       (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || | 
 | 506 |       (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || | 
 | 507 |       (((c) >= 0x0A13) && ((c) <= 0x0A28)) || | 
 | 508 |       (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || | 
 | 509 |       (((c) >= 0x0A32) && ((c) <= 0x0A33)) || | 
 | 510 |       (((c) >= 0x0A35) && ((c) <= 0x0A36)) || | 
 | 511 |       (((c) >= 0x0A38) && ((c) <= 0x0A39)) || | 
 | 512 |       (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || | 
 | 513 |       ((c) == 0x0A5E) || | 
 | 514 |       (((c) >= 0x0A72) && ((c) <= 0x0A74)) || | 
 | 515 |       (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || | 
 | 516 |       ((c) == 0x0A8D) || | 
 | 517 |       (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || | 
 | 518 |       (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || | 
 | 519 |       (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || | 
 | 520 |       (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || | 
 | 521 |       (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || | 
 | 522 |       ((c) == 0x0ABD) || | 
 | 523 |       ((c) == 0x0AE0) || | 
 | 524 |       (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || | 
 | 525 |       (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || | 
 | 526 |       (((c) >= 0x0B13) && ((c) <= 0x0B28)) || | 
 | 527 |       (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || | 
 | 528 |       (((c) >= 0x0B32) && ((c) <= 0x0B33)) || | 
 | 529 |       (((c) >= 0x0B36) && ((c) <= 0x0B39)) || | 
 | 530 |       ((c) == 0x0B3D) || | 
 | 531 |       (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || | 
 | 532 |       (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || | 
 | 533 |       (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || | 
 | 534 |       (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || | 
 | 535 |       (((c) >= 0x0B92) && ((c) <= 0x0B95)) || | 
 | 536 |       (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || | 
 | 537 |       ((c) == 0x0B9C) || | 
 | 538 |       (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || | 
 | 539 |       (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || | 
 | 540 |       (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || | 
 | 541 |       (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || | 
 | 542 |       (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || | 
 | 543 |       (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || | 
 | 544 |       (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || | 
 | 545 |       (((c) >= 0x0C12) && ((c) <= 0x0C28)) || | 
 | 546 |       (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || | 
 | 547 |       (((c) >= 0x0C35) && ((c) <= 0x0C39)) || | 
 | 548 |       (((c) >= 0x0C60) && ((c) <= 0x0C61)) || | 
 | 549 |       (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || | 
 | 550 |       (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || | 
 | 551 |       (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || | 
 | 552 |       (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || | 
 | 553 |       (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || | 
 | 554 |       ((c) == 0x0CDE) || | 
 | 555 |       (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || | 
 | 556 |       (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || | 
 | 557 |       (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || | 
 | 558 |       (((c) >= 0x0D12) && ((c) <= 0x0D28)) || | 
 | 559 |       (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || | 
 | 560 |       (((c) >= 0x0D60) && ((c) <= 0x0D61)) || | 
 | 561 |       (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || | 
 | 562 |       ((c) == 0x0E30) || | 
 | 563 |       (((c) >= 0x0E32) && ((c) <= 0x0E33)) || | 
 | 564 |       (((c) >= 0x0E40) && ((c) <= 0x0E45)) || | 
 | 565 |       (((c) >= 0x0E81) && ((c) <= 0x0E82)) || | 
 | 566 |       ((c) == 0x0E84) || | 
 | 567 |       (((c) >= 0x0E87) && ((c) <= 0x0E88)) || | 
 | 568 |       ((c) == 0x0E8A) || | 
 | 569 |       ((c) == 0x0E8D) || | 
 | 570 |       (((c) >= 0x0E94) && ((c) <= 0x0E97)) || | 
 | 571 |       (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || | 
 | 572 |       (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || | 
 | 573 |       ((c) == 0x0EA5) || | 
 | 574 |       ((c) == 0x0EA7) || | 
 | 575 |       (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || | 
 | 576 |       (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || | 
 | 577 |       ((c) == 0x0EB0) || | 
 | 578 |       (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || | 
 | 579 |       ((c) == 0x0EBD) || | 
 | 580 |       (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || | 
 | 581 |       (((c) >= 0x0F40) && ((c) <= 0x0F47)) || | 
 | 582 |       (((c) >= 0x0F49) && ((c) <= 0x0F69)) || | 
 | 583 |      (((c) >= 0x10A0) && (	/* accelerator */ | 
 | 584 |       (((c) >= 0x10A0) && ((c) <= 0x10C5)) || | 
 | 585 |       (((c) >= 0x10D0) && ((c) <= 0x10F6)) || | 
 | 586 |       ((c) == 0x1100) || | 
 | 587 |       (((c) >= 0x1102) && ((c) <= 0x1103)) || | 
 | 588 |       (((c) >= 0x1105) && ((c) <= 0x1107)) || | 
 | 589 |       ((c) == 0x1109) || | 
 | 590 |       (((c) >= 0x110B) && ((c) <= 0x110C)) || | 
 | 591 |       (((c) >= 0x110E) && ((c) <= 0x1112)) || | 
 | 592 |       ((c) == 0x113C) || | 
 | 593 |       ((c) == 0x113E) || | 
 | 594 |       ((c) == 0x1140) || | 
 | 595 |       ((c) == 0x114C) || | 
 | 596 |       ((c) == 0x114E) || | 
 | 597 |       ((c) == 0x1150) || | 
 | 598 |       (((c) >= 0x1154) && ((c) <= 0x1155)) || | 
 | 599 |       ((c) == 0x1159) || | 
 | 600 |       (((c) >= 0x115F) && ((c) <= 0x1161)) || | 
 | 601 |       ((c) == 0x1163) || | 
 | 602 |       ((c) == 0x1165) || | 
 | 603 |       ((c) == 0x1167) || | 
 | 604 |       ((c) == 0x1169) || | 
 | 605 |       (((c) >= 0x116D) && ((c) <= 0x116E)) || | 
 | 606 |       (((c) >= 0x1172) && ((c) <= 0x1173)) || | 
 | 607 |       ((c) == 0x1175) || | 
 | 608 |       ((c) == 0x119E) || | 
 | 609 |       ((c) == 0x11A8) || | 
 | 610 |       ((c) == 0x11AB) || | 
 | 611 |       (((c) >= 0x11AE) && ((c) <= 0x11AF)) || | 
 | 612 |       (((c) >= 0x11B7) && ((c) <= 0x11B8)) || | 
 | 613 |       ((c) == 0x11BA) || | 
 | 614 |       (((c) >= 0x11BC) && ((c) <= 0x11C2)) || | 
 | 615 |       ((c) == 0x11EB) || | 
 | 616 |       ((c) == 0x11F0) || | 
 | 617 |       ((c) == 0x11F9) || | 
 | 618 |       (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || | 
 | 619 |       (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || | 
 | 620 |       (((c) >= 0x1F00) && ((c) <= 0x1F15)) || | 
 | 621 |       (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || | 
 | 622 |       (((c) >= 0x1F20) && ((c) <= 0x1F45)) || | 
 | 623 |       (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || | 
 | 624 |       (((c) >= 0x1F50) && ((c) <= 0x1F57)) || | 
 | 625 |       ((c) == 0x1F59) || | 
 | 626 |       ((c) == 0x1F5B) || | 
 | 627 |       ((c) == 0x1F5D) || | 
 | 628 |       (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || | 
 | 629 |       (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || | 
 | 630 |       (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || | 
 | 631 |       ((c) == 0x1FBE) || | 
 | 632 |       (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || | 
 | 633 |       (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || | 
 | 634 |       (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || | 
 | 635 |       (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || | 
 | 636 |       (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || | 
 | 637 |       (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || | 
 | 638 |       (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || | 
 | 639 |       ((c) == 0x2126) || | 
 | 640 |       (((c) >= 0x212A) && ((c) <= 0x212B)) || | 
 | 641 |       ((c) == 0x212E) || | 
 | 642 |       (((c) >= 0x2180) && ((c) <= 0x2182)) || | 
 | 643 |       (((c) >= 0x3041) && ((c) <= 0x3094)) || | 
 | 644 |       (((c) >= 0x30A1) && ((c) <= 0x30FA)) || | 
 | 645 |       (((c) >= 0x3105) && ((c) <= 0x312C)) || | 
 | 646 |       (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ )))))); | 
 | 647 | } | 
 | 648 |  | 
 | 649 | /** | 
 | 650 |  * xmlIsDigit: | 
 | 651 |  * @c:  an unicode character (int) | 
 | 652 |  * | 
 | 653 |  * Check whether the character is allowed by the production | 
 | 654 |  * [88] Digit ::= ... long list see REC ... | 
 | 655 |  * | 
 | 656 |  * Returns 0 if not, non-zero otherwise | 
 | 657 |  */ | 
 | 658 | int | 
 | 659 | xmlIsDigit(int c) { | 
 | 660 |     return( | 
 | 661 |       (((c) >= 0x0030) && ((c) <= 0x0039)) || | 
 | 662 |      (((c) >= 0x660) && (	/* accelerator */ | 
 | 663 |       (((c) >= 0x0660) && ((c) <= 0x0669)) || | 
 | 664 |       (((c) >= 0x06F0) && ((c) <= 0x06F9)) || | 
 | 665 |       (((c) >= 0x0966) && ((c) <= 0x096F)) || | 
 | 666 |       (((c) >= 0x09E6) && ((c) <= 0x09EF)) || | 
 | 667 |       (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || | 
 | 668 |       (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || | 
 | 669 |       (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || | 
 | 670 |       (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || | 
 | 671 |       (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || | 
 | 672 |       (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || | 
 | 673 |       (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || | 
 | 674 |       (((c) >= 0x0E50) && ((c) <= 0x0E59)) || | 
 | 675 |       (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || | 
 | 676 |       (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ )); | 
 | 677 | } | 
 | 678 |  | 
 | 679 | /** | 
 | 680 |  * xmlIsCombining: | 
 | 681 |  * @c:  an unicode character (int) | 
 | 682 |  * | 
 | 683 |  * Check whether the character is allowed by the production | 
 | 684 |  * [87] CombiningChar ::= ... long list see REC ... | 
 | 685 |  * | 
 | 686 |  * Returns 0 if not, non-zero otherwise | 
 | 687 |  */ | 
 | 688 | int | 
 | 689 | xmlIsCombining(int c) { | 
 | 690 |     return( | 
 | 691 |      (((c) >= 0x300) && (	/* accelerator */ | 
 | 692 |       (((c) >= 0x0300) && ((c) <= 0x0345)) || | 
 | 693 |       (((c) >= 0x0360) && ((c) <= 0x0361)) || | 
 | 694 |       (((c) >= 0x0483) && ((c) <= 0x0486)) || | 
 | 695 |       (((c) >= 0x0591) && ((c) <= 0x05A1)) || | 
 | 696 |       (((c) >= 0x05A3) && ((c) <= 0x05B9)) || | 
 | 697 |       (((c) >= 0x05BB) && ((c) <= 0x05BD)) || | 
 | 698 |       ((c) == 0x05BF) || | 
 | 699 |       (((c) >= 0x05C1) && ((c) <= 0x05C2)) || | 
 | 700 |       ((c) == 0x05C4) || | 
 | 701 |       (((c) >= 0x064B) && ((c) <= 0x0652)) || | 
 | 702 |       ((c) == 0x0670) || | 
 | 703 |       (((c) >= 0x06D6) && ((c) <= 0x06DC)) || | 
 | 704 |       (((c) >= 0x06DD) && ((c) <= 0x06DF)) || | 
 | 705 |       (((c) >= 0x06E0) && ((c) <= 0x06E4)) || | 
 | 706 |       (((c) >= 0x06E7) && ((c) <= 0x06E8)) || | 
 | 707 |       (((c) >= 0x06EA) && ((c) <= 0x06ED)) || | 
 | 708 |      (((c) >= 0x0901) && (	/* accelerator */ | 
 | 709 |       (((c) >= 0x0901) && ((c) <= 0x0903)) || | 
 | 710 |       ((c) == 0x093C) || | 
 | 711 |       (((c) >= 0x093E) && ((c) <= 0x094C)) || | 
 | 712 |       ((c) == 0x094D) || | 
 | 713 |       (((c) >= 0x0951) && ((c) <= 0x0954)) || | 
 | 714 |       (((c) >= 0x0962) && ((c) <= 0x0963)) || | 
 | 715 |       (((c) >= 0x0981) && ((c) <= 0x0983)) || | 
 | 716 |       ((c) == 0x09BC) || | 
 | 717 |       ((c) == 0x09BE) || | 
 | 718 |       ((c) == 0x09BF) || | 
 | 719 |       (((c) >= 0x09C0) && ((c) <= 0x09C4)) || | 
 | 720 |       (((c) >= 0x09C7) && ((c) <= 0x09C8)) || | 
 | 721 |       (((c) >= 0x09CB) && ((c) <= 0x09CD)) || | 
 | 722 |       ((c) == 0x09D7) || | 
 | 723 |       (((c) >= 0x09E2) && ((c) <= 0x09E3)) || | 
 | 724 |      (((c) >= 0x0A02) && (	/* accelerator */ | 
 | 725 |       ((c) == 0x0A02) || | 
 | 726 |       ((c) == 0x0A3C) || | 
 | 727 |       ((c) == 0x0A3E) || | 
 | 728 |       ((c) == 0x0A3F) || | 
 | 729 |       (((c) >= 0x0A40) && ((c) <= 0x0A42)) || | 
 | 730 |       (((c) >= 0x0A47) && ((c) <= 0x0A48)) || | 
 | 731 |       (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || | 
 | 732 |       (((c) >= 0x0A70) && ((c) <= 0x0A71)) || | 
 | 733 |       (((c) >= 0x0A81) && ((c) <= 0x0A83)) || | 
 | 734 |       ((c) == 0x0ABC) || | 
 | 735 |       (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || | 
 | 736 |       (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || | 
 | 737 |       (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || | 
 | 738 |       (((c) >= 0x0B01) && ((c) <= 0x0B03)) || | 
 | 739 |       ((c) == 0x0B3C) || | 
 | 740 |       (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || | 
 | 741 |       (((c) >= 0x0B47) && ((c) <= 0x0B48)) || | 
 | 742 |       (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || | 
 | 743 |       (((c) >= 0x0B56) && ((c) <= 0x0B57)) || | 
 | 744 |       (((c) >= 0x0B82) && ((c) <= 0x0B83)) || | 
 | 745 |       (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || | 
 | 746 |       (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || | 
 | 747 |       (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || | 
 | 748 |       ((c) == 0x0BD7) || | 
 | 749 |       (((c) >= 0x0C01) && ((c) <= 0x0C03)) || | 
 | 750 |       (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || | 
 | 751 |       (((c) >= 0x0C46) && ((c) <= 0x0C48)) || | 
 | 752 |       (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || | 
 | 753 |       (((c) >= 0x0C55) && ((c) <= 0x0C56)) || | 
 | 754 |       (((c) >= 0x0C82) && ((c) <= 0x0C83)) || | 
 | 755 |       (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || | 
 | 756 |       (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || | 
 | 757 |       (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || | 
 | 758 |       (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || | 
 | 759 |       (((c) >= 0x0D02) && ((c) <= 0x0D03)) || | 
 | 760 |       (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || | 
 | 761 |       (((c) >= 0x0D46) && ((c) <= 0x0D48)) || | 
 | 762 |       (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || | 
 | 763 |       ((c) == 0x0D57) || | 
 | 764 |      (((c) >= 0x0E31) && (	/* accelerator */ | 
 | 765 |       ((c) == 0x0E31) || | 
 | 766 |       (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || | 
 | 767 |       (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || | 
 | 768 |       ((c) == 0x0EB1) || | 
 | 769 |       (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || | 
 | 770 |       (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || | 
 | 771 |       (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || | 
 | 772 |       (((c) >= 0x0F18) && ((c) <= 0x0F19)) || | 
 | 773 |       ((c) == 0x0F35) || | 
 | 774 |       ((c) == 0x0F37) || | 
 | 775 |       ((c) == 0x0F39) || | 
 | 776 |       ((c) == 0x0F3E) || | 
 | 777 |       ((c) == 0x0F3F) || | 
 | 778 |       (((c) >= 0x0F71) && ((c) <= 0x0F84)) || | 
 | 779 |       (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || | 
 | 780 |       (((c) >= 0x0F90) && ((c) <= 0x0F95)) || | 
 | 781 |       ((c) == 0x0F97) || | 
 | 782 |       (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || | 
 | 783 |       (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || | 
 | 784 |       ((c) == 0x0FB9) || | 
 | 785 |       (((c) >= 0x20D0) && ((c) <= 0x20DC)) || | 
 | 786 |       ((c) == 0x20E1) || | 
 | 787 |       (((c) >= 0x302A) && ((c) <= 0x302F)) || | 
 | 788 |       ((c) == 0x3099) || | 
 | 789 |       ((c) == 0x309A)))))))))); | 
 | 790 | } | 
 | 791 |  | 
 | 792 | /** | 
 | 793 |  * xmlIsExtender: | 
 | 794 |  * @c:  an unicode character (int) | 
 | 795 |  * | 
 | 796 |  * Check whether the character is allowed by the production | 
 | 797 |  * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | | 
 | 798 |  *                   #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | | 
 | 799 |  *                   [#x309D-#x309E] | [#x30FC-#x30FE] | 
 | 800 |  * | 
 | 801 |  * Returns 0 if not, non-zero otherwise | 
 | 802 |  */ | 
 | 803 | int | 
 | 804 | xmlIsExtender(int c) { | 
 | 805 |     switch (c) { | 
 | 806 |     case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387: | 
 | 807 |     case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005: | 
 | 808 |     case 0x3031: case 0x3032: case 0x3033: case 0x3034: | 
 | 809 |     case 0x3035: case 0x309D: case 0x309E: case 0x30FC: | 
 | 810 |     case 0x30FE: | 
 | 811 | 	return 1; | 
 | 812 |     default: | 
 | 813 | 	return 0; | 
 | 814 |     } | 
 | 815 | } | 
 | 816 |  | 
 | 817 | /** | 
 | 818 |  * xmlIsIdeographic: | 
 | 819 |  * @c:  an unicode character (int) | 
 | 820 |  * | 
 | 821 |  * Check whether the character is allowed by the production | 
 | 822 |  * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] | 
 | 823 |  * | 
 | 824 |  * Returns 0 if not, non-zero otherwise | 
 | 825 |  */ | 
 | 826 | int | 
 | 827 | xmlIsIdeographic(int c) { | 
 | 828 |     return(((c) < 0x0100) ? 0 : | 
 | 829 |      (((c) >= 0x4e00) && ((c) <= 0x9fa5)) || | 
 | 830 |      (((c) >= 0xf900) && ((c) <= 0xfa2d)) || | 
 | 831 |      (((c) >= 0x3021) && ((c) <= 0x3029)) || | 
 | 832 |       ((c) == 0x3007)); | 
 | 833 | } | 
 | 834 |  | 
 | 835 | /** | 
 | 836 |  * xmlIsLetter: | 
 | 837 |  * @c:  an unicode character (int) | 
 | 838 |  * | 
 | 839 |  * Check whether the character is allowed by the production | 
 | 840 |  * [84] Letter ::= BaseChar | Ideographic | 
 | 841 |  * | 
 | 842 |  * Returns 0 if not, non-zero otherwise | 
 | 843 |  */ | 
 | 844 | int | 
 | 845 | xmlIsLetter(int c) { | 
 | 846 |     return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); | 
 | 847 | } | 
 | 848 |  | 
 | 849 | /** | 
 | 850 |  * xmlIsPubidChar: | 
 | 851 |  * @c:  an unicode character (int) | 
 | 852 |  * | 
 | 853 |  * Check whether the character is allowed by the production | 
 | 854 |  * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] | 
 | 855 |  * | 
 | 856 |  * Returns 0 if not, non-zero otherwise | 
 | 857 |  */ | 
 | 858 | int | 
 | 859 | xmlIsPubidChar(int c) { | 
 | 860 |     return( | 
 | 861 |      ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || | 
 | 862 |      (((c) >= 'a') && ((c) <= 'z')) || | 
 | 863 |      (((c) >= 'A') && ((c) <= 'Z')) || | 
 | 864 |      (((c) >= '0') && ((c) <= '9')) || | 
 | 865 |      ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || | 
 | 866 |      ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || | 
 | 867 |      ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || | 
 | 868 |      ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || | 
 | 869 |      ((c) == '$') || ((c) == '_') || ((c) == '%')); | 
 | 870 | } | 
 | 871 |  | 
 | 872 | /************************************************************************ | 
 | 873 |  *									* | 
 | 874 |  * 		Input handling functions for progressive parsing	* | 
 | 875 |  *									* | 
 | 876 |  ************************************************************************/ | 
 | 877 |  | 
 | 878 | /* #define DEBUG_INPUT */ | 
 | 879 | /* #define DEBUG_STACK */ | 
 | 880 | /* #define DEBUG_PUSH */ | 
 | 881 |  | 
 | 882 |  | 
 | 883 | /* we need to keep enough input to show errors in context */ | 
 | 884 | #define LINE_LEN        80 | 
 | 885 |  | 
 | 886 | #ifdef DEBUG_INPUT | 
 | 887 | #define CHECK_BUFFER(in) check_buffer(in) | 
 | 888 |  | 
 | 889 | void check_buffer(xmlParserInputPtr in) { | 
 | 890 |     if (in->base != in->buf->buffer->content) { | 
 | 891 |         xmlGenericError(xmlGenericErrorContext, | 
 | 892 | 		"xmlParserInput: base mismatch problem\n"); | 
 | 893 |     } | 
 | 894 |     if (in->cur < in->base) { | 
 | 895 |         xmlGenericError(xmlGenericErrorContext, | 
 | 896 | 		"xmlParserInput: cur < base problem\n"); | 
 | 897 |     } | 
 | 898 |     if (in->cur > in->base + in->buf->buffer->use) { | 
 | 899 |         xmlGenericError(xmlGenericErrorContext, | 
 | 900 | 		"xmlParserInput: cur > base + use problem\n"); | 
 | 901 |     } | 
 | 902 |     xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n", | 
 | 903 |             (int) in, (int) in->buf->buffer->content, in->cur - in->base, | 
 | 904 | 	    in->buf->buffer->use, in->buf->buffer->size); | 
 | 905 | } | 
 | 906 |  | 
 | 907 | #else | 
 | 908 | #define CHECK_BUFFER(in)  | 
 | 909 | #endif | 
 | 910 |  | 
 | 911 |  | 
 | 912 | /** | 
 | 913 |  * xmlParserInputRead: | 
 | 914 |  * @in:  an XML parser input | 
 | 915 |  * @len:  an indicative size for the lookahead | 
 | 916 |  * | 
 | 917 |  * This function refresh the input for the parser. It doesn't try to | 
 | 918 |  * preserve pointers to the input buffer, and discard already read data | 
 | 919 |  * | 
 | 920 |  * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the | 
 | 921 |  * end of this entity | 
 | 922 |  */ | 
 | 923 | int | 
 | 924 | xmlParserInputRead(xmlParserInputPtr in, int len) { | 
 | 925 |     int ret; | 
 | 926 |     int used; | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 927 |     int indx; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 928 |  | 
 | 929 | #ifdef DEBUG_INPUT | 
 | 930 |     xmlGenericError(xmlGenericErrorContext, "Read\n"); | 
 | 931 | #endif | 
 | 932 |     if (in->buf == NULL) return(-1); | 
 | 933 |     if (in->base == NULL) return(-1); | 
 | 934 |     if (in->cur == NULL) return(-1); | 
 | 935 |     if (in->buf->buffer == NULL) return(-1); | 
 | 936 |     if (in->buf->readcallback == NULL) return(-1); | 
 | 937 |  | 
 | 938 |     CHECK_BUFFER(in); | 
 | 939 |  | 
 | 940 |     used = in->cur - in->buf->buffer->content; | 
 | 941 |     ret = xmlBufferShrink(in->buf->buffer, used); | 
 | 942 |     if (ret > 0) { | 
 | 943 | 	in->cur -= ret; | 
 | 944 | 	in->consumed += ret; | 
 | 945 |     } | 
 | 946 |     ret = xmlParserInputBufferRead(in->buf, len); | 
 | 947 |     if (in->base != in->buf->buffer->content) { | 
 | 948 |         /* | 
 | 949 | 	 * the buffer has been realloced | 
 | 950 | 	 */ | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 951 | 	indx = in->cur - in->base; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 952 | 	in->base = in->buf->buffer->content; | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 953 | 	in->cur = &in->buf->buffer->content[indx]; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 954 |     } | 
| Daniel Veillard | 48b2f89 | 2001-02-25 16:11:03 +0000 | [diff] [blame] | 955 |     in->end = &in->buf->buffer->content[in->buf->buffer->use]; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 956 |  | 
 | 957 |     CHECK_BUFFER(in); | 
 | 958 |  | 
 | 959 |     return(ret); | 
 | 960 | } | 
 | 961 |  | 
 | 962 | /** | 
 | 963 |  * xmlParserInputGrow: | 
 | 964 |  * @in:  an XML parser input | 
 | 965 |  * @len:  an indicative size for the lookahead | 
 | 966 |  * | 
 | 967 |  * This function increase the input for the parser. It tries to | 
 | 968 |  * preserve pointers to the input buffer, and keep already read data | 
 | 969 |  * | 
 | 970 |  * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the | 
 | 971 |  * end of this entity | 
 | 972 |  */ | 
 | 973 | int | 
 | 974 | xmlParserInputGrow(xmlParserInputPtr in, int len) { | 
 | 975 |     int ret; | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 976 |     int indx; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 977 |  | 
 | 978 | #ifdef DEBUG_INPUT | 
 | 979 |     xmlGenericError(xmlGenericErrorContext, "Grow\n"); | 
 | 980 | #endif | 
 | 981 |     if (in->buf == NULL) return(-1); | 
 | 982 |     if (in->base == NULL) return(-1); | 
 | 983 |     if (in->cur == NULL) return(-1); | 
 | 984 |     if (in->buf->buffer == NULL) return(-1); | 
 | 985 |  | 
 | 986 |     CHECK_BUFFER(in); | 
 | 987 |  | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 988 |     indx = in->cur - in->base; | 
 | 989 |     if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 990 |  | 
 | 991 | 	CHECK_BUFFER(in); | 
 | 992 |  | 
 | 993 |         return(0); | 
 | 994 |     } | 
 | 995 |     if (in->buf->readcallback != NULL) | 
 | 996 | 	ret = xmlParserInputBufferGrow(in->buf, len); | 
 | 997 |     else	 | 
 | 998 |         return(0); | 
 | 999 |  | 
 | 1000 |     /* | 
| Daniel Veillard | 48b2f89 | 2001-02-25 16:11:03 +0000 | [diff] [blame] | 1001 |      * NOTE : in->base may be a "dangling" i.e. freed pointer in this | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1002 |      *        block, but we use it really as an integer to do some | 
 | 1003 |      *        pointer arithmetic. Insure will raise it as a bug but in | 
 | 1004 |      *        that specific case, that's not ! | 
 | 1005 |      */ | 
 | 1006 |     if (in->base != in->buf->buffer->content) { | 
 | 1007 |         /* | 
 | 1008 | 	 * the buffer has been realloced | 
 | 1009 | 	 */ | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 1010 | 	indx = in->cur - in->base; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1011 | 	in->base = in->buf->buffer->content; | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 1012 | 	in->cur = &in->buf->buffer->content[indx]; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1013 |     } | 
| Daniel Veillard | 48b2f89 | 2001-02-25 16:11:03 +0000 | [diff] [blame] | 1014 |     in->end = &in->buf->buffer->content[in->buf->buffer->use]; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1015 |  | 
 | 1016 |     CHECK_BUFFER(in); | 
 | 1017 |  | 
 | 1018 |     return(ret); | 
 | 1019 | } | 
 | 1020 |  | 
 | 1021 | /** | 
 | 1022 |  * xmlParserInputShrink: | 
 | 1023 |  * @in:  an XML parser input | 
 | 1024 |  * | 
 | 1025 |  * This function removes used input for the parser. | 
 | 1026 |  */ | 
 | 1027 | void | 
 | 1028 | xmlParserInputShrink(xmlParserInputPtr in) { | 
 | 1029 |     int used; | 
 | 1030 |     int ret; | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 1031 |     int indx; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1032 |  | 
 | 1033 | #ifdef DEBUG_INPUT | 
 | 1034 |     xmlGenericError(xmlGenericErrorContext, "Shrink\n"); | 
 | 1035 | #endif | 
 | 1036 |     if (in->buf == NULL) return; | 
 | 1037 |     if (in->base == NULL) return; | 
 | 1038 |     if (in->cur == NULL) return; | 
 | 1039 |     if (in->buf->buffer == NULL) return; | 
 | 1040 |  | 
 | 1041 |     CHECK_BUFFER(in); | 
 | 1042 |  | 
 | 1043 |     used = in->cur - in->buf->buffer->content; | 
 | 1044 |     /* | 
 | 1045 |      * Do not shrink on large buffers whose only a tiny fraction | 
 | 1046 |      * was consumned | 
 | 1047 |      */ | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 1048 |     if ((int) in->buf->buffer->use > used + 2 * INPUT_CHUNK) | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1049 | 	return; | 
 | 1050 |     if (used > INPUT_CHUNK) { | 
 | 1051 | 	ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); | 
 | 1052 | 	if (ret > 0) { | 
 | 1053 | 	    in->cur -= ret; | 
 | 1054 | 	    in->consumed += ret; | 
 | 1055 | 	} | 
| Daniel Veillard | 48b2f89 | 2001-02-25 16:11:03 +0000 | [diff] [blame] | 1056 | 	in->end = &in->buf->buffer->content[in->buf->buffer->use]; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1057 |     } | 
 | 1058 |  | 
 | 1059 |     CHECK_BUFFER(in); | 
 | 1060 |  | 
 | 1061 |     if (in->buf->buffer->use > INPUT_CHUNK) { | 
 | 1062 |         return; | 
 | 1063 |     } | 
 | 1064 |     xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); | 
 | 1065 |     if (in->base != in->buf->buffer->content) { | 
 | 1066 |         /* | 
 | 1067 | 	 * the buffer has been realloced | 
 | 1068 | 	 */ | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 1069 | 	indx = in->cur - in->base; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1070 | 	in->base = in->buf->buffer->content; | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 1071 | 	in->cur = &in->buf->buffer->content[indx]; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1072 |     } | 
| Daniel Veillard | 48b2f89 | 2001-02-25 16:11:03 +0000 | [diff] [blame] | 1073 |     in->end = &in->buf->buffer->content[in->buf->buffer->use]; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1074 |  | 
 | 1075 |     CHECK_BUFFER(in); | 
 | 1076 | } | 
 | 1077 |  | 
 | 1078 | /************************************************************************ | 
 | 1079 |  *									* | 
 | 1080 |  * 		UTF8 character input and related functions		* | 
 | 1081 |  *									* | 
 | 1082 |  ************************************************************************/ | 
 | 1083 |  | 
 | 1084 | /** | 
 | 1085 |  * xmlNextChar: | 
 | 1086 |  * @ctxt:  the XML parser context | 
 | 1087 |  * | 
 | 1088 |  * Skip to the next char input char. | 
 | 1089 |  */ | 
 | 1090 |  | 
 | 1091 | void | 
 | 1092 | xmlNextChar(xmlParserCtxtPtr ctxt) { | 
 | 1093 |     if (ctxt->instate == XML_PARSER_EOF) | 
 | 1094 | 	return; | 
 | 1095 |  | 
 | 1096 |     /* | 
 | 1097 |      *   2.11 End-of-Line Handling | 
 | 1098 |      *   the literal two-character sequence "#xD#xA" or a standalone | 
 | 1099 |      *   literal #xD, an XML processor must pass to the application | 
 | 1100 |      *   the single character #xA.  | 
 | 1101 |      */ | 
 | 1102 |     if (ctxt->token != 0) ctxt->token = 0; | 
 | 1103 |     else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { | 
 | 1104 | 	if ((*ctxt->input->cur == 0) && | 
 | 1105 | 	    (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && | 
 | 1106 | 	    (ctxt->instate != XML_PARSER_COMMENT)) { | 
 | 1107 | 	        /* | 
 | 1108 | 		 * If we are at the end of the current entity and | 
 | 1109 | 		 * the context allows it, we pop consumed entities | 
 | 1110 | 		 * automatically. | 
 | 1111 | 		 * the auto closing should be blocked in other cases | 
 | 1112 | 		 */ | 
 | 1113 | 		xmlPopInput(ctxt); | 
 | 1114 | 	} else { | 
 | 1115 | 	    if (*(ctxt->input->cur) == '\n') { | 
 | 1116 | 		ctxt->input->line++; ctxt->input->col = 1; | 
 | 1117 | 	    } else ctxt->input->col++; | 
 | 1118 | 	    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { | 
 | 1119 | 		/* | 
 | 1120 | 		 * We are supposed to handle UTF8, check it's valid | 
 | 1121 | 		 * From rfc2044: encoding of the Unicode values on UTF-8: | 
 | 1122 | 		 * | 
 | 1123 | 		 * UCS-4 range (hex.)           UTF-8 octet sequence (binary) | 
 | 1124 | 		 * 0000 0000-0000 007F   0xxxxxxx | 
 | 1125 | 		 * 0000 0080-0000 07FF   110xxxxx 10xxxxxx | 
 | 1126 | 		 * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx  | 
 | 1127 | 		 * | 
 | 1128 | 		 * Check for the 0x110000 limit too | 
 | 1129 | 		 */ | 
 | 1130 | 		const unsigned char *cur = ctxt->input->cur; | 
 | 1131 | 		unsigned char c; | 
 | 1132 |  | 
 | 1133 | 		c = *cur; | 
 | 1134 | 		if (c & 0x80) { | 
 | 1135 | 		    if (cur[1] == 0) | 
 | 1136 | 			xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 
 | 1137 | 		    if ((cur[1] & 0xc0) != 0x80) | 
 | 1138 | 			goto encoding_error; | 
 | 1139 | 		    if ((c & 0xe0) == 0xe0) { | 
 | 1140 | 			unsigned int val; | 
 | 1141 |  | 
 | 1142 | 			if (cur[2] == 0) | 
 | 1143 | 			    xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 
 | 1144 | 			if ((cur[2] & 0xc0) != 0x80) | 
 | 1145 | 			    goto encoding_error; | 
 | 1146 | 			if ((c & 0xf0) == 0xf0) { | 
 | 1147 | 			    if (cur[3] == 0) | 
 | 1148 | 				xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 
 | 1149 | 			    if (((c & 0xf8) != 0xf0) || | 
 | 1150 | 				((cur[3] & 0xc0) != 0x80)) | 
 | 1151 | 				goto encoding_error; | 
 | 1152 | 			    /* 4-byte code */ | 
 | 1153 | 			    ctxt->input->cur += 4; | 
 | 1154 | 			    val = (cur[0] & 0x7) << 18; | 
 | 1155 | 			    val |= (cur[1] & 0x3f) << 12; | 
 | 1156 | 			    val |= (cur[2] & 0x3f) << 6; | 
 | 1157 | 			    val |= cur[3] & 0x3f; | 
 | 1158 | 			} else { | 
 | 1159 | 			  /* 3-byte code */ | 
 | 1160 | 			    ctxt->input->cur += 3; | 
 | 1161 | 			    val = (cur[0] & 0xf) << 12; | 
 | 1162 | 			    val |= (cur[1] & 0x3f) << 6; | 
 | 1163 | 			    val |= cur[2] & 0x3f; | 
 | 1164 | 			} | 
 | 1165 | 			if (((val > 0xd7ff) && (val < 0xe000)) || | 
 | 1166 | 			    ((val > 0xfffd) && (val < 0x10000)) || | 
 | 1167 | 			    (val >= 0x110000)) { | 
 | 1168 | 			    if ((ctxt->sax != NULL) && | 
 | 1169 | 				(ctxt->sax->error != NULL)) | 
 | 1170 | 				ctxt->sax->error(ctxt->userData,  | 
 | 1171 | 				 "Char 0x%X out of allowed range\n", val); | 
 | 1172 | 			    ctxt->errNo = XML_ERR_INVALID_ENCODING; | 
 | 1173 | 			    ctxt->wellFormed = 0; | 
 | 1174 | 			    ctxt->disableSAX = 1; | 
 | 1175 | 			}     | 
 | 1176 | 		    } else | 
 | 1177 | 		      /* 2-byte code */ | 
 | 1178 | 		        ctxt->input->cur += 2; | 
 | 1179 | 		} else | 
 | 1180 | 		    /* 1-byte code */ | 
 | 1181 | 		    ctxt->input->cur++; | 
 | 1182 | 	    } else { | 
 | 1183 | 		/* | 
 | 1184 | 		 * Assume it's a fixed lenght encoding (1) with | 
 | 1185 | 		 * a compatibke encoding for the ASCII set, since | 
 | 1186 | 		 * XML constructs only use < 128 chars | 
 | 1187 | 		 */ | 
 | 1188 | 	        ctxt->input->cur++; | 
 | 1189 | 	    } | 
 | 1190 | 	    ctxt->nbChars++; | 
 | 1191 | 	    if (*ctxt->input->cur == 0) | 
 | 1192 | 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 
 | 1193 | 	} | 
 | 1194 |     } else { | 
 | 1195 | 	ctxt->input->cur++; | 
 | 1196 | 	ctxt->nbChars++; | 
 | 1197 | 	if (*ctxt->input->cur == 0) | 
 | 1198 | 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 
 | 1199 |     } | 
 | 1200 |     if ((*ctxt->input->cur == '%') && (!ctxt->html)) | 
 | 1201 | 	xmlParserHandlePEReference(ctxt); | 
 | 1202 |     if ((*ctxt->input->cur == 0) && | 
 | 1203 |         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) | 
 | 1204 | 	    xmlPopInput(ctxt); | 
 | 1205 |     return; | 
 | 1206 | encoding_error: | 
 | 1207 |     /* | 
 | 1208 |      * If we detect an UTF8 error that probably mean that the | 
 | 1209 |      * input encoding didn't get properly advertized in the | 
 | 1210 |      * declaration header. Report the error and switch the encoding | 
 | 1211 |      * to ISO-Latin-1 (if you don't like this policy, just declare the | 
 | 1212 |      * encoding !) | 
 | 1213 |      */ | 
 | 1214 |     if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { | 
 | 1215 | 	ctxt->sax->error(ctxt->userData,  | 
 | 1216 | 			 "Input is not proper UTF-8, indicate encoding !\n"); | 
 | 1217 | 	ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", | 
 | 1218 | 			ctxt->input->cur[0], ctxt->input->cur[1], | 
 | 1219 | 			ctxt->input->cur[2], ctxt->input->cur[3]); | 
 | 1220 |     } | 
 | 1221 |     ctxt->errNo = XML_ERR_INVALID_ENCODING; | 
 | 1222 |  | 
 | 1223 |     ctxt->charset = XML_CHAR_ENCODING_8859_1;  | 
 | 1224 |     ctxt->input->cur++; | 
 | 1225 |     return; | 
 | 1226 | } | 
 | 1227 |  | 
 | 1228 | /** | 
 | 1229 |  * xmlCurrentChar: | 
 | 1230 |  * @ctxt:  the XML parser context | 
 | 1231 |  * @len:  pointer to the length of the char read | 
 | 1232 |  * | 
 | 1233 |  * The current char value, if using UTF-8 this may actaully span multiple | 
 | 1234 |  * bytes in the input buffer. Implement the end of line normalization: | 
 | 1235 |  * 2.11 End-of-Line Handling | 
 | 1236 |  * Wherever an external parsed entity or the literal entity value | 
 | 1237 |  * of an internal parsed entity contains either the literal two-character | 
 | 1238 |  * sequence "#xD#xA" or a standalone literal #xD, an XML processor | 
 | 1239 |  * must pass to the application the single character #xA. | 
 | 1240 |  * This behavior can conveniently be produced by normalizing all | 
 | 1241 |  * line breaks to #xA on input, before parsing.) | 
 | 1242 |  * | 
 | 1243 |  * Returns the current char value and its lenght | 
 | 1244 |  */ | 
 | 1245 |  | 
 | 1246 | int | 
 | 1247 | xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { | 
 | 1248 |     if (ctxt->instate == XML_PARSER_EOF) | 
 | 1249 | 	return(0); | 
 | 1250 |  | 
 | 1251 |     if (ctxt->token != 0) { | 
 | 1252 | 	*len = 0; | 
 | 1253 | 	return(ctxt->token); | 
 | 1254 |     }	 | 
 | 1255 |     if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { | 
 | 1256 | 	    *len = 1; | 
 | 1257 | 	    return((int) *ctxt->input->cur); | 
 | 1258 |     } | 
 | 1259 |     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { | 
 | 1260 | 	/* | 
 | 1261 | 	 * We are supposed to handle UTF8, check it's valid | 
 | 1262 | 	 * From rfc2044: encoding of the Unicode values on UTF-8: | 
 | 1263 | 	 * | 
 | 1264 | 	 * UCS-4 range (hex.)           UTF-8 octet sequence (binary) | 
 | 1265 | 	 * 0000 0000-0000 007F   0xxxxxxx | 
 | 1266 | 	 * 0000 0080-0000 07FF   110xxxxx 10xxxxxx | 
 | 1267 | 	 * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx  | 
 | 1268 | 	 * | 
 | 1269 | 	 * Check for the 0x110000 limit too | 
 | 1270 | 	 */ | 
 | 1271 | 	const unsigned char *cur = ctxt->input->cur; | 
 | 1272 | 	unsigned char c; | 
 | 1273 | 	unsigned int val; | 
 | 1274 |  | 
 | 1275 | 	c = *cur; | 
 | 1276 | 	if (c & 0x80) { | 
 | 1277 | 	    if (cur[1] == 0) | 
 | 1278 | 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 
 | 1279 | 	    if ((cur[1] & 0xc0) != 0x80) | 
 | 1280 | 		goto encoding_error; | 
 | 1281 | 	    if ((c & 0xe0) == 0xe0) { | 
 | 1282 |  | 
 | 1283 | 		if (cur[2] == 0) | 
 | 1284 | 		    xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 
 | 1285 | 		if ((cur[2] & 0xc0) != 0x80) | 
 | 1286 | 		    goto encoding_error; | 
 | 1287 | 		if ((c & 0xf0) == 0xf0) { | 
 | 1288 | 		    if (cur[3] == 0) | 
 | 1289 | 			xmlParserInputGrow(ctxt->input, INPUT_CHUNK); | 
 | 1290 | 		    if (((c & 0xf8) != 0xf0) || | 
 | 1291 | 			((cur[3] & 0xc0) != 0x80)) | 
 | 1292 | 			goto encoding_error; | 
 | 1293 | 		    /* 4-byte code */ | 
 | 1294 | 		    *len = 4; | 
 | 1295 | 		    val = (cur[0] & 0x7) << 18; | 
 | 1296 | 		    val |= (cur[1] & 0x3f) << 12; | 
 | 1297 | 		    val |= (cur[2] & 0x3f) << 6; | 
 | 1298 | 		    val |= cur[3] & 0x3f; | 
 | 1299 | 		} else { | 
 | 1300 | 		  /* 3-byte code */ | 
 | 1301 | 		    *len = 3; | 
 | 1302 | 		    val = (cur[0] & 0xf) << 12; | 
 | 1303 | 		    val |= (cur[1] & 0x3f) << 6; | 
 | 1304 | 		    val |= cur[2] & 0x3f; | 
 | 1305 | 		} | 
 | 1306 | 	    } else { | 
 | 1307 | 	      /* 2-byte code */ | 
 | 1308 | 		*len = 2; | 
 | 1309 | 		val = (cur[0] & 0x1f) << 6; | 
 | 1310 | 		val |= cur[1] & 0x3f; | 
 | 1311 | 	    } | 
 | 1312 | 	    if (!IS_CHAR(val)) { | 
 | 1313 | 		if ((ctxt->sax != NULL) && | 
 | 1314 | 		    (ctxt->sax->error != NULL)) | 
 | 1315 | 		    ctxt->sax->error(ctxt->userData,  | 
 | 1316 | 				     "Char 0x%X out of allowed range\n", val); | 
 | 1317 | 		ctxt->errNo = XML_ERR_INVALID_ENCODING; | 
 | 1318 | 		ctxt->wellFormed = 0; | 
 | 1319 | 		ctxt->disableSAX = 1; | 
 | 1320 | 	    }     | 
 | 1321 | 	    return(val); | 
 | 1322 | 	} else { | 
 | 1323 | 	    /* 1-byte code */ | 
 | 1324 | 	    *len = 1; | 
 | 1325 | 	    if (*ctxt->input->cur == 0xD) { | 
 | 1326 | 		if (ctxt->input->cur[1] == 0xA) { | 
 | 1327 | 		    ctxt->nbChars++; | 
 | 1328 | 		    ctxt->input->cur++; | 
 | 1329 | 		} | 
 | 1330 | 		return(0xA); | 
 | 1331 | 	    } | 
 | 1332 | 	    return((int) *ctxt->input->cur); | 
 | 1333 | 	} | 
 | 1334 |     } | 
 | 1335 |     /* | 
 | 1336 |      * Assume it's a fixed lenght encoding (1) with | 
 | 1337 |      * a compatibke encoding for the ASCII set, since | 
 | 1338 |      * XML constructs only use < 128 chars | 
 | 1339 |      */ | 
 | 1340 |     *len = 1; | 
 | 1341 |     if (*ctxt->input->cur == 0xD) { | 
 | 1342 | 	if (ctxt->input->cur[1] == 0xA) { | 
 | 1343 | 	    ctxt->nbChars++; | 
 | 1344 | 	    ctxt->input->cur++; | 
 | 1345 | 	} | 
 | 1346 | 	return(0xA); | 
 | 1347 |     } | 
 | 1348 |     return((int) *ctxt->input->cur); | 
 | 1349 | encoding_error: | 
 | 1350 |     /* | 
 | 1351 |      * If we detect an UTF8 error that probably mean that the | 
 | 1352 |      * input encoding didn't get properly advertized in the | 
 | 1353 |      * declaration header. Report the error and switch the encoding | 
 | 1354 |      * to ISO-Latin-1 (if you don't like this policy, just declare the | 
 | 1355 |      * encoding !) | 
 | 1356 |      */ | 
 | 1357 |     if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { | 
 | 1358 | 	ctxt->sax->error(ctxt->userData,  | 
 | 1359 | 			 "Input is not proper UTF-8, indicate encoding !\n"); | 
 | 1360 | 	ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", | 
 | 1361 | 			ctxt->input->cur[0], ctxt->input->cur[1], | 
 | 1362 | 			ctxt->input->cur[2], ctxt->input->cur[3]); | 
 | 1363 |     } | 
 | 1364 |     ctxt->errNo = XML_ERR_INVALID_ENCODING; | 
 | 1365 |  | 
 | 1366 |     ctxt->charset = XML_CHAR_ENCODING_8859_1;  | 
 | 1367 |     *len = 1; | 
 | 1368 |     return((int) *ctxt->input->cur); | 
 | 1369 | } | 
 | 1370 |  | 
 | 1371 | /** | 
 | 1372 |  * xmlStringCurrentChar: | 
 | 1373 |  * @ctxt:  the XML parser context | 
 | 1374 |  * @cur:  pointer to the beginning of the char | 
 | 1375 |  * @len:  pointer to the length of the char read | 
 | 1376 |  * | 
 | 1377 |  * The current char value, if using UTF-8 this may actaully span multiple | 
 | 1378 |  * bytes in the input buffer. | 
 | 1379 |  * | 
 | 1380 |  * Returns the current char value and its lenght | 
 | 1381 |  */ | 
 | 1382 |  | 
 | 1383 | int | 
 | 1384 | xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) { | 
| Daniel Veillard | 61d80a2 | 2001-04-27 17:13:01 +0000 | [diff] [blame] | 1385 |     if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1386 | 	/* | 
 | 1387 | 	 * We are supposed to handle UTF8, check it's valid | 
 | 1388 | 	 * From rfc2044: encoding of the Unicode values on UTF-8: | 
 | 1389 | 	 * | 
 | 1390 | 	 * UCS-4 range (hex.)           UTF-8 octet sequence (binary) | 
 | 1391 | 	 * 0000 0000-0000 007F   0xxxxxxx | 
 | 1392 | 	 * 0000 0080-0000 07FF   110xxxxx 10xxxxxx | 
 | 1393 | 	 * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx  | 
 | 1394 | 	 * | 
 | 1395 | 	 * Check for the 0x110000 limit too | 
 | 1396 | 	 */ | 
 | 1397 | 	unsigned char c; | 
 | 1398 | 	unsigned int val; | 
 | 1399 |  | 
 | 1400 | 	c = *cur; | 
 | 1401 | 	if (c & 0x80) { | 
 | 1402 | 	    if ((cur[1] & 0xc0) != 0x80) | 
 | 1403 | 		goto encoding_error; | 
 | 1404 | 	    if ((c & 0xe0) == 0xe0) { | 
 | 1405 |  | 
 | 1406 | 		if ((cur[2] & 0xc0) != 0x80) | 
 | 1407 | 		    goto encoding_error; | 
 | 1408 | 		if ((c & 0xf0) == 0xf0) { | 
 | 1409 | 		    if (((c & 0xf8) != 0xf0) || | 
 | 1410 | 			((cur[3] & 0xc0) != 0x80)) | 
 | 1411 | 			goto encoding_error; | 
 | 1412 | 		    /* 4-byte code */ | 
 | 1413 | 		    *len = 4; | 
 | 1414 | 		    val = (cur[0] & 0x7) << 18; | 
 | 1415 | 		    val |= (cur[1] & 0x3f) << 12; | 
 | 1416 | 		    val |= (cur[2] & 0x3f) << 6; | 
 | 1417 | 		    val |= cur[3] & 0x3f; | 
 | 1418 | 		} else { | 
 | 1419 | 		  /* 3-byte code */ | 
 | 1420 | 		    *len = 3; | 
 | 1421 | 		    val = (cur[0] & 0xf) << 12; | 
 | 1422 | 		    val |= (cur[1] & 0x3f) << 6; | 
 | 1423 | 		    val |= cur[2] & 0x3f; | 
 | 1424 | 		} | 
 | 1425 | 	    } else { | 
 | 1426 | 	      /* 2-byte code */ | 
 | 1427 | 		*len = 2; | 
 | 1428 | 		val = (cur[0] & 0x1f) << 6; | 
| Daniel Veillard | e043ee1 | 2001-04-16 14:08:07 +0000 | [diff] [blame] | 1429 | 		val |= cur[1] & 0x3f; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1430 | 	    } | 
 | 1431 | 	    if (!IS_CHAR(val)) { | 
 | 1432 | 		if ((ctxt->sax != NULL) && | 
 | 1433 | 		    (ctxt->sax->error != NULL)) | 
 | 1434 | 		    ctxt->sax->error(ctxt->userData,  | 
 | 1435 | 				     "Char 0x%X out of allowed range\n", val); | 
 | 1436 | 		ctxt->errNo = XML_ERR_INVALID_ENCODING; | 
 | 1437 | 		ctxt->wellFormed = 0; | 
 | 1438 | 		ctxt->disableSAX = 1; | 
 | 1439 | 	    }     | 
 | 1440 | 	    return(val); | 
 | 1441 | 	} else { | 
 | 1442 | 	    /* 1-byte code */ | 
 | 1443 | 	    *len = 1; | 
 | 1444 | 	    return((int) *cur); | 
 | 1445 | 	} | 
 | 1446 |     } | 
 | 1447 |     /* | 
 | 1448 |      * Assume it's a fixed lenght encoding (1) with | 
 | 1449 |      * a compatibke encoding for the ASCII set, since | 
 | 1450 |      * XML constructs only use < 128 chars | 
 | 1451 |      */ | 
 | 1452 |     *len = 1; | 
 | 1453 |     return((int) *cur); | 
 | 1454 | encoding_error: | 
 | 1455 |     /* | 
 | 1456 |      * If we detect an UTF8 error that probably mean that the | 
 | 1457 |      * input encoding didn't get properly advertized in the | 
 | 1458 |      * declaration header. Report the error and switch the encoding | 
 | 1459 |      * to ISO-Latin-1 (if you don't like this policy, just declare the | 
 | 1460 |      * encoding !) | 
 | 1461 |      */ | 
 | 1462 |     if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) { | 
 | 1463 | 	ctxt->sax->error(ctxt->userData,  | 
 | 1464 | 			 "Input is not proper UTF-8, indicate encoding !\n"); | 
 | 1465 | 	ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", | 
 | 1466 | 			ctxt->input->cur[0], ctxt->input->cur[1], | 
 | 1467 | 			ctxt->input->cur[2], ctxt->input->cur[3]); | 
 | 1468 |     } | 
 | 1469 |     ctxt->errNo = XML_ERR_INVALID_ENCODING; | 
 | 1470 |  | 
 | 1471 |     *len = 1; | 
 | 1472 |     return((int) *cur); | 
 | 1473 | } | 
 | 1474 |  | 
 | 1475 | /** | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 1476 |  * xmlCopyCharMultiByte: | 
 | 1477 |  * @out:  pointer to an arry of xmlChar | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1478 |  * @val:  the char value | 
 | 1479 |  * | 
 | 1480 |  * append the char value in the array  | 
 | 1481 |  * | 
 | 1482 |  * Returns the number of xmlChar written | 
 | 1483 |  */ | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1484 | int | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 1485 | xmlCopyCharMultiByte(xmlChar *out, int val) { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1486 |     /* | 
 | 1487 |      * We are supposed to handle UTF8, check it's valid | 
 | 1488 |      * From rfc2044: encoding of the Unicode values on UTF-8: | 
 | 1489 |      * | 
 | 1490 |      * UCS-4 range (hex.)           UTF-8 octet sequence (binary) | 
 | 1491 |      * 0000 0000-0000 007F   0xxxxxxx | 
 | 1492 |      * 0000 0080-0000 07FF   110xxxxx 10xxxxxx | 
 | 1493 |      * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx  | 
 | 1494 |      */ | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 1495 |     if  (val >= 0x80) { | 
 | 1496 | 	xmlChar *savedout = out; | 
 | 1497 | 	int bits; | 
 | 1498 | 	if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; } | 
 | 1499 | 	else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;} | 
 | 1500 | 	else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; } | 
 | 1501 | 	else { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1502 | 	    xmlGenericError(xmlGenericErrorContext, | 
 | 1503 | 		    "Internal error, xmlCopyChar 0x%X out of bound\n", | 
 | 1504 | 		    val); | 
 | 1505 | 	    return(0); | 
 | 1506 | 	} | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 1507 | 	for ( ; bits >= 0; bits-= 6) | 
 | 1508 | 	    *out++= ((val >> bits) & 0x3F) | 0x80 ; | 
 | 1509 | 	return (out - savedout); | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1510 |     } | 
 | 1511 |     *out = (xmlChar) val; | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 1512 |     return 1; | 
 | 1513 | } | 
 | 1514 |  | 
 | 1515 | /** | 
 | 1516 |  * xmlCopyChar: | 
 | 1517 |  * @len:  Ignored, compatibility | 
 | 1518 |  * @out:  pointer to an arry of xmlChar | 
 | 1519 |  * @val:  the char value | 
 | 1520 |  * | 
 | 1521 |  * append the char value in the array  | 
 | 1522 |  * | 
 | 1523 |  * Returns the number of xmlChar written | 
 | 1524 |  */ | 
 | 1525 |  | 
 | 1526 | int | 
| Daniel Veillard | c86a4fa | 2001-03-26 16:28:29 +0000 | [diff] [blame] | 1527 | xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { | 
| Daniel Veillard | 56a4cb8 | 2001-03-24 17:00:36 +0000 | [diff] [blame] | 1528 |     /* the len parameter is ignored */ | 
 | 1529 |     if  (val >= 0x80) { | 
 | 1530 | 	return(xmlCopyCharMultiByte (out, val)); | 
 | 1531 |     } | 
 | 1532 |     *out = (xmlChar) val; | 
 | 1533 |     return 1; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1534 | } | 
 | 1535 |  | 
 | 1536 | /************************************************************************ | 
 | 1537 |  *									* | 
 | 1538 |  *		Commodity functions to switch encodings			* | 
 | 1539 |  *									* | 
 | 1540 |  ************************************************************************/ | 
 | 1541 |  | 
 | 1542 | /** | 
 | 1543 |  * xmlSwitchEncoding: | 
 | 1544 |  * @ctxt:  the parser context | 
 | 1545 |  * @enc:  the encoding value (number) | 
 | 1546 |  * | 
 | 1547 |  * change the input functions when discovering the character encoding | 
 | 1548 |  * of a given entity. | 
 | 1549 |  * | 
 | 1550 |  * Returns 0 in case of success, -1 otherwise | 
 | 1551 |  */ | 
 | 1552 | int | 
 | 1553 | xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) | 
 | 1554 | { | 
 | 1555 |     xmlCharEncodingHandlerPtr handler; | 
 | 1556 |  | 
 | 1557 |     switch (enc) { | 
 | 1558 | 	case XML_CHAR_ENCODING_ERROR: | 
 | 1559 | 	    ctxt->errNo = XML_ERR_UNKNOWN_ENCODING; | 
 | 1560 | 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1561 | 		ctxt->sax->error(ctxt->userData, "encoding unknown\n"); | 
 | 1562 | 	    ctxt->wellFormed = 0; | 
 | 1563 | 	    ctxt->disableSAX = 1; | 
 | 1564 | 	    break; | 
 | 1565 | 	case XML_CHAR_ENCODING_NONE: | 
 | 1566 | 	    /* let's assume it's UTF-8 without the XML decl */ | 
 | 1567 | 	    ctxt->charset = XML_CHAR_ENCODING_UTF8; | 
 | 1568 | 	    return(0); | 
 | 1569 | 	case XML_CHAR_ENCODING_UTF8: | 
 | 1570 | 	    /* default encoding, no conversion should be needed */ | 
 | 1571 | 	    ctxt->charset = XML_CHAR_ENCODING_UTF8; | 
| Daniel Veillard | 87a764e | 2001-06-20 17:41:10 +0000 | [diff] [blame^] | 1572 |  | 
 | 1573 | 	    /* | 
 | 1574 | 	     * Errata on XML-1.0 June 20 2001 | 
 | 1575 | 	     * Specific handling of the Byte Order Mark for | 
 | 1576 | 	     * UTF-8 | 
 | 1577 | 	     */ | 
 | 1578 | 	    if ((ctxt->input->cur[0] == 0xEF) && | 
 | 1579 | 		(ctxt->input->cur[1] == 0xBB) && | 
 | 1580 | 		(ctxt->input->cur[2] == 0xBF)) { | 
 | 1581 | 		ctxt->input->cur += 3; | 
 | 1582 | 	    } | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1583 | 	    return(0); | 
 | 1584 | 	default: | 
 | 1585 | 	    break; | 
 | 1586 |     } | 
 | 1587 |     handler = xmlGetCharEncodingHandler(enc); | 
 | 1588 |     if (handler == NULL) { | 
 | 1589 | 	/* | 
 | 1590 | 	 * Default handlers. | 
 | 1591 | 	 */ | 
 | 1592 | 	switch (enc) { | 
 | 1593 | 	    case XML_CHAR_ENCODING_ERROR: | 
 | 1594 | 		ctxt->errNo = XML_ERR_UNKNOWN_ENCODING; | 
 | 1595 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1596 | 		    ctxt->sax->error(ctxt->userData, "encoding unknown\n"); | 
 | 1597 | 		ctxt->wellFormed = 0; | 
 | 1598 | 		ctxt->disableSAX = 1; | 
 | 1599 | 		ctxt->charset = XML_CHAR_ENCODING_UTF8; | 
 | 1600 | 		break; | 
 | 1601 | 	    case XML_CHAR_ENCODING_NONE: | 
 | 1602 | 		/* let's assume it's UTF-8 without the XML decl */ | 
 | 1603 | 		ctxt->charset = XML_CHAR_ENCODING_UTF8; | 
 | 1604 | 		return(0); | 
 | 1605 | 	    case XML_CHAR_ENCODING_UTF8: | 
 | 1606 | 	    case XML_CHAR_ENCODING_ASCII: | 
 | 1607 | 		/* default encoding, no conversion should be needed */ | 
 | 1608 | 		ctxt->charset = XML_CHAR_ENCODING_UTF8; | 
 | 1609 | 		return(0); | 
 | 1610 | 	    case XML_CHAR_ENCODING_UTF16LE: | 
 | 1611 | 		break; | 
 | 1612 | 	    case XML_CHAR_ENCODING_UTF16BE: | 
 | 1613 | 		break; | 
 | 1614 | 	    case XML_CHAR_ENCODING_UCS4LE: | 
 | 1615 | 		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; | 
 | 1616 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1617 | 		    ctxt->sax->error(ctxt->userData, | 
 | 1618 | 		      "char encoding USC4 little endian not supported\n"); | 
 | 1619 | 		break; | 
 | 1620 | 	    case XML_CHAR_ENCODING_UCS4BE: | 
 | 1621 | 		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; | 
 | 1622 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1623 | 		    ctxt->sax->error(ctxt->userData, | 
 | 1624 | 		      "char encoding USC4 big endian not supported\n"); | 
 | 1625 | 		break; | 
 | 1626 | 	    case XML_CHAR_ENCODING_EBCDIC: | 
 | 1627 | 		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; | 
 | 1628 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1629 | 		    ctxt->sax->error(ctxt->userData, | 
 | 1630 | 		      "char encoding EBCDIC not supported\n"); | 
 | 1631 | 		break; | 
 | 1632 | 	    case XML_CHAR_ENCODING_UCS4_2143: | 
 | 1633 | 		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; | 
 | 1634 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1635 | 		    ctxt->sax->error(ctxt->userData, | 
 | 1636 | 		      "char encoding UCS4 2143 not supported\n"); | 
 | 1637 | 		break; | 
 | 1638 | 	    case XML_CHAR_ENCODING_UCS4_3412: | 
 | 1639 | 		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; | 
 | 1640 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1641 | 		    ctxt->sax->error(ctxt->userData, | 
 | 1642 | 		      "char encoding UCS4 3412 not supported\n"); | 
 | 1643 | 		break; | 
 | 1644 | 	    case XML_CHAR_ENCODING_UCS2: | 
 | 1645 | 		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; | 
 | 1646 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1647 | 		    ctxt->sax->error(ctxt->userData, | 
 | 1648 | 		      "char encoding UCS2 not supported\n"); | 
 | 1649 | 		break; | 
 | 1650 | 	    case XML_CHAR_ENCODING_8859_1: | 
 | 1651 | 	    case XML_CHAR_ENCODING_8859_2: | 
 | 1652 | 	    case XML_CHAR_ENCODING_8859_3: | 
 | 1653 | 	    case XML_CHAR_ENCODING_8859_4: | 
 | 1654 | 	    case XML_CHAR_ENCODING_8859_5: | 
 | 1655 | 	    case XML_CHAR_ENCODING_8859_6: | 
 | 1656 | 	    case XML_CHAR_ENCODING_8859_7: | 
 | 1657 | 	    case XML_CHAR_ENCODING_8859_8: | 
 | 1658 | 	    case XML_CHAR_ENCODING_8859_9: | 
 | 1659 | 		/* | 
 | 1660 | 		 * We used to keep the internal content in the | 
 | 1661 | 		 * document encoding however this turns being unmaintainable | 
 | 1662 | 		 * So xmlGetCharEncodingHandler() will return non-null | 
 | 1663 | 		 * values for this now. | 
 | 1664 | 		 */ | 
 | 1665 | 		if ((ctxt->inputNr == 1) && | 
 | 1666 | 		    (ctxt->encoding == NULL) && | 
 | 1667 | 		    (ctxt->input->encoding != NULL)) { | 
 | 1668 | 		    ctxt->encoding = xmlStrdup(ctxt->input->encoding); | 
 | 1669 | 		} | 
 | 1670 | 		ctxt->charset = enc; | 
 | 1671 | 		return(0); | 
 | 1672 | 	    case XML_CHAR_ENCODING_2022_JP: | 
 | 1673 | 		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; | 
 | 1674 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1675 | 		    ctxt->sax->error(ctxt->userData, | 
 | 1676 | 		      "char encoding ISO-2022-JPnot supported\n"); | 
 | 1677 | 		break; | 
 | 1678 | 	    case XML_CHAR_ENCODING_SHIFT_JIS: | 
 | 1679 | 		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; | 
 | 1680 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1681 | 		    ctxt->sax->error(ctxt->userData, | 
 | 1682 | 		      "char encoding Shift_JIS not supported\n"); | 
 | 1683 | 		break; | 
 | 1684 | 	    case XML_CHAR_ENCODING_EUC_JP: | 
 | 1685 | 		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; | 
 | 1686 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1687 | 		    ctxt->sax->error(ctxt->userData, | 
 | 1688 | 		      "char encoding EUC-JPnot supported\n"); | 
 | 1689 | 		break; | 
 | 1690 | 	} | 
 | 1691 |     } | 
 | 1692 |     if (handler == NULL) | 
 | 1693 | 	return(-1); | 
 | 1694 |     ctxt->charset = XML_CHAR_ENCODING_UTF8; | 
 | 1695 |     return(xmlSwitchToEncoding(ctxt, handler)); | 
 | 1696 | } | 
 | 1697 |  | 
 | 1698 | /** | 
 | 1699 |  * xmlSwitchToEncoding: | 
 | 1700 |  * @ctxt:  the parser context | 
 | 1701 |  * @handler:  the encoding handler | 
 | 1702 |  * | 
 | 1703 |  * change the input functions when discovering the character encoding | 
 | 1704 |  * of a given entity. | 
 | 1705 |  * | 
 | 1706 |  * Returns 0 in case of success, -1 otherwise | 
 | 1707 |  */ | 
 | 1708 | int | 
 | 1709 | xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)  | 
 | 1710 | { | 
 | 1711 |     int nbchars; | 
 | 1712 |  | 
 | 1713 |     if (handler != NULL) { | 
 | 1714 |         if (ctxt->input != NULL) { | 
 | 1715 | 	    if (ctxt->input->buf != NULL) { | 
 | 1716 | 	        if (ctxt->input->buf->encoder != NULL) { | 
 | 1717 | 		    if (ctxt->input->buf->encoder == handler) | 
 | 1718 | 			return(0); | 
 | 1719 | 		    /* | 
 | 1720 | 		     * Note: this is a bit dangerous, but that's what it | 
 | 1721 | 		     * takes to use nearly compatible signature for different | 
 | 1722 | 		     * encodings. | 
 | 1723 | 		     */ | 
 | 1724 | 		    xmlCharEncCloseFunc(ctxt->input->buf->encoder); | 
 | 1725 | 		    ctxt->input->buf->encoder = handler; | 
 | 1726 | 		    return(0); | 
 | 1727 | 		} | 
 | 1728 | 		ctxt->input->buf->encoder = handler; | 
 | 1729 |  | 
 | 1730 | 	        /* | 
 | 1731 | 		 * Is there already some content down the pipe to convert ? | 
 | 1732 | 		 */ | 
 | 1733 | 	        if ((ctxt->input->buf->buffer != NULL) && | 
 | 1734 | 		    (ctxt->input->buf->buffer->use > 0)) { | 
 | 1735 | 		    int processed; | 
 | 1736 |  | 
 | 1737 | 		    /* | 
 | 1738 | 		     * Specific handling of the Byte Order Mark for  | 
 | 1739 | 		     * UTF-16 | 
 | 1740 | 		     */ | 
 | 1741 | 		    if ((handler->name != NULL) && | 
 | 1742 | 			(!strcmp(handler->name, "UTF-16LE")) &&  | 
 | 1743 | 		        (ctxt->input->cur[0] == 0xFF) && | 
 | 1744 | 		        (ctxt->input->cur[1] == 0xFE)) { | 
 | 1745 | 			ctxt->input->cur += 2; | 
 | 1746 | 		    } | 
 | 1747 | 		    if ((handler->name != NULL) && | 
 | 1748 | 			(!strcmp(handler->name, "UTF-16BE")) &&  | 
 | 1749 | 		        (ctxt->input->cur[0] == 0xFE) && | 
 | 1750 | 		        (ctxt->input->cur[1] == 0xFF)) { | 
 | 1751 | 			ctxt->input->cur += 2; | 
 | 1752 | 		    } | 
| Daniel Veillard | 87a764e | 2001-06-20 17:41:10 +0000 | [diff] [blame^] | 1753 | 		    /* | 
 | 1754 | 		     * Errata on XML-1.0 June 20 2001 | 
 | 1755 | 		     * Specific handling of the Byte Order Mark for | 
 | 1756 | 		     * UTF-8 | 
 | 1757 | 		     */ | 
 | 1758 | 		    if ((handler->name != NULL) && | 
 | 1759 | 			(!strcmp(handler->name, "UTF-8")) && | 
 | 1760 | 			(ctxt->input->cur[0] == 0xEF) && | 
 | 1761 | 			(ctxt->input->cur[1] == 0xBB) && | 
 | 1762 | 			(ctxt->input->cur[1] == 0xBF)) { | 
 | 1763 | 			ctxt->input->cur += 3; | 
 | 1764 | 		    } | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1765 |  | 
 | 1766 | 		    /* | 
 | 1767 | 		     * Shring the current input buffer. | 
 | 1768 | 		     * Move it as the raw buffer and create a new input buffer | 
 | 1769 | 		     */ | 
 | 1770 | 		    processed = ctxt->input->cur - ctxt->input->base; | 
 | 1771 | 		    xmlBufferShrink(ctxt->input->buf->buffer, processed); | 
 | 1772 | 		    ctxt->input->buf->raw = ctxt->input->buf->buffer; | 
 | 1773 | 		    ctxt->input->buf->buffer = xmlBufferCreate(); | 
 | 1774 |  | 
 | 1775 | 		    if (ctxt->html) { | 
 | 1776 | 			/* | 
 | 1777 | 			 * converst as much as possbile of the buffer | 
 | 1778 | 			 */ | 
 | 1779 | 			nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, | 
 | 1780 | 				                   ctxt->input->buf->buffer, | 
 | 1781 | 						   ctxt->input->buf->raw); | 
 | 1782 | 		    } else { | 
 | 1783 | 			/* | 
 | 1784 | 			 * convert just enough to get | 
 | 1785 | 			 * '<?xml version="1.0" encoding="xxx"?>' | 
 | 1786 | 			 * parsed with the autodetected encoding | 
 | 1787 | 			 * into the parser reading buffer. | 
 | 1788 | 			 */ | 
 | 1789 | 			nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder, | 
 | 1790 | 						      ctxt->input->buf->buffer, | 
 | 1791 | 						      ctxt->input->buf->raw); | 
 | 1792 | 		    } | 
 | 1793 | 		    if (nbchars < 0) { | 
 | 1794 | 			xmlGenericError(xmlGenericErrorContext, | 
 | 1795 | 				"xmlSwitchToEncoding: encoder error\n"); | 
 | 1796 | 			return(-1); | 
 | 1797 | 		    } | 
 | 1798 | 		    ctxt->input->base = | 
 | 1799 | 		    ctxt->input->cur = ctxt->input->buf->buffer->content; | 
| Daniel Veillard | 48b2f89 | 2001-02-25 16:11:03 +0000 | [diff] [blame] | 1800 | 		    ctxt->input->end = | 
 | 1801 | 			&ctxt->input->base[ctxt->input->buf->buffer->use]; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1802 |  | 
 | 1803 | 		} | 
 | 1804 | 		return(0); | 
 | 1805 | 	    } else { | 
 | 1806 | 	        if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) { | 
 | 1807 | 		    /* | 
 | 1808 | 		     * When parsing a static memory array one must know the | 
 | 1809 | 		     * size to be able to convert the buffer. | 
 | 1810 | 		     */ | 
 | 1811 | 		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1812 | 			ctxt->sax->error(ctxt->userData, | 
 | 1813 | 					 "xmlSwitchEncoding : no input\n"); | 
 | 1814 | 		    return(-1); | 
 | 1815 | 		} else { | 
 | 1816 | 		    int processed; | 
 | 1817 |  | 
 | 1818 | 		    /* | 
 | 1819 | 		     * Shring the current input buffer. | 
 | 1820 | 		     * Move it as the raw buffer and create a new input buffer | 
 | 1821 | 		     */ | 
 | 1822 | 		    processed = ctxt->input->cur - ctxt->input->base; | 
 | 1823 |  | 
 | 1824 | 		    ctxt->input->buf->raw = xmlBufferCreate(); | 
 | 1825 | 		    xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur, | 
 | 1826 | 				 ctxt->input->length - processed); | 
 | 1827 | 		    ctxt->input->buf->buffer = xmlBufferCreate(); | 
 | 1828 |  | 
 | 1829 | 		    /* | 
 | 1830 | 		     * convert as much as possible of the raw input | 
 | 1831 | 		     * to the parser reading buffer. | 
 | 1832 | 		     */ | 
 | 1833 | 		    nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, | 
 | 1834 | 		                               ctxt->input->buf->buffer, | 
 | 1835 | 					       ctxt->input->buf->raw); | 
 | 1836 | 		    if (nbchars < 0) { | 
 | 1837 | 			xmlGenericError(xmlGenericErrorContext, | 
 | 1838 | 				"xmlSwitchToEncoding: encoder error\n"); | 
 | 1839 | 			return(-1); | 
 | 1840 | 		    } | 
 | 1841 |  | 
 | 1842 | 		    /* | 
 | 1843 | 		     * Conversion succeeded, get rid of the old buffer | 
 | 1844 | 		     */ | 
 | 1845 | 		    if ((ctxt->input->free != NULL) && | 
 | 1846 | 		        (ctxt->input->base != NULL)) | 
 | 1847 | 			ctxt->input->free((xmlChar *) ctxt->input->base); | 
 | 1848 | 		    ctxt->input->base = | 
 | 1849 | 		    ctxt->input->cur = ctxt->input->buf->buffer->content; | 
| Daniel Veillard | 48b2f89 | 2001-02-25 16:11:03 +0000 | [diff] [blame] | 1850 | 		    ctxt->input->end = | 
 | 1851 | 			&ctxt->input->base[ctxt->input->buf->buffer->use]; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1852 | 		} | 
 | 1853 | 	    } | 
 | 1854 | 	} else { | 
 | 1855 | 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1856 | 	        ctxt->sax->error(ctxt->userData, | 
 | 1857 | 		                 "xmlSwitchEncoding : no input\n"); | 
 | 1858 | 	    return(-1); | 
 | 1859 | 	} | 
 | 1860 | 	/* | 
 | 1861 | 	 * The parsing is now done in UTF8 natively | 
 | 1862 | 	 */ | 
 | 1863 | 	ctxt->charset = XML_CHAR_ENCODING_UTF8; | 
 | 1864 |     } else  | 
 | 1865 | 	return(-1); | 
 | 1866 |     return(0); | 
 | 1867 |  | 
 | 1868 | } | 
 | 1869 |  | 
 | 1870 | /************************************************************************ | 
 | 1871 |  *									* | 
 | 1872 |  *	Commodity functions to handle entities processing		* | 
 | 1873 |  *									* | 
 | 1874 |  ************************************************************************/ | 
 | 1875 |  | 
 | 1876 | /** | 
 | 1877 |  * xmlFreeInputStream: | 
 | 1878 |  * @input:  an xmlParserInputPtr | 
 | 1879 |  * | 
 | 1880 |  * Free up an input stream. | 
 | 1881 |  */ | 
 | 1882 | void | 
 | 1883 | xmlFreeInputStream(xmlParserInputPtr input) { | 
 | 1884 |     if (input == NULL) return; | 
 | 1885 |  | 
 | 1886 |     if (input->filename != NULL) xmlFree((char *) input->filename); | 
 | 1887 |     if (input->directory != NULL) xmlFree((char *) input->directory); | 
 | 1888 |     if (input->encoding != NULL) xmlFree((char *) input->encoding); | 
 | 1889 |     if (input->version != NULL) xmlFree((char *) input->version); | 
 | 1890 |     if ((input->free != NULL) && (input->base != NULL)) | 
 | 1891 |         input->free((xmlChar *) input->base); | 
 | 1892 |     if (input->buf != NULL)  | 
 | 1893 |         xmlFreeParserInputBuffer(input->buf); | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1894 |     xmlFree(input); | 
 | 1895 | } | 
 | 1896 |  | 
 | 1897 | /** | 
 | 1898 |  * xmlNewInputStream: | 
 | 1899 |  * @ctxt:  an XML parser context | 
 | 1900 |  * | 
 | 1901 |  * Create a new input stream structure | 
 | 1902 |  * Returns the new input stream or NULL | 
 | 1903 |  */ | 
 | 1904 | xmlParserInputPtr | 
 | 1905 | xmlNewInputStream(xmlParserCtxtPtr ctxt) { | 
 | 1906 |     xmlParserInputPtr input; | 
 | 1907 |  | 
 | 1908 |     input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); | 
 | 1909 |     if (input == NULL) { | 
 | 1910 | 	if (ctxt != NULL) { | 
 | 1911 | 	    ctxt->errNo = XML_ERR_NO_MEMORY; | 
 | 1912 | 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1913 | 		ctxt->sax->error(ctxt->userData,  | 
 | 1914 | 			 "malloc: couldn't allocate a new input stream\n"); | 
 | 1915 | 	    ctxt->errNo = XML_ERR_NO_MEMORY; | 
 | 1916 | 	} | 
 | 1917 | 	return(NULL); | 
 | 1918 |     } | 
 | 1919 |     memset(input, 0, sizeof(xmlParserInput)); | 
 | 1920 |     input->line = 1; | 
 | 1921 |     input->col = 1; | 
 | 1922 |     input->standalone = -1; | 
 | 1923 |     return(input); | 
 | 1924 | } | 
 | 1925 |  | 
 | 1926 | /** | 
 | 1927 |  * xmlNewIOInputStream: | 
 | 1928 |  * @ctxt:  an XML parser context | 
 | 1929 |  * @input:  an I/O Input | 
 | 1930 |  * @enc:  the charset encoding if known | 
 | 1931 |  * | 
 | 1932 |  * Create a new input stream structure encapsulating the @input into | 
 | 1933 |  * a stream suitable for the parser. | 
 | 1934 |  * | 
 | 1935 |  * Returns the new input stream or NULL | 
 | 1936 |  */ | 
 | 1937 | xmlParserInputPtr | 
 | 1938 | xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, | 
 | 1939 | 	            xmlCharEncoding enc) { | 
 | 1940 |     xmlParserInputPtr inputStream; | 
 | 1941 |  | 
 | 1942 |     if (xmlParserDebugEntities) | 
 | 1943 | 	xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); | 
 | 1944 |     inputStream = xmlNewInputStream(ctxt); | 
 | 1945 |     if (inputStream == NULL) { | 
 | 1946 | 	return(NULL); | 
 | 1947 |     } | 
 | 1948 |     inputStream->filename = NULL; | 
 | 1949 |     inputStream->buf = input; | 
 | 1950 |     inputStream->base = inputStream->buf->buffer->content; | 
 | 1951 |     inputStream->cur = inputStream->buf->buffer->content; | 
| Daniel Veillard | 48b2f89 | 2001-02-25 16:11:03 +0000 | [diff] [blame] | 1952 |     inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 1953 |     if (enc != XML_CHAR_ENCODING_NONE) { | 
 | 1954 |         xmlSwitchEncoding(ctxt, enc); | 
 | 1955 |     } | 
 | 1956 |  | 
 | 1957 |     return(inputStream); | 
 | 1958 | } | 
 | 1959 |  | 
 | 1960 | /** | 
 | 1961 |  * xmlNewEntityInputStream: | 
 | 1962 |  * @ctxt:  an XML parser context | 
 | 1963 |  * @entity:  an Entity pointer | 
 | 1964 |  * | 
 | 1965 |  * Create a new input stream based on an xmlEntityPtr | 
 | 1966 |  * | 
 | 1967 |  * Returns the new input stream or NULL | 
 | 1968 |  */ | 
 | 1969 | xmlParserInputPtr | 
 | 1970 | xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { | 
 | 1971 |     xmlParserInputPtr input; | 
 | 1972 |  | 
 | 1973 |     if (entity == NULL) { | 
 | 1974 |         ctxt->errNo = XML_ERR_INTERNAL_ERROR; | 
 | 1975 |         if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1976 | 	    ctxt->sax->error(ctxt->userData, | 
 | 1977 | 	      "internal: xmlNewEntityInputStream entity = NULL\n"); | 
 | 1978 | 	ctxt->errNo = XML_ERR_INTERNAL_ERROR; | 
 | 1979 | 	return(NULL); | 
 | 1980 |     } | 
 | 1981 |     if (xmlParserDebugEntities) | 
 | 1982 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 1983 | 		"new input from entity: %s\n", entity->name); | 
 | 1984 |     if (entity->content == NULL) { | 
 | 1985 | 	switch (entity->etype) { | 
 | 1986 |             case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: | 
 | 1987 | 	        ctxt->errNo = XML_ERR_UNPARSED_ENTITY; | 
 | 1988 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1989 | 		    ctxt->sax->error(ctxt->userData, | 
 | 1990 | 		      "xmlNewEntityInputStream unparsed entity !\n"); | 
 | 1991 |                 break; | 
 | 1992 |             case XML_EXTERNAL_GENERAL_PARSED_ENTITY: | 
 | 1993 |             case XML_EXTERNAL_PARAMETER_ENTITY: | 
 | 1994 | 		return(xmlLoadExternalEntity((char *) entity->URI, | 
 | 1995 | 		       (char *) entity->ExternalID, ctxt)); | 
 | 1996 |             case XML_INTERNAL_GENERAL_ENTITY: | 
 | 1997 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 1998 | 		    ctxt->sax->error(ctxt->userData, | 
 | 1999 | 	  "Internal entity %s without content !\n", entity->name); | 
 | 2000 |                 break; | 
 | 2001 |             case XML_INTERNAL_PARAMETER_ENTITY: | 
 | 2002 | 		ctxt->errNo = XML_ERR_INTERNAL_ERROR; | 
 | 2003 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 2004 | 		    ctxt->sax->error(ctxt->userData, | 
 | 2005 | 	  "Internal parameter entity %s without content !\n", entity->name); | 
 | 2006 |                 break; | 
 | 2007 |             case XML_INTERNAL_PREDEFINED_ENTITY: | 
 | 2008 | 		ctxt->errNo = XML_ERR_INTERNAL_ERROR; | 
 | 2009 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 2010 | 		    ctxt->sax->error(ctxt->userData, | 
 | 2011 | 	      "Predefined entity %s without content !\n", entity->name); | 
 | 2012 |                 break; | 
 | 2013 | 	} | 
 | 2014 | 	return(NULL); | 
 | 2015 |     } | 
 | 2016 |     input = xmlNewInputStream(ctxt); | 
 | 2017 |     if (input == NULL) { | 
 | 2018 | 	return(NULL); | 
 | 2019 |     } | 
 | 2020 |     input->filename = (char *) entity->URI; | 
 | 2021 |     input->base = entity->content; | 
 | 2022 |     input->cur = entity->content; | 
 | 2023 |     input->length = entity->length; | 
| Daniel Veillard | 48b2f89 | 2001-02-25 16:11:03 +0000 | [diff] [blame] | 2024 |     input->end = &entity->content[input->length]; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 2025 |     return(input); | 
 | 2026 | } | 
 | 2027 |  | 
 | 2028 | /** | 
 | 2029 |  * xmlNewStringInputStream: | 
 | 2030 |  * @ctxt:  an XML parser context | 
 | 2031 |  * @buffer:  an memory buffer | 
 | 2032 |  * | 
 | 2033 |  * Create a new input stream based on a memory buffer. | 
 | 2034 |  * Returns the new input stream | 
 | 2035 |  */ | 
 | 2036 | xmlParserInputPtr | 
 | 2037 | xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { | 
 | 2038 |     xmlParserInputPtr input; | 
 | 2039 |  | 
 | 2040 |     if (buffer == NULL) { | 
 | 2041 | 	ctxt->errNo = XML_ERR_INTERNAL_ERROR; | 
 | 2042 |         if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 2043 | 	    ctxt->sax->error(ctxt->userData, | 
 | 2044 | 	      "internal: xmlNewStringInputStream string = NULL\n"); | 
 | 2045 | 	return(NULL); | 
 | 2046 |     } | 
 | 2047 |     if (xmlParserDebugEntities) | 
 | 2048 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 2049 | 		"new fixed input: %.30s\n", buffer); | 
 | 2050 |     input = xmlNewInputStream(ctxt); | 
 | 2051 |     if (input == NULL) { | 
 | 2052 | 	return(NULL); | 
 | 2053 |     } | 
 | 2054 |     input->base = buffer; | 
 | 2055 |     input->cur = buffer; | 
 | 2056 |     input->length = xmlStrlen(buffer); | 
| Daniel Veillard | 48b2f89 | 2001-02-25 16:11:03 +0000 | [diff] [blame] | 2057 |     input->end = &buffer[input->length]; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 2058 |     return(input); | 
 | 2059 | } | 
 | 2060 |  | 
 | 2061 | /** | 
 | 2062 |  * xmlNewInputFromFile: | 
 | 2063 |  * @ctxt:  an XML parser context | 
 | 2064 |  * @filename:  the filename to use as entity | 
 | 2065 |  * | 
 | 2066 |  * Create a new input stream based on a file. | 
 | 2067 |  * | 
 | 2068 |  * Returns the new input stream or NULL in case of error | 
 | 2069 |  */ | 
 | 2070 | xmlParserInputPtr | 
 | 2071 | xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { | 
 | 2072 |     xmlParserInputBufferPtr buf; | 
 | 2073 |     xmlParserInputPtr inputStream; | 
 | 2074 |     char *directory = NULL; | 
 | 2075 |     xmlChar *URI = NULL; | 
 | 2076 |  | 
 | 2077 |     if (xmlParserDebugEntities) | 
 | 2078 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 2079 | 		"new input from file: %s\n", filename); | 
 | 2080 |     if (ctxt == NULL) return(NULL); | 
 | 2081 |     buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); | 
 | 2082 |     if (buf == NULL) | 
 | 2083 | 	return(NULL); | 
 | 2084 |  | 
 | 2085 |     URI = xmlStrdup((xmlChar *) filename); | 
 | 2086 |     directory = xmlParserGetDirectory((const char *) URI); | 
 | 2087 |  | 
 | 2088 |     inputStream = xmlNewInputStream(ctxt); | 
 | 2089 |     if (inputStream == NULL) { | 
 | 2090 | 	if (directory != NULL) xmlFree((char *) directory); | 
 | 2091 | 	if (URI != NULL) xmlFree((char *) URI); | 
 | 2092 | 	return(NULL); | 
 | 2093 |     } | 
 | 2094 |  | 
 | 2095 |     inputStream->filename = (const char *) URI; | 
 | 2096 |     inputStream->directory = directory; | 
 | 2097 |     inputStream->buf = buf; | 
 | 2098 |  | 
 | 2099 |     inputStream->base = inputStream->buf->buffer->content; | 
 | 2100 |     inputStream->cur = inputStream->buf->buffer->content; | 
| Daniel Veillard | 48b2f89 | 2001-02-25 16:11:03 +0000 | [diff] [blame] | 2101 |     inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 2102 |     if ((ctxt->directory == NULL) && (directory != NULL)) | 
 | 2103 |         ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); | 
 | 2104 |     return(inputStream); | 
 | 2105 | } | 
 | 2106 |  | 
 | 2107 | /************************************************************************ | 
 | 2108 |  *									* | 
 | 2109 |  *		Commodity functions to handle parser contexts		* | 
 | 2110 |  *									* | 
 | 2111 |  ************************************************************************/ | 
 | 2112 |  | 
 | 2113 | /** | 
 | 2114 |  * xmlInitParserCtxt: | 
 | 2115 |  * @ctxt:  an XML parser context | 
 | 2116 |  * | 
 | 2117 |  * Initialize a parser context | 
 | 2118 |  */ | 
 | 2119 |  | 
 | 2120 | void | 
 | 2121 | xmlInitParserCtxt(xmlParserCtxtPtr ctxt) | 
 | 2122 | { | 
 | 2123 |     xmlSAXHandler *sax; | 
 | 2124 |  | 
 | 2125 |     xmlDefaultSAXHandlerInit(); | 
 | 2126 |  | 
 | 2127 |     sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); | 
 | 2128 |     if (sax == NULL) { | 
 | 2129 |         xmlGenericError(xmlGenericErrorContext, | 
 | 2130 | 		"xmlInitParserCtxt: out of memory\n"); | 
 | 2131 |     } | 
 | 2132 |     else | 
 | 2133 |         memset(sax, 0, sizeof(xmlSAXHandler)); | 
 | 2134 |  | 
 | 2135 |     /* Allocate the Input stack */ | 
 | 2136 |     ctxt->inputTab = (xmlParserInputPtr *) | 
 | 2137 | 	        xmlMalloc(5 * sizeof(xmlParserInputPtr)); | 
 | 2138 |     if (ctxt->inputTab == NULL) { | 
 | 2139 |         xmlGenericError(xmlGenericErrorContext, | 
 | 2140 | 		"xmlInitParserCtxt: out of memory\n"); | 
 | 2141 | 	ctxt->inputNr = 0; | 
 | 2142 | 	ctxt->inputMax = 0; | 
 | 2143 | 	ctxt->input = NULL; | 
 | 2144 | 	return; | 
 | 2145 |     } | 
 | 2146 |     ctxt->inputNr = 0; | 
 | 2147 |     ctxt->inputMax = 5; | 
 | 2148 |     ctxt->input = NULL; | 
 | 2149 |  | 
 | 2150 |     ctxt->version = NULL; | 
 | 2151 |     ctxt->encoding = NULL; | 
 | 2152 |     ctxt->standalone = -1; | 
 | 2153 |     ctxt->hasExternalSubset = 0; | 
 | 2154 |     ctxt->hasPErefs = 0; | 
 | 2155 |     ctxt->html = 0; | 
 | 2156 |     ctxt->external = 0; | 
 | 2157 |     ctxt->instate = XML_PARSER_START; | 
 | 2158 |     ctxt->token = 0; | 
 | 2159 |     ctxt->directory = NULL; | 
 | 2160 |  | 
 | 2161 |     /* Allocate the Node stack */ | 
 | 2162 |     ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); | 
 | 2163 |     if (ctxt->nodeTab == NULL) { | 
 | 2164 |         xmlGenericError(xmlGenericErrorContext, | 
 | 2165 | 		"xmlInitParserCtxt: out of memory\n"); | 
 | 2166 | 	ctxt->nodeNr = 0; | 
 | 2167 | 	ctxt->nodeMax = 0; | 
 | 2168 | 	ctxt->node = NULL; | 
 | 2169 | 	ctxt->inputNr = 0; | 
 | 2170 | 	ctxt->inputMax = 0; | 
 | 2171 | 	ctxt->input = NULL; | 
 | 2172 | 	return; | 
 | 2173 |     } | 
 | 2174 |     ctxt->nodeNr = 0; | 
 | 2175 |     ctxt->nodeMax = 10; | 
 | 2176 |     ctxt->node = NULL; | 
 | 2177 |  | 
 | 2178 |     /* Allocate the Name stack */ | 
 | 2179 |     ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); | 
 | 2180 |     if (ctxt->nameTab == NULL) { | 
 | 2181 |         xmlGenericError(xmlGenericErrorContext, | 
 | 2182 | 		"xmlInitParserCtxt: out of memory\n"); | 
 | 2183 | 	ctxt->nodeNr = 0; | 
 | 2184 | 	ctxt->nodeMax = 0; | 
 | 2185 | 	ctxt->node = NULL; | 
 | 2186 | 	ctxt->inputNr = 0; | 
 | 2187 | 	ctxt->inputMax = 0; | 
 | 2188 | 	ctxt->input = NULL; | 
 | 2189 | 	ctxt->nameNr = 0; | 
 | 2190 | 	ctxt->nameMax = 0; | 
 | 2191 | 	ctxt->name = NULL; | 
 | 2192 | 	return; | 
 | 2193 |     } | 
 | 2194 |     ctxt->nameNr = 0; | 
 | 2195 |     ctxt->nameMax = 10; | 
 | 2196 |     ctxt->name = NULL; | 
 | 2197 |  | 
 | 2198 |     /* Allocate the space stack */ | 
 | 2199 |     ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); | 
 | 2200 |     if (ctxt->spaceTab == NULL) { | 
 | 2201 |         xmlGenericError(xmlGenericErrorContext, | 
 | 2202 | 		"xmlInitParserCtxt: out of memory\n"); | 
 | 2203 | 	ctxt->nodeNr = 0; | 
 | 2204 | 	ctxt->nodeMax = 0; | 
 | 2205 | 	ctxt->node = NULL; | 
 | 2206 | 	ctxt->inputNr = 0; | 
 | 2207 | 	ctxt->inputMax = 0; | 
 | 2208 | 	ctxt->input = NULL; | 
 | 2209 | 	ctxt->nameNr = 0; | 
 | 2210 | 	ctxt->nameMax = 0; | 
 | 2211 | 	ctxt->name = NULL; | 
 | 2212 | 	ctxt->spaceNr = 0; | 
 | 2213 | 	ctxt->spaceMax = 0; | 
 | 2214 | 	ctxt->space = NULL; | 
 | 2215 | 	return; | 
 | 2216 |     } | 
 | 2217 |     ctxt->spaceNr = 1; | 
 | 2218 |     ctxt->spaceMax = 10; | 
 | 2219 |     ctxt->spaceTab[0] = -1; | 
 | 2220 |     ctxt->space = &ctxt->spaceTab[0]; | 
 | 2221 |  | 
| Daniel Veillard | 14be0a1 | 2001-03-03 18:50:55 +0000 | [diff] [blame] | 2222 |     ctxt->sax = sax; | 
 | 2223 |     memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler)); | 
 | 2224 |  | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 2225 |     ctxt->userData = ctxt; | 
 | 2226 |     ctxt->myDoc = NULL; | 
 | 2227 |     ctxt->wellFormed = 1; | 
 | 2228 |     ctxt->valid = 1; | 
 | 2229 |     ctxt->loadsubset = xmlLoadExtDtdDefaultValue; | 
 | 2230 |     ctxt->validate = xmlDoValidityCheckingDefaultValue; | 
 | 2231 |     ctxt->pedantic = xmlPedanticParserDefaultValue; | 
 | 2232 |     ctxt->keepBlanks = xmlKeepBlanksDefaultValue; | 
 | 2233 |     ctxt->vctxt.userData = ctxt; | 
 | 2234 |     if (ctxt->validate) { | 
 | 2235 | 	ctxt->vctxt.error = xmlParserValidityError; | 
 | 2236 | 	if (xmlGetWarningsDefaultValue == 0) | 
 | 2237 | 	    ctxt->vctxt.warning = NULL; | 
 | 2238 | 	else | 
 | 2239 | 	    ctxt->vctxt.warning = xmlParserValidityWarning; | 
| Daniel Veillard | 34b1b3a | 2001-04-21 14:16:10 +0000 | [diff] [blame] | 2240 | 	ctxt->vctxt.nodeMax = 0; | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 2241 |     } else { | 
 | 2242 | 	ctxt->vctxt.error = NULL; | 
 | 2243 | 	ctxt->vctxt.warning = NULL; | 
 | 2244 |     } | 
 | 2245 |     ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; | 
 | 2246 |     ctxt->record_info = 0; | 
 | 2247 |     ctxt->nbChars = 0; | 
 | 2248 |     ctxt->checkIndex = 0; | 
 | 2249 |     ctxt->inSubset = 0; | 
 | 2250 |     ctxt->errNo = XML_ERR_OK; | 
 | 2251 |     ctxt->depth = 0; | 
 | 2252 |     ctxt->charset = XML_CHAR_ENCODING_UTF8; | 
 | 2253 |     xmlInitNodeInfoSeq(&ctxt->node_seq); | 
 | 2254 | } | 
 | 2255 |  | 
 | 2256 | /** | 
 | 2257 |  * xmlFreeParserCtxt: | 
 | 2258 |  * @ctxt:  an XML parser context | 
 | 2259 |  * | 
 | 2260 |  * Free all the memory used by a parser context. However the parsed | 
 | 2261 |  * document in ctxt->myDoc is not freed. | 
 | 2262 |  */ | 
 | 2263 |  | 
 | 2264 | void | 
 | 2265 | xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) | 
 | 2266 | { | 
 | 2267 |     xmlParserInputPtr input; | 
 | 2268 |     xmlChar *oldname; | 
 | 2269 |  | 
 | 2270 |     if (ctxt == NULL) return; | 
 | 2271 |  | 
 | 2272 |     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ | 
 | 2273 |         xmlFreeInputStream(input); | 
 | 2274 |     } | 
 | 2275 |     while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */ | 
 | 2276 | 	xmlFree(oldname); | 
 | 2277 |     } | 
 | 2278 |     if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); | 
 | 2279 |     if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab); | 
 | 2280 |     if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); | 
 | 2281 |     if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); | 
 | 2282 |     if (ctxt->version != NULL) xmlFree((char *) ctxt->version); | 
 | 2283 |     if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); | 
 | 2284 |     if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName); | 
 | 2285 |     if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); | 
 | 2286 |     if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 2287 |     if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler)) | 
 | 2288 |         xmlFree(ctxt->sax); | 
 | 2289 |     if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); | 
| Daniel Veillard | a9142e7 | 2001-06-19 11:07:54 +0000 | [diff] [blame] | 2290 |     if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 2291 |     xmlFree(ctxt); | 
 | 2292 | } | 
 | 2293 |  | 
 | 2294 | /** | 
 | 2295 |  * xmlNewParserCtxt: | 
 | 2296 |  * | 
 | 2297 |  * Allocate and initialize a new parser context. | 
 | 2298 |  * | 
 | 2299 |  * Returns the xmlParserCtxtPtr or NULL | 
 | 2300 |  */ | 
 | 2301 |  | 
 | 2302 | xmlParserCtxtPtr | 
 | 2303 | xmlNewParserCtxt() | 
 | 2304 | { | 
 | 2305 |     xmlParserCtxtPtr ctxt; | 
 | 2306 |  | 
 | 2307 |     ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); | 
 | 2308 |     if (ctxt == NULL) { | 
 | 2309 |         xmlGenericError(xmlGenericErrorContext, | 
 | 2310 | 		"xmlNewParserCtxt : cannot allocate context\n"); | 
 | 2311 |         perror("malloc"); | 
 | 2312 | 	return(NULL); | 
 | 2313 |     } | 
 | 2314 |     memset(ctxt, 0, sizeof(xmlParserCtxt)); | 
 | 2315 |     xmlInitParserCtxt(ctxt); | 
 | 2316 |     return(ctxt); | 
 | 2317 | } | 
 | 2318 |  | 
 | 2319 | /************************************************************************ | 
 | 2320 |  *									* | 
 | 2321 |  *		Handling of node informations				* | 
 | 2322 |  *									* | 
 | 2323 |  ************************************************************************/ | 
 | 2324 |  | 
 | 2325 | /** | 
 | 2326 |  * xmlClearParserCtxt: | 
 | 2327 |  * @ctxt:  an XML parser context | 
 | 2328 |  * | 
 | 2329 |  * Clear (release owned resources) and reinitialize a parser context | 
 | 2330 |  */ | 
 | 2331 |  | 
 | 2332 | void | 
 | 2333 | xmlClearParserCtxt(xmlParserCtxtPtr ctxt) | 
 | 2334 | { | 
 | 2335 |   xmlClearNodeInfoSeq(&ctxt->node_seq); | 
 | 2336 |   xmlInitParserCtxt(ctxt); | 
 | 2337 | } | 
 | 2338 |  | 
 | 2339 | /** | 
 | 2340 |  * xmlParserFindNodeInfo: | 
 | 2341 |  * @ctxt:  an XML parser context | 
 | 2342 |  * @node:  an XML node within the tree | 
 | 2343 |  * | 
 | 2344 |  * Find the parser node info struct for a given node | 
 | 2345 |  *  | 
 | 2346 |  * Returns an xmlParserNodeInfo block pointer or NULL | 
 | 2347 |  */ | 
 | 2348 | const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx, | 
 | 2349 |                                                const xmlNode* node) | 
 | 2350 | { | 
 | 2351 |   unsigned long pos; | 
 | 2352 |  | 
 | 2353 |   /* Find position where node should be at */ | 
 | 2354 |   pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); | 
 | 2355 |   if ( ctx->node_seq.buffer[pos].node == node ) | 
 | 2356 |     return &ctx->node_seq.buffer[pos]; | 
 | 2357 |   else | 
 | 2358 |     return NULL; | 
 | 2359 | } | 
 | 2360 |  | 
 | 2361 |  | 
 | 2362 | /** | 
 | 2363 |  * xmlInitNodeInfoSeq: | 
 | 2364 |  * @seq:  a node info sequence pointer | 
 | 2365 |  * | 
 | 2366 |  * -- Initialize (set to initial state) node info sequence | 
 | 2367 |  */ | 
 | 2368 | void | 
 | 2369 | xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) | 
 | 2370 | { | 
 | 2371 |   seq->length = 0; | 
 | 2372 |   seq->maximum = 0; | 
 | 2373 |   seq->buffer = NULL; | 
 | 2374 | } | 
 | 2375 |  | 
 | 2376 | /** | 
 | 2377 |  * xmlClearNodeInfoSeq: | 
 | 2378 |  * @seq:  a node info sequence pointer | 
 | 2379 |  * | 
 | 2380 |  * -- Clear (release memory and reinitialize) node | 
 | 2381 |  *   info sequence | 
 | 2382 |  */ | 
 | 2383 | void | 
 | 2384 | xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) | 
 | 2385 | { | 
 | 2386 |   if ( seq->buffer != NULL ) | 
 | 2387 |     xmlFree(seq->buffer); | 
 | 2388 |   xmlInitNodeInfoSeq(seq); | 
 | 2389 | } | 
 | 2390 |  | 
 | 2391 |  | 
 | 2392 | /** | 
 | 2393 |  * xmlParserFindNodeInfoIndex: | 
 | 2394 |  * @seq:  a node info sequence pointer | 
 | 2395 |  * @node:  an XML node pointer | 
 | 2396 |  * | 
 | 2397 |  *  | 
 | 2398 |  * xmlParserFindNodeInfoIndex : Find the index that the info record for | 
 | 2399 |  *   the given node is or should be at in a sorted sequence | 
 | 2400 |  * | 
 | 2401 |  * Returns a long indicating the position of the record | 
 | 2402 |  */ | 
 | 2403 | unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq, | 
 | 2404 |                                          const xmlNode* node) | 
 | 2405 | { | 
 | 2406 |   unsigned long upper, lower, middle; | 
 | 2407 |   int found = 0; | 
 | 2408 |  | 
 | 2409 |   /* Do a binary search for the key */ | 
 | 2410 |   lower = 1; | 
 | 2411 |   upper = seq->length; | 
 | 2412 |   middle = 0; | 
 | 2413 |   while ( lower <= upper && !found) { | 
 | 2414 |     middle = lower + (upper - lower) / 2; | 
 | 2415 |     if ( node == seq->buffer[middle - 1].node ) | 
 | 2416 |       found = 1; | 
 | 2417 |     else if ( node < seq->buffer[middle - 1].node ) | 
 | 2418 |       upper = middle - 1; | 
 | 2419 |     else | 
 | 2420 |       lower = middle + 1; | 
 | 2421 |   } | 
 | 2422 |  | 
 | 2423 |   /* Return position */ | 
 | 2424 |   if ( middle == 0 || seq->buffer[middle - 1].node < node ) | 
 | 2425 |     return middle; | 
 | 2426 |   else  | 
 | 2427 |     return middle - 1; | 
 | 2428 | } | 
 | 2429 |  | 
 | 2430 |  | 
 | 2431 | /** | 
 | 2432 |  * xmlParserAddNodeInfo: | 
 | 2433 |  * @ctxt:  an XML parser context | 
 | 2434 |  * @info:  a node info sequence pointer | 
 | 2435 |  * | 
 | 2436 |  * Insert node info record into the sorted sequence | 
 | 2437 |  */ | 
 | 2438 | void | 
 | 2439 | xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,  | 
 | 2440 |                      const xmlParserNodeInfo* info) | 
 | 2441 | { | 
 | 2442 |   unsigned long pos; | 
 | 2443 |   static unsigned int block_size = 5; | 
 | 2444 |  | 
 | 2445 |   /* Find pos and check to see if node is already in the sequence */ | 
 | 2446 |   pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node); | 
 | 2447 |   if ( pos < ctxt->node_seq.length | 
 | 2448 |        && ctxt->node_seq.buffer[pos].node == info->node ) { | 
 | 2449 |     ctxt->node_seq.buffer[pos] = *info; | 
 | 2450 |   } | 
 | 2451 |  | 
 | 2452 |   /* Otherwise, we need to add new node to buffer */ | 
 | 2453 |   else { | 
 | 2454 |     /* Expand buffer by 5 if needed */ | 
 | 2455 |     if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) { | 
 | 2456 |       xmlParserNodeInfo* tmp_buffer; | 
 | 2457 |       unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer) | 
 | 2458 |                                 *(ctxt->node_seq.maximum + block_size)); | 
 | 2459 |  | 
 | 2460 |       if ( ctxt->node_seq.buffer == NULL ) | 
 | 2461 |         tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size); | 
 | 2462 |       else  | 
 | 2463 |         tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size); | 
 | 2464 |  | 
 | 2465 |       if ( tmp_buffer == NULL ) { | 
 | 2466 |         if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 2467 | 	    ctxt->sax->error(ctxt->userData, "Out of memory\n"); | 
 | 2468 | 	ctxt->errNo = XML_ERR_NO_MEMORY; | 
 | 2469 |         return; | 
 | 2470 |       } | 
 | 2471 |       ctxt->node_seq.buffer = tmp_buffer; | 
 | 2472 |       ctxt->node_seq.maximum += block_size; | 
 | 2473 |     } | 
 | 2474 |  | 
 | 2475 |     /* If position is not at end, move elements out of the way */ | 
 | 2476 |     if ( pos != ctxt->node_seq.length ) { | 
 | 2477 |       unsigned long i; | 
 | 2478 |  | 
 | 2479 |       for ( i = ctxt->node_seq.length; i > pos; i-- ) | 
 | 2480 |         ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; | 
 | 2481 |     } | 
 | 2482 |    | 
 | 2483 |     /* Copy element and increase length */ | 
 | 2484 |     ctxt->node_seq.buffer[pos] = *info; | 
 | 2485 |     ctxt->node_seq.length++; | 
 | 2486 |   }    | 
 | 2487 | } | 
 | 2488 |  | 
 | 2489 | /************************************************************************ | 
 | 2490 |  *									* | 
 | 2491 |  *		Deprecated functions kept for compatibility		* | 
 | 2492 |  *									* | 
 | 2493 |  ************************************************************************/ | 
 | 2494 |  | 
 | 2495 | /* | 
 | 2496 |  * xmlCheckLanguageID | 
 | 2497 |  * @lang:  pointer to the string value | 
 | 2498 |  * | 
 | 2499 |  * Checks that the value conforms to the LanguageID production: | 
 | 2500 |  * | 
 | 2501 |  * NOTE: this is somewhat deprecated, those productions were removed from | 
 | 2502 |  *       the XML Second edition. | 
 | 2503 |  * | 
 | 2504 |  * [33] LanguageID ::= Langcode ('-' Subcode)* | 
 | 2505 |  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode | 
 | 2506 |  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) | 
 | 2507 |  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ | 
 | 2508 |  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ | 
 | 2509 |  * [38] Subcode ::= ([a-z] | [A-Z])+ | 
 | 2510 |  * | 
 | 2511 |  * Returns 1 if correct 0 otherwise | 
 | 2512 |  **/ | 
 | 2513 | int | 
 | 2514 | xmlCheckLanguageID(const xmlChar *lang) { | 
 | 2515 |     const xmlChar *cur = lang; | 
 | 2516 |  | 
 | 2517 |     if (cur == NULL) | 
 | 2518 | 	return(0); | 
 | 2519 |     if (((cur[0] == 'i') && (cur[1] == '-')) || | 
 | 2520 | 	((cur[0] == 'I') && (cur[1] == '-'))) { | 
 | 2521 | 	/* | 
 | 2522 | 	 * IANA code | 
 | 2523 | 	 */ | 
 | 2524 | 	cur += 2; | 
 | 2525 |         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ | 
 | 2526 | 	       ((cur[0] >= 'a') && (cur[0] <= 'z'))) | 
 | 2527 | 	    cur++; | 
 | 2528 |     } else if (((cur[0] == 'x') && (cur[1] == '-')) || | 
 | 2529 | 	       ((cur[0] == 'X') && (cur[1] == '-'))) { | 
 | 2530 | 	/* | 
 | 2531 | 	 * User code | 
 | 2532 | 	 */ | 
 | 2533 | 	cur += 2; | 
 | 2534 |         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ | 
 | 2535 | 	       ((cur[0] >= 'a') && (cur[0] <= 'z'))) | 
 | 2536 | 	    cur++; | 
 | 2537 |     } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || | 
 | 2538 | 	       ((cur[0] >= 'a') && (cur[0] <= 'z'))) { | 
 | 2539 | 	/* | 
 | 2540 | 	 * ISO639 | 
 | 2541 | 	 */ | 
 | 2542 | 	cur++; | 
 | 2543 |         if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || | 
 | 2544 | 	    ((cur[0] >= 'a') && (cur[0] <= 'z'))) | 
 | 2545 | 	    cur++; | 
 | 2546 | 	else | 
 | 2547 | 	    return(0); | 
 | 2548 |     } else | 
 | 2549 | 	return(0); | 
 | 2550 |     while (cur[0] != 0) { /* non input consuming */ | 
 | 2551 | 	if (cur[0] != '-') | 
 | 2552 | 	    return(0); | 
 | 2553 | 	cur++; | 
 | 2554 |         if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || | 
 | 2555 | 	    ((cur[0] >= 'a') && (cur[0] <= 'z'))) | 
 | 2556 | 	    cur++; | 
 | 2557 | 	else | 
 | 2558 | 	    return(0); | 
 | 2559 |         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ | 
 | 2560 | 	       ((cur[0] >= 'a') && (cur[0] <= 'z'))) | 
 | 2561 | 	    cur++; | 
 | 2562 |     } | 
 | 2563 |     return(1); | 
 | 2564 | } | 
 | 2565 |  | 
 | 2566 | /** | 
 | 2567 |  * xmlDecodeEntities: | 
 | 2568 |  * @ctxt:  the parser context | 
 | 2569 |  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF | 
 | 2570 |  * @len:  the len to decode (in bytes !), -1 for no size limit | 
 | 2571 |  * @end:  an end marker xmlChar, 0 if none | 
 | 2572 |  * @end2:  an end marker xmlChar, 0 if none | 
 | 2573 |  * @end3:  an end marker xmlChar, 0 if none | 
 | 2574 |  *  | 
 | 2575 |  * This function is deprecated, we now always process entities content | 
 | 2576 |  * through xmlStringDecodeEntities | 
 | 2577 |  * | 
 | 2578 |  * TODO: remove it in next major release. | 
 | 2579 |  * | 
 | 2580 |  * [67] Reference ::= EntityRef | CharRef | 
 | 2581 |  * | 
 | 2582 |  * [69] PEReference ::= '%' Name ';' | 
 | 2583 |  * | 
 | 2584 |  * Returns A newly allocated string with the substitution done. The caller | 
 | 2585 |  *      must deallocate it ! | 
 | 2586 |  */ | 
 | 2587 | xmlChar * | 
| Daniel Veillard | c86a4fa | 2001-03-26 16:28:29 +0000 | [diff] [blame] | 2588 | xmlDecodeEntities(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED, int what ATTRIBUTE_UNUSED, | 
 | 2589 | 	      xmlChar end ATTRIBUTE_UNUSED, xmlChar  end2 ATTRIBUTE_UNUSED, xmlChar end3 ATTRIBUTE_UNUSED) { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 2590 | #if 0 | 
 | 2591 |     xmlChar *buffer = NULL; | 
 | 2592 |     unsigned int buffer_size = 0; | 
 | 2593 |     unsigned int nbchars = 0; | 
 | 2594 |  | 
 | 2595 |     xmlChar *current = NULL; | 
 | 2596 |     xmlEntityPtr ent; | 
 | 2597 |     unsigned int max = (unsigned int) len; | 
 | 2598 |     int c,l; | 
 | 2599 | #endif | 
 | 2600 |  | 
 | 2601 |     static int deprecated = 0; | 
 | 2602 |     if (!deprecated) { | 
 | 2603 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 2604 | 		"xmlDecodeEntities() deprecated function reached\n"); | 
 | 2605 | 	deprecated = 1; | 
 | 2606 |     } | 
 | 2607 |  | 
 | 2608 | #if 0 | 
 | 2609 |     if (ctxt->depth > 40) { | 
 | 2610 | 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 2611 | 	    ctxt->sax->error(ctxt->userData, | 
 | 2612 | 		"Detected entity reference loop\n"); | 
 | 2613 | 	ctxt->wellFormed = 0; | 
 | 2614 | 	ctxt->disableSAX = 1; | 
 | 2615 | 	ctxt->errNo = XML_ERR_ENTITY_LOOP; | 
 | 2616 | 	return(NULL); | 
 | 2617 |     } | 
 | 2618 |  | 
 | 2619 |     /* | 
 | 2620 |      * allocate a translation buffer. | 
 | 2621 |      */ | 
 | 2622 |     buffer_size = XML_PARSER_BIG_BUFFER_SIZE; | 
 | 2623 |     buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); | 
 | 2624 |     if (buffer == NULL) { | 
 | 2625 | 	perror("xmlDecodeEntities: malloc failed"); | 
 | 2626 | 	return(NULL); | 
 | 2627 |     } | 
 | 2628 |  | 
 | 2629 |     /* | 
 | 2630 |      * Ok loop until we reach one of the ending char or a size limit. | 
 | 2631 |      */ | 
 | 2632 |     GROW; | 
 | 2633 |     c = CUR_CHAR(l); | 
 | 2634 |     while ((nbchars < max) && (c != end) && /* NOTUSED */ | 
 | 2635 |            (c != end2) && (c != end3)) { | 
 | 2636 | 	GROW; | 
 | 2637 | 	if (c == 0) break; | 
 | 2638 |         if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) { | 
 | 2639 | 	    int val = xmlParseCharRef(ctxt); | 
 | 2640 | 	    COPY_BUF(0,buffer,nbchars,val); | 
 | 2641 | 	    NEXTL(l); | 
 | 2642 | 	} else if ((c == '&') && (ctxt->token != '&') && | 
 | 2643 | 		   (what & XML_SUBSTITUTE_REF)) { | 
 | 2644 | 	    if (xmlParserDebugEntities) | 
 | 2645 | 		xmlGenericError(xmlGenericErrorContext, | 
 | 2646 | 			"decoding Entity Reference\n"); | 
 | 2647 | 	    ent = xmlParseEntityRef(ctxt); | 
 | 2648 | 	    if ((ent != NULL) &&  | 
 | 2649 | 		(ctxt->replaceEntities != 0)) { | 
 | 2650 | 		current = ent->content; | 
 | 2651 | 		while (*current != 0) { /* non input consuming loop */ | 
 | 2652 | 		    buffer[nbchars++] = *current++; | 
 | 2653 | 		    if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { | 
 | 2654 | 			growBuffer(buffer); | 
 | 2655 | 		    } | 
 | 2656 | 		} | 
 | 2657 | 	    } else if (ent != NULL) { | 
 | 2658 | 		const xmlChar *cur = ent->name; | 
 | 2659 |  | 
 | 2660 | 		buffer[nbchars++] = '&'; | 
 | 2661 | 		if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { | 
 | 2662 | 		    growBuffer(buffer); | 
 | 2663 | 		} | 
 | 2664 | 		while (*cur != 0) { /* non input consuming loop */ | 
 | 2665 | 		    buffer[nbchars++] = *cur++; | 
 | 2666 | 		} | 
 | 2667 | 		buffer[nbchars++] = ';'; | 
 | 2668 | 	    } | 
 | 2669 | 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { | 
 | 2670 | 	    /* | 
 | 2671 | 	     * a PEReference induce to switch the entity flow, | 
 | 2672 | 	     * we break here to flush the current set of chars | 
 | 2673 | 	     * parsed if any. We will be called back later. | 
 | 2674 | 	     */ | 
 | 2675 | 	    if (xmlParserDebugEntities) | 
 | 2676 | 		xmlGenericError(xmlGenericErrorContext, | 
 | 2677 | 			"decoding PE Reference\n"); | 
 | 2678 | 	    if (nbchars != 0) break; | 
 | 2679 |  | 
 | 2680 | 	    xmlParsePEReference(ctxt); | 
 | 2681 |  | 
 | 2682 | 	    /* | 
 | 2683 | 	     * Pop-up of finished entities. | 
 | 2684 | 	     */ | 
 | 2685 | 	    while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ | 
 | 2686 | 		xmlPopInput(ctxt); | 
 | 2687 |  | 
 | 2688 | 	    break; | 
 | 2689 | 	} else { | 
 | 2690 | 	    COPY_BUF(l,buffer,nbchars,c); | 
 | 2691 | 	    NEXTL(l); | 
 | 2692 | 	    if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { | 
 | 2693 | 	      growBuffer(buffer); | 
 | 2694 | 	    } | 
 | 2695 | 	} | 
 | 2696 | 	c = CUR_CHAR(l); | 
 | 2697 |     } | 
 | 2698 |     buffer[nbchars++] = 0; | 
 | 2699 |     return(buffer); | 
 | 2700 | #endif | 
 | 2701 |     return(NULL); | 
 | 2702 | } | 
 | 2703 |  | 
 | 2704 | /** | 
 | 2705 |  * xmlNamespaceParseNCName: | 
 | 2706 |  * @ctxt:  an XML parser context | 
 | 2707 |  * | 
 | 2708 |  * parse an XML namespace name. | 
 | 2709 |  * | 
 | 2710 |  * TODO: this seems not in use anymore, the namespace handling is done on | 
 | 2711 |  *       top of the SAX interfaces, i.e. not on raw input. | 
 | 2712 |  * | 
 | 2713 |  * [NS 3] NCName ::= (Letter | '_') (NCNameChar)* | 
 | 2714 |  * | 
 | 2715 |  * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | | 
 | 2716 |  *                       CombiningChar | Extender | 
 | 2717 |  * | 
 | 2718 |  * Returns the namespace name or NULL | 
 | 2719 |  */ | 
 | 2720 |  | 
 | 2721 | xmlChar * | 
| Daniel Veillard | c86a4fa | 2001-03-26 16:28:29 +0000 | [diff] [blame] | 2722 | xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 2723 | #if 0 | 
 | 2724 |     xmlChar buf[XML_MAX_NAMELEN + 5]; | 
 | 2725 |     int len = 0, l; | 
 | 2726 |     int cur = CUR_CHAR(l); | 
 | 2727 | #endif | 
 | 2728 |  | 
 | 2729 |     static int deprecated = 0; | 
 | 2730 |     if (!deprecated) { | 
 | 2731 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 2732 | 		"xmlNamespaceParseNCName() deprecated function reached\n"); | 
 | 2733 | 	deprecated = 1; | 
 | 2734 |     } | 
 | 2735 |  | 
 | 2736 | #if 0 | 
 | 2737 |     /* load first the value of the char !!! */ | 
 | 2738 |     GROW; | 
 | 2739 |     if (!IS_LETTER(cur) && (cur != '_')) return(NULL); | 
 | 2740 |  | 
 | 2741 | xmlGenericError(xmlGenericErrorContext, | 
 | 2742 | 	"xmlNamespaceParseNCName: reached loop 3\n"); | 
 | 2743 |     while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */ | 
 | 2744 |            (cur == '.') || (cur == '-') || | 
 | 2745 | 	   (cur == '_') || | 
 | 2746 | 	   (IS_COMBINING(cur)) || | 
 | 2747 | 	   (IS_EXTENDER(cur))) { | 
 | 2748 | 	COPY_BUF(l,buf,len,cur); | 
 | 2749 | 	NEXTL(l); | 
 | 2750 | 	cur = CUR_CHAR(l); | 
 | 2751 | 	if (len >= XML_MAX_NAMELEN) { | 
 | 2752 | 	    xmlGenericError(xmlGenericErrorContext,  | 
 | 2753 | 	       "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n"); | 
 | 2754 | 	    while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */ | 
 | 2755 | 		   (cur == '.') || (cur == '-') || | 
 | 2756 | 		   (cur == '_') || | 
 | 2757 | 		   (IS_COMBINING(cur)) || | 
 | 2758 | 		   (IS_EXTENDER(cur))) { | 
 | 2759 | 		NEXTL(l); | 
 | 2760 | 		cur = CUR_CHAR(l); | 
 | 2761 | 	    } | 
 | 2762 | 	    break; | 
 | 2763 | 	} | 
 | 2764 |     } | 
 | 2765 |     return(xmlStrndup(buf, len)); | 
 | 2766 | #endif | 
 | 2767 |     return(NULL); | 
 | 2768 | } | 
 | 2769 |  | 
 | 2770 | /** | 
 | 2771 |  * xmlNamespaceParseQName: | 
 | 2772 |  * @ctxt:  an XML parser context | 
 | 2773 |  * @prefix:  a xmlChar **  | 
 | 2774 |  * | 
 | 2775 |  * TODO: this seems not in use anymore, the namespace handling is done on | 
 | 2776 |  *       top of the SAX interfaces, i.e. not on raw input. | 
 | 2777 |  * | 
 | 2778 |  * parse an XML qualified name | 
 | 2779 |  * | 
 | 2780 |  * [NS 5] QName ::= (Prefix ':')? LocalPart | 
 | 2781 |  * | 
 | 2782 |  * [NS 6] Prefix ::= NCName | 
 | 2783 |  * | 
 | 2784 |  * [NS 7] LocalPart ::= NCName | 
 | 2785 |  * | 
 | 2786 |  * Returns the local part, and prefix is updated | 
 | 2787 |  *   to get the Prefix if any. | 
 | 2788 |  */ | 
 | 2789 |  | 
 | 2790 | xmlChar * | 
| Daniel Veillard | c86a4fa | 2001-03-26 16:28:29 +0000 | [diff] [blame] | 2791 | xmlNamespaceParseQName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlChar **prefix ATTRIBUTE_UNUSED) { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 2792 |  | 
 | 2793 |     static int deprecated = 0; | 
 | 2794 |     if (!deprecated) { | 
 | 2795 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 2796 | 		"xmlNamespaceParseQName() deprecated function reached\n"); | 
 | 2797 | 	deprecated = 1; | 
 | 2798 |     } | 
 | 2799 |  | 
 | 2800 | #if 0 | 
 | 2801 |     xmlChar *ret = NULL; | 
 | 2802 |  | 
 | 2803 |     *prefix = NULL; | 
 | 2804 |     ret = xmlNamespaceParseNCName(ctxt); | 
 | 2805 |     if (RAW == ':') { | 
 | 2806 |         *prefix = ret; | 
 | 2807 | 	NEXT; | 
 | 2808 | 	ret = xmlNamespaceParseNCName(ctxt); | 
 | 2809 |     } | 
 | 2810 |  | 
 | 2811 |     return(ret); | 
 | 2812 | #endif | 
 | 2813 |     return(NULL); | 
 | 2814 | } | 
 | 2815 |  | 
 | 2816 | /** | 
 | 2817 |  * xmlNamespaceParseNSDef: | 
 | 2818 |  * @ctxt:  an XML parser context | 
 | 2819 |  * | 
 | 2820 |  * parse a namespace prefix declaration | 
 | 2821 |  * | 
 | 2822 |  * TODO: this seems not in use anymore, the namespace handling is done on | 
 | 2823 |  *       top of the SAX interfaces, i.e. not on raw input. | 
 | 2824 |  * | 
 | 2825 |  * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral | 
 | 2826 |  * | 
 | 2827 |  * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)? | 
 | 2828 |  * | 
 | 2829 |  * Returns the namespace name | 
 | 2830 |  */ | 
 | 2831 |  | 
 | 2832 | xmlChar * | 
| Daniel Veillard | c86a4fa | 2001-03-26 16:28:29 +0000 | [diff] [blame] | 2833 | xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 2834 |     static int deprecated = 0; | 
 | 2835 |     if (!deprecated) { | 
 | 2836 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 2837 | 		"xmlNamespaceParseNSDef() deprecated function reached\n"); | 
 | 2838 | 	deprecated = 1; | 
 | 2839 |     } | 
 | 2840 |     return(NULL); | 
 | 2841 | #if 0 | 
 | 2842 |     xmlChar *name = NULL; | 
 | 2843 |  | 
 | 2844 |     if ((RAW == 'x') && (NXT(1) == 'm') && | 
 | 2845 |         (NXT(2) == 'l') && (NXT(3) == 'n') && | 
 | 2846 | 	(NXT(4) == 's')) { | 
 | 2847 | 	SKIP(5); | 
 | 2848 | 	if (RAW == ':') { | 
 | 2849 | 	    NEXT; | 
 | 2850 | 	    name = xmlNamespaceParseNCName(ctxt); | 
 | 2851 | 	} | 
 | 2852 |     } | 
 | 2853 |     return(name); | 
 | 2854 | #endif | 
 | 2855 | } | 
 | 2856 |  | 
 | 2857 | /** | 
 | 2858 |  * xmlParseQuotedString: | 
 | 2859 |  * @ctxt:  an XML parser context | 
 | 2860 |  * | 
 | 2861 |  * Parse and return a string between quotes or doublequotes | 
 | 2862 |  * | 
 | 2863 |  * TODO: Deprecated, to  be removed at next drop of binary compatibility | 
 | 2864 |  * | 
 | 2865 |  * Returns the string parser or NULL. | 
 | 2866 |  */ | 
 | 2867 | xmlChar * | 
| Daniel Veillard | c86a4fa | 2001-03-26 16:28:29 +0000 | [diff] [blame] | 2868 | xmlParseQuotedString(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 2869 |     static int deprecated = 0; | 
 | 2870 |     if (!deprecated) { | 
 | 2871 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 2872 | 		"xmlParseQuotedString() deprecated function reached\n"); | 
 | 2873 | 	deprecated = 1; | 
 | 2874 |     } | 
 | 2875 |     return(NULL); | 
 | 2876 |  | 
 | 2877 | #if 0 | 
 | 2878 |     xmlChar *buf = NULL; | 
 | 2879 |     int len = 0,l; | 
 | 2880 |     int size = XML_PARSER_BUFFER_SIZE; | 
 | 2881 |     int c; | 
 | 2882 |  | 
 | 2883 |     buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); | 
 | 2884 |     if (buf == NULL) { | 
 | 2885 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 2886 | 		"malloc of %d byte failed\n", size); | 
 | 2887 | 	return(NULL); | 
 | 2888 |     } | 
 | 2889 | xmlGenericError(xmlGenericErrorContext, | 
 | 2890 | 	"xmlParseQuotedString: reached loop 4\n"); | 
 | 2891 |     if (RAW == '"') { | 
 | 2892 |         NEXT; | 
 | 2893 | 	c = CUR_CHAR(l); | 
 | 2894 | 	while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */ | 
 | 2895 | 	    if (len + 5 >= size) { | 
 | 2896 | 		size *= 2; | 
 | 2897 | 		buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); | 
 | 2898 | 		if (buf == NULL) { | 
 | 2899 | 		    xmlGenericError(xmlGenericErrorContext, | 
 | 2900 | 			    "realloc of %d byte failed\n", size); | 
 | 2901 | 		    return(NULL); | 
 | 2902 | 		} | 
 | 2903 | 	    } | 
 | 2904 | 	    COPY_BUF(l,buf,len,c); | 
 | 2905 | 	    NEXTL(l); | 
 | 2906 | 	    c = CUR_CHAR(l); | 
 | 2907 | 	} | 
 | 2908 | 	if (c != '"') { | 
 | 2909 | 	    ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; | 
 | 2910 | 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 2911 | 	        ctxt->sax->error(ctxt->userData,  | 
 | 2912 | 			         "String not closed \"%.50s\"\n", buf); | 
 | 2913 | 	    ctxt->wellFormed = 0; | 
 | 2914 | 	    ctxt->disableSAX = 1; | 
 | 2915 |         } else { | 
 | 2916 | 	    NEXT; | 
 | 2917 | 	} | 
 | 2918 |     } else if (RAW == '\''){ | 
 | 2919 |         NEXT; | 
 | 2920 | 	c = CUR; | 
 | 2921 | 	while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */ | 
 | 2922 | 	    if (len + 1 >= size) { | 
 | 2923 | 		size *= 2; | 
 | 2924 | 		buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); | 
 | 2925 | 		if (buf == NULL) { | 
 | 2926 | 		    xmlGenericError(xmlGenericErrorContext, | 
 | 2927 | 			    "realloc of %d byte failed\n", size); | 
 | 2928 | 		    return(NULL); | 
 | 2929 | 		} | 
 | 2930 | 	    } | 
 | 2931 | 	    buf[len++] = c; | 
 | 2932 | 	    NEXT; | 
 | 2933 | 	    c = CUR; | 
 | 2934 | 	} | 
 | 2935 | 	if (RAW != '\'') { | 
 | 2936 | 	    ctxt->errNo = XML_ERR_STRING_NOT_CLOSED; | 
 | 2937 | 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 2938 | 	        ctxt->sax->error(ctxt->userData, | 
 | 2939 | 			         "String not closed \"%.50s\"\n", buf); | 
 | 2940 | 	    ctxt->wellFormed = 0; | 
 | 2941 | 	    ctxt->disableSAX = 1; | 
 | 2942 |         } else { | 
 | 2943 | 	    NEXT; | 
 | 2944 | 	} | 
 | 2945 |     } | 
 | 2946 |     return(buf); | 
 | 2947 | #endif | 
 | 2948 | } | 
 | 2949 |  | 
 | 2950 | /** | 
 | 2951 |  * xmlParseNamespace: | 
 | 2952 |  * @ctxt:  an XML parser context | 
 | 2953 |  * | 
 | 2954 |  * xmlParseNamespace: parse specific PI '<?namespace ...' constructs. | 
 | 2955 |  * | 
 | 2956 |  * This is what the older xml-name Working Draft specified, a bunch of | 
 | 2957 |  * other stuff may still rely on it, so support is still here as | 
 | 2958 |  * if it was declared on the root of the Tree:-( | 
 | 2959 |  * | 
 | 2960 |  * TODO: remove from library | 
 | 2961 |  * | 
 | 2962 |  * To be removed at next drop of binary compatibility | 
 | 2963 |  */ | 
 | 2964 |  | 
 | 2965 | void | 
| Daniel Veillard | c86a4fa | 2001-03-26 16:28:29 +0000 | [diff] [blame] | 2966 | xmlParseNamespace(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 2967 |     static int deprecated = 0; | 
 | 2968 |     if (!deprecated) { | 
 | 2969 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 2970 | 		"xmlParseNamespace() deprecated function reached\n"); | 
 | 2971 | 	deprecated = 1; | 
 | 2972 |     } | 
 | 2973 |  | 
 | 2974 | #if 0 | 
 | 2975 |     xmlChar *href = NULL; | 
 | 2976 |     xmlChar *prefix = NULL; | 
 | 2977 |     int garbage = 0; | 
 | 2978 |  | 
 | 2979 |     /* | 
 | 2980 |      * We just skipped "namespace" or "xml:namespace" | 
 | 2981 |      */ | 
 | 2982 |     SKIP_BLANKS; | 
 | 2983 |  | 
 | 2984 | xmlGenericError(xmlGenericErrorContext, | 
 | 2985 | 	"xmlParseNamespace: reached loop 5\n"); | 
 | 2986 |     while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */ | 
 | 2987 | 	/* | 
 | 2988 | 	 * We can have "ns" or "prefix" attributes | 
 | 2989 | 	 * Old encoding as 'href' or 'AS' attributes is still supported | 
 | 2990 | 	 */ | 
 | 2991 | 	if ((RAW == 'n') && (NXT(1) == 's')) { | 
 | 2992 | 	    garbage = 0; | 
 | 2993 | 	    SKIP(2); | 
 | 2994 | 	    SKIP_BLANKS; | 
 | 2995 |  | 
 | 2996 | 	    if (RAW != '=') continue; | 
 | 2997 | 	    NEXT; | 
 | 2998 | 	    SKIP_BLANKS; | 
 | 2999 |  | 
 | 3000 | 	    href = xmlParseQuotedString(ctxt); | 
 | 3001 | 	    SKIP_BLANKS; | 
 | 3002 | 	} else if ((RAW == 'h') && (NXT(1) == 'r') && | 
 | 3003 | 	    (NXT(2) == 'e') && (NXT(3) == 'f')) { | 
 | 3004 | 	    garbage = 0; | 
 | 3005 | 	    SKIP(4); | 
 | 3006 | 	    SKIP_BLANKS; | 
 | 3007 |  | 
 | 3008 | 	    if (RAW != '=') continue; | 
 | 3009 | 	    NEXT; | 
 | 3010 | 	    SKIP_BLANKS; | 
 | 3011 |  | 
 | 3012 | 	    href = xmlParseQuotedString(ctxt); | 
 | 3013 | 	    SKIP_BLANKS; | 
 | 3014 | 	} else if ((RAW == 'p') && (NXT(1) == 'r') && | 
 | 3015 | 	           (NXT(2) == 'e') && (NXT(3) == 'f') && | 
 | 3016 | 	           (NXT(4) == 'i') && (NXT(5) == 'x')) { | 
 | 3017 | 	    garbage = 0; | 
 | 3018 | 	    SKIP(6); | 
 | 3019 | 	    SKIP_BLANKS; | 
 | 3020 |  | 
 | 3021 | 	    if (RAW != '=') continue; | 
 | 3022 | 	    NEXT; | 
 | 3023 | 	    SKIP_BLANKS; | 
 | 3024 |  | 
 | 3025 | 	    prefix = xmlParseQuotedString(ctxt); | 
 | 3026 | 	    SKIP_BLANKS; | 
 | 3027 | 	} else if ((RAW == 'A') && (NXT(1) == 'S')) { | 
 | 3028 | 	    garbage = 0; | 
 | 3029 | 	    SKIP(2); | 
 | 3030 | 	    SKIP_BLANKS; | 
 | 3031 |  | 
 | 3032 | 	    if (RAW != '=') continue; | 
 | 3033 | 	    NEXT; | 
 | 3034 | 	    SKIP_BLANKS; | 
 | 3035 |  | 
 | 3036 | 	    prefix = xmlParseQuotedString(ctxt); | 
 | 3037 | 	    SKIP_BLANKS; | 
 | 3038 | 	} else if ((RAW == '?') && (NXT(1) == '>')) { | 
 | 3039 | 	    garbage = 0; | 
 | 3040 | 	    NEXT; | 
 | 3041 | 	} else { | 
 | 3042 |             /* | 
 | 3043 | 	     * Found garbage when parsing the namespace | 
 | 3044 | 	     */ | 
 | 3045 | 	    if (!garbage) { | 
 | 3046 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 3047 | 		    ctxt->sax->error(ctxt->userData, | 
 | 3048 | 		                     "xmlParseNamespace found garbage\n"); | 
 | 3049 | 	    } | 
 | 3050 | 	    ctxt->errNo = XML_ERR_NS_DECL_ERROR; | 
 | 3051 | 	    ctxt->wellFormed = 0; | 
 | 3052 | 	    ctxt->disableSAX = 1; | 
 | 3053 |             NEXT; | 
 | 3054 |         } | 
 | 3055 |     } | 
 | 3056 |  | 
 | 3057 |     MOVETO_ENDTAG(CUR_PTR); | 
 | 3058 |     NEXT; | 
 | 3059 |  | 
 | 3060 |     /* | 
 | 3061 |      * Register the DTD. | 
 | 3062 |     if (href != NULL) | 
 | 3063 | 	if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL)) | 
 | 3064 | 	    ctxt->sax->globalNamespace(ctxt->userData, href, prefix); | 
 | 3065 |      */ | 
 | 3066 |  | 
 | 3067 |     if (prefix != NULL) xmlFree(prefix); | 
 | 3068 |     if (href != NULL) xmlFree(href); | 
 | 3069 | #endif | 
 | 3070 | } | 
 | 3071 |  | 
 | 3072 | /** | 
 | 3073 |  * xmlScanName: | 
 | 3074 |  * @ctxt:  an XML parser context | 
 | 3075 |  * | 
 | 3076 |  * Trickery: parse an XML name but without consuming the input flow | 
 | 3077 |  * Needed for rollback cases. Used only when parsing entities references. | 
 | 3078 |  * | 
 | 3079 |  * TODO: seems deprecated now, only used in the default part of | 
 | 3080 |  *       xmlParserHandleReference | 
 | 3081 |  * | 
 | 3082 |  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | | 
 | 3083 |  *                  CombiningChar | Extender | 
 | 3084 |  * | 
 | 3085 |  * [5] Name ::= (Letter | '_' | ':') (NameChar)* | 
 | 3086 |  * | 
 | 3087 |  * [6] Names ::= Name (S Name)* | 
 | 3088 |  * | 
 | 3089 |  * Returns the Name parsed or NULL | 
 | 3090 |  */ | 
 | 3091 |  | 
 | 3092 | xmlChar * | 
| Daniel Veillard | c86a4fa | 2001-03-26 16:28:29 +0000 | [diff] [blame] | 3093 | xmlScanName(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 3094 |     static int deprecated = 0; | 
 | 3095 |     if (!deprecated) { | 
 | 3096 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 3097 | 		"xmlScanName() deprecated function reached\n"); | 
 | 3098 | 	deprecated = 1; | 
 | 3099 |     } | 
 | 3100 |     return(NULL); | 
 | 3101 |  | 
 | 3102 | #if 0 | 
 | 3103 |     xmlChar buf[XML_MAX_NAMELEN]; | 
 | 3104 |     int len = 0; | 
 | 3105 |  | 
 | 3106 |     GROW; | 
 | 3107 |     if (!IS_LETTER(RAW) && (RAW != '_') && | 
 | 3108 |         (RAW != ':')) { | 
 | 3109 | 	return(NULL); | 
 | 3110 |     } | 
 | 3111 |  | 
 | 3112 |  | 
 | 3113 |     while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */ | 
 | 3114 |            (NXT(len) == '.') || (NXT(len) == '-') || | 
 | 3115 | 	   (NXT(len) == '_') || (NXT(len) == ':') ||  | 
 | 3116 | 	   (IS_COMBINING(NXT(len))) || | 
 | 3117 | 	   (IS_EXTENDER(NXT(len)))) { | 
 | 3118 | 	GROW; | 
 | 3119 | 	buf[len] = NXT(len); | 
 | 3120 | 	len++; | 
 | 3121 | 	if (len >= XML_MAX_NAMELEN) { | 
 | 3122 | 	    xmlGenericError(xmlGenericErrorContext,  | 
 | 3123 | 	       "xmlScanName: reached XML_MAX_NAMELEN limit\n"); | 
 | 3124 | 	    while ((IS_LETTER(NXT(len))) || /* NOT REACHED */ | 
 | 3125 | 		   (IS_DIGIT(NXT(len))) || | 
 | 3126 | 		   (NXT(len) == '.') || (NXT(len) == '-') || | 
 | 3127 | 		   (NXT(len) == '_') || (NXT(len) == ':') ||  | 
 | 3128 | 		   (IS_COMBINING(NXT(len))) || | 
 | 3129 | 		   (IS_EXTENDER(NXT(len)))) | 
 | 3130 | 		 len++; | 
 | 3131 | 	    break; | 
 | 3132 | 	} | 
 | 3133 |     } | 
 | 3134 |     return(xmlStrndup(buf, len)); | 
 | 3135 | #endif | 
 | 3136 | } | 
 | 3137 |  | 
 | 3138 | /** | 
 | 3139 |  * xmlParserHandleReference: | 
 | 3140 |  * @ctxt:  the parser context | 
 | 3141 |  *  | 
 | 3142 |  * TODO: Remove, now deprecated ... the test is done directly in the | 
 | 3143 |  *       content parsing | 
 | 3144 |  * routines. | 
 | 3145 |  * | 
 | 3146 |  * [67] Reference ::= EntityRef | CharRef | 
 | 3147 |  * | 
 | 3148 |  * [68] EntityRef ::= '&' Name ';' | 
 | 3149 |  * | 
 | 3150 |  * [ WFC: Entity Declared ] | 
 | 3151 |  * the Name given in the entity reference must match that in an entity | 
 | 3152 |  * declaration, except that well-formed documents need not declare any | 
 | 3153 |  * of the following entities: amp, lt, gt, apos, quot.  | 
 | 3154 |  * | 
 | 3155 |  * [ WFC: Parsed Entity ] | 
 | 3156 |  * An entity reference must not contain the name of an unparsed entity | 
 | 3157 |  * | 
 | 3158 |  * [66] CharRef ::= '&#' [0-9]+ ';' | | 
 | 3159 |  *                  '&#x' [0-9a-fA-F]+ ';' | 
 | 3160 |  * | 
 | 3161 |  * A PEReference may have been detectect in the current input stream | 
 | 3162 |  * the handling is done accordingly to  | 
 | 3163 |  *      http://www.w3.org/TR/REC-xml#entproc | 
 | 3164 |  */ | 
 | 3165 | void | 
| Daniel Veillard | c86a4fa | 2001-03-26 16:28:29 +0000 | [diff] [blame] | 3166 | xmlParserHandleReference(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED) { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 3167 |     static int deprecated = 0; | 
 | 3168 |     if (!deprecated) { | 
 | 3169 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 3170 | 		"xmlParserHandleReference() deprecated function reached\n"); | 
 | 3171 | 	deprecated = 1; | 
 | 3172 |     } | 
 | 3173 |  | 
 | 3174 | #if 0 | 
 | 3175 |     xmlParserInputPtr input; | 
 | 3176 |     xmlChar *name; | 
 | 3177 |     xmlEntityPtr ent = NULL; | 
 | 3178 |  | 
 | 3179 |     if (ctxt->token != 0) { | 
 | 3180 |         return; | 
 | 3181 |     }	 | 
 | 3182 |     if (RAW != '&') return; | 
 | 3183 |     GROW; | 
 | 3184 |     if ((RAW == '&') && (NXT(1) == '#')) { | 
 | 3185 | 	switch(ctxt->instate) { | 
 | 3186 | 	    case XML_PARSER_ENTITY_DECL: | 
 | 3187 | 	    case XML_PARSER_PI: | 
 | 3188 | 	    case XML_PARSER_CDATA_SECTION: | 
 | 3189 | 	    case XML_PARSER_COMMENT: | 
 | 3190 | 	    case XML_PARSER_SYSTEM_LITERAL: | 
 | 3191 | 		/* we just ignore it there */ | 
 | 3192 | 		return; | 
 | 3193 | 	    case XML_PARSER_START_TAG: | 
 | 3194 | 		return; | 
 | 3195 | 	    case XML_PARSER_END_TAG: | 
 | 3196 | 		return; | 
 | 3197 | 	    case XML_PARSER_EOF: | 
 | 3198 | 		ctxt->errNo = XML_ERR_CHARREF_AT_EOF; | 
 | 3199 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 3200 | 		    ctxt->sax->error(ctxt->userData, "CharRef at EOF\n"); | 
 | 3201 | 		ctxt->wellFormed = 0; | 
 | 3202 | 		ctxt->disableSAX = 1; | 
 | 3203 | 		return; | 
 | 3204 | 	    case XML_PARSER_PROLOG: | 
 | 3205 | 	    case XML_PARSER_START: | 
 | 3206 | 	    case XML_PARSER_MISC: | 
 | 3207 | 		ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG; | 
 | 3208 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 3209 | 		    ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n"); | 
 | 3210 | 		ctxt->wellFormed = 0; | 
 | 3211 | 		ctxt->disableSAX = 1; | 
 | 3212 | 		return; | 
 | 3213 | 	    case XML_PARSER_EPILOG: | 
 | 3214 | 		ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG; | 
 | 3215 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 3216 | 		    ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n"); | 
 | 3217 | 		ctxt->wellFormed = 0; | 
 | 3218 | 		ctxt->disableSAX = 1; | 
 | 3219 | 		return; | 
 | 3220 | 	    case XML_PARSER_DTD: | 
 | 3221 | 		ctxt->errNo = XML_ERR_CHARREF_IN_DTD; | 
 | 3222 | 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 3223 | 		    ctxt->sax->error(ctxt->userData,  | 
 | 3224 | 		           "CharRef are forbiden in DTDs!\n"); | 
 | 3225 | 		ctxt->wellFormed = 0; | 
 | 3226 | 		ctxt->disableSAX = 1; | 
 | 3227 | 		return; | 
 | 3228 | 	    case XML_PARSER_ENTITY_VALUE: | 
 | 3229 | 	        /* | 
 | 3230 | 		 * NOTE: in the case of entity values, we don't do the | 
 | 3231 | 		 *       substitution here since we need the literal | 
 | 3232 | 		 *       entity value to be able to save the internal | 
 | 3233 | 		 *       subset of the document. | 
 | 3234 | 		 *       This will be handled by xmlStringDecodeEntities | 
 | 3235 | 		 */ | 
 | 3236 | 		return; | 
 | 3237 | 	    case XML_PARSER_CONTENT: | 
 | 3238 | 		return; | 
 | 3239 | 	    case XML_PARSER_ATTRIBUTE_VALUE: | 
 | 3240 | 		/* ctxt->token = xmlParseCharRef(ctxt); */ | 
 | 3241 | 		return; | 
 | 3242 |             case XML_PARSER_IGNORE: | 
 | 3243 | 	        return; | 
 | 3244 | 	} | 
 | 3245 | 	return; | 
 | 3246 |     } | 
 | 3247 |  | 
 | 3248 |     switch(ctxt->instate) { | 
 | 3249 | 	case XML_PARSER_CDATA_SECTION: | 
 | 3250 | 	    return; | 
 | 3251 | 	case XML_PARSER_PI: | 
 | 3252 |         case XML_PARSER_COMMENT: | 
 | 3253 | 	case XML_PARSER_SYSTEM_LITERAL: | 
 | 3254 |         case XML_PARSER_CONTENT: | 
 | 3255 | 	    return; | 
 | 3256 | 	case XML_PARSER_START_TAG: | 
 | 3257 | 	    return; | 
 | 3258 | 	case XML_PARSER_END_TAG: | 
 | 3259 | 	    return; | 
 | 3260 |         case XML_PARSER_EOF: | 
 | 3261 | 	    ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF; | 
 | 3262 | 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 3263 | 	        ctxt->sax->error(ctxt->userData, "Reference at EOF\n"); | 
 | 3264 | 	    ctxt->wellFormed = 0; | 
 | 3265 | 	    ctxt->disableSAX = 1; | 
 | 3266 | 	    return; | 
 | 3267 |         case XML_PARSER_PROLOG: | 
 | 3268 | 	case XML_PARSER_START: | 
 | 3269 | 	case XML_PARSER_MISC: | 
 | 3270 | 	    ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG; | 
 | 3271 | 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 3272 | 	        ctxt->sax->error(ctxt->userData, "Reference in prolog!\n"); | 
 | 3273 | 	    ctxt->wellFormed = 0; | 
 | 3274 | 	    ctxt->disableSAX = 1; | 
 | 3275 | 	    return; | 
 | 3276 |         case XML_PARSER_EPILOG: | 
 | 3277 | 	    ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG; | 
 | 3278 | 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 3279 | 	        ctxt->sax->error(ctxt->userData, "Reference in epilog!\n"); | 
 | 3280 | 	    ctxt->wellFormed = 0; | 
 | 3281 | 	    ctxt->disableSAX = 1; | 
 | 3282 | 	    return; | 
 | 3283 | 	case XML_PARSER_ENTITY_VALUE: | 
 | 3284 | 	    /* | 
 | 3285 | 	     * NOTE: in the case of entity values, we don't do the | 
 | 3286 | 	     *       substitution here since we need the literal | 
 | 3287 | 	     *       entity value to be able to save the internal | 
 | 3288 | 	     *       subset of the document. | 
 | 3289 | 	     *       This will be handled by xmlStringDecodeEntities | 
 | 3290 | 	     */ | 
 | 3291 | 	    return; | 
 | 3292 |         case XML_PARSER_ATTRIBUTE_VALUE: | 
 | 3293 | 	    /* | 
 | 3294 | 	     * NOTE: in the case of attributes values, we don't do the | 
 | 3295 | 	     *       substitution here unless we are in a mode where | 
 | 3296 | 	     *       the parser is explicitely asked to substitute | 
 | 3297 | 	     *       entities. The SAX callback is called with values | 
 | 3298 | 	     *       without entity substitution. | 
 | 3299 | 	     *       This will then be handled by xmlStringDecodeEntities | 
 | 3300 | 	     */ | 
 | 3301 | 	    return; | 
 | 3302 | 	case XML_PARSER_ENTITY_DECL: | 
 | 3303 | 	    /* | 
 | 3304 | 	     * we just ignore it there | 
 | 3305 | 	     * the substitution will be done once the entity is referenced | 
 | 3306 | 	     */ | 
 | 3307 | 	    return; | 
 | 3308 |         case XML_PARSER_DTD: | 
 | 3309 | 	    ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD; | 
 | 3310 | 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 3311 | 		ctxt->sax->error(ctxt->userData,  | 
 | 3312 | 		       "Entity references are forbiden in DTDs!\n"); | 
 | 3313 | 	    ctxt->wellFormed = 0; | 
 | 3314 | 	    ctxt->disableSAX = 1; | 
 | 3315 | 	    return; | 
 | 3316 |         case XML_PARSER_IGNORE: | 
 | 3317 | 	    return; | 
 | 3318 |     } | 
 | 3319 |  | 
 | 3320 | /* TODO: this seems not reached anymore .... Verify ... */ | 
 | 3321 | xmlGenericError(xmlGenericErrorContext, | 
 | 3322 | 	"Reached deprecated section in xmlParserHandleReference()\n"); | 
 | 3323 | xmlGenericError(xmlGenericErrorContext, | 
 | 3324 | 	"Please forward the document to Daniel.Veillard@w3.org\n"); | 
 | 3325 | xmlGenericError(xmlGenericErrorContext, | 
 | 3326 | 	"indicating the version: %s, thanks !\n", xmlParserVersion); | 
 | 3327 |     NEXT; | 
 | 3328 |     name = xmlScanName(ctxt); | 
 | 3329 |     if (name == NULL) { | 
 | 3330 | 	ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME; | 
 | 3331 | 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 3332 | 	    ctxt->sax->error(ctxt->userData, "Entity reference: no name\n"); | 
 | 3333 | 	ctxt->wellFormed = 0; | 
 | 3334 | 	ctxt->disableSAX = 1; | 
 | 3335 | 	ctxt->token = '&'; | 
 | 3336 | 	return; | 
 | 3337 |     } | 
 | 3338 |     if (NXT(xmlStrlen(name)) != ';') { | 
 | 3339 | 	ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; | 
 | 3340 | 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 3341 | 	    ctxt->sax->error(ctxt->userData,  | 
 | 3342 | 	                     "Entity reference: ';' expected\n"); | 
 | 3343 | 	ctxt->wellFormed = 0; | 
 | 3344 | 	ctxt->disableSAX = 1; | 
 | 3345 | 	ctxt->token = '&'; | 
 | 3346 | 	xmlFree(name); | 
 | 3347 | 	return; | 
 | 3348 |     } | 
 | 3349 |     SKIP(xmlStrlen(name) + 1); | 
 | 3350 |     if (ctxt->sax != NULL) { | 
 | 3351 | 	if (ctxt->sax->getEntity != NULL) | 
 | 3352 | 	    ent = ctxt->sax->getEntity(ctxt->userData, name); | 
 | 3353 |     } | 
 | 3354 |  | 
 | 3355 |     /* | 
 | 3356 |      * [ WFC: Entity Declared ] | 
 | 3357 |      * the Name given in the entity reference must match that in an entity | 
 | 3358 |      * declaration, except that well-formed documents need not declare any | 
 | 3359 |      * of the following entities: amp, lt, gt, apos, quot.  | 
 | 3360 |      */ | 
 | 3361 |     if (ent == NULL) | 
 | 3362 | 	ent = xmlGetPredefinedEntity(name); | 
 | 3363 |     if (ent == NULL) { | 
 | 3364 |         ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; | 
 | 3365 | 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 3366 | 	    ctxt->sax->error(ctxt->userData,  | 
 | 3367 | 			     "Entity reference: entity %s not declared\n", | 
 | 3368 | 			     name); | 
 | 3369 | 	ctxt->wellFormed = 0; | 
 | 3370 | 	ctxt->disableSAX = 1; | 
 | 3371 | 	xmlFree(name); | 
 | 3372 | 	return; | 
 | 3373 |     } | 
 | 3374 |  | 
 | 3375 |     /* | 
 | 3376 |      * [ WFC: Parsed Entity ] | 
 | 3377 |      * An entity reference must not contain the name of an unparsed entity | 
 | 3378 |      */ | 
 | 3379 |     if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { | 
 | 3380 |         ctxt->errNo = XML_ERR_UNPARSED_ENTITY; | 
 | 3381 | 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 3382 | 	    ctxt->sax->error(ctxt->userData,  | 
 | 3383 | 			 "Entity reference to unparsed entity %s\n", name); | 
 | 3384 | 	ctxt->wellFormed = 0; | 
 | 3385 | 	ctxt->disableSAX = 1; | 
 | 3386 |     } | 
 | 3387 |  | 
 | 3388 |     if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) { | 
 | 3389 |         ctxt->token = ent->content[0]; | 
 | 3390 | 	xmlFree(name); | 
 | 3391 | 	return; | 
 | 3392 |     } | 
 | 3393 |     input = xmlNewEntityInputStream(ctxt, ent); | 
 | 3394 |     xmlPushInput(ctxt, input); | 
 | 3395 |     xmlFree(name); | 
 | 3396 | #endif | 
 | 3397 |     return; | 
 | 3398 | } | 
 | 3399 |  | 
 | 3400 | /** | 
 | 3401 |  * xmlHandleEntity: | 
 | 3402 |  * @ctxt:  an XML parser context | 
 | 3403 |  * @entity:  an XML entity pointer. | 
 | 3404 |  * | 
 | 3405 |  * Default handling of defined entities, when should we define a new input | 
 | 3406 |  * stream ? When do we just handle that as a set of chars ? | 
 | 3407 |  * | 
 | 3408 |  * OBSOLETE: to be removed at some point. | 
 | 3409 |  */ | 
 | 3410 |  | 
 | 3411 | void | 
| Daniel Veillard | c86a4fa | 2001-03-26 16:28:29 +0000 | [diff] [blame] | 3412 | xmlHandleEntity(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlEntityPtr entity ATTRIBUTE_UNUSED) { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 3413 |     static int deprecated = 0; | 
 | 3414 |     if (!deprecated) { | 
 | 3415 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 3416 | 		"xmlHandleEntity() deprecated function reached\n"); | 
 | 3417 | 	deprecated = 1; | 
 | 3418 |     } | 
 | 3419 |  | 
 | 3420 | #if 0 | 
 | 3421 |     int len; | 
 | 3422 |     xmlParserInputPtr input; | 
 | 3423 |  | 
 | 3424 |     if (entity->content == NULL) { | 
 | 3425 | 	ctxt->errNo = XML_ERR_INTERNAL_ERROR; | 
 | 3426 |         if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) | 
 | 3427 | 	    ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n", | 
 | 3428 | 	               entity->name); | 
 | 3429 | 	ctxt->wellFormed = 0; | 
 | 3430 | 	ctxt->disableSAX = 1; | 
 | 3431 |         return; | 
 | 3432 |     } | 
 | 3433 |     len = xmlStrlen(entity->content); | 
 | 3434 |     if (len <= 2) goto handle_as_char; | 
 | 3435 |  | 
 | 3436 |     /* | 
 | 3437 |      * Redefine its content as an input stream. | 
 | 3438 |      */ | 
 | 3439 |     input = xmlNewEntityInputStream(ctxt, entity); | 
 | 3440 |     xmlPushInput(ctxt, input); | 
 | 3441 |     return; | 
 | 3442 |  | 
 | 3443 | handle_as_char: | 
 | 3444 |     /* | 
 | 3445 |      * Just handle the content as a set of chars. | 
 | 3446 |      */ | 
 | 3447 |     if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && | 
 | 3448 | 	(ctxt->sax->characters != NULL)) | 
 | 3449 | 	ctxt->sax->characters(ctxt->userData, entity->content, len); | 
 | 3450 | #endif | 
 | 3451 | } | 
 | 3452 |  | 
 | 3453 | /** | 
 | 3454 |  * xmlNewGlobalNs: | 
 | 3455 |  * @doc:  the document carrying the namespace | 
 | 3456 |  * @href:  the URI associated | 
 | 3457 |  * @prefix:  the prefix for the namespace | 
 | 3458 |  * | 
 | 3459 |  * Creation of a Namespace, the old way using PI and without scoping | 
 | 3460 |  *   DEPRECATED !!! | 
 | 3461 |  * It now create a namespace on the root element of the document if found. | 
 | 3462 |  * Returns NULL this functionnality had been removed | 
 | 3463 |  */ | 
 | 3464 | xmlNsPtr | 
| Daniel Veillard | c86a4fa | 2001-03-26 16:28:29 +0000 | [diff] [blame] | 3465 | xmlNewGlobalNs(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *href ATTRIBUTE_UNUSED, | 
 | 3466 | 	       const xmlChar *prefix ATTRIBUTE_UNUSED) { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 3467 |     static int deprecated = 0; | 
 | 3468 |     if (!deprecated) { | 
 | 3469 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 3470 | 		"xmlNewGlobalNs() deprecated function reached\n"); | 
 | 3471 | 	deprecated = 1; | 
 | 3472 |     } | 
 | 3473 |     return(NULL); | 
 | 3474 | #if 0 | 
 | 3475 |     xmlNodePtr root; | 
 | 3476 |  | 
 | 3477 |     xmlNsPtr cur; | 
 | 3478 |   | 
 | 3479 |     root = xmlDocGetRootElement(doc); | 
 | 3480 |     if (root != NULL) | 
 | 3481 | 	return(xmlNewNs(root, href, prefix)); | 
 | 3482 | 	 | 
 | 3483 |     /* | 
 | 3484 |      * if there is no root element yet, create an old Namespace type | 
 | 3485 |      * and it will be moved to the root at save time. | 
 | 3486 |      */ | 
 | 3487 |     cur = (xmlNsPtr) xmlMalloc(sizeof(xmlNs)); | 
 | 3488 |     if (cur == NULL) { | 
 | 3489 |         xmlGenericError(xmlGenericErrorContext, | 
 | 3490 | 		"xmlNewGlobalNs : malloc failed\n"); | 
 | 3491 | 	return(NULL); | 
 | 3492 |     } | 
 | 3493 |     memset(cur, 0, sizeof(xmlNs)); | 
 | 3494 |     cur->type = XML_GLOBAL_NAMESPACE; | 
 | 3495 |  | 
 | 3496 |     if (href != NULL) | 
 | 3497 | 	cur->href = xmlStrdup(href);  | 
 | 3498 |     if (prefix != NULL) | 
 | 3499 | 	cur->prefix = xmlStrdup(prefix);  | 
 | 3500 |  | 
 | 3501 |     /* | 
 | 3502 |      * Add it at the end to preserve parsing order ... | 
 | 3503 |      */ | 
 | 3504 |     if (doc != NULL) { | 
 | 3505 | 	if (doc->oldNs == NULL) { | 
 | 3506 | 	    doc->oldNs = cur; | 
 | 3507 | 	} else { | 
 | 3508 | 	    xmlNsPtr prev = doc->oldNs; | 
 | 3509 |  | 
 | 3510 | 	    while (prev->next != NULL) prev = prev->next; | 
 | 3511 | 	    prev->next = cur; | 
 | 3512 | 	} | 
 | 3513 |     } | 
 | 3514 |  | 
 | 3515 |   return(NULL); | 
 | 3516 | #endif | 
 | 3517 | } | 
 | 3518 |  | 
 | 3519 | /** | 
 | 3520 |  * xmlUpgradeOldNs: | 
 | 3521 |  * @doc:  a document pointer | 
 | 3522 |  *  | 
 | 3523 |  * Upgrade old style Namespaces (PI) and move them to the root of the document. | 
 | 3524 |  * DEPRECATED | 
 | 3525 |  */ | 
 | 3526 | void | 
| Daniel Veillard | c86a4fa | 2001-03-26 16:28:29 +0000 | [diff] [blame] | 3527 | xmlUpgradeOldNs(xmlDocPtr doc ATTRIBUTE_UNUSED) { | 
| Owen Taylor | 3473f88 | 2001-02-23 17:55:21 +0000 | [diff] [blame] | 3528 |     static int deprecated = 0; | 
 | 3529 |     if (!deprecated) { | 
 | 3530 | 	xmlGenericError(xmlGenericErrorContext, | 
 | 3531 | 		"xmlNewGlobalNs() deprecated function reached\n"); | 
 | 3532 | 	deprecated = 1; | 
 | 3533 |     } | 
 | 3534 | #if 0 | 
 | 3535 |     xmlNsPtr cur; | 
 | 3536 |  | 
 | 3537 |     if ((doc == NULL) || (doc->oldNs == NULL)) return; | 
 | 3538 |     if (doc->children == NULL) { | 
 | 3539 | #ifdef DEBUG_TREE | 
 | 3540 |         xmlGenericError(xmlGenericErrorContext, | 
 | 3541 | 		"xmlUpgradeOldNs: failed no root !\n"); | 
 | 3542 | #endif | 
 | 3543 | 	return; | 
 | 3544 |     } | 
 | 3545 |  | 
 | 3546 |     cur = doc->oldNs; | 
 | 3547 |     while (cur->next != NULL) { | 
 | 3548 | 	cur->type = XML_LOCAL_NAMESPACE; | 
 | 3549 |         cur = cur->next; | 
 | 3550 |     } | 
 | 3551 |     cur->type = XML_LOCAL_NAMESPACE; | 
 | 3552 |     cur->next = doc->children->nsDef; | 
 | 3553 |     doc->children->nsDef = doc->oldNs; | 
 | 3554 |     doc->oldNs = NULL; | 
 | 3555 | #endif | 
 | 3556 | } | 
 | 3557 |  |