third pass at the escaping refactoring. Daniel

* xmlIO.c xmlsave.c: third pass at the escaping refactoring.
Daniel
diff --git a/xmlsave.c b/xmlsave.c
index bfaf07b..89df7fb 100644
--- a/xmlsave.c
+++ b/xmlsave.c
@@ -139,6 +139,178 @@
 
 /************************************************************************
  *									*
+ *			Special escaping routines			*
+ *									*
+ ************************************************************************/
+/**
+ * xmlEscapeEntities:
+ * @out:  a pointer to an array of bytes to store the result
+ * @outlen:  the length of @out
+ * @in:  a pointer to an array of unescaped UTF-8 bytes
+ * @inlen:  the length of @in
+ *
+ * Take a block of UTF-8 chars in and escape them. Used when there is no
+ * encoding specified.
+ *
+ * Returns 0 if success, or -1 otherwise
+ * The value of @inlen after return is the number of octets consumed
+ *     if the return value is positive, else unpredictable.
+ * The value of @outlen after return is the number of octets consumed.
+ */
+static int
+xmlEscapeEntities(unsigned char* out, int *outlen,
+                 const xmlChar* in, int *inlen) {
+    unsigned char* outstart = out;
+    const unsigned char* base = in;
+    unsigned char* outend = out + *outlen;
+    const unsigned char* inend;
+    int val;
+
+    inend = in + (*inlen);
+    
+    while ((in < inend) && (out < outend)) {
+    	if (*in == '<') {
+	    if (outend - out < 4) break;
+	    *out++ = '&';
+	    *out++ = 'l';
+	    *out++ = 't';
+	    *out++ = ';';
+	    in++;
+	    continue;
+	} else if (*in == '>') {
+	    if (outend - out < 4) break;
+	    *out++ = '&';
+	    *out++ = 'g';
+	    *out++ = 't';
+	    *out++ = ';';
+	    in++;
+	    continue;
+	} else if (*in == '&') {
+	    if (outend - out < 5) break;
+	    *out++ = '&';
+	    *out++ = 'a';
+	    *out++ = 'm';
+	    *out++ = 'p';
+	    *out++ = ';';
+	    in++;
+	    continue;
+	} else if (((*in >= 0x20) && (*in < 0x80)) ||
+	           (*in == '\n') || (*in == '\t')) {
+	    /*
+	     * default case, just copy !
+	     */
+	    *out++ = *in++;
+	    continue;
+	} else if (*in >= 0x80) {
+	    /*
+	     * We assume we have UTF-8 input.
+	     */
+	    unsigned char* ptr;
+
+	    if (outend - out < 10) break;
+
+	    if (*in < 0xC0) {
+		xmlGenericError(xmlGenericErrorContext,
+			"xmlEscapeEntities : input not UTF-8\n");
+		in++;
+		goto error;
+	    } else if (*in < 0xE0) {
+		if (inend - in < 2) break;
+		val = (in[0]) & 0x1F;
+		val <<= 6;
+		val |= (in[1]) & 0x3F;
+		in += 2;
+	    } else if (*in < 0xF0) {
+		if (inend - in < 3) break;
+		val = (in[0]) & 0x0F;
+		val <<= 6;
+		val |= (in[1]) & 0x3F;
+		val <<= 6;
+		val |= (in[2]) & 0x3F;
+		in += 3;
+	    } else if (*in < 0xF8) {
+		if (inend - in < 4) break;
+		val = (in[0]) & 0x07;
+		val <<= 6;
+		val |= (in[1]) & 0x3F;
+		val <<= 6;
+		val |= (in[2]) & 0x3F;
+		val <<= 6;
+		val |= (in[3]) & 0x3F;
+		in += 4;
+	    } else {
+		xmlGenericError(xmlGenericErrorContext,
+		    "xmlEscapeEntities : char out of range\n");
+		in++;
+		goto error;
+	    }
+	    if (!IS_CHAR(val)) {
+		xmlGenericError(xmlGenericErrorContext,
+		    "xmlEscapeEntities : char out of range\n");
+		in++;
+		goto error;
+	    }
+
+	    /*
+	     * We could do multiple things here. Just save as a char ref
+	     */
+serialize_hex_charref:
+	    *out++ = '&';
+	    *out++ = '#';
+	    *out++ = 'x';
+	    if (val < 0x10) ptr = out;
+	    else if (val < 0x100) ptr = out + 1;
+	    else if (val < 0x1000) ptr = out + 2;
+	    else if (val < 0x10000) ptr = out + 3;
+	    else if (val < 0x100000) ptr = out + 4;
+	    else ptr = out + 5;
+	    out = ptr + 1;
+	    while (val > 0) {
+	        switch (val & 0xF) {
+		    case 0: *ptr-- = '0'; break;
+		    case 1: *ptr-- = '1'; break;
+		    case 2: *ptr-- = '2'; break;
+		    case 3: *ptr-- = '3'; break;
+		    case 4: *ptr-- = '4'; break;
+		    case 5: *ptr-- = '5'; break;
+		    case 6: *ptr-- = '6'; break;
+		    case 7: *ptr-- = '7'; break;
+		    case 8: *ptr-- = '8'; break;
+		    case 9: *ptr-- = '9'; break;
+		    case 0xA: *ptr-- = 'A'; break;
+		    case 0xB: *ptr-- = 'B'; break;
+		    case 0xC: *ptr-- = 'C'; break;
+		    case 0xD: *ptr-- = 'D'; break;
+		    case 0xE: *ptr-- = 'E'; break;
+		    case 0xF: *ptr-- = 'F'; break;
+		    default: *ptr-- = '0'; break;
+		}
+		val >>= 4;
+	    }
+	    *out++ = ';';
+	    continue;
+	} else if (IS_BYTE_CHAR(*in)) {
+	    if (outend - out < 6) break;
+	    val = *in++;
+	    goto serialize_hex_charref;
+	} else {
+	    xmlGenericError(xmlGenericErrorContext,
+		"xmlEscapeEntities : char out of range\n");
+	    in++;
+	    goto error;
+	}
+    }
+    *outlen = out - outstart;
+    *inlen = in - base;
+    return(0);
+error:
+    *outlen = out - outstart;
+    *inlen = in - base;
+    return(-1);
+}
+
+/************************************************************************
+ *									*
  *			Allocation and deallocation			*
  *									*
  ************************************************************************/
@@ -472,14 +644,8 @@
 		(cur->name != xmlStringTextNoenc)) {
 
 		if (ctxt->encoding == NULL) {
-		    xmlChar *buffer;
-
-		    buffer = xmlEncodeEntitiesReentrant(ctxt->doc,
-		                                        cur->content);
-		    if (buffer != NULL) {
-			xmlOutputBufferWriteString(buf, (const char *)buffer);
-			xmlFree(buffer);
-		    }
+		    xmlOutputBufferWriteEscape(buf, cur->content,
+		                               xmlEscapeEntities);
 		} else {
 		    xmlOutputBufferWriteEscape(buf, cur->content, NULL);
 		}
@@ -585,14 +751,7 @@
     xmlOutputBufferWriteString(buf, ">");
     if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
 	if (ctxt->encoding == NULL) {
-	    xmlChar *buffer;
-
-	    buffer = xmlEncodeEntitiesReentrant(ctxt->doc,
-						cur->content);
-	    if (buffer != NULL) {
-		xmlOutputBufferWriteString(buf, (const char *)buffer);
-		xmlFree(buffer);
-	    }
+	    xmlOutputBufferWriteEscape(buf, cur->content, xmlEscapeEntities);
 	} else {
 	    xmlOutputBufferWriteEscape(buf, cur->content, NULL);
 	}
@@ -932,14 +1091,8 @@
 		(cur->name != xmlStringTextNoenc)) {
 
 		if (ctxt->encoding == NULL) {
-		    xmlChar *buffer;
-
-		    buffer = xmlEncodeEntitiesReentrant(ctxt->doc,
-		                                        cur->content);
-		    if (buffer != NULL) {
-			xmlOutputBufferWriteString(buf, (const char *)buffer);
-			xmlFree(buffer);
-		    }
+		    xmlOutputBufferWriteEscape(buf, cur->content,
+		                               xmlEscapeEntities);
 		} else {
 		    xmlOutputBufferWriteEscape(buf, cur->content, NULL);
 		}
@@ -1059,14 +1212,7 @@
     xmlOutputBufferWriteString(buf, ">");
     if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) {
 	if (ctxt->encoding == NULL) {
-	    xmlChar *buffer;
-
-	    buffer = xmlEncodeEntitiesReentrant(ctxt->doc,
-						cur->content);
-	    if (buffer != NULL) {
-		xmlOutputBufferWriteString(buf, (const char *)buffer);
-		xmlFree(buffer);
-	    }
+	    xmlOutputBufferWriteEscape(buf, cur->content, xmlEscapeEntities);
 	} else {
 	    xmlOutputBufferWriteEscape(buf, cur->content, NULL);
 	}