Better entities and char ref encoding, error msg formatting, Daniel.

commit: 8cc0d1f88966694e6562ee5bb2ca4406da11d64b [log] [tgz]
author: Daniel Veillard <veillard@src.gnome.org> Mon Nov 16 01:04:26 1998 +0000
committer: Daniel Veillard <veillard@src.gnome.org> Mon Nov 16 01:04:26 1998 +0000
tree: 3ab496745eb57075c9331155aafdf9a090e289f7
parent: 242590ee5d7558dbc937c8abbd471425f1059f43 [diff] [blame]
diff --git a/entities.c b/entities.c
index 0b41800..28a3582 100644
--- a/entities.c
+++ b/entities.c

@@ -249,12 +249,22 @@
 }
 
 /*
+ * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
+ *                  | [#x10000-#x10FFFF]
+ * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
+ */
+#define IS_CHAR(c)							\
+    (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) ||			\
+     (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
+
+/*
  * xmlEncodeEntities : do a global encoding of a string, replacing the
  *                     predefined entities and non ASCII values with their
  *                     entities and CharRef counterparts.
  * TODO !!!! Once moved to UTF-8 internal encoding, the encoding of non-ascii
  *           get erroneous.
  */
+
 CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input) {
     const CHAR *cur = input;
     CHAR *out = buffer;
@@ -310,6 +320,12 @@
 	    *out++ = 'o';
 	    *out++ = 's';
 	    *out++ = ';';
+	} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
+	    (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
+	    /*
+	     * default case, just copy !
+	     */
+	    *out++ = *cur;
 #ifndef USE_UTF_8
 	} else if ((sizeof(CHAR) == 1) && (*cur >= 0x80)) {
 	    char buf[10], *ptr;
@@ -321,12 +337,26 @@
             ptr = buf;
 	    while (*ptr != 0) *out++ = *ptr++;
 #endif
-	} else {
-	    /*
-	     * default case, just copy !
-	     */
-	    *out++ = *cur;
+	} else if (IS_CHAR(*cur)) {
+	    char buf[10], *ptr;
+
+#ifdef HAVE_SNPRINTF
+	    snprintf(buf, 9, "&#%d;", *cur);
+#else
+	    sprintf(buf, "&#%d;", *cur);
+#endif
+            ptr = buf;
+	    while (*ptr != 0) *out++ = *ptr++;
 	}
+#if 0
+	else {
+	    /*
+	     * default case, this is not a valid char !
+	     * Skip it...
+	     */
+	    fprintf(stderr, "xmlEncodeEntities: invalid char %d\n", (int) *cur);
+	}
+#endif
 	cur++;
     }
     *out++ = 0;
commit	8cc0d1f88966694e6562ee5bb2ca4406da11d64b	[log] [tgz]
author	Daniel Veillard <veillard@src.gnome.org>	Mon Nov 16 01:04:26 1998 +0000
committer	Daniel Veillard <veillard@src.gnome.org>	Mon Nov 16 01:04:26 1998 +0000
tree	3ab496745eb57075c9331155aafdf9a090e289f7
parent	242590ee5d7558dbc937c8abbd471425f1059f43 [diff] [blame]