fixed validation of attributes content of type NAME NAMES NMTOKEN and
* valid.c: fixed validation of attributes content of type
NAME NAMES NMTOKEN and NMTOKENS to accept internationalized
values, very old bug. Fixes #67671
Daniel
diff --git a/ChangeLog b/ChangeLog
index db91021..7a50abb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Sun Jan 13 16:37:15 CET 2002 Daniel Veillard <daniel@veillard.com>
+
+ * valid.c: fixed validation of attributes content of type
+ NAME NAMES NMTOKEN and NMTOKENS to accept internationalized
+ values, very old bug. Fixes #67671
+
Sun Jan 13 15:07:49 CET 2002 Daniel Veillard <daniel@veillard.com>
* parser.c include/libxml/parserInternals.h tree.c: integrated
diff --git a/parserInternals.c b/parserInternals.c
index 9a570c9..4af5d71 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -1391,68 +1391,69 @@
*/
int
-xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
+xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
+{
if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
- /*
- * We are supposed to handle UTF8, check it's valid
- * From rfc2044: encoding of the Unicode values on UTF-8:
- *
- * UCS-4 range (hex.) UTF-8 octet sequence (binary)
- * 0000 0000-0000 007F 0xxxxxxx
- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
- *
- * Check for the 0x110000 limit too
- */
- unsigned char c;
- unsigned int val;
+ /*
+ * We are supposed to handle UTF8, check it's valid
+ * From rfc2044: encoding of the Unicode values on UTF-8:
+ *
+ * UCS-4 range (hex.) UTF-8 octet sequence (binary)
+ * 0000 0000-0000 007F 0xxxxxxx
+ * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ *
+ * Check for the 0x110000 limit too
+ */
+ unsigned char c;
+ unsigned int val;
- c = *cur;
- if (c & 0x80) {
- if ((cur[1] & 0xc0) != 0x80)
- goto encoding_error;
- if ((c & 0xe0) == 0xe0) {
+ c = *cur;
+ if (c & 0x80) {
+ if ((cur[1] & 0xc0) != 0x80)
+ goto encoding_error;
+ if ((c & 0xe0) == 0xe0) {
- if ((cur[2] & 0xc0) != 0x80)
- goto encoding_error;
- if ((c & 0xf0) == 0xf0) {
- if (((c & 0xf8) != 0xf0) ||
- ((cur[3] & 0xc0) != 0x80))
- goto encoding_error;
- /* 4-byte code */
- *len = 4;
- val = (cur[0] & 0x7) << 18;
- val |= (cur[1] & 0x3f) << 12;
- val |= (cur[2] & 0x3f) << 6;
- val |= cur[3] & 0x3f;
- } else {
- /* 3-byte code */
- *len = 3;
- val = (cur[0] & 0xf) << 12;
- val |= (cur[1] & 0x3f) << 6;
- val |= cur[2] & 0x3f;
- }
- } else {
- /* 2-byte code */
- *len = 2;
- val = (cur[0] & 0x1f) << 6;
- val |= cur[1] & 0x3f;
- }
- if (!IS_CHAR(val)) {
- if ((ctxt->sax != NULL) &&
- (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Char 0x%X out of allowed range\n", val);
- ctxt->errNo = XML_ERR_INVALID_ENCODING;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
- return(val);
- } else {
- /* 1-byte code */
- *len = 1;
- return((int) *cur);
- }
+ if ((cur[2] & 0xc0) != 0x80)
+ goto encoding_error;
+ if ((c & 0xf0) == 0xf0) {
+ if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
+ goto encoding_error;
+ /* 4-byte code */
+ *len = 4;
+ val = (cur[0] & 0x7) << 18;
+ val |= (cur[1] & 0x3f) << 12;
+ val |= (cur[2] & 0x3f) << 6;
+ val |= cur[3] & 0x3f;
+ } else {
+ /* 3-byte code */
+ *len = 3;
+ val = (cur[0] & 0xf) << 12;
+ val |= (cur[1] & 0x3f) << 6;
+ val |= cur[2] & 0x3f;
+ }
+ } else {
+ /* 2-byte code */
+ *len = 2;
+ val = (cur[0] & 0x1f) << 6;
+ val |= cur[1] & 0x3f;
+ }
+ if (!IS_CHAR(val)) {
+ if ((ctxt != NULL) && (ctxt->sax != NULL) &&
+ (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "Char 0x%X out of allowed range\n",
+ val);
+ ctxt->errNo = XML_ERR_INVALID_ENCODING;
+ ctxt->wellFormed = 0;
+ ctxt->disableSAX = 1;
+ }
+ return (val);
+ } else {
+ /* 1-byte code */
+ *len = 1;
+ return ((int) *cur);
+ }
}
/*
* Assume it's a fixed length encoding (1) with
@@ -1460,8 +1461,9 @@
* XML constructs only use < 128 chars
*/
*len = 1;
- return((int) *cur);
+ return ((int) *cur);
encoding_error:
+
/*
* If we detect an UTF8 error that probably mean that the
* input encoding didn't get properly advertised in the
@@ -1469,17 +1471,20 @@
* to ISO-Latin-1 (if you don't like this policy, just declare the
* encoding !)
*/
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
- ctxt->sax->error(ctxt->userData,
- "Input is not proper UTF-8, indicate encoding !\n");
- ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
- ctxt->input->cur[0], ctxt->input->cur[1],
- ctxt->input->cur[2], ctxt->input->cur[3]);
+ if (ctxt != NULL) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
+ ctxt->sax->error(ctxt->userData,
+ "Input is not proper UTF-8, indicate encoding !\n");
+ ctxt->sax->error(ctxt->userData,
+ "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+ ctxt->input->cur[0], ctxt->input->cur[1],
+ ctxt->input->cur[2], ctxt->input->cur[3]);
+ }
+ ctxt->errNo = XML_ERR_INVALID_ENCODING;
}
- ctxt->errNo = XML_ERR_INVALID_ENCODING;
*len = 1;
- return((int) *cur);
+ return ((int) *cur);
}
/**
diff --git a/valid.c b/valid.c
index 24ce16a..55c7b9e 100644
--- a/valid.c
+++ b/valid.c
@@ -2565,23 +2565,29 @@
static int
xmlValidateNameValue(const xmlChar *value) {
const xmlChar *cur;
+ int val, len;
if (value == NULL) return(0);
cur = value;
-
- if (!IS_LETTER(*cur) && (*cur != '_') &&
- (*cur != ':')) {
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
+ if (!IS_LETTER(val) && (val != '_') &&
+ (val != ':')) {
return(0);
}
- while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
- (*cur == '.') || (*cur == '-') ||
- (*cur == '_') || (*cur == ':') ||
- (IS_COMBINING(*cur)) ||
- (IS_EXTENDER(*cur)))
- cur++;
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
+ while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
+ (val == '.') || (val == '-') ||
+ (val == '_') || (val == ':') ||
+ (IS_COMBINING(val)) ||
+ (IS_EXTENDER(val))) {
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
+ }
- if (*cur != 0) return(0);
+ if (val != 0) return(0);
return(1);
}
@@ -2598,39 +2604,53 @@
static int
xmlValidateNamesValue(const xmlChar *value) {
const xmlChar *cur;
+ int val, len;
if (value == NULL) return(0);
cur = value;
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
- if (!IS_LETTER(*cur) && (*cur != '_') &&
- (*cur != ':')) {
+ if (!IS_LETTER(val) && (val != '_') &&
+ (val != ':')) {
return(0);
}
- while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
- (*cur == '.') || (*cur == '-') ||
- (*cur == '_') || (*cur == ':') ||
- (IS_COMBINING(*cur)) ||
- (IS_EXTENDER(*cur)))
- cur++;
-
- while (IS_BLANK(*cur)) {
- while (IS_BLANK(*cur)) cur++;
-
- if (!IS_LETTER(*cur) && (*cur != '_') &&
- (*cur != ':')) {
- return(0);
- }
-
- while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
- (*cur == '.') || (*cur == '-') ||
- (*cur == '_') || (*cur == ':') ||
- (IS_COMBINING(*cur)) ||
- (IS_EXTENDER(*cur)))
- cur++;
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
+ while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
+ (val == '.') || (val == '-') ||
+ (val == '_') || (val == ':') ||
+ (IS_COMBINING(val)) ||
+ (IS_EXTENDER(val))) {
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
}
- if (*cur != 0) return(0);
+ while (IS_BLANK(val)) {
+ while (IS_BLANK(val)) {
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
+ }
+
+ if (!IS_LETTER(val) && (val != '_') &&
+ (val != ':')) {
+ return(0);
+ }
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
+
+ while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
+ (val == '.') || (val == '-') ||
+ (val == '_') || (val == ':') ||
+ (IS_COMBINING(val)) ||
+ (IS_EXTENDER(val))) {
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
+ }
+ }
+
+ if (val != 0) return(0);
return(1);
}
@@ -2649,25 +2669,30 @@
static int
xmlValidateNmtokenValue(const xmlChar *value) {
const xmlChar *cur;
+ int val, len;
if (value == NULL) return(0);
cur = value;
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
- if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) &&
- (*cur != '.') && (*cur != '-') &&
- (*cur != '_') && (*cur != ':') &&
- (!IS_COMBINING(*cur)) &&
- (!IS_EXTENDER(*cur)))
+ if (!IS_LETTER(val) && !IS_DIGIT(val) &&
+ (val != '.') && (val != '-') &&
+ (val != '_') && (val != ':') &&
+ (!IS_COMBINING(val)) &&
+ (!IS_EXTENDER(val)))
return(0);
- while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
- (*cur == '.') || (*cur == '-') ||
- (*cur == '_') || (*cur == ':') ||
- (IS_COMBINING(*cur)) ||
- (IS_EXTENDER(*cur)))
- cur++;
+ while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
+ (val == '.') || (val == '-') ||
+ (val == '_') || (val == ':') ||
+ (IS_COMBINING(val)) ||
+ (IS_EXTENDER(val))) {
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
+ }
- if (*cur != 0) return(0);
+ if (val != 0) return(0);
return(1);
}
@@ -2686,45 +2711,59 @@
static int
xmlValidateNmtokensValue(const xmlChar *value) {
const xmlChar *cur;
+ int val, len;
if (value == NULL) return(0);
cur = value;
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
- while (IS_BLANK(*cur)) cur++;
- if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) &&
- (*cur != '.') && (*cur != '-') &&
- (*cur != '_') && (*cur != ':') &&
- (!IS_COMBINING(*cur)) &&
- (!IS_EXTENDER(*cur)))
- return(0);
-
- while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
- (*cur == '.') || (*cur == '-') ||
- (*cur == '_') || (*cur == ':') ||
- (IS_COMBINING(*cur)) ||
- (IS_EXTENDER(*cur)))
- cur++;
-
- while (IS_BLANK(*cur)) {
- while (IS_BLANK(*cur)) cur++;
- if (*cur == 0) return(1);
-
- if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) &&
- (*cur != '.') && (*cur != '-') &&
- (*cur != '_') && (*cur != ':') &&
- (!IS_COMBINING(*cur)) &&
- (!IS_EXTENDER(*cur)))
- return(0);
-
- while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
- (*cur == '.') || (*cur == '-') ||
- (*cur == '_') || (*cur == ':') ||
- (IS_COMBINING(*cur)) ||
- (IS_EXTENDER(*cur)))
- cur++;
+ while (IS_BLANK(val)) {
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
}
- if (*cur != 0) return(0);
+ if (!IS_LETTER(val) && !IS_DIGIT(val) &&
+ (val != '.') && (val != '-') &&
+ (val != '_') && (val != ':') &&
+ (!IS_COMBINING(val)) &&
+ (!IS_EXTENDER(val)))
+ return(0);
+
+ while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
+ (val == '.') || (val == '-') ||
+ (val == '_') || (val == ':') ||
+ (IS_COMBINING(val)) ||
+ (IS_EXTENDER(val))) {
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
+ }
+
+ while (IS_BLANK(val)) {
+ while (IS_BLANK(val)) {
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
+ }
+ if (val == 0) return(1);
+
+ if (!IS_LETTER(val) && !IS_DIGIT(val) &&
+ (val != '.') && (val != '-') &&
+ (val != '_') && (val != ':') &&
+ (!IS_COMBINING(val)) &&
+ (!IS_EXTENDER(val)))
+ return(0);
+
+ while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
+ (val == '.') || (val == '-') ||
+ (val == '_') || (val == ':') ||
+ (IS_COMBINING(val)) ||
+ (IS_EXTENDER(val))) {
+ val = xmlStringCurrentChar(NULL, cur, &len);
+ cur += len;
+ }
+ }
+
+ if (val != 0) return(0);
return(1);
}