added two new macros IS_ASCII_LETTER and IS_ASCII_DIGIT used with (html)
* include/libxml/parserInternals.h: added two new macros
IS_ASCII_LETTER and IS_ASCII_DIGIT used with (html)
parsing and xpath for testing data not necessarily
unicode.
* HTMLparser.c, xpath.c: changed use of IS_LETTER_CH and
IS_DIGIT_CH macros to ascii versions (bug 153936).
diff --git a/ChangeLog b/ChangeLog
index 53be3be..6889b65 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+Sat Oct 2 15:03:14 PDT 2004 William Brack <wbrack@mmm.com.hk>
+
+ * include/libxml/parserInternals.h: added two new macros
+ IS_ASCII_LETTER and IS_ASCII_DIGIT used with (html)
+ parsing and xpath for testing data not necessarily
+ unicode.
+ * HTMLparser.c, xpath.c: changed use of IS_LETTER_CH and
+ IS_DIGIT_CH macros to ascii versions (bug 153936).
+
Fri Oct 1 20:37:25 PDT 2004 William Brack <wbrack@mmm.com.hk>
* error.c: added some coding to attempt to display which file
diff --git a/HTMLparser.c b/HTMLparser.c
index 10f8516..72a0870 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -2168,11 +2168,11 @@
int i = 0;
xmlChar loc[HTML_PARSER_BUFFER_SIZE];
- if (!IS_LETTER_CH(CUR) && (CUR != '_') &&
+ if (!IS_ASCII_LETTER(CUR) && (CUR != '_') &&
(CUR != ':')) return(NULL);
while ((i < HTML_PARSER_BUFFER_SIZE) &&
- ((IS_LETTER_CH(CUR)) || (IS_DIGIT_CH(CUR)) ||
+ ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) ||
(CUR == ':') || (CUR == '-') || (CUR == '_'))) {
if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
else loc[i] = CUR;
diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h
index 57c4b17..14d4e4d 100644
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -197,6 +197,26 @@
*
*/
#define IS_LETTER_CH(c) xmlIsBaseChar_ch(c)
+
+/**
+ * IS_ASCII_LETTER(c)
+ * @c: an xmlChar value
+ *
+ * Macro to check [a-zA-Z]
+ *
+ */
+#define IS_ASCII_LETTER(c) (((0x41 <= (c)) && ((c) <= 0x5a)) || \
+ ((0x61 <= (c)) && ((c) <= 0x7a)))
+
+/**
+ * IS_ASCII_DIGIT(c)
+ * @c: an xmlChar value
+ *
+ * Macro to check [0-9]
+ *
+ */
+#define IS_ASCII_DIGIT(c) ((0x30 <= (c)) && ((c) <= 0x39))
+
/**
* IS_PUBIDCHAR:
* @c: an UNICODE value (int)
diff --git a/xpath.c b/xpath.c
index 9866736..c55958d 100644
--- a/xpath.c
+++ b/xpath.c
@@ -57,6 +57,13 @@
"Unimplemented block at %s:%d\n", \
__FILE__, __LINE__);
+/*
+ * TODO:
+ * There are a few spots where some tests are done which depend upon ascii
+ * data. These should be enhanced for full UTF8 support (see particularly
+ * any use of the macros IS_ASCII_CHARACTER and IS_ASCII_DIGIT)
+ */
+
#if defined(LIBXML_SCHEMAS_ENABLED) || defined(LIBXML_XPATH_ENABLED)
/************************************************************************
* *
@@ -7947,7 +7954,7 @@
}
NEXT;
SKIP_BLANKS;
- } else if (IS_DIGIT_CH(CUR) || (CUR == '.' && IS_DIGIT_CH(NXT(1)))) {
+ } else if (IS_ASCII_DIGIT(CUR) || (CUR == '.' && IS_ASCII_DIGIT(NXT(1)))) {
xmlXPathCompNumber(ctxt);
} else if ((CUR == '\'') || (CUR == '"')) {
xmlXPathCompLiteral(ctxt);
@@ -8009,12 +8016,12 @@
int len = 0;
SKIP_BLANKS;
- if (!IS_LETTER_CH(CUR) && (CUR != '_') &&
+ if (!IS_ASCII_LETTER(CUR) && (CUR != '_') &&
(CUR != ':')) {
return(NULL);
}
- while ((IS_LETTER_CH(NXT(len))) || (IS_DIGIT_CH(NXT(len))) ||
+ while ((IS_ASCII_LETTER(NXT(len))) || (IS_ASCII_DIGIT(NXT(len))) ||
(NXT(len) == '.') || (NXT(len) == '-') ||
(NXT(len) == '_') || (NXT(len) == ':') ||
(IS_COMBINING_CH(NXT(len))) ||
@@ -8024,7 +8031,7 @@
if (len >= XML_MAX_NAMELEN) {
xmlGenericError(xmlGenericErrorContext,
"xmlScanName: reached XML_MAX_NAMELEN limit\n");
- while ((IS_LETTER_CH(NXT(len))) || (IS_DIGIT_CH(NXT(len))) ||
+ while ((IS_ASCII_LETTER(NXT(len))) || (IS_ASCII_DIGIT(NXT(len))) ||
(NXT(len) == '.') || (NXT(len) == '-') ||
(NXT(len) == '_') || (NXT(len) == ':') ||
(IS_COMBINING_CH(NXT(len))) ||
@@ -8060,8 +8067,10 @@
xmlChar *name = NULL; /* we may have to preparse a name to find out */
SKIP_BLANKS;
- if ((CUR == '$') || (CUR == '(') || (IS_DIGIT_CH(CUR)) ||
- (CUR == '\'') || (CUR == '"') || (CUR == '.' && IS_DIGIT_CH(NXT(1)))) {
+ if ((CUR == '$') || (CUR == '(') ||
+ (IS_ASCII_DIGIT(CUR)) ||
+ (CUR == '\'') || (CUR == '"') ||
+ (CUR == '.' && IS_ASCII_DIGIT(NXT(1)))) {
lc = 0;
} else if (CUR == '*') {
/* relative or absolute location path */
@@ -8957,7 +8966,7 @@
NEXT;
SKIP_BLANKS;
if ((CUR != 0 ) &&
- ((IS_LETTER_CH(CUR)) || (CUR == '_') || (CUR == '.') ||
+ ((IS_ASCII_LETTER(CUR)) || (CUR == '_') || (CUR == '.') ||
(CUR == '@') || (CUR == '*')))
xmlXPathCompRelativeLocationPath(ctxt);
}