ensure that the locale does not affect the tokenization of identifiers
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index d60b256..fbbd0bc 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -93,6 +93,21 @@
};
+/* Ensure that the locale does not interfere with tokenization. */
+
+static int
+ascii_isalpha(int c)
+{
+ return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
+}
+
+static int
+ascii_isalnum(int c)
+{
+ return ascii_isalpha(c) || ('0' <= c && c <= '9');
+}
+
+
/* Create and initialize a new tok_state structure */
static struct tok_state *
@@ -230,7 +245,7 @@
} while (t[0] == '\x20' || t[0] == '\t');
begin = t;
- while (isalnum(Py_CHARMASK(t[0])) ||
+ while (ascii_isalnum(Py_CHARMASK(t[0])) ||
t[0] == '-' || t[0] == '_' || t[0] == '.')
t++;
@@ -1185,7 +1200,6 @@
return 0;
}
-
/* Get next token, after space stripping etc. */
static int
@@ -1341,7 +1355,7 @@
}
/* Identifier (most frequent token!) */
- if (isalpha(c) || c == '_') {
+ if (ascii_isalpha(c) || c == '_') {
/* Process r"", u"" and ur"" */
switch (c) {
case 'b':
@@ -1367,7 +1381,7 @@
goto letter_quote;
break;
}
- while (isalnum(c) || c == '_') {
+ while (ascii_isalnum(c) || c == '_') {
c = tok_nextc(tok);
}
tok_backup(tok, c);