added support for unicode identifiers --HG-- branch : trunk

commit: b5365480c7ac04821e16a549c16e581a73f121b6 [log] [tgz]
author: Armin Ronacher <armin.ronacher@active-4.com> Sun May 11 00:18:35 2008 +0200
committer: Armin Ronacher <armin.ronacher@active-4.com> Sun May 11 00:18:35 2008 +0200
tree: 72f5c01da05d34bab334de164193f8b445df9682
parent: b9e7875e43b79898a8e24fdba8015864ba8b5166 [diff] [blame]
diff --git a/jinja2/lexer.py b/jinja2/lexer.py
index d950025..f472d4b 100644
--- a/jinja2/lexer.py
+++ b/jinja2/lexer.py

@@ -31,7 +31,7 @@
 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                        r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')
 integer_re = re.compile(r'\d+')
-name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
+name_re = re.compile(r'\b[^\W\d]\w*\b(?u)')
 float_re = re.compile(r'\d+\.\d+')
 
 # bind operators to token types
@@ -88,6 +88,13 @@
 }
 
 
+def _trystr(s):
+    try:
+        return str(s)
+    except UnicodeError:
+        return s
+
+
 def unescape_string(lineno, filename, s):
     r"""Unescape a string. Supported escapes:
         \a, \n, \r\, \f, \v, \\, \", \', \0
@@ -95,7 +102,8 @@
         \x00, \u0000, \U00000000, \N{...}
     """
     try:
-        return s.encode('ascii', 'backslashreplace').decode('unicode-escape')
+        return _trystr(s.encode('ascii', 'backslashreplace')
+                        .decode('unicode-escape'))
     except UnicodeError, e:
         msg = str(e).split(':')[-1].strip()
         raise TemplateSyntaxError(msg, lineno, filename)
@@ -409,20 +417,13 @@
                 elif token in ('raw_begin', 'raw_end'):
                     continue
                 elif token == 'data':
-                    try:
-                        value = str(value)
-                    except UnicodeError:
-                        pass
+                    value = _trystr(value)
                 elif token == 'keyword':
                     token = value
                 elif token == 'name':
-                    value = str(value)
+                    value = _trystr(value)
                 elif token == 'string':
                     value = unescape_string(lineno, filename, value[1:-1])
-                    try:
-                        value = str(value)
-                    except UnicodeError:
-                        pass
                 elif token == 'integer':
                     value = int(value)
                 elif token == 'float':
commit	b5365480c7ac04821e16a549c16e581a73f121b6	[log] [tgz]
author	Armin Ronacher <armin.ronacher@active-4.com>	Sun May 11 00:18:35 2008 +0200
committer	Armin Ronacher <armin.ronacher@active-4.com>	Sun May 11 00:18:35 2008 +0200
tree	72f5c01da05d34bab334de164193f8b445df9682
parent	b9e7875e43b79898a8e24fdba8015864ba8b5166 [diff] [blame]