added support for unicode identifiers
--HG--
branch : trunk
diff --git a/jinja2/compiler.py b/jinja2/compiler.py
index 6518427..b45aeb5 100644
--- a/jinja2/compiler.py
+++ b/jinja2/compiler.py
@@ -8,12 +8,13 @@
:copyright: Copyright 2008 by Armin Ronacher.
:license: GNU GPL.
"""
+import string
from time import time
from copy import copy
from random import randrange
from keyword import iskeyword
from cStringIO import StringIO
-from itertools import chain
+from itertools import chain, takewhile
from jinja2 import nodes
from jinja2.visitor import NodeVisitor, NodeTransformer
from jinja2.exceptions import TemplateAssertionError
@@ -38,6 +39,8 @@
else:
have_condexpr = True
+_safe_ident_chars = set(string.letters + '0123456789')
+
def generate(node, environment, name, filename, stream=None):
"""Generate the python source for a node tree."""
@@ -49,6 +52,30 @@
return generator.stream.getvalue()
+def mask_identifier(ident):
+ """Mask an identifier properly for python source code."""
+ rv = ['l_']
+ for char in ident:
+ if char in _safe_ident_chars:
+ rv.append(char)
+ else:
+ rv.append('_%x_' % ord(char))
+ return str(''.join(rv))
+
+
+def unmask_identifier(ident):
+ """Unmask an identifier."""
+ if not ident.startswith('l_'):
+ return ident
+ rv = []
+ i = iter(ident[2:])
+ for c in i:
+ if c == '_':
+ c = unichr(int(concat(takewhile(lambda c: c != '_', i)), 16))
+ rv.append(c)
+ return ''.join(rv)
+
+
def has_safe_repr(value):
"""Does the node have a safe representation?"""
if value is None or value is NotImplemented or value is Ellipsis:
@@ -470,7 +497,8 @@
def pull_locals(self, frame):
"""Pull all the references identifiers into the local scope."""
for name in frame.identifiers.undeclared:
- self.writeline('l_%s = context.resolve(%r)' % (name, name))
+ self.writeline('%s = context.resolve(%r)' % (mask_identifier(name),
+ name))
def pull_dependencies(self, nodes):
"""Pull all the dependencies."""
@@ -496,7 +524,7 @@
aliases = {}
for name in frame.identifiers.find_shadowed():
aliases[name] = ident = self.temporary_identifier()
- self.writeline('%s = l_%s' % (ident, name))
+ self.writeline('%s = %s' % (ident, mask_identifier(name)))
return aliases
def function_scoping(self, node, frame, children=None):
@@ -545,7 +573,8 @@
func_frame.accesses_kwargs = False
func_frame.accesses_varargs = False
func_frame.accesses_caller = False
- func_frame.arguments = args = ['l_' + x.name for x in node.args]
+ func_frame.arguments = args = [mask_identifier(x.name)
+ for x in node.args]
undeclared = find_undeclared(children, ('caller', 'kwargs', 'varargs'))
@@ -746,7 +775,7 @@
def visit_Import(self, node, frame):
"""Visit regular imports."""
- self.writeline('l_%s = ' % node.target, node)
+ self.writeline(mask_identifier(node.target) + ' = ', node)
if frame.toplevel:
self.write('context.vars[%r] = ' % node.target)
self.write('environment.get_template(')
@@ -774,18 +803,19 @@
name, alias = name
else:
alias = name
- self.writeline('l_%s = getattr(included_template, '
- '%r, missing)' % (alias, name))
- self.writeline('if l_%s is missing:' % alias)
+ self.writeline('%s = getattr(included_template, '
+ '%r, missing)' % (mask_identifier(alias), name))
+ self.writeline('if %s is missing:' % mask_identifier(alias))
self.indent()
- self.writeline('l_%s = environment.undefined(%r %% '
+ self.writeline('%s = environment.undefined(%r %% '
'included_template.name, '
'name=included_template.name)' %
- (alias, 'the template %r does not export '
- 'the requested name ' + repr(name)))
+ (mask_identifier(alias), 'the template %r does '
+ 'not export the requested name ' + repr(name)))
self.outdent()
if frame.toplevel:
- self.writeline('context.vars[%r] = l_%s' % (alias, alias))
+ self.writeline('context.vars[%r] = %s' %
+ (alias, mask_identifier(alias)))
if not alias.startswith('__'):
self.writeline('context.exported_vars.discard(%r)' % alias)
@@ -859,7 +889,7 @@
# reset the aliases if there are any.
for name, alias in aliases.iteritems():
- self.writeline('l_%s = %s' % (name, alias))
+ self.writeline('%s = %s' % (mask_identifier(name), alias))
def visit_If(self, node, frame):
if_frame = frame.soft()
@@ -897,8 +927,8 @@
arg_tuple = ', '.join(repr(x.name) for x in node.args)
if len(node.args) == 1:
arg_tuple += ','
- self.write('l_%s = Macro(environment, macro, %r, (%s), (' %
- (node.name, node.name, arg_tuple))
+ self.write('%s = Macro(environment, macro, %r, (%s), (' %
+ (mask_identifier(node.name), node.name, arg_tuple))
for arg in node.defaults:
self.visit(arg, macro_frame)
self.write(', ')
@@ -1082,14 +1112,15 @@
# make sure toplevel assignments are added to the context.
if frame.toplevel:
for name in assignment_frame.assigned_names:
- self.writeline('context.vars[%r] = l_%s' % (name, name))
+ self.writeline('context.vars[%r] = %s' %
+ (name, mask_identifier(name)))
if not name.startswith('__'):
self.writeline('context.exported_vars.add(%r)' % name)
def visit_Name(self, node, frame):
if node.ctx == 'store' and frame.toplevel:
frame.assigned_names.add(node.name)
- self.write('l_' + node.name)
+ self.write(mask_identifier(node.name))
def visit_MarkSafe(self, node, frame):
self.write('Markup(')
diff --git a/jinja2/debug.py b/jinja2/debug.py
index 9209054..cfc2bc8 100644
--- a/jinja2/debug.py
+++ b/jinja2/debug.py
@@ -12,6 +12,7 @@
"""
import sys
from types import CodeType
+from jinja2.compiler import unmask_identifier
def translate_exception(exc_info):
@@ -66,7 +67,13 @@
locals = {}
for name, value in real_locals.iteritems():
if name.startswith('l_'):
- locals[name[2:]] = value
+ try:
+ locals[str(unmask_identifier(name))] = value
+ except UnicodeError:
+ # bummer. someone actually used an unicode identifier.
+ # there is no way this can be added back into the python
+ # layer with python < 3. we have to ignore it...
+ pass
# if there is a local called __jinja_exception__, we get
# rid of it to not break the debug functionality.
diff --git a/jinja2/lexer.py b/jinja2/lexer.py
index d950025..f472d4b 100644
--- a/jinja2/lexer.py
+++ b/jinja2/lexer.py
@@ -31,7 +31,7 @@
string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')
integer_re = re.compile(r'\d+')
-name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
+name_re = re.compile(r'\b[^\W\d]\w*\b(?u)')
float_re = re.compile(r'\d+\.\d+')
# bind operators to token types
@@ -88,6 +88,13 @@
}
+def _trystr(s):
+ try:
+ return str(s)
+ except UnicodeError:
+ return s
+
+
def unescape_string(lineno, filename, s):
r"""Unescape a string. Supported escapes:
\a, \n, \r\, \f, \v, \\, \", \', \0
@@ -95,7 +102,8 @@
\x00, \u0000, \U00000000, \N{...}
"""
try:
- return s.encode('ascii', 'backslashreplace').decode('unicode-escape')
+ return _trystr(s.encode('ascii', 'backslashreplace')
+ .decode('unicode-escape'))
except UnicodeError, e:
msg = str(e).split(':')[-1].strip()
raise TemplateSyntaxError(msg, lineno, filename)
@@ -409,20 +417,13 @@
elif token in ('raw_begin', 'raw_end'):
continue
elif token == 'data':
- try:
- value = str(value)
- except UnicodeError:
- pass
+ value = _trystr(value)
elif token == 'keyword':
token = value
elif token == 'name':
- value = str(value)
+ value = _trystr(value)
elif token == 'string':
value = unescape_string(lineno, filename, value[1:-1])
- try:
- value = str(value)
- except UnicodeError:
- pass
elif token == 'integer':
value = int(value)
elif token == 'float':