some performance improvements

--HG--
branch : trunk
diff --git a/jinja2/compiler.py b/jinja2/compiler.py
index 958b2c3..bc5163b 100644
--- a/jinja2/compiler.py
+++ b/jinja2/compiler.py
@@ -14,7 +14,7 @@
 from jinja2 import nodes
 from jinja2.visitor import NodeVisitor, NodeTransformer
 from jinja2.exceptions import TemplateAssertionError
-from jinja2.runtime import StaticLoopContext
+from jinja2.runtime import StaticLoopContext, concat
 from jinja2.utils import Markup
 
 
@@ -728,7 +728,7 @@
         self.pull_locals(macro_frame, indent=False)
         self.writeline('%s = []' % buf)
         self.blockvisit(node.body, macro_frame, indent=False)
-        self.writeline("return Markup(u''.join(%s))" % buf)
+        self.writeline("return Markup(concat(%s))" % buf)
         self.outdent()
         self.newline()
         if frame.toplevel:
@@ -756,7 +756,7 @@
         self.pull_locals(call_frame, indent=False)
         self.writeline('%s = []' % buf)
         self.blockvisit(node.body, call_frame, indent=False)
-        self.writeline("return Markup(u''.join(%s))" % buf)
+        self.writeline("return Markup(concat(%s))" % buf)
         self.outdent()
         arg_tuple = ', '.join(repr(x.name) for x in node.args)
         if len(node.args) == 1:
@@ -794,7 +794,7 @@
             self.writeline('yield ', node)
         else:
             self.writeline('%s.append(' % frame.buffer, node)
-        self.visit_Filter(node.filter, filter_frame, "u''.join(%s)" % buf)
+        self.visit_Filter(node.filter, filter_frame, 'concat(%s)' % buf)
         if frame.buffer is not None:
             self.write(')')
 
@@ -842,7 +842,7 @@
         if len(body) < 3:
             for item in body:
                 if isinstance(item, list):
-                    val = repr(u''.join(item))
+                    val = repr(concat(item))
                     if frame.buffer is None:
                         self.writeline('yield ' + val)
                     else:
@@ -863,7 +863,7 @@
             arguments = []
             for item in body:
                 if isinstance(item, list):
-                    format.append(u''.join(item).replace('%', '%%'))
+                    format.append(concat(item).replace('%', '%%'))
                 else:
                     format.append('%s')
                     arguments.append(item)
@@ -871,7 +871,7 @@
                 self.writeline('yield ')
             else:
                 self.writeline('%s.append(' % frame.buffer)
-            self.write(repr(u''.join(format)) + ' % (')
+            self.write(repr(concat(format)) + ' % (')
             idx = -1
             self.indent()
             for argument in arguments:
diff --git a/jinja2/environment.py b/jinja2/environment.py
index 239193e..5325c2b 100644
--- a/jinja2/environment.py
+++ b/jinja2/environment.py
@@ -13,7 +13,7 @@
 from jinja2.parser import Parser
 from jinja2.optimizer import optimize
 from jinja2.compiler import generate
-from jinja2.runtime import Undefined, TemplateContext
+from jinja2.runtime import Undefined, TemplateContext, concat
 from jinja2.debug import translate_exception
 from jinja2.utils import import_string, LRUCache, Markup
 from jinja2.defaults import DEFAULT_FILTERS, DEFAULT_TESTS, DEFAULT_NAMESPACE
@@ -301,7 +301,7 @@
     def render(self, *args, **kwargs):
         """Render the template into a string."""
         try:
-            return u''.join(self.generate(*args, **kwargs))
+            return concat(self.generate(*args, **kwargs))
         except:
             # hide the `generate` frame
             exc_type, exc_value, tb = sys.exc_info()
@@ -395,7 +395,7 @@
     """Represents an included template."""
 
     def __init__(self, template, context):
-        body = Markup(u''.join(template.root_render_func(context)))
+        body = Markup(concat(template.root_render_func(context)))
         self.__dict__.update(context.get_exported())
         self._name = template.name
         self._rendered_body = body
@@ -448,7 +448,7 @@
                 except StopIteration:
                     if not c_size:
                         raise
-                yield u''.join(buf)
+                yield concat(buf)
                 del buf[:]
                 c_size = 0
 
diff --git a/jinja2/optimizer.py b/jinja2/optimizer.py
index c432b3b..f52b77f 100644
--- a/jinja2/optimizer.py
+++ b/jinja2/optimizer.py
@@ -18,7 +18,7 @@
 """
 from jinja2 import nodes
 from jinja2.visitor import NodeVisitor, NodeTransformer
-from jinja2.runtime import LoopContext
+from jinja2.runtime import LoopContext, concat
 
 
 def optimize(node, environment, context_hint=None):
@@ -111,7 +111,7 @@
 
         # now check if we can evaluate the filter at compile time.
         try:
-            data = node.filter.as_const(u''.join(buffer))
+            data = node.filter.as_const(concat(buffer))
         except nodes.Impossible:
             return node
 
diff --git a/jinja2/runtime.py b/jinja2/runtime.py
index 7860dcc..8f0e1cc 100644
--- a/jinja2/runtime.py
+++ b/jinja2/runtime.py
@@ -14,13 +14,17 @@
 
 
 __all__ = ['LoopContext', 'StaticLoopContext', 'TemplateContext',
-           'Macro', 'Markup', 'missing']
+           'Macro', 'Markup', 'missing', 'concat']
 
 
 # special singleton representing missing values for the runtime
 missing = object()
 
 
+# concatenate a list of strings and convert them to unicode.
+concat = u''.join
+
+
 class TemplateContext(object):
     """Holds the variables of the local template or of the global one.  It's
     not save to use this class outside of the compiled code.  For example
@@ -116,7 +120,7 @@
         self._render_func = render_func
 
     def __call__(self):
-        return Markup(u''.join(self._render_func(self._context)))
+        return Markup(concat(self._render_func(self._context)))
 
     def __repr__(self):
         return '<%s %r>' % (