diff --git a/jinja/datastructure.py b/jinja/datastructure.py
index 73d789d..ecbbf3d 100644
--- a/jinja/datastructure.py
+++ b/jinja/datastructure.py
@@ -9,20 +9,14 @@
     :license: BSD, see LICENSE for more details.
 """
 
-# python2.3 compatibility. do not use this method for anything else
-# then context reversing.
-try:
-    _reversed = reversed
-except NameError:
-    def _reversed(c):
-        return c[::-1]
-
 # sets
 try:
     set
 except NameError:
     from sets import Set as set
 
+from jinja.exceptions import TemplateRuntimeError
+
 
 class UndefinedType(object):
     """
@@ -110,9 +104,6 @@
         return 'Markup(%s)' % unicode.__repr__(self)
 
 
-safe_types = set([Markup, int, long, float])
-
-
 class Context(object):
     """
     Dict like object.
@@ -120,7 +111,7 @@
 
     def __init__(self, _environment_, *args, **kwargs):
         self.environment = _environment_
-        self._stack = [self.environment.globals, dict(*args, **kwargs), {}]
+        self._stack = [_environment_.globals, dict(*args, **kwargs), {}]
         self.globals, self.initial, self.current = self._stack
 
         # cache object used for filters and tests
@@ -131,20 +122,20 @@
         return FakeTranslator()
 
     def pop(self):
-        if len(self._stack) <= 2:
-            raise ValueError('cannot pop initial layer')
+        """Pop the last layer from the stack and return it."""
         rv = self._stack.pop()
         self.current = self._stack[-1]
         return rv
 
     def push(self, data=None):
-        self._stack.append(data or {})
+        """Push a new dict or empty layer to the stack and return that layer"""
+        data = data or {}
+        self._stack.append(data)
         self.current = self._stack[-1]
+        return data
 
     def to_dict(self):
-        """
-        Convert the context into a dict. This skips the globals.
-        """
+        """Convert the context into a dict. This skips the globals."""
         result = {}
         for layer in self._stack[1:]:
             for key, value in layer.iteritems():
@@ -155,12 +146,14 @@
         # don't give access to jinja internal variables
         if name.startswith('::'):
             return Undefined
-        for d in _reversed(self._stack):
+        # because the stack is usually quite small we better use [::-1]
+        # which is faster than reversed() somehow.
+        for d in self._stack[::-1]:
             if name in d:
                 rv = d[name]
                 if isinstance(rv, Deferred):
                     rv = rv(self, name)
-                    # never tough the globals!
+                    # never touch the globals!
                     if d is self.globals:
                         self.initial[name] = rv
                     else:
@@ -201,6 +194,10 @@
             self.push(seq)
 
     def push(self, seq):
+        """
+        Push a sequence to the loop stack. This is used by the
+        recursive for loop.
+        """
         if seq in (Undefined, None):
             seq = ()
         self._stack.append({
@@ -208,8 +205,10 @@
             'seq':              seq,
             'length':           len(seq)
         })
+        return self
 
     def pop(self):
+        """Remove the last layer from the loop stack."""
         return self._stack.pop()
 
     iterated = property(lambda s: s._stack[-1]['index'] > -1)
@@ -235,7 +234,8 @@
     def __call__(self, seq):
         if self.loop_function is not None:
             return self.loop_function(seq)
-        return Undefined
+        raise TemplateRuntimeError('Loops are just callable if defined with '
+                                   'the "recursive" modifier.')
 
 
 class CycleContext(object):
@@ -245,6 +245,7 @@
 
     def __init__(self, seq=None):
         self.lineno = -1
+        # bind the correct helper function based on the constructor signature
         if seq is not None:
             self.seq = seq
             self.length = len(seq)
@@ -253,10 +254,12 @@
             self.cycle = self.cycle_dynamic
 
     def cycle_static(self):
+        """Helper function for static cycling."""
         self.lineno = (self.lineno + 1) % self.length
         return self.seq[self.lineno]
 
     def cycle_dynamic(self, seq):
+        """Helper function for dynamic cycling."""
         self.lineno = (self.lineno + 1) % len(seq)
         return seq[self.lineno]
 
diff --git a/jinja/environment.py b/jinja/environment.py
index 5c478f1..9943afc 100644
--- a/jinja/environment.py
+++ b/jinja/environment.py
@@ -12,7 +12,7 @@
 from jinja.lexer import Lexer
 from jinja.parser import Parser
 from jinja.loaders import LoaderWrapper
-from jinja.datastructure import Undefined, Context
+from jinja.datastructure import Undefined, Context, Markup
 from jinja.utils import escape
 from jinja.exceptions import FilterNotFound, TestNotFound, SecurityException
 from jinja.defaults import DEFAULT_FILTERS, DEFAULT_TESTS, DEFAULT_NAMESPACE
@@ -92,7 +92,9 @@
         """
         Convert a value to unicode with the rules defined on the environment.
         """
-        if isinstance(value, unicode):
+        if value in (None, Undefined):
+            return u''
+        elif isinstance(value, unicode):
             return value
         else:
             try:
@@ -131,19 +133,22 @@
             return not rv
         return bool(rv)
 
-    def get_attribute(self, obj, name):
+    def get_attribute(self, obj, attributes):
         """
-        Get the attribute name from obj.
+        Get some attributes from an object.
         """
-        try:
-            return obj[name]
-        except (TypeError, KeyError, IndexError):
-            if hasattr(obj, name):
+        node = obj
+        for name in attributes:
+            try:
+                node = node[name]
+            except (TypeError, KeyError, IndexError):
+                if not hasattr(obj, name):
+                    return Undefined
                 r = getattr(obj, 'jinja_allowed_attributes', None)
                 if r is not None and name not in r:
                     raise SecurityException('unsafe attributed %r accessed' % name)
-                return getattr(obj, name)
-        return Undefined
+                node = getattr(obj, name)
+        return node
 
     def call_function(self, f, args, kwargs, dyn_args, dyn_kwargs):
         """
@@ -163,7 +168,8 @@
         """
         Function call without arguments.
         """
-        if getattr(f, 'jinja_unsafe_call', False):
+        if getattr(f, 'jinja_unsafe_call', False) or \
+           getattr(f, 'alters_data', False):
             raise SecurityException('unsafe function %r called' % f.__name__)
         return f()
 
@@ -173,8 +179,12 @@
         evaluator the source generated by the python translator will
         call this function for all variables.
         """
-        if value is Undefined:
+        if value is Undefined or value is None:
             return u''
-        elif self.auto_escape:
+        elif isinstance(value, (int, float, Markup, bool)):
+            return unicode(value)
+        elif not isinstance(value, unicode):
+            value = self.to_unicode(value)
+        if self.auto_escape:
             return escape(value, True)
-        return unicode(value)
+        return value
diff --git a/jinja/exceptions.py b/jinja/exceptions.py
index ef8f5cf..8b3d667 100644
--- a/jinja/exceptions.py
+++ b/jinja/exceptions.py
@@ -62,7 +62,3 @@
     Raised by the template engine if a tag encountered an error when
     rendering.
     """
-
-    def __init__(self, message, lineno):
-        RuntimeError.__init__(self, message)
-        self.lineno = lineno
diff --git a/jinja/loaders.py b/jinja/loaders.py
index 734eedb..0411bc3 100644
--- a/jinja/loaders.py
+++ b/jinja/loaders.py
@@ -10,10 +10,14 @@
 """
 
 import codecs
+import sha
+import time
 from os import path
+from threading import Lock
 from jinja.parser import Parser
-from jinja.translators.python import PythonTranslator
+from jinja.translators.python import PythonTranslator, Template
 from jinja.exceptions import TemplateNotFound
+from jinja.utils import CacheDict
 
 
 __all__ = ['FileSystemLoader']
@@ -27,6 +31,14 @@
                      if p and p[0] != '.']))
 
 
+def get_template_cachename(cachepath, name):
+    """
+    Return the filename for a cached file.
+    """
+    return path.join(cachepath, 'jinja_%s.cache' %
+                     sha.new('jinja(%s)tmpl' % name).hexdigest())
+
+
 class LoaderWrapper(object):
     """
     Wraps a loader so that it's bound to an environment.
@@ -87,24 +99,42 @@
     =================== =================================================
     ``searchpath``      String with the path to the templates on the
                         filesystem.
-    ``use_cache``       Set this to ``True`` to enable memory caching.
+    ``use_memcache``    Set this to ``True`` to enable memory caching.
                         This is usually a good idea in production mode,
                         but disable it during development since it won't
                         reload template changes automatically.
                         This only works in persistent environments like
                         FastCGI.
-    ``cache_size``      Number of template instance you want to cache.
+    ``memcache_size``   Number of template instance you want to cache.
                         Defaults to ``40``.
+    ``cache_folder``    Set this to an existing directory to enable
+                        caching of templates on the file system. Note
+                        that this only affects templates transformed
+                        into python code. Default is ``None`` which means
+                        that caching is disabled.
+    ``auto_reload``     Set this to `False` for a slightly better
+                        performance. In that case Jinja won't check for
+                        template changes on the filesystem.
     =================== =================================================
     """
 
-    def __init__(self, searchpath, use_cache=False, cache_size=40):
+    def __init__(self, searchpath, use_memcache=False, memcache_size=40,
+                 cache_folder=None, auto_reload=True):
         self.searchpath = searchpath
-        self.use_cache = use_cache
-        self.cache_size = cache_size
-        self.cache = {}
+        self.use_memcache = use_memcache
+        if use_memcache:
+            self.memcache = CacheDict(memcache_size)
+        else:
+            self.memcache = None
+        self.cache_folder = cache_folder
+        self.auto_reload = auto_reload
+        self._times = {}
+        self._lock = Lock()
 
     def get_source(self, environment, name, parent):
+        """
+        Get the source code of a template.
+        """
         filename = get_template_filename(self.searchpath, name)
         if path.exists(filename):
             f = codecs.open(filename, 'r', environment.template_charset)
@@ -116,17 +146,73 @@
             raise TemplateNotFound(name)
 
     def parse(self, environment, name, parent):
+        """
+        Load and parse a template
+        """
         source = self.get_source(environment, name, parent)
         return Parser(environment, source, name).parse()
 
     def load(self, environment, name, translator):
-        if self.use_cache:
-            key = (name, translator)
-            if key in self.cache:
-                return self.cache[key]
-            if len(self.cache) >= self.cache_size:
-                self.cache.clear()
-        rv = translator.process(environment, self.parse(environment, name, None))
-        if self.use_cache:
-            self.cache[key] = rv
-        return rv
+        """
+        Load, parse and translate a template.
+        """
+        self._lock.acquire()
+        try:
+            # caching is only possible for the python translator. skip
+            # all other translators
+            if translator is PythonTranslator:
+                tmpl = None
+
+                # auto reload enabled? check for the last change of the template
+                if self.auto_reload:
+                    last_change = path.getmtime(get_template_filename(self.searchpath, name))
+                else:
+                    last_change = None
+
+                # check if we have something in the memory cache and the
+                # memory cache is enabled.
+                if self.use_memcache and name in self.memcache:
+                    tmpl = self.memcache[name]
+                    if last_change is not None and last_change > self._times[name]:
+                        tmpl = None
+
+                # if diskcache is enabled look for an already compiled template
+                if self.cache_folder is not None:
+                    cache_filename = get_template_cachename(self.cache_folder, name)
+
+                    # there is a up to date compiled template
+                    if tmpl is not None and last_change is None:
+                        try:
+                            cache_time = path.getmtime(cache_filename)
+                        except OSError:
+                            cache_time = 0
+                        if last_change >= cache_time:
+                            f = file(cache_filename, 'rb')
+                            try:
+                                tmpl = Template.load(environment, f)
+                            finally:
+                                f.close()
+
+                    # no template so far, parse, translate and compile it
+                    elif tmpl is None:
+                        tmpl = translator.process(environment, self.parse(environment, name, None))
+
+                    # save the compiled template
+                    f = file(cache_filename, 'wb')
+                    try:
+                        tmpl.dump(f)
+                    finally:
+                        f.close()
+
+                # if memcaching is enabled push the template
+                if tmpl is not None:
+                    if self.use_memcache:
+                        self._times[name] = time.time()
+                        self.memcache[name] = tmpl
+                    return tmpl
+
+            # if we reach this point we don't have caching enabled or translate
+            # to something else than python
+            return translator.process(environment, self.parse(environment, name, None))
+        finally:
+            self._lock.release()
diff --git a/jinja/nodes.py b/jinja/nodes.py
index 72423db..fbcd9ed 100644
--- a/jinja/nodes.py
+++ b/jinja/nodes.py
@@ -190,8 +190,9 @@
 
     def get_items(self):
         result = [self.name]
-        for item in self.arguments:
-            result.extend(item)
+        if self.arguments:
+            for item in self.arguments:
+                result.extend(item)
         result.append(self.body)
         return result
 
diff --git a/jinja/parser.py b/jinja/parser.py
index 064419a..2b0f581 100644
--- a/jinja/parser.py
+++ b/jinja/parser.py
@@ -202,8 +202,12 @@
         if ast.varargs or ast.kwargs:
             raise TemplateSyntaxError('variable length macro signature '
                                       'not allowed.', lineno)
-        defaults = [None] * (len(ast.argnames) - len(ast.defaults)) + ast.defaults
-        return nodes.Macro(lineno, ast.name, zip(ast.argnames, defaults), body)
+        if ast.argnames:
+            defaults = [None] * (len(ast.argnames) - len(ast.defaults)) + ast.defaults
+            args = zip(ast.argnames, defaults)
+        else:
+            args = None
+        return nodes.Macro(lineno, ast.name, args, body)
 
     def handle_block_directive(self, lineno, gen):
         """
diff --git a/jinja/translators/python.py b/jinja/translators/python.py
index 3128822..7553209 100644
--- a/jinja/translators/python.py
+++ b/jinja/translators/python.py
@@ -35,15 +35,23 @@
         self.code = code
         self.generate_func = None
 
-    def dump(self, filename):
+    def dump(self, stream=None):
         """Dump the template into python bytecode."""
-        from marshal import dumps
-        return dumps(self.code)
+        if stream is not None:
+            from marshal import dump
+            dump(self.code, stream)
+        else:
+            from marshal import dumps
+            return dumps(self.code)
 
     def load(environment, data):
         """Load the template from python bytecode."""
-        from marshal import loads
-        code = loads(data)
+        if isinstance(data, basestring):
+            from marshal import loads
+            code = loads(data)
+        else:
+            from marshal import load
+            code = load(data)
         return Template(environment, code)
     load = staticmethod(load)
 
@@ -53,10 +61,8 @@
             ns = {}
             exec self.code in ns
             self.generate_func = ns['generate']
-        result = []
         ctx = self.environment.context_class(self.environment, *args, **kwargs)
-        self.generate_func(ctx, result.append)
-        return u''.join(result)
+        return u''.join(self.generate_func(ctx))
 
 
 class PythonTranslator(Translator):
@@ -201,29 +207,41 @@
 
     def handle_template(self, node):
         """
-        Handle the overall template node. This node is the first node and ensures
-        that we get the bootstrapping code. It also knows about inheritance
-        information. It only occours as outer node, never in the tree itself.
+        Handle the overall template node. This node is the first node and
+        ensures that we get the bootstrapping code. It also knows about
+        inheritance information. It only occours as outer node, never in
+        the tree itself.
         """
         # if there is a parent template we parse the parent template and
         # update the blocks there. Once this is done we drop the current
         # template in favor of the new one. Do that until we found the
         # root template.
         requirements_todo = []
+        blocks = node.blocks.copy()
+        parent = None
+
         while node.extends is not None:
+            # handle all requirements but not those from the
+            # root template. The root template renders everything so
+            # there is no need for additional requirements
             if node not in requirements_todo:
                 requirements_todo.append(node)
 
-            tmpl = self.environment.loader.parse(node.extends.template,
-                                                 node.filename)
-            # handle block inheritance
-            for block in tmpl.blocks.itervalues():
-                if block.name in node.blocks:
-                    block.replace(node.blocks[block.name])
-            node = tmpl
+            # load the template we inherit from and add not known blocks
+            # to the block registry, make this template the new root.
+            parent = self.environment.loader.parse(node.extends.template,
+                                                   node.filename)
+            for name, block in parent.blocks.iteritems():
+                if name not in blocks:
+                    blocks[name] = block
 
-            if tmpl not in requirements_todo:
-                requirements_todo.append(node)
+            node = parent
+
+        # if there is a parent template, do the inheritance handling now
+        if parent is not None:
+            for name, block in blocks.iteritems():
+                if name in node.blocks:
+                    node.blocks[name].replace(block)
 
         # look up requirements
         requirements = []
@@ -235,8 +253,9 @@
         # bootstrapping code
         lines = [
             'from __future__ import division\n'
-            'from jinja.datastructure import Undefined, LoopContext, CycleContext\n\n'
-            'def generate(context, write):\n'
+            'from jinja.datastructure import Undefined, LoopContext, CycleContext\n'
+            'from jinja.utils import buffereater\n\n'
+            'def generate(context):\n'
             '    # BOOTSTRAPPING CODE\n'
             '    environment = context.environment\n'
             '    get_attribute = environment.get_attribute\n'
@@ -244,7 +263,9 @@
             '    apply_filters = environment.apply_filters\n'
             '    call_function = environment.call_function\n'
             '    call_function_simple = environment.call_function_simple\n'
-            '    finish_var = environment.finish_var'
+            '    finish_var = environment.finish_var\n'
+            '    ctx_push = context.push\n'
+            '    ctx_pop = context.pop\n'
         ]
         self.indention = 1
 
@@ -268,6 +289,7 @@
                 '        return translator.ngettext(s, p, r[n]) % (r or {})'
             )
         lines.append(rv)
+        lines.append('    if False:\n        yield None')
 
         return '\n'.join(lines)
 
@@ -275,7 +297,7 @@
         """
         Handle data around nodes.
         """
-        return self.indent('write(%r)' % node.text)
+        return self.indent('yield %r' % node.text)
 
     def handle_node_list(self, node):
         """
@@ -294,24 +316,21 @@
         """
         buf = []
         write = lambda x: buf.append(self.indent(x))
-        write('context.push()')
+        write('ctx_push()')
 
         # recursive loops
         if node.recursive:
             write('def forloop(seq):')
             self.indention += 1
-            write('loopbuffer = []')
-            write('write = loopbuffer.append')
-            write('context[\'loop\'].push(seq)')
-            write('for %s in context[\'loop\']:' %
+            write('for %s in context[\'loop\'].push(seq):' %
                 self.handle_node(node.item),
             )
 
         # simple loops
         else:
-            write('context[\'loop\'] = LoopContext(%s, context[\'loop\'], None)' %
+            write('context[\'loop\'] = loop = LoopContext(%s, context[\'loop\'], None)' %
                   self.handle_node(node.seq))
-            write('for %s in context[\'loop\']:' %
+            write('for %s in loop:' %
                 self.handle_node(node.item)
             )
 
@@ -321,7 +340,7 @@
         self.indention -= 1
 
         # else part of loop
-        if node.else_ is not None:
+        if node.else_:
             write('if not context[\'loop\'].iterated:')
             self.indention += 1
             buf.append(self.handle_node(node.else_))
@@ -330,12 +349,17 @@
         # call recursive for loop!
         if node.recursive:
             write('context[\'loop\'].pop()')
-            write('return u\'\'.join(loopbuffer)')
+            write('if False:')
+            self.indention += 1
+            write('yield None')
+            self.indention -= 2
+            write('context[\'loop\'] = LoopContext(None, context[\'loop\'], buffereater(forloop))')
+            write('for item in forloop(%s):' % self.handle_node(node.seq))
+            self.indention += 1
+            write('yield item')
             self.indention -= 1
-            write('context[\'loop\'] = LoopContext(None, context[\'loop\'], forloop)')
-            write('write(forloop(%s))' % self.handle_node(node.seq))
 
-        write('context.pop()')
+        write('ctx_pop()')
         return '\n'.join(buf)
 
     def handle_if_condition(self, node):
@@ -382,9 +406,9 @@
         self.indention -= 1
 
         if hardcoded:
-            write('write(finish_var(context.current[%r].cycle()))' % name)
+            write('yield finish_var(context.current[%r].cycle())' % name)
         else:
-            write('write(finish_var(context.current[%r].cycle(%s)))' % (
+            write('yield finish_var(context.current[%r].cycle(%s))' % (
                 name,
                 self.handle_node(node.seq)
             ))
@@ -395,7 +419,7 @@
         """
         Handle a print statement.
         """
-        return self.indent('write(finish_var(%s))' % self.handle_node(node.variable))
+        return self.indent('yield finish_var(%s)' % self.handle_node(node.variable))
 
     def handle_macro(self, node):
         """
@@ -404,26 +428,30 @@
         buf = []
         write = lambda x: buf.append(self.indent(x))
 
-        args = []
-        defaults = []
-        for name, n in node.arguments:
-            args.append('context[\'%s\']' % name)
-            if n is None:
-                defaults.append('Undefined')
-            else:
-                defaults.append(self.handle_node(n))
-
         write('def macro(*args):')
         self.indention += 1
-        write('context.push()')
-        write('%s = (args + %s[len(args):])' % (_to_tuple(args), _to_tuple(defaults)))
-        write('macrobuffer = []')
-        write('write = macrobuffer.append')
+
+        if node.arguments:
+            write('argcount = len(args)')
+            tmp = []
+            for idx, (name, n) in enumerate(node.arguments):
+                tmp.append('\'%s\': (argcount > %d and (args[%d],) or (%s,))[0]' % (
+                    name,
+                    idx,
+                    idx,
+                    n is None and 'Undefined' or self.handle_node(n)
+                ))
+            write('ctx_push({%s})' % ', '.join(tmp))
+        else:
+            write('ctx_push()')
+
         buf.append(self.handle_node(node.body))
-        write('context.pop()')
-        write('return u\'\'.join(macrobuffer)')
-        self.indention -= 1
-        buf.append(self.indent('context[%r] = macro' % node.name))
+        write('ctx_pop()')
+        write('if False:')
+        self.indention += 1
+        write('yield False')
+        self.indention -= 2
+        buf.append(self.indent('context[%r] = buffereater(macro)' % node.name))
 
         return '\n'.join(buf)
 
@@ -444,14 +472,14 @@
         write = lambda x: buf.append(self.indent(x))
         write('def filtered():')
         self.indention += 1
-        write('context.push()')
-        write('buffer = []')
-        write('write = buffer.append')
+        write('ctx_push()')
         buf.append(self.handle_node(node.body))
-        write('context.pop()')
-        write('return u\'\'.join(buffer)')
-        self.indention -= 1
-        write('write(%s)' % self.filter('filtered()', node.filters))
+        write('ctx_pop()')
+        write('if False:')
+        self.indention += 1
+        write('yield None')
+        self.indention -= 2
+        write('yield %s' % self.filter('u\'\'.join(filtered())', node.filters))
         return '\n'.join(buf)
 
     def handle_block(self, node):
@@ -474,9 +502,9 @@
             node.filename or '?',
             node.lineno
         ))
-        write('context.push()')
+        write('ctx_push()')
         buf.append(self.handle_node(node.body))
-        write('context.pop()')
+        write('ctx_pop()')
         buf.append(self.indent('# END OF BLOCK'))
         return '\n'.join(buf)
 
@@ -506,7 +534,7 @@
             replacements = '{%s}' % ', '.join(replacements)
         else:
             replacements = 'None'
-        return self.indent('write(translate(%r, %r, %r, %s))' % (
+        return self.indent('yield translate(%r, %r, %r, %s)' % (
             node.singular,
             node.plural,
             node.indicator,
@@ -607,9 +635,17 @@
         """
         Handle hardcoded attribute access. foo.bar
         """
-        return 'get_attribute(%s, %r)' % (
+        expr = node.expr
+
+        # chain getattrs for speed reasons
+        path = [repr(node.attrname)]
+        while node.expr.__class__ is ast.Getattr:
+            path.append(repr(node.attrname))
+            node = node.expr
+
+        return 'get_attribute(%s, %s)' % (
             self.handle_node(node.expr),
-            node.attrname
+            _to_tuple(path)
         )
 
     def handle_ass_tuple(self, node):
diff --git a/jinja/utils.py b/jinja/utils.py
index 6441921..1a732a2 100644
--- a/jinja/utils.py
+++ b/jinja/utils.py
@@ -9,8 +9,14 @@
     :license: BSD, see LICENSE for more details.
 """
 import re
+from types import MethodType, FunctionType
 from jinja.nodes import Trans
-from jinja.datastructure import safe_types, Markup
+from jinja.datastructure import Markup
+
+try:
+    from collections import deque
+except ImportError:
+    deque = None
 
 
 _escape_pairs = {
@@ -28,12 +34,10 @@
 
 def escape(x, attribute=False):
     """
-    Escape an object x which is converted to unicode first.
+    Escape an object x.
     """
-    if type(x) in safe_types:
-        return x
     return Markup(_escape_res[not attribute].sub(lambda m:
-                  _escape_pairs[m.group()], unicode(x)))
+                  _escape_pairs[m.group()], x))
 
 
 def find_translations(environment, source):
@@ -48,3 +52,137 @@
         if node.__class__ is Trans:
             yield node.lineno, node.singular, node.plural
         queue.extend(node.getChildNodes())
+
+
+def buffereater(f):
+    """
+    Used by the python translator to capture output of substreams.
+    (macros, filter sections etc)
+    """
+    def wrapped(*args, **kwargs):
+        return u''.join(f(*args, **kwargs))
+    return wrapped
+
+
+class CacheDict(object):
+    """
+    A dict like object that stores a limited number of items and forgets
+    about the least recently used items::
+
+        >>> cache = CacheDict(3)
+        >>> cache['A'] = 0
+        >>> cache['B'] = 1
+        >>> cache['C'] = 2
+        >>> len(cache)
+        3
+    
+    If we now access 'A' again it has a higher priority than B::
+
+        >>> cache['A']
+        0
+
+    If we add a new item 'D' now 'B' will disappear::
+
+        >>> cache['D'] = 3
+        >>> len(cache)
+        3
+        >>> 'B' in cache
+        False
+
+    If you iterate over the object the most recently used item will be
+    yielded First::
+
+        >>> for item in cache:
+        ...     print item
+        D
+        A
+        C
+
+    If you want to iterate the other way round use ``reverse(cache)``.
+
+    Implementation note: This is not a nice way to solve that problem but
+    for smaller capacities it's faster than a linked list.
+    Perfect for template environments where you don't expect too many
+    different keys.
+    """
+
+    def __init__(self, capacity):
+        self.capacity = capacity
+        self._mapping = {}
+
+        # use a deque here if possible
+        if deque is not None:
+            self._queue = deque()
+            self._popleft = self._queue.popleft
+        # python2.3, just use a list
+        else:
+            self._queue = []
+            pop = self._queue.pop
+            self._popleft = lambda: pop(0)
+        # alias all queue methods for faster lookup
+        self._pop = self._queue.pop
+        self._remove = self._queue.remove
+        self._append = self._queue.append
+
+    def copy(self):
+        rv = CacheDict(self.capacity)
+        rv._mapping.update(self._mapping)
+        rv._queue = self._queue[:]
+        return rv
+
+    def get(self, key, default=None):
+        if key in self:
+            return self[key]
+        return default
+
+    def setdefault(self, key, default=None):
+        if key in self:
+            return self[key]
+        self[key] = default
+        return default
+
+    def clear(self):
+        self._mapping.clear()
+        del self._queue[:]
+
+    def __contains__(self, key):
+        return key in self._mapping
+
+    def __len__(self):
+        return len(self._mapping)
+
+    def __repr__(self):
+        return '<%s %r>' % (
+            self.__class__.__name__,
+            self._mapping
+        )
+
+    def __getitem__(self, key):
+        rv = self._mapping[key]
+        if self._queue[-1] != key:
+            self._remove(key)
+            self._append(key)
+        return rv
+
+    def __setitem__(self, key, value):
+        if key in self._mapping:
+            self._remove(key)
+        elif len(self._mapping) == self.capacity:
+            del self._mapping[self._popleft()]
+        self._append(key)
+        self._mapping[key] = value
+
+    def __delitem__(self, key):
+        del self._mapping[key]
+        self._remove(key)
+
+    def __iter__(self):
+        try:
+            return reversed(self._queue)
+        except NameError:
+            return iter(self._queue[::-1])
+
+    def __reversed__(self):
+        return iter(self._queue)
+
+    __copy__ = copy
diff --git a/tests/inheritance.py b/tests/inheritance.py
index 8b1ebec..a2275aa 100644
--- a/tests/inheritance.py
+++ b/tests/inheritance.py
@@ -4,10 +4,5 @@
 from jinja.parser import Parser
 from jinja.translators.python import PythonTranslator
 
-print PythonTranslator(e, e.loader.parse('index.html')).translate()
-
-tmpl = e.loader.load('index.html')
-print tmpl.render(navigation_items=[{
-    'url':          '/',
-    'caption':      'Index'
-}])
+tmpl = e.loader.load('c.html')
+print tmpl.render()
diff --git a/tests/layout.py b/tests/layout.py
new file mode 100644
index 0000000..8b1ebec
--- /dev/null
+++ b/tests/layout.py
@@ -0,0 +1,13 @@
+from jinja import Environment, FileSystemLoader
+e = Environment(loader=FileSystemLoader('templates'))
+
+from jinja.parser import Parser
+from jinja.translators.python import PythonTranslator
+
+print PythonTranslator(e, e.loader.parse('index.html')).translate()
+
+tmpl = e.loader.load('index.html')
+print tmpl.render(navigation_items=[{
+    'url':          '/',
+    'caption':      'Index'
+}])
diff --git a/tests/templates/a.html b/tests/templates/a.html
new file mode 100644
index 0000000..bf9c270
--- /dev/null
+++ b/tests/templates/a.html
@@ -0,0 +1,9 @@
+{% block block1 %}from template a.html{% endblock %}
+{% block block2 %}from template a.html{% endblock %}
+{% block block3 %}from template a.html{% endblock %}
+{% block block4 %}
+  nested block from template a.html
+  {% block block5 %}
+    contents of the nested block from a.html
+  {% endblock %}
+{% endblock %}
diff --git a/tests/templates/b.html b/tests/templates/b.html
new file mode 100644
index 0000000..181fb24
--- /dev/null
+++ b/tests/templates/b.html
@@ -0,0 +1,3 @@
+{% extends 'a.html' %}
+{% block block1 %}from template b.html{% endblock %}
+{% block block5 %}contents of nested block from b.html{% endblock %}
diff --git a/tests/templates/c.html b/tests/templates/c.html
new file mode 100644
index 0000000..ffb7236
--- /dev/null
+++ b/tests/templates/c.html
@@ -0,0 +1,3 @@
+{% extends 'b.html' %}
+{% block block2 %}from template c.html{% endblock %}
+{% block block3 %}from template c.html{% endblock %}
diff --git a/www/generate.py b/www/generate.py
index f8111f3..b68818b 100755
--- a/www/generate.py
+++ b/www/generate.py
@@ -14,7 +14,8 @@
 
 formatter = HtmlFormatter(cssclass='syntax', encoding=None, style='pastie')
 
-env = Environment('<%', '%>', '<%=', '%>', loader=FileSystemLoader('.'), trim_blocks=True)
+env = Environment('<%', '%>', '<%=', '%>', loader=FileSystemLoader('.',
+    cache_folder='/tmp'), trim_blocks=True)
 env.filters['pygmentize'] = stringfilter(lambda v, l:
     highlight(v.strip(), get_lexer_by_name(l), formatter))
 
