blob: 9437023c93e26f319904b25e4a7e3609a281ddfc [file] [log] [blame]
Armin Ronacher07bc6842008-03-31 14:18:49 +02001# -*- coding: utf-8 -*-
2"""
3 jinja2.utils
4 ~~~~~~~~~~~~
5
6 Utility functions.
7
8 :copyright: 2008 by Armin Ronacher.
9 :license: BSD, see LICENSE for more details.
10"""
Christoph Hack80909862008-04-14 01:35:10 +020011import re
Benjamin Wiegand96828552008-05-03 22:27:29 +020012import sys
Christoph Hack80909862008-04-14 01:35:10 +020013import string
Armin Ronacher000b4912008-05-01 18:40:15 +020014try:
15 from thread import allocate_lock
16except ImportError:
17 from dummy_thread import allocate_lock
Armin Ronacher76c280b2008-05-04 12:31:48 +020018from htmlentitydefs import name2codepoint
Armin Ronacher814f6c22008-04-17 15:52:23 +020019from collections import deque
20from copy import deepcopy
Armin Ronacher18c6ca02008-04-17 10:03:29 +020021from itertools import imap
Armin Ronacher8edbe492008-04-10 20:43:43 +020022
23
Armin Ronacherbe4ae242008-04-18 09:49:08 +020024_word_split_re = re.compile(r'(\s+)')
25_punctuation_re = re.compile(
26 '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
27 '|'.join(imap(re.escape, ('(', '<', '&lt;'))),
28 '|'.join(imap(re.escape, ('.', ',', ')', '>', '\n', '&gt;')))
29 )
30)
31_simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
Armin Ronacher76c280b2008-05-04 12:31:48 +020032_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
33_entity_re = re.compile(r'&([^;]+);')
34_entities = name2codepoint.copy()
35_entities['apos'] = 39
Armin Ronacherbe4ae242008-04-18 09:49:08 +020036
Armin Ronacher7259c762008-04-30 13:03:59 +020037# special singleton representing missing values for the runtime
38missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})()
39
40
Armin Ronacher7ceced52008-05-03 10:15:31 +020041# concatenate a list of strings and convert them to unicode.
42# unfortunately there is a bug in python 2.4 and lower that causes
43# unicode.join trash the traceback.
Armin Ronachercda43df2008-05-03 17:10:05 +020044_concat = u''.join
Armin Ronacher7ceced52008-05-03 10:15:31 +020045try:
46 def _test_gen_bug():
47 raise TypeError(_test_gen_bug)
48 yield None
Armin Ronachercda43df2008-05-03 17:10:05 +020049 _concat(_test_gen_bug())
Armin Ronacher7ceced52008-05-03 10:15:31 +020050except TypeError, _error:
Armin Ronachercda43df2008-05-03 17:10:05 +020051 if not _error.args or _error.args[0] is not _test_gen_bug:
Armin Ronacher7ceced52008-05-03 10:15:31 +020052 def concat(gen):
53 try:
Armin Ronachercda43df2008-05-03 17:10:05 +020054 return _concat(list(gen))
Armin Ronacher7ceced52008-05-03 10:15:31 +020055 except:
56 # this hack is needed so that the current frame
57 # does not show up in the traceback.
58 exc_type, exc_value, tb = sys.exc_info()
59 raise exc_type, exc_value, tb.tb_next
Armin Ronachercda43df2008-05-03 17:10:05 +020060 else:
61 concat = _concat
Armin Ronacher7ceced52008-05-03 10:15:31 +020062 del _test_gen_bug, _error
63
64
Armin Ronacher4f7d2d52008-04-22 10:40:26 +020065def contextfunction(f):
Armin Ronacherd84ec462008-04-29 13:43:16 +020066 """This decorator can be used to mark a callable as context callable. A
67 context callable is passed the active context as first argument if it
68 was directly stored in the context.
Armin Ronacher4f7d2d52008-04-22 10:40:26 +020069 """
70 f.contextfunction = True
71 return f
72
73
Armin Ronacher203bfcb2008-04-24 21:54:44 +020074def environmentfunction(f):
Armin Ronacherd84ec462008-04-29 13:43:16 +020075 """This decorator can be used to mark a callable as environment callable.
76 A environment callable is passed the current environment as first argument
77 if it was directly stored in the context.
Armin Ronacher203bfcb2008-04-24 21:54:44 +020078 """
79 f.environmentfunction = True
80 return f
81
82
Armin Ronacher187bde12008-05-01 18:19:16 +020083def clear_caches():
84 """Jinja2 keeps internal caches for environments and lexers. These are
85 used so that Jinja2 doesn't have to recreate environments and lexers all
86 the time. Normally you don't have to care about that but if you are
87 messuring memory consumption you may want to clean the caches.
88 """
89 from jinja2.environment import _spontaneous_environments
90 from jinja2.lexer import _lexer_cache
91 _spontaneous_environments.clear()
92 _lexer_cache.clear()
93
94
Armin Ronacherf59bac22008-04-20 13:11:43 +020095def import_string(import_name, silent=False):
96 """Imports an object based on a string. This use useful if you want to
97 use import paths as endpoints or something similar. An import path can
98 be specified either in dotted notation (``xml.sax.saxutils.escape``)
99 or with a colon as object delimiter (``xml.sax.saxutils:escape``).
100
101 If the `silent` is True the return value will be `None` if the import
102 fails.
103
104 :return: imported object
Armin Ronacher9a027f42008-04-17 11:13:40 +0200105 """
Armin Ronacherf59bac22008-04-20 13:11:43 +0200106 try:
107 if ':' in import_name:
108 module, obj = import_name.split(':', 1)
109 elif '.' in import_name:
110 items = import_name.split('.')
111 module = '.'.join(items[:-1])
112 obj = items[-1]
113 else:
114 return __import__(import_name)
115 return getattr(__import__(module, None, None, [obj]), obj)
116 except (ImportError, AttributeError):
117 if not silent:
118 raise
Armin Ronacher9a027f42008-04-17 11:13:40 +0200119
120
Christoph Hacke9e43bb2008-04-13 23:35:48 +0200121def pformat(obj, verbose=False):
Armin Ronacherbe4ae242008-04-18 09:49:08 +0200122 """Prettyprint an object. Either use the `pretty` library or the
Christoph Hacke9e43bb2008-04-13 23:35:48 +0200123 builtin `pprint`.
124 """
125 try:
126 from pretty import pretty
127 return pretty(obj, verbose=verbose)
128 except ImportError:
129 from pprint import pformat
130 return pformat(obj)
Christoph Hack80909862008-04-14 01:35:10 +0200131
132
Christoph Hack80909862008-04-14 01:35:10 +0200133def urlize(text, trim_url_limit=None, nofollow=False):
Armin Ronacherbe4ae242008-04-18 09:49:08 +0200134 """Converts any URLs in text into clickable links. Works on http://,
Christoph Hack80909862008-04-14 01:35:10 +0200135 https:// and www. links. Links can have trailing punctuation (periods,
136 commas, close-parens) and leading punctuation (opening parens) and
137 it'll still do the right thing.
138
139 If trim_url_limit is not None, the URLs in link text will be limited
140 to trim_url_limit characters.
141
142 If nofollow is True, the URLs in link text will get a rel="nofollow"
143 attribute.
144 """
145 trim_url = lambda x, limit=trim_url_limit: limit is not None \
146 and (x[:limit] + (len(x) >=limit and '...'
147 or '')) or x
148 words = _word_split_re.split(text)
149 nofollow_attr = nofollow and ' rel="nofollow"' or ''
150 for i, word in enumerate(words):
151 match = _punctuation_re.match(word)
152 if match:
153 lead, middle, trail = match.groups()
154 if middle.startswith('www.') or (
155 '@' not in middle and
156 not middle.startswith('http://') and
157 len(middle) > 0 and
158 middle[0] in string.letters + string.digits and (
159 middle.endswith('.org') or
160 middle.endswith('.net') or
161 middle.endswith('.com')
162 )):
163 middle = '<a href="http://%s"%s>%s</a>' % (middle,
164 nofollow_attr, trim_url(middle))
165 if middle.startswith('http://') or \
166 middle.startswith('https://'):
167 middle = '<a href="%s"%s>%s</a>' % (middle,
168 nofollow_attr, trim_url(middle))
169 if '@' in middle and not middle.startswith('www.') and \
170 not ':' in middle and _simple_email_re.match(middle):
171 middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
172 if lead + middle + trail != word:
173 words[i] = lead + middle + trail
174 return u''.join(words)
Armin Ronacher18c6ca02008-04-17 10:03:29 +0200175
176
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200177def generate_lorem_ipsum(n=5, html=True, min=20, max=100):
178 """Generate some lorem impsum for the template."""
179 from jinja2.constants import LOREM_IPSUM_WORDS
180 from random import choice, random, randrange
181 words = LOREM_IPSUM_WORDS.split()
182 result = []
183
184 for _ in xrange(n):
185 next_capitalized = True
186 last_comma = last_fullstop = 0
187 word = None
188 last = None
189 p = []
190
191 # each paragraph contains out of 20 to 100 words.
192 for idx, _ in enumerate(xrange(randrange(min, max))):
193 while True:
194 word = choice(words)
195 if word != last:
196 last = word
197 break
198 if next_capitalized:
199 word = word.capitalize()
200 next_capitalized = False
201 # add commas
202 if idx - randrange(3, 8) > last_comma:
203 last_comma = idx
204 last_fullstop += 2
205 word += ','
206 # add end of sentences
207 if idx - randrange(10, 20) > last_fullstop:
208 last_comma = last_fullstop = idx
209 word += '.'
210 next_capitalized = True
211 p.append(word)
212
213 # ensure that the paragraph ends with a dot.
214 p = u' '.join(p)
215 if p.endswith(','):
216 p = p[:-1] + '.'
217 elif not p.endswith('.'):
218 p += '.'
219 result.append(p)
220
221 if not html:
222 return u'\n\n'.join(result)
223 return Markup(u'\n'.join(u'<p>%s</p>' % escape(x) for x in result))
224
225
Armin Ronacher18c6ca02008-04-17 10:03:29 +0200226class Markup(unicode):
227 """Marks a string as being safe for inclusion in HTML/XML output without
228 needing to be escaped. This implements the `__html__` interface a couple
229 of frameworks and web applications use.
230
231 The `escape` function returns markup objects so that double escaping can't
232 happen. If you want to use autoescaping in Jinja just set the finalizer
233 of the environment to `escape`.
234 """
Armin Ronacher18c6ca02008-04-17 10:03:29 +0200235 __slots__ = ()
236
237 def __html__(self):
238 return self
239
240 def __add__(self, other):
241 if hasattr(other, '__html__') or isinstance(other, basestring):
242 return self.__class__(unicode(self) + unicode(escape(other)))
243 return NotImplemented
244
245 def __radd__(self, other):
246 if hasattr(other, '__html__') or isinstance(other, basestring):
247 return self.__class__(unicode(escape(other)) + unicode(self))
248 return NotImplemented
249
250 def __mul__(self, num):
251 if not isinstance(num, (int, long)):
252 return NotImplemented
253 return self.__class__(unicode.__mul__(self, num))
254 __rmul__ = __mul__
255
256 def __mod__(self, arg):
257 if isinstance(arg, tuple):
258 arg = tuple(imap(_MarkupEscapeHelper, arg))
259 else:
260 arg = _MarkupEscapeHelper(arg)
261 return self.__class__(unicode.__mod__(self, arg))
262
263 def __repr__(self):
264 return '%s(%s)' % (
265 self.__class__.__name__,
266 unicode.__repr__(self)
267 )
268
269 def join(self, seq):
270 return self.__class__(unicode.join(self, imap(escape, seq)))
Armin Ronacherf59bac22008-04-20 13:11:43 +0200271 join.__doc__ = unicode.join.__doc__
Armin Ronacher18c6ca02008-04-17 10:03:29 +0200272
273 def split(self, *args, **kwargs):
274 return map(self.__class__, unicode.split(self, *args, **kwargs))
Armin Ronacherf59bac22008-04-20 13:11:43 +0200275 split.__doc__ = unicode.split.__doc__
Armin Ronacher18c6ca02008-04-17 10:03:29 +0200276
277 def rsplit(self, *args, **kwargs):
278 return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
Armin Ronacherf59bac22008-04-20 13:11:43 +0200279 rsplit.__doc__ = unicode.rsplit.__doc__
Armin Ronacher18c6ca02008-04-17 10:03:29 +0200280
281 def splitlines(self, *args, **kwargs):
282 return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
Armin Ronacherf59bac22008-04-20 13:11:43 +0200283 splitlines.__doc__ = unicode.splitlines.__doc__
Armin Ronacher18c6ca02008-04-17 10:03:29 +0200284
Armin Ronacher76c280b2008-05-04 12:31:48 +0200285 def unescape(self):
286 """Unescape markup."""
287 def handle_match(m):
288 name = m.group(1)
289 if name in _entities:
290 return unichr(_entities[name])
291 try:
292 if name[:2] in ('#x', '#X'):
293 return unichr(int(name[2:], 16))
294 elif name.startswith('#'):
295 return unichr(int(name[1:]))
296 except ValueError:
297 pass
298 return u''
299 return _entity_re.sub(handle_match, unicode(self))
300
301 def striptags(self):
302 """Strip tags and resolve enities."""
303 stripped = u' '.join(_striptags_re.sub('', self).split())
304 return Markup(stripped).unescape()
305
Armin Ronacher18c6ca02008-04-17 10:03:29 +0200306 def make_wrapper(name):
307 orig = getattr(unicode, name)
308 def func(self, *args, **kwargs):
309 args = list(args)
310 for idx, arg in enumerate(args):
311 if hasattr(arg, '__html__') or isinstance(arg, basestring):
312 args[idx] = escape(arg)
313 for name, arg in kwargs.iteritems():
314 if hasattr(arg, '__html__') or isinstance(arg, basestring):
315 kwargs[name] = escape(arg)
316 return self.__class__(orig(self, *args, **kwargs))
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200317 func.__name__ = orig.__name__
318 func.__doc__ = orig.__doc__
319 return func
Armin Ronacher18c6ca02008-04-17 10:03:29 +0200320 for method in '__getitem__', '__getslice__', 'capitalize', \
321 'title', 'lower', 'upper', 'replace', 'ljust', \
Armin Ronacher709f6e52008-04-28 18:18:16 +0200322 'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
Armin Ronacher316157d2008-04-28 18:30:27 +0200323 'translate', 'expandtabs', 'swapcase', 'zfill':
Armin Ronacher18c6ca02008-04-17 10:03:29 +0200324 locals()[method] = make_wrapper(method)
Armin Ronacher709f6e52008-04-28 18:18:16 +0200325
326 # new in python 2.5
327 if hasattr(unicode, 'partition'):
Armin Ronacher316157d2008-04-28 18:30:27 +0200328 locals().update(
329 partition=make_wrapper('partition'),
330 rpartition=make_wrapper('rpartition')
331 )
Armin Ronacher18c6ca02008-04-17 10:03:29 +0200332 del method, make_wrapper
333
334
335class _MarkupEscapeHelper(object):
336 """Helper for Markup.__mod__"""
337
338 def __init__(self, obj):
339 self.obj = obj
340
341 __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
342 __unicode__ = lambda s: unicode(escape(s.obj))
343 __str__ = lambda s: str(escape(s.obj))
344 __repr__ = lambda s: str(repr(escape(s.obj)))
345 __int__ = lambda s: int(s.obj)
346 __float__ = lambda s: float(s.obj)
Armin Ronacher814f6c22008-04-17 15:52:23 +0200347
348
349class LRUCache(object):
350 """A simple LRU Cache implementation."""
351 # this is fast for small capacities (something around 200) but doesn't
352 # scale. But as long as it's only used for the database connections in
353 # a non request fallback it's fine.
354
355 def __init__(self, capacity):
356 self.capacity = capacity
357 self._mapping = {}
358 self._queue = deque()
359
360 # alias all queue methods for faster lookup
361 self._popleft = self._queue.popleft
362 self._pop = self._queue.pop
363 if hasattr(self._queue, 'remove'):
364 self._remove = self._queue.remove
Armin Ronacher000b4912008-05-01 18:40:15 +0200365 self._wlock = allocate_lock()
Armin Ronacher814f6c22008-04-17 15:52:23 +0200366 self._append = self._queue.append
367
368 def _remove(self, obj):
369 """Python 2.4 compatibility."""
370 for idx, item in enumerate(self._queue):
371 if item == obj:
372 del self._queue[idx]
373 break
374
375 def copy(self):
376 """Return an shallow copy of the instance."""
Armin Ronacherbe4ae242008-04-18 09:49:08 +0200377 rv = self.__class__(self.capacity)
Armin Ronacher814f6c22008-04-17 15:52:23 +0200378 rv._mapping.update(self._mapping)
Armin Ronacherbe4ae242008-04-18 09:49:08 +0200379 rv._queue = deque(self._queue)
Armin Ronacher814f6c22008-04-17 15:52:23 +0200380 return rv
381
382 def get(self, key, default=None):
383 """Return an item from the cache dict or `default`"""
Armin Ronacher000b4912008-05-01 18:40:15 +0200384 try:
Armin Ronacher814f6c22008-04-17 15:52:23 +0200385 return self[key]
Armin Ronacher000b4912008-05-01 18:40:15 +0200386 except KeyError:
387 return default
Armin Ronacher814f6c22008-04-17 15:52:23 +0200388
389 def setdefault(self, key, default=None):
Armin Ronacherbe4ae242008-04-18 09:49:08 +0200390 """Set `default` if the key is not in the cache otherwise
Armin Ronacher814f6c22008-04-17 15:52:23 +0200391 leave unchanged. Return the value of this key.
392 """
Armin Ronacher000b4912008-05-01 18:40:15 +0200393 try:
Armin Ronacher814f6c22008-04-17 15:52:23 +0200394 return self[key]
Armin Ronacher000b4912008-05-01 18:40:15 +0200395 except KeyError:
396 self[key] = default
397 return default
Armin Ronacher814f6c22008-04-17 15:52:23 +0200398
399 def clear(self):
400 """Clear the cache."""
Armin Ronacher000b4912008-05-01 18:40:15 +0200401 self._wlock.acquire()
402 try:
403 self._mapping.clear()
404 self._queue.clear()
405 finally:
406 self._wlock.release()
Armin Ronacher814f6c22008-04-17 15:52:23 +0200407
408 def __contains__(self, key):
409 """Check if a key exists in this cache."""
410 return key in self._mapping
411
412 def __len__(self):
413 """Return the current size of the cache."""
414 return len(self._mapping)
415
416 def __repr__(self):
417 return '<%s %r>' % (
418 self.__class__.__name__,
419 self._mapping
420 )
421
422 def __getitem__(self, key):
423 """Get an item from the cache. Moves the item up so that it has the
424 highest priority then.
425
426 Raise an `KeyError` if it does not exist.
427 """
428 rv = self._mapping[key]
429 if self._queue[-1] != key:
430 self._remove(key)
431 self._append(key)
432 return rv
433
434 def __setitem__(self, key, value):
435 """Sets the value for an item. Moves the item up so that it
436 has the highest priority then.
437 """
Armin Ronacher000b4912008-05-01 18:40:15 +0200438 self._wlock.acquire()
439 try:
440 if key in self._mapping:
441 self._remove(key)
442 elif len(self._mapping) == self.capacity:
443 del self._mapping[self._popleft()]
444 self._append(key)
445 self._mapping[key] = value
446 finally:
447 self._wlock.release()
Armin Ronacher814f6c22008-04-17 15:52:23 +0200448
449 def __delitem__(self, key):
450 """Remove an item from the cache dict.
451 Raise an `KeyError` if it does not exist.
452 """
Armin Ronacher000b4912008-05-01 18:40:15 +0200453 self._wlock.acquire()
454 try:
455 del self._mapping[key]
456 self._remove(key)
457 finally:
458 self._wlock.release()
Armin Ronacher814f6c22008-04-17 15:52:23 +0200459
460 def __iter__(self):
461 """Iterate over all values in the cache dict, ordered by
462 the most recent usage.
463 """
464 return reversed(self._queue)
465
466 def __reversed__(self):
467 """Iterate over the values in the cache dict, oldest items
468 coming first.
469 """
470 return iter(self._queue)
471
472 __copy__ = copy
473
Armin Ronacherbd33f112008-04-18 09:17:32 +0200474
475# we have to import it down here as the speedups module imports the
476# markup type which is define above.
477try:
Armin Ronacherf59bac22008-04-20 13:11:43 +0200478 from jinja2._speedups import escape, soft_unicode
Armin Ronacherbd33f112008-04-18 09:17:32 +0200479except ImportError:
Lukas Meuserad48a2e2008-05-01 18:19:57 +0200480 def escape(s):
Armin Ronacherbd33f112008-04-18 09:17:32 +0200481 """Convert the characters &, <, >, and " in string s to HTML-safe
482 sequences. Use this if you need to display text that might contain
483 such characters in HTML.
484 """
Lukas Meuserad48a2e2008-05-01 18:19:57 +0200485 if hasattr(s, '__html__'):
486 return s.__html__()
487 return Markup(unicode(s)
Armin Ronacherbd33f112008-04-18 09:17:32 +0200488 .replace('&', '&amp;')
489 .replace('>', '&gt;')
490 .replace('<', '&lt;')
491 .replace('"', '&quot;')
492 )
Armin Ronacherf59bac22008-04-20 13:11:43 +0200493
494 def soft_unicode(s):
495 """Make a string unicode if it isn't already. That way a markup
496 string is not converted back to unicode.
497 """
498 if not isinstance(s, unicode):
499 s = unicode(s)
500 return s
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200501
502
503# partials
504try:
505 from functools import partial
506except ImportError:
507 class partial(object):
508 def __init__(self, _func, *args, **kwargs):
Benjamin Wiegand228c1832008-04-28 18:09:27 +0200509 self._func = _func
Armin Ronacher4f7d2d52008-04-22 10:40:26 +0200510 self._args = args
511 self._kwargs = kwargs
512 def __call__(self, *args, **kwargs):
513 kwargs.update(self._kwargs)
514 return self._func(*(self._args + args), **kwargs)