blob: 5e6b4036bd5f4f5f5e2dc0089e606134f7011749 [file] [log] [blame]
Armin Ronacher07bc6842008-03-31 14:18:49 +02001# -*- coding: utf-8 -*-
2"""
3 jinja2.utils
4 ~~~~~~~~~~~~
5
6 Utility functions.
7
8 :copyright: 2008 by Armin Ronacher.
9 :license: BSD, see LICENSE for more details.
10"""
Christoph Hack80909862008-04-14 01:35:10 +020011import re
12import string
Armin Ronacher814f6c22008-04-17 15:52:23 +020013from collections import deque
14from copy import deepcopy
Armin Ronacher18c6ca02008-04-17 10:03:29 +020015from functools import update_wrapper
16from itertools import imap
Armin Ronacher8edbe492008-04-10 20:43:43 +020017
18
Armin Ronacher9a027f42008-04-17 11:13:40 +020019def soft_unicode(s):
20 """Make a string unicode if it isn't already. That way a markup
21 string is not converted back to unicode.
22 """
23 if not isinstance(s, unicode):
24 s = unicode(s)
25 return s
26
27
Christoph Hacke9e43bb2008-04-13 23:35:48 +020028def pformat(obj, verbose=False):
29 """
30 Prettyprint an object. Either use the `pretty` library or the
31 builtin `pprint`.
32 """
33 try:
34 from pretty import pretty
35 return pretty(obj, verbose=verbose)
36 except ImportError:
37 from pprint import pformat
38 return pformat(obj)
Christoph Hack80909862008-04-14 01:35:10 +020039
40
41_word_split_re = re.compile(r'(\s+)')
42
43_punctuation_re = re.compile(
Armin Ronacher18c6ca02008-04-17 10:03:29 +020044 '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
45 '|'.join(imap(re.escape, ('(', '<', '&lt;'))),
46 '|'.join(imap(re.escape, ('.', ',', ')', '>', '\n', '&gt;')))
Christoph Hack80909862008-04-14 01:35:10 +020047 )
48)
49
50_simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
51
52
53def urlize(text, trim_url_limit=None, nofollow=False):
54 """
55 Converts any URLs in text into clickable links. Works on http://,
56 https:// and www. links. Links can have trailing punctuation (periods,
57 commas, close-parens) and leading punctuation (opening parens) and
58 it'll still do the right thing.
59
60 If trim_url_limit is not None, the URLs in link text will be limited
61 to trim_url_limit characters.
62
63 If nofollow is True, the URLs in link text will get a rel="nofollow"
64 attribute.
65 """
66 trim_url = lambda x, limit=trim_url_limit: limit is not None \
67 and (x[:limit] + (len(x) >=limit and '...'
68 or '')) or x
69 words = _word_split_re.split(text)
70 nofollow_attr = nofollow and ' rel="nofollow"' or ''
71 for i, word in enumerate(words):
72 match = _punctuation_re.match(word)
73 if match:
74 lead, middle, trail = match.groups()
75 if middle.startswith('www.') or (
76 '@' not in middle and
77 not middle.startswith('http://') and
78 len(middle) > 0 and
79 middle[0] in string.letters + string.digits and (
80 middle.endswith('.org') or
81 middle.endswith('.net') or
82 middle.endswith('.com')
83 )):
84 middle = '<a href="http://%s"%s>%s</a>' % (middle,
85 nofollow_attr, trim_url(middle))
86 if middle.startswith('http://') or \
87 middle.startswith('https://'):
88 middle = '<a href="%s"%s>%s</a>' % (middle,
89 nofollow_attr, trim_url(middle))
90 if '@' in middle and not middle.startswith('www.') and \
91 not ':' in middle and _simple_email_re.match(middle):
92 middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
93 if lead + middle + trail != word:
94 words[i] = lead + middle + trail
95 return u''.join(words)
Armin Ronacher18c6ca02008-04-17 10:03:29 +020096
97
98class Markup(unicode):
99 """Marks a string as being safe for inclusion in HTML/XML output without
100 needing to be escaped. This implements the `__html__` interface a couple
101 of frameworks and web applications use.
102
103 The `escape` function returns markup objects so that double escaping can't
104 happen. If you want to use autoescaping in Jinja just set the finalizer
105 of the environment to `escape`.
106 """
107
108 __slots__ = ()
109
110 def __html__(self):
111 return self
112
113 def __add__(self, other):
114 if hasattr(other, '__html__') or isinstance(other, basestring):
115 return self.__class__(unicode(self) + unicode(escape(other)))
116 return NotImplemented
117
118 def __radd__(self, other):
119 if hasattr(other, '__html__') or isinstance(other, basestring):
120 return self.__class__(unicode(escape(other)) + unicode(self))
121 return NotImplemented
122
123 def __mul__(self, num):
124 if not isinstance(num, (int, long)):
125 return NotImplemented
126 return self.__class__(unicode.__mul__(self, num))
127 __rmul__ = __mul__
128
129 def __mod__(self, arg):
130 if isinstance(arg, tuple):
131 arg = tuple(imap(_MarkupEscapeHelper, arg))
132 else:
133 arg = _MarkupEscapeHelper(arg)
134 return self.__class__(unicode.__mod__(self, arg))
135
136 def __repr__(self):
137 return '%s(%s)' % (
138 self.__class__.__name__,
139 unicode.__repr__(self)
140 )
141
142 def join(self, seq):
143 return self.__class__(unicode.join(self, imap(escape, seq)))
144
145 def split(self, *args, **kwargs):
146 return map(self.__class__, unicode.split(self, *args, **kwargs))
147
148 def rsplit(self, *args, **kwargs):
149 return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
150
151 def splitlines(self, *args, **kwargs):
152 return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
153
154 def make_wrapper(name):
155 orig = getattr(unicode, name)
156 def func(self, *args, **kwargs):
157 args = list(args)
158 for idx, arg in enumerate(args):
159 if hasattr(arg, '__html__') or isinstance(arg, basestring):
160 args[idx] = escape(arg)
161 for name, arg in kwargs.iteritems():
162 if hasattr(arg, '__html__') or isinstance(arg, basestring):
163 kwargs[name] = escape(arg)
164 return self.__class__(orig(self, *args, **kwargs))
165 return update_wrapper(func, orig, ('__name__', '__doc__'))
166 for method in '__getitem__', '__getslice__', 'capitalize', \
167 'title', 'lower', 'upper', 'replace', 'ljust', \
168 'rjust', 'lstrip', 'rstrip', 'partition', 'center', \
169 'strip', 'translate', 'expandtabs', 'rpartition', \
170 'swapcase', 'zfill':
171 locals()[method] = make_wrapper(method)
172 del method, make_wrapper
173
174
175class _MarkupEscapeHelper(object):
176 """Helper for Markup.__mod__"""
177
178 def __init__(self, obj):
179 self.obj = obj
180
181 __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
182 __unicode__ = lambda s: unicode(escape(s.obj))
183 __str__ = lambda s: str(escape(s.obj))
184 __repr__ = lambda s: str(repr(escape(s.obj)))
185 __int__ = lambda s: int(s.obj)
186 __float__ = lambda s: float(s.obj)
Armin Ronacher814f6c22008-04-17 15:52:23 +0200187
188
189class LRUCache(object):
190 """A simple LRU Cache implementation."""
191 # this is fast for small capacities (something around 200) but doesn't
192 # scale. But as long as it's only used for the database connections in
193 # a non request fallback it's fine.
194
195 def __init__(self, capacity):
196 self.capacity = capacity
197 self._mapping = {}
198 self._queue = deque()
199
200 # alias all queue methods for faster lookup
201 self._popleft = self._queue.popleft
202 self._pop = self._queue.pop
203 if hasattr(self._queue, 'remove'):
204 self._remove = self._queue.remove
205 self._append = self._queue.append
206
207 def _remove(self, obj):
208 """Python 2.4 compatibility."""
209 for idx, item in enumerate(self._queue):
210 if item == obj:
211 del self._queue[idx]
212 break
213
214 def copy(self):
215 """Return an shallow copy of the instance."""
216 rv = LRUCache(self.capacity)
217 rv._mapping.update(self._mapping)
218 rv._queue = self._queue[:]
219 return rv
220
221 def get(self, key, default=None):
222 """Return an item from the cache dict or `default`"""
223 if key in self:
224 return self[key]
225 return default
226
227 def setdefault(self, key, default=None):
228 """
229 Set `default` if the key is not in the cache otherwise
230 leave unchanged. Return the value of this key.
231 """
232 if key in self:
233 return self[key]
234 self[key] = default
235 return default
236
237 def clear(self):
238 """Clear the cache."""
239 self._mapping.clear()
240 self._queue.clear()
241
242 def __contains__(self, key):
243 """Check if a key exists in this cache."""
244 return key in self._mapping
245
246 def __len__(self):
247 """Return the current size of the cache."""
248 return len(self._mapping)
249
250 def __repr__(self):
251 return '<%s %r>' % (
252 self.__class__.__name__,
253 self._mapping
254 )
255
256 def __getitem__(self, key):
257 """Get an item from the cache. Moves the item up so that it has the
258 highest priority then.
259
260 Raise an `KeyError` if it does not exist.
261 """
262 rv = self._mapping[key]
263 if self._queue[-1] != key:
264 self._remove(key)
265 self._append(key)
266 return rv
267
268 def __setitem__(self, key, value):
269 """Sets the value for an item. Moves the item up so that it
270 has the highest priority then.
271 """
272 if key in self._mapping:
273 self._remove(key)
274 elif len(self._mapping) == self.capacity:
275 del self._mapping[self._popleft()]
276 self._append(key)
277 self._mapping[key] = value
278
279 def __delitem__(self, key):
280 """Remove an item from the cache dict.
281 Raise an `KeyError` if it does not exist.
282 """
283 del self._mapping[key]
284 self._remove(key)
285
286 def __iter__(self):
287 """Iterate over all values in the cache dict, ordered by
288 the most recent usage.
289 """
290 return reversed(self._queue)
291
292 def __reversed__(self):
293 """Iterate over the values in the cache dict, oldest items
294 coming first.
295 """
296 return iter(self._queue)
297
298 __copy__ = copy
299
300 def __deepcopy__(self):
301 """Return a deep copy of the LRU Cache"""
302 rv = LRUCache(self.capacity)
303 rv._mapping = deepcopy(self._mapping)
304 rv._queue = deepcopy(self._queue)
305 return rv
Armin Ronacherbd33f112008-04-18 09:17:32 +0200306
307
308# we have to import it down here as the speedups module imports the
309# markup type which is define above.
310try:
311 from jinja2._speedups import escape
312except ImportError:
313 def escape(obj):
314 """Convert the characters &, <, >, and " in string s to HTML-safe
315 sequences. Use this if you need to display text that might contain
316 such characters in HTML.
317 """
318 if hasattr(obj, '__html__'):
319 return obj.__html__()
320 return Markup(unicode(obj)
321 .replace('&', '&amp;')
322 .replace('>', '&gt;')
323 .replace('<', '&lt;')
324 .replace('"', '&quot;')
325 )