blob: 2e64fe2f762374dc5efbe75e96bc1a37da7ede62 [file] [log] [blame]
Armin Ronacher07bc6842008-03-31 14:18:49 +02001# -*- coding: utf-8 -*-
2"""
3 jinja2.utils
4 ~~~~~~~~~~~~
5
6 Utility functions.
7
8 :copyright: 2008 by Armin Ronacher.
9 :license: BSD, see LICENSE for more details.
10"""
Christoph Hack80909862008-04-14 01:35:10 +020011import re
12import string
Armin Ronacher18c6ca02008-04-17 10:03:29 +020013from functools import update_wrapper
14from itertools import imap
Armin Ronacher8edbe492008-04-10 20:43:43 +020015
16
17def escape(obj, attribute=False):
18 """HTML escape an object."""
19 if hasattr(obj, '__html__'):
20 return obj.__html__()
Armin Ronacher18c6ca02008-04-17 10:03:29 +020021 return Markup(unicode(obj)
22 .replace('&', '&')
23 .replace('>', '>')
24 .replace('<', '&lt;')
Armin Ronacher449167d2008-04-11 17:55:05 +020025 .replace('"', '&quot;')
Armin Ronacher18c6ca02008-04-17 10:03:29 +020026 )
Christoph Hacke9e43bb2008-04-13 23:35:48 +020027
28
Armin Ronacher9a027f42008-04-17 11:13:40 +020029def soft_unicode(s):
30 """Make a string unicode if it isn't already. That way a markup
31 string is not converted back to unicode.
32 """
33 if not isinstance(s, unicode):
34 s = unicode(s)
35 return s
36
37
Christoph Hacke9e43bb2008-04-13 23:35:48 +020038def pformat(obj, verbose=False):
39 """
40 Prettyprint an object. Either use the `pretty` library or the
41 builtin `pprint`.
42 """
43 try:
44 from pretty import pretty
45 return pretty(obj, verbose=verbose)
46 except ImportError:
47 from pprint import pformat
48 return pformat(obj)
Christoph Hack80909862008-04-14 01:35:10 +020049
50
51_word_split_re = re.compile(r'(\s+)')
52
53_punctuation_re = re.compile(
Armin Ronacher18c6ca02008-04-17 10:03:29 +020054 '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
55 '|'.join(imap(re.escape, ('(', '<', '&lt;'))),
56 '|'.join(imap(re.escape, ('.', ',', ')', '>', '\n', '&gt;')))
Christoph Hack80909862008-04-14 01:35:10 +020057 )
58)
59
60_simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
61
62
63def urlize(text, trim_url_limit=None, nofollow=False):
64 """
65 Converts any URLs in text into clickable links. Works on http://,
66 https:// and www. links. Links can have trailing punctuation (periods,
67 commas, close-parens) and leading punctuation (opening parens) and
68 it'll still do the right thing.
69
70 If trim_url_limit is not None, the URLs in link text will be limited
71 to trim_url_limit characters.
72
73 If nofollow is True, the URLs in link text will get a rel="nofollow"
74 attribute.
75 """
76 trim_url = lambda x, limit=trim_url_limit: limit is not None \
77 and (x[:limit] + (len(x) >=limit and '...'
78 or '')) or x
79 words = _word_split_re.split(text)
80 nofollow_attr = nofollow and ' rel="nofollow"' or ''
81 for i, word in enumerate(words):
82 match = _punctuation_re.match(word)
83 if match:
84 lead, middle, trail = match.groups()
85 if middle.startswith('www.') or (
86 '@' not in middle and
87 not middle.startswith('http://') and
88 len(middle) > 0 and
89 middle[0] in string.letters + string.digits and (
90 middle.endswith('.org') or
91 middle.endswith('.net') or
92 middle.endswith('.com')
93 )):
94 middle = '<a href="http://%s"%s>%s</a>' % (middle,
95 nofollow_attr, trim_url(middle))
96 if middle.startswith('http://') or \
97 middle.startswith('https://'):
98 middle = '<a href="%s"%s>%s</a>' % (middle,
99 nofollow_attr, trim_url(middle))
100 if '@' in middle and not middle.startswith('www.') and \
101 not ':' in middle and _simple_email_re.match(middle):
102 middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
103 if lead + middle + trail != word:
104 words[i] = lead + middle + trail
105 return u''.join(words)
Armin Ronacher18c6ca02008-04-17 10:03:29 +0200106
107
108class Markup(unicode):
109 """Marks a string as being safe for inclusion in HTML/XML output without
110 needing to be escaped. This implements the `__html__` interface a couple
111 of frameworks and web applications use.
112
113 The `escape` function returns markup objects so that double escaping can't
114 happen. If you want to use autoescaping in Jinja just set the finalizer
115 of the environment to `escape`.
116 """
117
118 __slots__ = ()
119
120 def __html__(self):
121 return self
122
123 def __add__(self, other):
124 if hasattr(other, '__html__') or isinstance(other, basestring):
125 return self.__class__(unicode(self) + unicode(escape(other)))
126 return NotImplemented
127
128 def __radd__(self, other):
129 if hasattr(other, '__html__') or isinstance(other, basestring):
130 return self.__class__(unicode(escape(other)) + unicode(self))
131 return NotImplemented
132
133 def __mul__(self, num):
134 if not isinstance(num, (int, long)):
135 return NotImplemented
136 return self.__class__(unicode.__mul__(self, num))
137 __rmul__ = __mul__
138
139 def __mod__(self, arg):
140 if isinstance(arg, tuple):
141 arg = tuple(imap(_MarkupEscapeHelper, arg))
142 else:
143 arg = _MarkupEscapeHelper(arg)
144 return self.__class__(unicode.__mod__(self, arg))
145
146 def __repr__(self):
147 return '%s(%s)' % (
148 self.__class__.__name__,
149 unicode.__repr__(self)
150 )
151
152 def join(self, seq):
153 return self.__class__(unicode.join(self, imap(escape, seq)))
154
155 def split(self, *args, **kwargs):
156 return map(self.__class__, unicode.split(self, *args, **kwargs))
157
158 def rsplit(self, *args, **kwargs):
159 return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
160
161 def splitlines(self, *args, **kwargs):
162 return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
163
164 def make_wrapper(name):
165 orig = getattr(unicode, name)
166 def func(self, *args, **kwargs):
167 args = list(args)
168 for idx, arg in enumerate(args):
169 if hasattr(arg, '__html__') or isinstance(arg, basestring):
170 args[idx] = escape(arg)
171 for name, arg in kwargs.iteritems():
172 if hasattr(arg, '__html__') or isinstance(arg, basestring):
173 kwargs[name] = escape(arg)
174 return self.__class__(orig(self, *args, **kwargs))
175 return update_wrapper(func, orig, ('__name__', '__doc__'))
176 for method in '__getitem__', '__getslice__', 'capitalize', \
177 'title', 'lower', 'upper', 'replace', 'ljust', \
178 'rjust', 'lstrip', 'rstrip', 'partition', 'center', \
179 'strip', 'translate', 'expandtabs', 'rpartition', \
180 'swapcase', 'zfill':
181 locals()[method] = make_wrapper(method)
182 del method, make_wrapper
183
184
185class _MarkupEscapeHelper(object):
186 """Helper for Markup.__mod__"""
187
188 def __init__(self, obj):
189 self.obj = obj
190
191 __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
192 __unicode__ = lambda s: unicode(escape(s.obj))
193 __str__ = lambda s: str(escape(s.obj))
194 __repr__ = lambda s: str(repr(escape(s.obj)))
195 __int__ = lambda s: int(s.obj)
196 __float__ = lambda s: float(s.obj)