Armin Ronacher | 07bc684 | 2008-03-31 14:18:49 +0200 | [diff] [blame] | 1 | # -*- coding: utf-8 -*- |
| 2 | """ |
| 3 | jinja2.utils |
| 4 | ~~~~~~~~~~~~ |
| 5 | |
| 6 | Utility functions. |
| 7 | |
| 8 | :copyright: 2008 by Armin Ronacher. |
| 9 | :license: BSD, see LICENSE for more details. |
| 10 | """ |
Christoph Hack | 8090986 | 2008-04-14 01:35:10 +0200 | [diff] [blame] | 11 | import re |
| 12 | import string |
Armin Ronacher | 18c6ca0 | 2008-04-17 10:03:29 +0200 | [diff] [blame] | 13 | from functools import update_wrapper |
| 14 | from itertools import imap |
Armin Ronacher | 8edbe49 | 2008-04-10 20:43:43 +0200 | [diff] [blame] | 15 | |
| 16 | |
| 17 | def escape(obj, attribute=False): |
| 18 | """HTML escape an object.""" |
| 19 | if hasattr(obj, '__html__'): |
| 20 | return obj.__html__() |
Armin Ronacher | 18c6ca0 | 2008-04-17 10:03:29 +0200 | [diff] [blame] | 21 | return Markup(unicode(obj) |
| 22 | .replace('&', '&') |
| 23 | .replace('>', '>') |
| 24 | .replace('<', '<') |
Armin Ronacher | 449167d | 2008-04-11 17:55:05 +0200 | [diff] [blame] | 25 | .replace('"', '"') |
Armin Ronacher | 18c6ca0 | 2008-04-17 10:03:29 +0200 | [diff] [blame] | 26 | ) |
Christoph Hack | e9e43bb | 2008-04-13 23:35:48 +0200 | [diff] [blame] | 27 | |
| 28 | |
Armin Ronacher | 9a027f4 | 2008-04-17 11:13:40 +0200 | [diff] [blame^] | 29 | def soft_unicode(s): |
| 30 | """Make a string unicode if it isn't already. That way a markup |
| 31 | string is not converted back to unicode. |
| 32 | """ |
| 33 | if not isinstance(s, unicode): |
| 34 | s = unicode(s) |
| 35 | return s |
| 36 | |
| 37 | |
Christoph Hack | e9e43bb | 2008-04-13 23:35:48 +0200 | [diff] [blame] | 38 | def pformat(obj, verbose=False): |
| 39 | """ |
| 40 | Prettyprint an object. Either use the `pretty` library or the |
| 41 | builtin `pprint`. |
| 42 | """ |
| 43 | try: |
| 44 | from pretty import pretty |
| 45 | return pretty(obj, verbose=verbose) |
| 46 | except ImportError: |
| 47 | from pprint import pformat |
| 48 | return pformat(obj) |
Christoph Hack | 8090986 | 2008-04-14 01:35:10 +0200 | [diff] [blame] | 49 | |
| 50 | |
| 51 | _word_split_re = re.compile(r'(\s+)') |
| 52 | |
| 53 | _punctuation_re = re.compile( |
Armin Ronacher | 18c6ca0 | 2008-04-17 10:03:29 +0200 | [diff] [blame] | 54 | '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % ( |
| 55 | '|'.join(imap(re.escape, ('(', '<', '<'))), |
| 56 | '|'.join(imap(re.escape, ('.', ',', ')', '>', '\n', '>'))) |
Christoph Hack | 8090986 | 2008-04-14 01:35:10 +0200 | [diff] [blame] | 57 | ) |
| 58 | ) |
| 59 | |
| 60 | _simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$') |
| 61 | |
| 62 | |
| 63 | def urlize(text, trim_url_limit=None, nofollow=False): |
| 64 | """ |
| 65 | Converts any URLs in text into clickable links. Works on http://, |
| 66 | https:// and www. links. Links can have trailing punctuation (periods, |
| 67 | commas, close-parens) and leading punctuation (opening parens) and |
| 68 | it'll still do the right thing. |
| 69 | |
| 70 | If trim_url_limit is not None, the URLs in link text will be limited |
| 71 | to trim_url_limit characters. |
| 72 | |
| 73 | If nofollow is True, the URLs in link text will get a rel="nofollow" |
| 74 | attribute. |
| 75 | """ |
| 76 | trim_url = lambda x, limit=trim_url_limit: limit is not None \ |
| 77 | and (x[:limit] + (len(x) >=limit and '...' |
| 78 | or '')) or x |
| 79 | words = _word_split_re.split(text) |
| 80 | nofollow_attr = nofollow and ' rel="nofollow"' or '' |
| 81 | for i, word in enumerate(words): |
| 82 | match = _punctuation_re.match(word) |
| 83 | if match: |
| 84 | lead, middle, trail = match.groups() |
| 85 | if middle.startswith('www.') or ( |
| 86 | '@' not in middle and |
| 87 | not middle.startswith('http://') and |
| 88 | len(middle) > 0 and |
| 89 | middle[0] in string.letters + string.digits and ( |
| 90 | middle.endswith('.org') or |
| 91 | middle.endswith('.net') or |
| 92 | middle.endswith('.com') |
| 93 | )): |
| 94 | middle = '<a href="http://%s"%s>%s</a>' % (middle, |
| 95 | nofollow_attr, trim_url(middle)) |
| 96 | if middle.startswith('http://') or \ |
| 97 | middle.startswith('https://'): |
| 98 | middle = '<a href="%s"%s>%s</a>' % (middle, |
| 99 | nofollow_attr, trim_url(middle)) |
| 100 | if '@' in middle and not middle.startswith('www.') and \ |
| 101 | not ':' in middle and _simple_email_re.match(middle): |
| 102 | middle = '<a href="mailto:%s">%s</a>' % (middle, middle) |
| 103 | if lead + middle + trail != word: |
| 104 | words[i] = lead + middle + trail |
| 105 | return u''.join(words) |
Armin Ronacher | 18c6ca0 | 2008-04-17 10:03:29 +0200 | [diff] [blame] | 106 | |
| 107 | |
| 108 | class Markup(unicode): |
| 109 | """Marks a string as being safe for inclusion in HTML/XML output without |
| 110 | needing to be escaped. This implements the `__html__` interface a couple |
| 111 | of frameworks and web applications use. |
| 112 | |
| 113 | The `escape` function returns markup objects so that double escaping can't |
| 114 | happen. If you want to use autoescaping in Jinja just set the finalizer |
| 115 | of the environment to `escape`. |
| 116 | """ |
| 117 | |
| 118 | __slots__ = () |
| 119 | |
| 120 | def __html__(self): |
| 121 | return self |
| 122 | |
| 123 | def __add__(self, other): |
| 124 | if hasattr(other, '__html__') or isinstance(other, basestring): |
| 125 | return self.__class__(unicode(self) + unicode(escape(other))) |
| 126 | return NotImplemented |
| 127 | |
| 128 | def __radd__(self, other): |
| 129 | if hasattr(other, '__html__') or isinstance(other, basestring): |
| 130 | return self.__class__(unicode(escape(other)) + unicode(self)) |
| 131 | return NotImplemented |
| 132 | |
| 133 | def __mul__(self, num): |
| 134 | if not isinstance(num, (int, long)): |
| 135 | return NotImplemented |
| 136 | return self.__class__(unicode.__mul__(self, num)) |
| 137 | __rmul__ = __mul__ |
| 138 | |
| 139 | def __mod__(self, arg): |
| 140 | if isinstance(arg, tuple): |
| 141 | arg = tuple(imap(_MarkupEscapeHelper, arg)) |
| 142 | else: |
| 143 | arg = _MarkupEscapeHelper(arg) |
| 144 | return self.__class__(unicode.__mod__(self, arg)) |
| 145 | |
| 146 | def __repr__(self): |
| 147 | return '%s(%s)' % ( |
| 148 | self.__class__.__name__, |
| 149 | unicode.__repr__(self) |
| 150 | ) |
| 151 | |
| 152 | def join(self, seq): |
| 153 | return self.__class__(unicode.join(self, imap(escape, seq))) |
| 154 | |
| 155 | def split(self, *args, **kwargs): |
| 156 | return map(self.__class__, unicode.split(self, *args, **kwargs)) |
| 157 | |
| 158 | def rsplit(self, *args, **kwargs): |
| 159 | return map(self.__class__, unicode.rsplit(self, *args, **kwargs)) |
| 160 | |
| 161 | def splitlines(self, *args, **kwargs): |
| 162 | return map(self.__class__, unicode.splitlines(self, *args, **kwargs)) |
| 163 | |
| 164 | def make_wrapper(name): |
| 165 | orig = getattr(unicode, name) |
| 166 | def func(self, *args, **kwargs): |
| 167 | args = list(args) |
| 168 | for idx, arg in enumerate(args): |
| 169 | if hasattr(arg, '__html__') or isinstance(arg, basestring): |
| 170 | args[idx] = escape(arg) |
| 171 | for name, arg in kwargs.iteritems(): |
| 172 | if hasattr(arg, '__html__') or isinstance(arg, basestring): |
| 173 | kwargs[name] = escape(arg) |
| 174 | return self.__class__(orig(self, *args, **kwargs)) |
| 175 | return update_wrapper(func, orig, ('__name__', '__doc__')) |
| 176 | for method in '__getitem__', '__getslice__', 'capitalize', \ |
| 177 | 'title', 'lower', 'upper', 'replace', 'ljust', \ |
| 178 | 'rjust', 'lstrip', 'rstrip', 'partition', 'center', \ |
| 179 | 'strip', 'translate', 'expandtabs', 'rpartition', \ |
| 180 | 'swapcase', 'zfill': |
| 181 | locals()[method] = make_wrapper(method) |
| 182 | del method, make_wrapper |
| 183 | |
| 184 | |
| 185 | class _MarkupEscapeHelper(object): |
| 186 | """Helper for Markup.__mod__""" |
| 187 | |
| 188 | def __init__(self, obj): |
| 189 | self.obj = obj |
| 190 | |
| 191 | __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x]) |
| 192 | __unicode__ = lambda s: unicode(escape(s.obj)) |
| 193 | __str__ = lambda s: str(escape(s.obj)) |
| 194 | __repr__ = lambda s: str(repr(escape(s.obj))) |
| 195 | __int__ = lambda s: int(s.obj) |
| 196 | __float__ = lambda s: float(s.obj) |