Armin Ronacher | 07bc684 | 2008-03-31 14:18:49 +0200 | [diff] [blame] | 1 | # -*- coding: utf-8 -*- |
| 2 | """ |
| 3 | jinja2.utils |
| 4 | ~~~~~~~~~~~~ |
| 5 | |
| 6 | Utility functions. |
| 7 | |
| 8 | :copyright: 2008 by Armin Ronacher. |
| 9 | :license: BSD, see LICENSE for more details. |
| 10 | """ |
Christoph Hack | 8090986 | 2008-04-14 01:35:10 +0200 | [diff] [blame^] | 11 | import re |
| 12 | import string |
Armin Ronacher | 8edbe49 | 2008-04-10 20:43:43 +0200 | [diff] [blame] | 13 | |
| 14 | |
| 15 | def escape(obj, attribute=False): |
| 16 | """HTML escape an object.""" |
| 17 | if hasattr(obj, '__html__'): |
| 18 | return obj.__html__() |
Armin Ronacher | 449167d | 2008-04-11 17:55:05 +0200 | [diff] [blame] | 19 | return unicode(obj) \ |
Armin Ronacher | 8edbe49 | 2008-04-10 20:43:43 +0200 | [diff] [blame] | 20 | .replace('&', '&') \ |
| 21 | .replace('>', '>') \ |
Armin Ronacher | 449167d | 2008-04-11 17:55:05 +0200 | [diff] [blame] | 22 | .replace('<', '<') \ |
| 23 | .replace('"', '"') |
Christoph Hack | e9e43bb | 2008-04-13 23:35:48 +0200 | [diff] [blame] | 24 | |
| 25 | |
| 26 | def pformat(obj, verbose=False): |
| 27 | """ |
| 28 | Prettyprint an object. Either use the `pretty` library or the |
| 29 | builtin `pprint`. |
| 30 | """ |
| 31 | try: |
| 32 | from pretty import pretty |
| 33 | return pretty(obj, verbose=verbose) |
| 34 | except ImportError: |
| 35 | from pprint import pformat |
| 36 | return pformat(obj) |
Christoph Hack | 8090986 | 2008-04-14 01:35:10 +0200 | [diff] [blame^] | 37 | |
| 38 | |
| 39 | _word_split_re = re.compile(r'(\s+)') |
| 40 | |
| 41 | _punctuation_re = re.compile( |
| 42 | '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % ( |
| 43 | '|'.join([re.escape(p) for p in ('(', '<', '<')]), |
| 44 | '|'.join([re.escape(p) for p in ('.', ',', ')', '>', '\n', '>')]) |
| 45 | ) |
| 46 | ) |
| 47 | |
| 48 | _simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$') |
| 49 | |
| 50 | |
| 51 | def urlize(text, trim_url_limit=None, nofollow=False): |
| 52 | """ |
| 53 | Converts any URLs in text into clickable links. Works on http://, |
| 54 | https:// and www. links. Links can have trailing punctuation (periods, |
| 55 | commas, close-parens) and leading punctuation (opening parens) and |
| 56 | it'll still do the right thing. |
| 57 | |
| 58 | If trim_url_limit is not None, the URLs in link text will be limited |
| 59 | to trim_url_limit characters. |
| 60 | |
| 61 | If nofollow is True, the URLs in link text will get a rel="nofollow" |
| 62 | attribute. |
| 63 | """ |
| 64 | trim_url = lambda x, limit=trim_url_limit: limit is not None \ |
| 65 | and (x[:limit] + (len(x) >=limit and '...' |
| 66 | or '')) or x |
| 67 | words = _word_split_re.split(text) |
| 68 | nofollow_attr = nofollow and ' rel="nofollow"' or '' |
| 69 | for i, word in enumerate(words): |
| 70 | match = _punctuation_re.match(word) |
| 71 | if match: |
| 72 | lead, middle, trail = match.groups() |
| 73 | if middle.startswith('www.') or ( |
| 74 | '@' not in middle and |
| 75 | not middle.startswith('http://') and |
| 76 | len(middle) > 0 and |
| 77 | middle[0] in string.letters + string.digits and ( |
| 78 | middle.endswith('.org') or |
| 79 | middle.endswith('.net') or |
| 80 | middle.endswith('.com') |
| 81 | )): |
| 82 | middle = '<a href="http://%s"%s>%s</a>' % (middle, |
| 83 | nofollow_attr, trim_url(middle)) |
| 84 | if middle.startswith('http://') or \ |
| 85 | middle.startswith('https://'): |
| 86 | middle = '<a href="%s"%s>%s</a>' % (middle, |
| 87 | nofollow_attr, trim_url(middle)) |
| 88 | if '@' in middle and not middle.startswith('www.') and \ |
| 89 | not ':' in middle and _simple_email_re.match(middle): |
| 90 | middle = '<a href="mailto:%s">%s</a>' % (middle, middle) |
| 91 | if lead + middle + trail != word: |
| 92 | words[i] = lead + middle + trail |
| 93 | return u''.join(words) |