blob: c8ef9afbd27fcedb0ec449b605017f706feaca85 [file] [log] [blame]
Armin Ronacherbd33f112008-04-18 09:17:32 +02001/**
2 * jinja2._speedups
3 * ~~~~~~~~~~~~~~~~
4 *
Armin Ronacher5411ce72008-05-25 11:36:22 +02005 * This module implements functions for automatic escaping in C for better
6 * performance. Additionally it defines a `tb_set_next` function to patch the
7 * debug traceback. If the speedups module is not compiled a ctypes
8 * implementation of `tb_set_next` and Python implementations of the other
9 * functions are used.
Armin Ronacherbd33f112008-04-18 09:17:32 +020010 *
Armin Ronacher62ccd1b2009-01-04 14:26:19 +010011 * :copyright: (c) 2009 by the Jinja Team.
Armin Ronacherbd33f112008-04-18 09:17:32 +020012 * :license: BSD.
13 */
14
15#include <Python.h>
16
Mickaël Guérinc0d40d32008-05-05 17:08:51 +020017#define ESCAPED_CHARS_TABLE_SIZE 63
Armin Ronacher9a1e33c2008-05-05 22:00:46 +020018#define UNICHR(x) (((PyUnicodeObject*)PyUnicode_DecodeASCII(x, strlen(x), NULL))->str);
Mickaël Guérinc0d40d32008-05-05 17:08:51 +020019
Armin Ronacherf4e1fb42008-06-09 18:54:56 +020020#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
21typedef int Py_ssize_t;
22#define PY_SSIZE_T_MAX INT_MAX
23#define PY_SSIZE_T_MIN INT_MIN
24#endif
25
26
Armin Ronacher9a1e33c2008-05-05 22:00:46 +020027static PyObject* markup;
Mickaël Guérinc0d40d32008-05-05 17:08:51 +020028static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE];
29static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE];
Armin Ronacherbd33f112008-04-18 09:17:32 +020030
31static int
32init_constants(void)
33{
Armin Ronacher3111f432008-06-09 20:23:28 +020034 PyObject *module;
Armin Ronacherf35e2812008-05-06 16:04:10 +020035 /* happing of characters to replace */
36 escaped_chars_repl['"'] = UNICHR("&#34;");
37 escaped_chars_repl['\''] = UNICHR("&#39;");
Armin Ronacher9a1e33c2008-05-05 22:00:46 +020038 escaped_chars_repl['&'] = UNICHR("&amp;");
Armin Ronacher9a1e33c2008-05-05 22:00:46 +020039 escaped_chars_repl['<'] = UNICHR("&lt;");
Armin Ronacher9a1e33c2008-05-05 22:00:46 +020040 escaped_chars_repl['>'] = UNICHR("&gt;");
Armin Ronacherf35e2812008-05-06 16:04:10 +020041
42 /* lengths of those characters when replaced - 1 */
43 memset(escaped_chars_delta_len, 0, sizeof (escaped_chars_delta_len));
44 escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \
45 escaped_chars_delta_len['&'] = 4;
46 escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3;
Armin Ronacherbd33f112008-04-18 09:17:32 +020047
Armin Ronacherf35e2812008-05-06 16:04:10 +020048 /* import markup type so that we can mark the return value */
Armin Ronacher3111f432008-06-09 20:23:28 +020049 module = PyImport_ImportModule("jinja2.utils");
Armin Ronacherbd33f112008-04-18 09:17:32 +020050 if (!module)
51 return 0;
52 markup = PyObject_GetAttrString(module, "Markup");
53 Py_DECREF(module);
54
55 return 1;
56}
57
58static PyObject*
59escape_unicode(PyUnicodeObject *in)
60{
61 PyUnicodeObject *out;
Mickaël Guérinc0d40d32008-05-05 17:08:51 +020062 Py_UNICODE *inp = in->str;
63 const Py_UNICODE *inp_end = in->str + in->length;
64 Py_UNICODE *next_escp;
Armin Ronacherbd33f112008-04-18 09:17:32 +020065 Py_UNICODE *outp;
Mickaël Guérinc0d40d32008-05-05 17:08:51 +020066 Py_ssize_t delta=0, erepl=0, delta_len=0;
Armin Ronacherbd33f112008-04-18 09:17:32 +020067
68 /* First we need to figure out how long the escaped string will be */
Mickaël Guérinc0d40d32008-05-05 17:08:51 +020069 while (*(inp) || inp < inp_end) {
70 if (*inp < ESCAPED_CHARS_TABLE_SIZE && escaped_chars_delta_len[*inp]) {
71 delta += escaped_chars_delta_len[*inp];
Armin Ronacherf59bac22008-04-20 13:11:43 +020072 ++erepl;
Armin Ronacherbd33f112008-04-18 09:17:32 +020073 }
Mickaël Guérinc0d40d32008-05-05 17:08:51 +020074 ++inp;
75 }
Armin Ronacherbd33f112008-04-18 09:17:32 +020076
77 /* Do we need to escape anything at all? */
78 if (!erepl) {
79 Py_INCREF(in);
80 return (PyObject*)in;
81 }
82
Mickaël Guérinc0d40d32008-05-05 17:08:51 +020083 out = (PyUnicodeObject*)PyUnicode_FromUnicode(NULL, in->length + delta);
Armin Ronacherbd33f112008-04-18 09:17:32 +020084 if (!out)
85 return NULL;
86
87 outp = out->str;
88 inp = in->str;
Mickaël Guérinc0d40d32008-05-05 17:08:51 +020089 while (erepl-- > 0) {
Armin Ronacher9a1e33c2008-05-05 22:00:46 +020090 /* look for the next substitution */
Mickaël Guérinc0d40d32008-05-05 17:08:51 +020091 next_escp = inp;
92 while (next_escp < inp_end) {
Armin Ronacher9a1e33c2008-05-05 22:00:46 +020093 if (*next_escp < ESCAPED_CHARS_TABLE_SIZE &&
94 (delta_len = escaped_chars_delta_len[*next_escp])) {
Mickaël Guérinc0d40d32008-05-05 17:08:51 +020095 ++delta_len;
96 break;
97 }
98 ++next_escp;
Armin Ronacherbd33f112008-04-18 09:17:32 +020099 }
Mickaël Guérinc0d40d32008-05-05 17:08:51 +0200100
101 if (next_escp > inp) {
102 /* copy unescaped chars between inp and next_escp */
103 Py_UNICODE_COPY(outp, inp, next_escp-inp);
Armin Ronachere62b7ef2008-05-24 20:47:29 +0200104 outp += next_escp - inp;
Mickaël Guérinc0d40d32008-05-05 17:08:51 +0200105 }
106
107 /* escape 'next_escp' */
108 Py_UNICODE_COPY(outp, escaped_chars_repl[*next_escp], delta_len);
109 outp += delta_len;
110
111 inp = next_escp + 1;
112 }
Armin Ronacher9a1e33c2008-05-05 22:00:46 +0200113 if (inp < inp_end)
Mickaël Guérinc0d40d32008-05-05 17:08:51 +0200114 Py_UNICODE_COPY(outp, inp, in->length - (inp - in->str));
Armin Ronacherbd33f112008-04-18 09:17:32 +0200115
116 return (PyObject*)out;
117}
118
119
120static PyObject*
Armin Ronacherf59bac22008-04-20 13:11:43 +0200121escape(PyObject *self, PyObject *text)
122{
Armin Ronacher3111f432008-06-09 20:23:28 +0200123 PyObject *s = NULL, *rv = NULL, *html;
Armin Ronacherbd33f112008-04-18 09:17:32 +0200124
125 /* we don't have to escape integers, bools or floats */
126 if (PyInt_CheckExact(text) || PyLong_CheckExact(text) ||
127 PyFloat_CheckExact(text) || PyBool_Check(text) ||
Armin Ronacher7ceced52008-05-03 10:15:31 +0200128 text == Py_None)
129 return PyObject_CallFunctionObjArgs(markup, text, NULL);
Armin Ronacherbd33f112008-04-18 09:17:32 +0200130
131 /* if the object has an __html__ method that performs the escaping */
Armin Ronacher3111f432008-06-09 20:23:28 +0200132 html = PyObject_GetAttrString(text, "__html__");
Armin Ronacherbd33f112008-04-18 09:17:32 +0200133 if (html) {
134 rv = PyObject_CallObject(html, NULL);
135 Py_DECREF(html);
136 return rv;
137 }
138
139 /* otherwise make the object unicode if it isn't, then escape */
140 PyErr_Clear();
141 if (!PyUnicode_Check(text)) {
142 PyObject *unicode = PyObject_Unicode(text);
143 if (!unicode)
144 return NULL;
145 s = escape_unicode((PyUnicodeObject*)unicode);
146 Py_DECREF(unicode);
147 }
148 else
149 s = escape_unicode((PyUnicodeObject*)text);
150
151 /* convert the unicode string into a markup object. */
Armin Ronacher7ceced52008-05-03 10:15:31 +0200152 rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL);
153 Py_DECREF(s);
154 return rv;
Armin Ronacherbd33f112008-04-18 09:17:32 +0200155}
156
157
Armin Ronacherf35e2812008-05-06 16:04:10 +0200158static PyObject*
159soft_unicode(PyObject *self, PyObject *s)
160{
161 if (!PyUnicode_Check(s))
162 return PyObject_Unicode(s);
163 Py_INCREF(s);
164 return s;
165}
166
167
Armin Ronacherd71fff02008-05-26 23:57:07 +0200168static PyObject*
Armin Ronacherbd33f112008-04-18 09:17:32 +0200169tb_set_next(PyObject *self, PyObject *args)
170{
171 PyTracebackObject *tb, *old;
172 PyObject *next;
173
174 if (!PyArg_ParseTuple(args, "O!O:tb_set_next", &PyTraceBack_Type, &tb, &next))
175 return NULL;
176 if (next == Py_None)
177 next = NULL;
178 else if (!PyTraceBack_Check(next)) {
179 PyErr_SetString(PyExc_TypeError,
180 "tb_set_next arg 2 must be traceback or None");
181 return NULL;
182 }
183 else
184 Py_INCREF(next);
185
186 old = tb->tb_next;
187 tb->tb_next = (PyTracebackObject*)next;
188 Py_XDECREF(old);
189
190 Py_INCREF(Py_None);
191 return Py_None;
192}
193
194
195static PyMethodDef module_methods[] = {
Armin Ronacherf59bac22008-04-20 13:11:43 +0200196 {"escape", (PyCFunction)escape, METH_O,
Armin Ronacher9a1e33c2008-05-05 22:00:46 +0200197 "escape(s) -> markup\n\n"
Armin Ronacher9bb7e472008-05-28 11:26:59 +0200198 "Convert the characters &, <, >, ', and \" in string s to HTML-safe\n"
Armin Ronacherf35e2812008-05-06 16:04:10 +0200199 "sequences. Use this if you need to display text that might contain\n"
Armin Ronacher9a1e33c2008-05-05 22:00:46 +0200200 "such characters in HTML. Marks return value as markup string."},
Armin Ronacherf59bac22008-04-20 13:11:43 +0200201 {"soft_unicode", (PyCFunction)soft_unicode, METH_O,
202 "soft_unicode(object) -> string\n\n"
203 "Make a string unicode if it isn't already. That way a markup\n"
204 "string is not converted back to unicode."},
Armin Ronacherbd33f112008-04-18 09:17:32 +0200205 {"tb_set_next", (PyCFunction)tb_set_next, METH_VARARGS,
206 "Set the tb_next member of a traceback object."},
207 {NULL, NULL, 0, NULL} /* Sentinel */
208};
209
210
211#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
212#define PyMODINIT_FUNC void
213#endif
214PyMODINIT_FUNC
215init_speedups(void)
216{
217 if (!init_constants())
218 return;
219
220 Py_InitModule3("jinja2._speedups", module_methods, "");
221}