blob: a392c973266a5683eb568b1117b98d11e7d12df2 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
Christian Heimes90540002008-05-08 14:29:10 +000016
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000017#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
22
23#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
24#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
25#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
26#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
27
28static PyTypeObject PyScannerType;
29static PyTypeObject PyEncoderType;
30
31typedef struct _PyScannerObject {
32 PyObject_HEAD
33 PyObject *strict;
34 PyObject *object_hook;
35 PyObject *object_pairs_hook;
36 PyObject *parse_float;
37 PyObject *parse_int;
38 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000039 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000040} PyScannerObject;
41
42static PyMemberDef scanner_members[] = {
43 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
44 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
45 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
46 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
47 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
48 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
49 {NULL}
50};
51
52typedef struct _PyEncoderObject {
53 PyObject_HEAD
54 PyObject *markers;
55 PyObject *defaultfn;
56 PyObject *encoder;
57 PyObject *indent;
58 PyObject *key_separator;
59 PyObject *item_separator;
60 PyObject *sort_keys;
61 PyObject *skipkeys;
62 int fast_encode;
63 int allow_nan;
64} PyEncoderObject;
65
66static PyMemberDef encoder_members[] = {
67 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
68 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
69 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
70 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
71 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
72 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
73 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
74 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
75 {NULL}
76};
77
78static PyObject *
79ascii_escape_unicode(PyObject *pystr);
80static PyObject *
81py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
82void init_json(void);
83static PyObject *
84scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
85static PyObject *
86_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
87static PyObject *
88scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
89static int
90scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
91static void
92scanner_dealloc(PyObject *self);
93static int
94scanner_clear(PyObject *self);
95static PyObject *
96encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
97static int
98encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
99static void
100encoder_dealloc(PyObject *self);
101static int
102encoder_clear(PyObject *self);
103static int
104encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
105static int
106encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
107static int
108encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
109static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000110_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111static void
112raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
113static PyObject *
114encoder_encode_string(PyEncoderObject *s, PyObject *obj);
115static int
116_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
117static PyObject *
118_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
119static PyObject *
120encoder_encode_float(PyEncoderObject *s, PyObject *obj);
121
Christian Heimes90540002008-05-08 14:29:10 +0000122#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000123#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000124
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000125#define MIN_EXPANSION 6
Christian Heimes90540002008-05-08 14:29:10 +0000126#ifdef Py_UNICODE_WIDE
127#define MAX_EXPANSION (2 * MIN_EXPANSION)
128#else
129#define MAX_EXPANSION MIN_EXPANSION
130#endif
131
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000132static int
133_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000134{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000135 /* PyObject to Py_ssize_t converter */
136 *size_ptr = PyLong_AsSsize_t(o);
Georg Brandl59682052009-05-05 07:52:05 +0000137 if (*size_ptr == -1 && PyErr_Occurred())
138 return 0;
139 return 1;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000140}
141
142static PyObject *
143_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
144{
145 /* Py_ssize_t to PyObject converter */
146 return PyLong_FromSsize_t(*size_ptr);
147}
148
149static Py_ssize_t
150ascii_escape_unichar(Py_UNICODE c, Py_UNICODE *output, Py_ssize_t chars)
151{
152 /* Escape unicode code point c to ASCII escape sequences
153 in char *output. output must have at least 12 bytes unused to
154 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000155 output[chars++] = '\\';
156 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000157 case '\\': output[chars++] = c; break;
158 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000159 case '\b': output[chars++] = 'b'; break;
160 case '\f': output[chars++] = 'f'; break;
161 case '\n': output[chars++] = 'n'; break;
162 case '\r': output[chars++] = 'r'; break;
163 case '\t': output[chars++] = 't'; break;
164 default:
165#ifdef Py_UNICODE_WIDE
166 if (c >= 0x10000) {
167 /* UTF-16 surrogate pair */
168 Py_UNICODE v = c - 0x10000;
169 c = 0xd800 | ((v >> 10) & 0x3ff);
170 output[chars++] = 'u';
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000171 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
172 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
173 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
174 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000175 c = 0xdc00 | (v & 0x3ff);
176 output[chars++] = '\\';
177 }
178#endif
179 output[chars++] = 'u';
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000180 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
182 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
183 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000184 }
185 return chars;
186}
187
188static PyObject *
189ascii_escape_unicode(PyObject *pystr)
190{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000191 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000192 Py_ssize_t i;
193 Py_ssize_t input_chars;
194 Py_ssize_t output_size;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000195 Py_ssize_t max_output_size;
Christian Heimes90540002008-05-08 14:29:10 +0000196 Py_ssize_t chars;
197 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000198 Py_UNICODE *output;
Christian Heimes90540002008-05-08 14:29:10 +0000199 Py_UNICODE *input_unicode;
200
201 input_chars = PyUnicode_GET_SIZE(pystr);
202 input_unicode = PyUnicode_AS_UNICODE(pystr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000203
Christian Heimes90540002008-05-08 14:29:10 +0000204 /* One char input can be up to 6 chars output, estimate 4 of these */
205 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000206 max_output_size = 2 + (input_chars * MAX_EXPANSION);
207 rval = PyUnicode_FromStringAndSize(NULL, output_size);
Christian Heimes90540002008-05-08 14:29:10 +0000208 if (rval == NULL) {
209 return NULL;
210 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000211 output = PyUnicode_AS_UNICODE(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000212 chars = 0;
213 output[chars++] = '"';
214 for (i = 0; i < input_chars; i++) {
215 Py_UNICODE c = input_unicode[i];
216 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000217 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000218 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000219 else {
220 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000221 }
222 if (output_size - chars < (1 + MAX_EXPANSION)) {
223 /* There's more than four, so let's resize by a lot */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000224 Py_ssize_t new_output_size = output_size * 2;
Christian Heimes90540002008-05-08 14:29:10 +0000225 /* This is an upper bound */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000226 if (new_output_size > max_output_size) {
227 new_output_size = max_output_size;
Christian Heimes90540002008-05-08 14:29:10 +0000228 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000229 /* Make sure that the output size changed before resizing */
230 if (new_output_size != output_size) {
231 output_size = new_output_size;
232 if (PyUnicode_Resize(&rval, output_size) == -1) {
233 return NULL;
234 }
235 output = PyUnicode_AS_UNICODE(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000236 }
Christian Heimes90540002008-05-08 14:29:10 +0000237 }
238 }
239 output[chars++] = '"';
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000240 if (PyUnicode_Resize(&rval, chars) == -1) {
Christian Heimes90540002008-05-08 14:29:10 +0000241 return NULL;
242 }
243 return rval;
244}
245
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000246static void
Christian Heimes90540002008-05-08 14:29:10 +0000247raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
248{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000249 /* Use the Python function json.decoder.errmsg to raise a nice
250 looking ValueError exception */
Christian Heimes90540002008-05-08 14:29:10 +0000251 static PyObject *errmsg_fn = NULL;
252 PyObject *pymsg;
253 if (errmsg_fn == NULL) {
254 PyObject *decoder = PyImport_ImportModule("json.decoder");
255 if (decoder == NULL)
256 return;
257 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000258 Py_DECREF(decoder);
Christian Heimes90540002008-05-08 14:29:10 +0000259 if (errmsg_fn == NULL)
260 return;
Christian Heimes90540002008-05-08 14:29:10 +0000261 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000262 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000263 if (pymsg) {
264 PyErr_SetObject(PyExc_ValueError, pymsg);
265 Py_DECREF(pymsg);
266 }
Christian Heimes90540002008-05-08 14:29:10 +0000267}
268
269static PyObject *
270join_list_unicode(PyObject *lst)
271{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000272 /* return u''.join(lst) */
273 static PyObject *sep = NULL;
274 if (sep == NULL) {
275 sep = PyUnicode_FromStringAndSize("", 0);
276 if (sep == NULL)
277 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +0000278 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000279 return PyUnicode_Join(sep, lst);
280}
281
282static PyObject *
283_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
284 /* return (rval, idx) tuple, stealing reference to rval */
285 PyObject *tpl;
286 PyObject *pyidx;
287 /*
288 steal a reference to rval, returns (rval, idx)
289 */
290 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000291 return NULL;
292 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000293 pyidx = PyLong_FromSsize_t(idx);
294 if (pyidx == NULL) {
295 Py_DECREF(rval);
296 return NULL;
297 }
298 tpl = PyTuple_New(2);
299 if (tpl == NULL) {
300 Py_DECREF(pyidx);
301 Py_DECREF(rval);
302 return NULL;
303 }
304 PyTuple_SET_ITEM(tpl, 0, rval);
305 PyTuple_SET_ITEM(tpl, 1, pyidx);
306 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000307}
308
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000309#define APPEND_OLD_CHUNK \
310 if (chunk != NULL) { \
311 if (chunks == NULL) { \
312 chunks = PyList_New(0); \
313 if (chunks == NULL) { \
314 goto bail; \
315 } \
316 } \
317 if (PyList_Append(chunks, chunk)) { \
318 Py_DECREF(chunk); \
319 goto bail; \
320 } \
321 Py_CLEAR(chunk); \
322 }
323
Christian Heimes90540002008-05-08 14:29:10 +0000324static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000325scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000326{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000327 /* Read the JSON string from PyUnicode pystr.
328 end is the index of the first character after the quote.
329 if strict is zero then literal control characters are allowed
330 *next_end_ptr is a return-by-reference index of the character
331 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000332
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000333 Return value is a new PyUnicode
334 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000335 PyObject *rval = NULL;
Christian Heimes90540002008-05-08 14:29:10 +0000336 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
337 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000338 Py_ssize_t next /* = begin */;
Christian Heimes90540002008-05-08 14:29:10 +0000339 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000340 PyObject *chunks = NULL;
341 PyObject *chunk = NULL;
342
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000343 if (end < 0 || len <= end) {
344 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
345 goto bail;
346 }
Christian Heimes90540002008-05-08 14:29:10 +0000347 while (1) {
348 /* Find the end of the string or the next escape */
349 Py_UNICODE c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000350 for (next = end; next < len; next++) {
351 c = buf[next];
352 if (c == '"' || c == '\\') {
353 break;
354 }
355 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000356 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000357 goto bail;
358 }
359 }
360 if (!(c == '"' || c == '\\')) {
361 raise_errmsg("Unterminated string starting at", pystr, begin);
362 goto bail;
363 }
364 /* Pick up this chunk if it's not zero length */
365 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000366 APPEND_OLD_CHUNK
Christian Heimes90540002008-05-08 14:29:10 +0000367 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
368 if (chunk == NULL) {
369 goto bail;
370 }
Christian Heimes90540002008-05-08 14:29:10 +0000371 }
372 next++;
373 if (c == '"') {
374 end = next;
375 break;
376 }
377 if (next == len) {
378 raise_errmsg("Unterminated string starting at", pystr, begin);
379 goto bail;
380 }
381 c = buf[next];
382 if (c != 'u') {
383 /* Non-unicode backslash escapes */
384 end = next + 1;
385 switch (c) {
386 case '"': break;
387 case '\\': break;
388 case '/': break;
389 case 'b': c = '\b'; break;
390 case 'f': c = '\f'; break;
391 case 'n': c = '\n'; break;
392 case 'r': c = '\r'; break;
393 case 't': c = '\t'; break;
394 default: c = 0;
395 }
396 if (c == 0) {
397 raise_errmsg("Invalid \\escape", pystr, end - 2);
398 goto bail;
399 }
400 }
401 else {
402 c = 0;
403 next++;
404 end = next + 4;
405 if (end >= len) {
406 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
407 goto bail;
408 }
409 /* Decode 4 hex digits */
410 for (; next < end; next++) {
Christian Heimes90540002008-05-08 14:29:10 +0000411 Py_UNICODE digit = buf[next];
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000412 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000413 switch (digit) {
414 case '0': case '1': case '2': case '3': case '4':
415 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000416 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000417 case 'a': case 'b': case 'c': case 'd': case 'e':
418 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000419 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000420 case 'A': case 'B': case 'C': case 'D': case 'E':
421 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000422 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000423 default:
424 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
425 goto bail;
426 }
427 }
428#ifdef Py_UNICODE_WIDE
429 /* Surrogate pair */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000430 if ((c & 0xfc00) == 0xd800) {
Christian Heimes90540002008-05-08 14:29:10 +0000431 Py_UNICODE c2 = 0;
432 if (end + 6 >= len) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000433 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
434 goto bail;
Christian Heimes90540002008-05-08 14:29:10 +0000435 }
436 if (buf[next++] != '\\' || buf[next++] != 'u') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000437 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
438 goto bail;
Christian Heimes90540002008-05-08 14:29:10 +0000439 }
440 end += 6;
441 /* Decode 4 hex digits */
442 for (; next < end; next++) {
Christian Heimes90540002008-05-08 14:29:10 +0000443 Py_UNICODE digit = buf[next];
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000444 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000445 switch (digit) {
446 case '0': case '1': case '2': case '3': case '4':
447 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000448 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000449 case 'a': case 'b': case 'c': case 'd': case 'e':
450 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000451 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000452 case 'A': case 'B': case 'C': case 'D': case 'E':
453 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000454 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000455 default:
456 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
457 goto bail;
458 }
459 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000460 if ((c2 & 0xfc00) != 0xdc00) {
461 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
462 goto bail;
463 }
Christian Heimes90540002008-05-08 14:29:10 +0000464 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
465 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000466 else if ((c & 0xfc00) == 0xdc00) {
467 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
468 goto bail;
469 }
Christian Heimes90540002008-05-08 14:29:10 +0000470#endif
471 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000472 APPEND_OLD_CHUNK
Christian Heimes90540002008-05-08 14:29:10 +0000473 chunk = PyUnicode_FromUnicode(&c, 1);
474 if (chunk == NULL) {
475 goto bail;
476 }
Christian Heimes90540002008-05-08 14:29:10 +0000477 }
478
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000479 if (chunks == NULL) {
480 if (chunk != NULL)
481 rval = chunk;
482 else
483 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000484 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000485 else {
486 APPEND_OLD_CHUNK
487 rval = join_list_unicode(chunks);
488 if (rval == NULL) {
489 goto bail;
490 }
491 Py_CLEAR(chunks);
492 }
493
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000494 *next_end_ptr = end;
495 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000496bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000497 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000498 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000499 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000500 return NULL;
501}
502
503PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000504 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000505 "\n"
506 "Scan the string s for a JSON string. End is the index of the\n"
507 "character in s after the quote that started the JSON string.\n"
508 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
509 "on attempt to decode an invalid string. If strict is False then literal\n"
510 "control characters are allowed in the string.\n"
511 "\n"
512 "Returns a tuple of the decoded string and the index of the character in s\n"
513 "after the end quote."
514);
Christian Heimes90540002008-05-08 14:29:10 +0000515
516static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000517py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000518{
519 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000520 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000521 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000522 Py_ssize_t next_end = -1;
523 int strict = 1;
524 if (!PyArg_ParseTuple(args, "OO&|i:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000525 return NULL;
526 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000527 if (PyUnicode_Check(pystr)) {
528 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000529 }
530 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000532 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000533 Py_TYPE(pystr)->tp_name);
534 return NULL;
535 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000536 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000537}
538
539PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000540 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000541 "\n"
542 "Return an ASCII-only JSON representation of a Python string"
543);
Christian Heimes90540002008-05-08 14:29:10 +0000544
545static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000546py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000547{
548 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000549 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000550 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000551 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000552 rval = ascii_escape_unicode(pystr);
553 }
554 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000555 PyErr_Format(PyExc_TypeError,
556 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000557 Py_TYPE(pystr)->tp_name);
558 return NULL;
559 }
Christian Heimes90540002008-05-08 14:29:10 +0000560 return rval;
561}
562
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000563static void
564scanner_dealloc(PyObject *self)
565{
566 /* Deallocate scanner object */
567 scanner_clear(self);
568 Py_TYPE(self)->tp_free(self);
569}
570
571static int
572scanner_traverse(PyObject *self, visitproc visit, void *arg)
573{
574 PyScannerObject *s;
575 assert(PyScanner_Check(self));
576 s = (PyScannerObject *)self;
577 Py_VISIT(s->strict);
578 Py_VISIT(s->object_hook);
579 Py_VISIT(s->object_pairs_hook);
580 Py_VISIT(s->parse_float);
581 Py_VISIT(s->parse_int);
582 Py_VISIT(s->parse_constant);
583 return 0;
584}
585
586static int
587scanner_clear(PyObject *self)
588{
589 PyScannerObject *s;
590 assert(PyScanner_Check(self));
591 s = (PyScannerObject *)self;
592 Py_CLEAR(s->strict);
593 Py_CLEAR(s->object_hook);
594 Py_CLEAR(s->object_pairs_hook);
595 Py_CLEAR(s->parse_float);
596 Py_CLEAR(s->parse_int);
597 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000598 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000599 return 0;
600}
601
602static PyObject *
603_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
604 /* Read a JSON object from PyUnicode pystr.
605 idx is the index of the first character after the opening curly brace.
606 *next_idx_ptr is a return-by-reference index to the first character after
607 the closing curly brace.
608
609 Returns a new PyObject (usually a dict, but object_hook can change that)
610 */
611 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
612 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
613 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000614 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000615 PyObject *key = NULL;
616 int strict = PyObject_IsTrue(s->strict);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000617 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000618 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000619
620 if (has_pairs_hook)
621 rval = PyList_New(0);
622 else
623 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000624 if (rval == NULL)
625 return NULL;
626
627 /* skip whitespace after { */
628 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
629
630 /* only loop if the object is non-empty */
631 if (idx <= end_idx && str[idx] != '}') {
632 while (idx <= end_idx) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000633 PyObject *memokey;
634
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000635 /* read key */
636 if (str[idx] != '"') {
637 raise_errmsg("Expecting property name", pystr, idx);
638 goto bail;
639 }
640 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
641 if (key == NULL)
642 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000643 memokey = PyDict_GetItem(s->memo, key);
644 if (memokey != NULL) {
645 Py_INCREF(memokey);
646 Py_DECREF(key);
647 key = memokey;
648 }
649 else {
650 if (PyDict_SetItem(s->memo, key, key) < 0)
651 goto bail;
652 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000653 idx = next_idx;
654
655 /* skip whitespace between key and : delimiter, read :, skip whitespace */
656 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
657 if (idx > end_idx || str[idx] != ':') {
658 raise_errmsg("Expecting : delimiter", pystr, idx);
659 goto bail;
660 }
661 idx++;
662 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
663
664 /* read any JSON term */
665 val = scan_once_unicode(s, pystr, idx, &next_idx);
666 if (val == NULL)
667 goto bail;
668
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000669 if (has_pairs_hook) {
670 PyObject *item = PyTuple_Pack(2, key, val);
671 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000672 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000673 Py_CLEAR(key);
674 Py_CLEAR(val);
675 if (PyList_Append(rval, item) == -1) {
676 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000677 goto bail;
678 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000679 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000680 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000681 else {
682 if (PyDict_SetItem(rval, key, val) < 0)
683 goto bail;
684 Py_CLEAR(key);
685 Py_CLEAR(val);
686 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000687 idx = next_idx;
688
689 /* skip whitespace before } or , */
690 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
691
692 /* bail if the object is closed or we didn't get the , delimiter */
693 if (idx > end_idx) break;
694 if (str[idx] == '}') {
695 break;
696 }
697 else if (str[idx] != ',') {
698 raise_errmsg("Expecting , delimiter", pystr, idx);
699 goto bail;
700 }
701 idx++;
702
703 /* skip whitespace after , delimiter */
704 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
705 }
706 }
707
708 /* verify that idx < end_idx, str[idx] should be '}' */
709 if (idx > end_idx || str[idx] != '}') {
710 raise_errmsg("Expecting object", pystr, end_idx);
711 goto bail;
712 }
713
714 *next_idx_ptr = idx + 1;
715
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000716 if (has_pairs_hook) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000717 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000718 Py_DECREF(rval);
719 return val;
720 }
721
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000722 /* if object_hook is not None: rval = object_hook(rval) */
723 if (s->object_hook != Py_None) {
724 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000725 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000726 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000727 }
728 return rval;
729bail:
730 Py_XDECREF(key);
731 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000732 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000733 return NULL;
734}
735
736static PyObject *
737_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
738 /* Read a JSON array from PyString pystr.
739 idx is the index of the first character after the opening brace.
740 *next_idx_ptr is a return-by-reference index to the first character after
741 the closing brace.
742
743 Returns a new PyList
744 */
745 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
746 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
747 PyObject *val = NULL;
748 PyObject *rval = PyList_New(0);
749 Py_ssize_t next_idx;
750 if (rval == NULL)
751 return NULL;
752
753 /* skip whitespace after [ */
754 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
755
756 /* only loop if the array is non-empty */
757 if (idx <= end_idx && str[idx] != ']') {
758 while (idx <= end_idx) {
759
760 /* read any JSON term */
761 val = scan_once_unicode(s, pystr, idx, &next_idx);
762 if (val == NULL)
763 goto bail;
764
765 if (PyList_Append(rval, val) == -1)
766 goto bail;
767
768 Py_CLEAR(val);
769 idx = next_idx;
770
771 /* skip whitespace between term and , */
772 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
773
774 /* bail if the array is closed or we didn't get the , delimiter */
775 if (idx > end_idx) break;
776 if (str[idx] == ']') {
777 break;
778 }
779 else if (str[idx] != ',') {
780 raise_errmsg("Expecting , delimiter", pystr, idx);
781 goto bail;
782 }
783 idx++;
784
785 /* skip whitespace after , */
786 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
787 }
788 }
789
790 /* verify that idx < end_idx, str[idx] should be ']' */
791 if (idx > end_idx || str[idx] != ']') {
792 raise_errmsg("Expecting object", pystr, end_idx);
793 goto bail;
794 }
795 *next_idx_ptr = idx + 1;
796 return rval;
797bail:
798 Py_XDECREF(val);
799 Py_DECREF(rval);
800 return NULL;
801}
802
803static PyObject *
804_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
805 /* Read a JSON constant from PyString pystr.
806 constant is the constant string that was found
807 ("NaN", "Infinity", "-Infinity").
808 idx is the index of the first character of the constant
809 *next_idx_ptr is a return-by-reference index to the first character after
810 the constant.
811
812 Returns the result of parse_constant
813 */
814 PyObject *cstr;
815 PyObject *rval;
816 /* constant is "NaN", "Infinity", or "-Infinity" */
817 cstr = PyUnicode_InternFromString(constant);
818 if (cstr == NULL)
819 return NULL;
820
821 /* rval = parse_constant(constant) */
822 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
823 idx += PyUnicode_GET_SIZE(cstr);
824 Py_DECREF(cstr);
825 *next_idx_ptr = idx;
826 return rval;
827}
828
829static PyObject *
830_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
831 /* Read a JSON number from PyUnicode pystr.
832 idx is the index of the first character of the number
833 *next_idx_ptr is a return-by-reference index to the first character after
834 the number.
835
836 Returns a new PyObject representation of that number:
837 PyInt, PyLong, or PyFloat.
838 May return other types if parse_int or parse_float are set
839 */
840 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
841 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
842 Py_ssize_t idx = start;
843 int is_float = 0;
844 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200845 PyObject *numstr = NULL;
846 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000847
848 /* read a sign if it's there, make sure it's not the end of the string */
849 if (str[idx] == '-') {
850 idx++;
851 if (idx > end_idx) {
852 PyErr_SetNone(PyExc_StopIteration);
853 return NULL;
854 }
855 }
856
857 /* read as many integer digits as we find as long as it doesn't start with 0 */
858 if (str[idx] >= '1' && str[idx] <= '9') {
859 idx++;
860 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
861 }
862 /* if it starts with 0 we only expect one integer digit */
863 else if (str[idx] == '0') {
864 idx++;
865 }
866 /* no integer digits, error */
867 else {
868 PyErr_SetNone(PyExc_StopIteration);
869 return NULL;
870 }
871
872 /* if the next char is '.' followed by a digit then read all float digits */
873 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
874 is_float = 1;
875 idx += 2;
876 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
877 }
878
879 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
880 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
881 Py_ssize_t e_start = idx;
882 idx++;
883
884 /* read an exponent sign if present */
885 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
886
887 /* read all digits */
888 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
889
890 /* if we got a digit, then parse as float. if not, backtrack */
891 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
892 is_float = 1;
893 }
894 else {
895 idx = e_start;
896 }
897 }
898
Antoine Pitrouf6454512011-04-25 19:16:06 +0200899 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
900 custom_func = s->parse_float;
901 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
902 custom_func = s->parse_int;
903 else
904 custom_func = NULL;
905
906 if (custom_func) {
907 /* copy the section we determined to be a number */
908 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
909 if (numstr == NULL)
910 return NULL;
911 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000912 }
913 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +0200914 Py_ssize_t i, n;
915 char *buf;
916 /* Straight conversion to ASCII, to avoid costly conversion of
917 decimal unicode digits (which cannot appear here) */
918 n = idx - start;
919 numstr = PyBytes_FromStringAndSize(NULL, n);
920 if (numstr == NULL)
921 return NULL;
922 buf = PyBytes_AS_STRING(numstr);
923 for (i = 0; i < n; i++) {
924 buf[i] = (char) str[i + start];
925 }
926 if (is_float)
927 rval = PyFloat_FromString(numstr);
928 else
929 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000930 }
931 Py_DECREF(numstr);
932 *next_idx_ptr = idx;
933 return rval;
934}
935
936static PyObject *
937scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
938{
939 /* Read one JSON term (of any kind) from PyUnicode pystr.
940 idx is the index of the first character of the term
941 *next_idx_ptr is a return-by-reference index to the first character after
942 the number.
943
944 Returns a new PyObject representation of the term.
945 */
Ezio Melotti362b9512011-05-07 17:58:09 +0300946 PyObject *res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000947 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
948 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
949 if (idx >= length) {
950 PyErr_SetNone(PyExc_StopIteration);
951 return NULL;
952 }
953 switch (str[idx]) {
954 case '"':
955 /* string */
956 return scanstring_unicode(pystr, idx + 1,
957 PyObject_IsTrue(s->strict),
958 next_idx_ptr);
959 case '{':
960 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +0300961 if (Py_EnterRecursiveCall(" while decoding a JSON object "
962 "from a unicode string"))
963 return NULL;
964 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
965 Py_LeaveRecursiveCall();
966 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000967 case '[':
968 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +0300969 if (Py_EnterRecursiveCall(" while decoding a JSON array "
970 "from a unicode string"))
971 return NULL;
972 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
973 Py_LeaveRecursiveCall();
974 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000975 case 'n':
976 /* null */
977 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
978 Py_INCREF(Py_None);
979 *next_idx_ptr = idx + 4;
980 return Py_None;
981 }
982 break;
983 case 't':
984 /* true */
985 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
986 Py_INCREF(Py_True);
987 *next_idx_ptr = idx + 4;
988 return Py_True;
989 }
990 break;
991 case 'f':
992 /* false */
993 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
994 Py_INCREF(Py_False);
995 *next_idx_ptr = idx + 5;
996 return Py_False;
997 }
998 break;
999 case 'N':
1000 /* NaN */
1001 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1002 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1003 }
1004 break;
1005 case 'I':
1006 /* Infinity */
1007 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1008 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1009 }
1010 break;
1011 case '-':
1012 /* -Infinity */
1013 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1014 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1015 }
1016 break;
1017 }
1018 /* Didn't find a string, object, array, or named constant. Look for a number. */
1019 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1020}
1021
1022static PyObject *
1023scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1024{
1025 /* Python callable interface to scan_once_{str,unicode} */
1026 PyObject *pystr;
1027 PyObject *rval;
1028 Py_ssize_t idx;
1029 Py_ssize_t next_idx = -1;
1030 static char *kwlist[] = {"string", "idx", NULL};
1031 PyScannerObject *s;
1032 assert(PyScanner_Check(self));
1033 s = (PyScannerObject *)self;
1034 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1035 return NULL;
1036
1037 if (PyUnicode_Check(pystr)) {
1038 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1039 }
1040 else {
1041 PyErr_Format(PyExc_TypeError,
1042 "first argument must be a string, not %.80s",
1043 Py_TYPE(pystr)->tp_name);
1044 return NULL;
1045 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001046 PyDict_Clear(s->memo);
1047 if (rval == NULL)
1048 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001049 return _build_rval_index_tuple(rval, next_idx);
1050}
1051
1052static PyObject *
1053scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1054{
1055 PyScannerObject *s;
1056 s = (PyScannerObject *)type->tp_alloc(type, 0);
1057 if (s != NULL) {
1058 s->strict = NULL;
1059 s->object_hook = NULL;
1060 s->object_pairs_hook = NULL;
1061 s->parse_float = NULL;
1062 s->parse_int = NULL;
1063 s->parse_constant = NULL;
1064 }
1065 return (PyObject *)s;
1066}
1067
1068static int
1069scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1070{
1071 /* Initialize Scanner object */
1072 PyObject *ctx;
1073 static char *kwlist[] = {"context", NULL};
1074 PyScannerObject *s;
1075
1076 assert(PyScanner_Check(self));
1077 s = (PyScannerObject *)self;
1078
1079 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1080 return -1;
1081
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001082 if (s->memo == NULL) {
1083 s->memo = PyDict_New();
1084 if (s->memo == NULL)
1085 goto bail;
1086 }
1087
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001088 /* All of these will fail "gracefully" so we don't need to verify them */
1089 s->strict = PyObject_GetAttrString(ctx, "strict");
1090 if (s->strict == NULL)
1091 goto bail;
1092 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1093 if (s->object_hook == NULL)
1094 goto bail;
1095 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1096 if (s->object_pairs_hook == NULL)
1097 goto bail;
1098 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1099 if (s->parse_float == NULL)
1100 goto bail;
1101 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1102 if (s->parse_int == NULL)
1103 goto bail;
1104 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1105 if (s->parse_constant == NULL)
1106 goto bail;
1107
1108 return 0;
1109
1110bail:
1111 Py_CLEAR(s->strict);
1112 Py_CLEAR(s->object_hook);
1113 Py_CLEAR(s->object_pairs_hook);
1114 Py_CLEAR(s->parse_float);
1115 Py_CLEAR(s->parse_int);
1116 Py_CLEAR(s->parse_constant);
1117 return -1;
1118}
1119
1120PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1121
1122static
1123PyTypeObject PyScannerType = {
1124 PyVarObject_HEAD_INIT(NULL, 0)
1125 "_json.Scanner", /* tp_name */
1126 sizeof(PyScannerObject), /* tp_basicsize */
1127 0, /* tp_itemsize */
1128 scanner_dealloc, /* tp_dealloc */
1129 0, /* tp_print */
1130 0, /* tp_getattr */
1131 0, /* tp_setattr */
1132 0, /* tp_compare */
1133 0, /* tp_repr */
1134 0, /* tp_as_number */
1135 0, /* tp_as_sequence */
1136 0, /* tp_as_mapping */
1137 0, /* tp_hash */
1138 scanner_call, /* tp_call */
1139 0, /* tp_str */
1140 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1141 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1142 0, /* tp_as_buffer */
1143 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1144 scanner_doc, /* tp_doc */
1145 scanner_traverse, /* tp_traverse */
1146 scanner_clear, /* tp_clear */
1147 0, /* tp_richcompare */
1148 0, /* tp_weaklistoffset */
1149 0, /* tp_iter */
1150 0, /* tp_iternext */
1151 0, /* tp_methods */
1152 scanner_members, /* tp_members */
1153 0, /* tp_getset */
1154 0, /* tp_base */
1155 0, /* tp_dict */
1156 0, /* tp_descr_get */
1157 0, /* tp_descr_set */
1158 0, /* tp_dictoffset */
1159 scanner_init, /* tp_init */
1160 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1161 scanner_new, /* tp_new */
1162 0,/* PyObject_GC_Del, */ /* tp_free */
1163};
1164
1165static PyObject *
1166encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1167{
1168 PyEncoderObject *s;
1169 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1170 if (s != NULL) {
1171 s->markers = NULL;
1172 s->defaultfn = NULL;
1173 s->encoder = NULL;
1174 s->indent = NULL;
1175 s->key_separator = NULL;
1176 s->item_separator = NULL;
1177 s->sort_keys = NULL;
1178 s->skipkeys = NULL;
1179 }
1180 return (PyObject *)s;
1181}
1182
1183static int
1184encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1185{
1186 /* initialize Encoder object */
1187 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1188
1189 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001190 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1191 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001192
1193 assert(PyEncoder_Check(self));
1194 s = (PyEncoderObject *)self;
1195
1196 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001197 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1198 &sort_keys, &skipkeys, &allow_nan))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001199 return -1;
1200
Antoine Pitrou781eba72009-12-08 15:57:31 +00001201 s->markers = markers;
1202 s->defaultfn = defaultfn;
1203 s->encoder = encoder;
1204 s->indent = indent;
1205 s->key_separator = key_separator;
1206 s->item_separator = item_separator;
1207 s->sort_keys = sort_keys;
1208 s->skipkeys = skipkeys;
1209 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1210 s->allow_nan = PyObject_IsTrue(allow_nan);
1211
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001212 Py_INCREF(s->markers);
1213 Py_INCREF(s->defaultfn);
1214 Py_INCREF(s->encoder);
1215 Py_INCREF(s->indent);
1216 Py_INCREF(s->key_separator);
1217 Py_INCREF(s->item_separator);
1218 Py_INCREF(s->sort_keys);
1219 Py_INCREF(s->skipkeys);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001220 return 0;
1221}
1222
1223static PyObject *
1224encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1225{
1226 /* Python callable interface to encode_listencode_obj */
1227 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1228 PyObject *obj;
1229 PyObject *rval;
1230 Py_ssize_t indent_level;
1231 PyEncoderObject *s;
1232 assert(PyEncoder_Check(self));
1233 s = (PyEncoderObject *)self;
1234 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1235 &obj, _convertPyInt_AsSsize_t, &indent_level))
1236 return NULL;
1237 rval = PyList_New(0);
1238 if (rval == NULL)
1239 return NULL;
1240 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1241 Py_DECREF(rval);
1242 return NULL;
1243 }
1244 return rval;
1245}
1246
1247static PyObject *
1248_encoded_const(PyObject *obj)
1249{
1250 /* Return the JSON string representation of None, True, False */
1251 if (obj == Py_None) {
1252 static PyObject *s_null = NULL;
1253 if (s_null == NULL) {
1254 s_null = PyUnicode_InternFromString("null");
1255 }
1256 Py_INCREF(s_null);
1257 return s_null;
1258 }
1259 else if (obj == Py_True) {
1260 static PyObject *s_true = NULL;
1261 if (s_true == NULL) {
1262 s_true = PyUnicode_InternFromString("true");
1263 }
1264 Py_INCREF(s_true);
1265 return s_true;
1266 }
1267 else if (obj == Py_False) {
1268 static PyObject *s_false = NULL;
1269 if (s_false == NULL) {
1270 s_false = PyUnicode_InternFromString("false");
1271 }
1272 Py_INCREF(s_false);
1273 return s_false;
1274 }
1275 else {
1276 PyErr_SetString(PyExc_ValueError, "not a const");
1277 return NULL;
1278 }
1279}
1280
1281static PyObject *
1282encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1283{
1284 /* Return the JSON representation of a PyFloat */
1285 double i = PyFloat_AS_DOUBLE(obj);
1286 if (!Py_IS_FINITE(i)) {
1287 if (!s->allow_nan) {
1288 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1289 return NULL;
1290 }
1291 if (i > 0) {
1292 return PyUnicode_FromString("Infinity");
1293 }
1294 else if (i < 0) {
1295 return PyUnicode_FromString("-Infinity");
1296 }
1297 else {
1298 return PyUnicode_FromString("NaN");
1299 }
1300 }
1301 /* Use a better float format here? */
1302 return PyObject_Repr(obj);
1303}
1304
1305static PyObject *
1306encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1307{
1308 /* Return the JSON representation of a string */
1309 if (s->fast_encode)
1310 return py_encode_basestring_ascii(NULL, obj);
1311 else
1312 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1313}
1314
1315static int
1316_steal_list_append(PyObject *lst, PyObject *stolen)
1317{
1318 /* Append stolen and then decrement its reference count */
1319 int rval = PyList_Append(lst, stolen);
1320 Py_DECREF(stolen);
1321 return rval;
1322}
1323
1324static int
1325encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1326{
1327 /* Encode Python object obj to a JSON term, rval is a PyList */
1328 PyObject *newobj;
1329 int rv;
1330
1331 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1332 PyObject *cstr = _encoded_const(obj);
1333 if (cstr == NULL)
1334 return -1;
1335 return _steal_list_append(rval, cstr);
1336 }
1337 else if (PyUnicode_Check(obj))
1338 {
1339 PyObject *encoded = encoder_encode_string(s, obj);
1340 if (encoded == NULL)
1341 return -1;
1342 return _steal_list_append(rval, encoded);
1343 }
1344 else if (PyLong_Check(obj)) {
1345 PyObject *encoded = PyObject_Str(obj);
1346 if (encoded == NULL)
1347 return -1;
1348 return _steal_list_append(rval, encoded);
1349 }
1350 else if (PyFloat_Check(obj)) {
1351 PyObject *encoded = encoder_encode_float(s, obj);
1352 if (encoded == NULL)
1353 return -1;
1354 return _steal_list_append(rval, encoded);
1355 }
1356 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001357 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1358 return -1;
1359 rv = encoder_listencode_list(s, rval, obj, indent_level);
1360 Py_LeaveRecursiveCall();
1361 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001362 }
1363 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001364 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1365 return -1;
1366 rv = encoder_listencode_dict(s, rval, obj, indent_level);
1367 Py_LeaveRecursiveCall();
1368 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001369 }
1370 else {
1371 PyObject *ident = NULL;
1372 if (s->markers != Py_None) {
1373 int has_key;
1374 ident = PyLong_FromVoidPtr(obj);
1375 if (ident == NULL)
1376 return -1;
1377 has_key = PyDict_Contains(s->markers, ident);
1378 if (has_key) {
1379 if (has_key != -1)
1380 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1381 Py_DECREF(ident);
1382 return -1;
1383 }
1384 if (PyDict_SetItem(s->markers, ident, obj)) {
1385 Py_DECREF(ident);
1386 return -1;
1387 }
1388 }
1389 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1390 if (newobj == NULL) {
1391 Py_XDECREF(ident);
1392 return -1;
1393 }
Ezio Melotti13672652011-05-11 01:02:56 +03001394
1395 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1396 return -1;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001397 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001398 Py_LeaveRecursiveCall();
1399
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001400 Py_DECREF(newobj);
1401 if (rv) {
1402 Py_XDECREF(ident);
1403 return -1;
1404 }
1405 if (ident != NULL) {
1406 if (PyDict_DelItem(s->markers, ident)) {
1407 Py_XDECREF(ident);
1408 return -1;
1409 }
1410 Py_XDECREF(ident);
1411 }
1412 return rv;
1413 }
1414}
1415
1416static int
1417encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
1418{
1419 /* Encode Python dict dct a JSON term, rval is a PyList */
1420 static PyObject *open_dict = NULL;
1421 static PyObject *close_dict = NULL;
1422 static PyObject *empty_dict = NULL;
1423 PyObject *kstr = NULL;
1424 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001425 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001426 PyObject *items;
1427 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001428 int skipkeys;
1429 Py_ssize_t idx;
1430
1431 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1432 open_dict = PyUnicode_InternFromString("{");
1433 close_dict = PyUnicode_InternFromString("}");
1434 empty_dict = PyUnicode_InternFromString("{}");
1435 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1436 return -1;
1437 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001438 if (Py_SIZE(dct) == 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001439 return PyList_Append(rval, empty_dict);
1440
1441 if (s->markers != Py_None) {
1442 int has_key;
1443 ident = PyLong_FromVoidPtr(dct);
1444 if (ident == NULL)
1445 goto bail;
1446 has_key = PyDict_Contains(s->markers, ident);
1447 if (has_key) {
1448 if (has_key != -1)
1449 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1450 goto bail;
1451 }
1452 if (PyDict_SetItem(s->markers, ident, dct)) {
1453 goto bail;
1454 }
1455 }
1456
1457 if (PyList_Append(rval, open_dict))
1458 goto bail;
1459
1460 if (s->indent != Py_None) {
1461 /* TODO: DOES NOT RUN */
1462 indent_level += 1;
1463 /*
1464 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1465 separator = _item_separator + newline_indent
1466 buf += newline_indent
1467 */
1468 }
1469
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001470 if (PyObject_IsTrue(s->sort_keys)) {
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001471 /* First sort the keys then replace them with (key, value) tuples. */
1472 Py_ssize_t i, nitems;
1473 items = PyMapping_Keys(dct);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 if (items == NULL)
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001475 goto bail;
1476 if (!PyList_Check(items)) {
1477 PyErr_SetString(PyExc_ValueError, "keys must return list");
1478 goto bail;
1479 }
1480 if (PyList_Sort(items) < 0)
1481 goto bail;
1482 nitems = PyList_GET_SIZE(items);
1483 for (i = 0; i < nitems; i++) {
1484 PyObject *key, *value;
1485 key = PyList_GET_ITEM(items, i);
1486 value = PyDict_GetItem(dct, key);
1487 item = PyTuple_Pack(2, key, value);
1488 if (item == NULL)
1489 goto bail;
1490 PyList_SET_ITEM(items, i, item);
1491 Py_DECREF(key);
1492 }
1493 }
1494 else {
1495 items = PyMapping_Items(dct);
1496 }
1497 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001498 goto bail;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001499 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001500 Py_DECREF(items);
1501 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001502 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001503 skipkeys = PyObject_IsTrue(s->skipkeys);
1504 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001505 while ((item = PyIter_Next(it)) != NULL) {
1506 PyObject *encoded, *key, *value;
1507 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
1508 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1509 goto bail;
1510 }
1511 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001512 if (PyUnicode_Check(key)) {
1513 Py_INCREF(key);
1514 kstr = key;
1515 }
1516 else if (PyFloat_Check(key)) {
1517 kstr = encoder_encode_float(s, key);
1518 if (kstr == NULL)
1519 goto bail;
1520 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001521 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001522 /* This must come before the PyLong_Check because
1523 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001524 kstr = _encoded_const(key);
1525 if (kstr == NULL)
1526 goto bail;
1527 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001528 else if (PyLong_Check(key)) {
1529 kstr = PyObject_Str(key);
1530 if (kstr == NULL)
1531 goto bail;
1532 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001533 else if (skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001534 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001535 continue;
1536 }
1537 else {
1538 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001539 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001540 goto bail;
1541 }
1542
1543 if (idx) {
1544 if (PyList_Append(rval, s->item_separator))
1545 goto bail;
1546 }
1547
1548 encoded = encoder_encode_string(s, kstr);
1549 Py_CLEAR(kstr);
1550 if (encoded == NULL)
1551 goto bail;
1552 if (PyList_Append(rval, encoded)) {
1553 Py_DECREF(encoded);
1554 goto bail;
1555 }
1556 Py_DECREF(encoded);
1557 if (PyList_Append(rval, s->key_separator))
1558 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001559
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001560 value = PyTuple_GET_ITEM(item, 1);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001561 if (encoder_listencode_obj(s, rval, value, indent_level))
1562 goto bail;
1563 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001564 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001565 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001566 if (PyErr_Occurred())
1567 goto bail;
1568 Py_CLEAR(it);
1569
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001570 if (ident != NULL) {
1571 if (PyDict_DelItem(s->markers, ident))
1572 goto bail;
1573 Py_CLEAR(ident);
1574 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001575 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001576 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001577 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001578
1579 yield '\n' + (' ' * (_indent * _current_indent_level))
1580 }*/
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001581 if (PyList_Append(rval, close_dict))
1582 goto bail;
1583 return 0;
1584
1585bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001586 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001587 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001588 Py_XDECREF(kstr);
1589 Py_XDECREF(ident);
1590 return -1;
1591}
1592
1593
1594static int
1595encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
1596{
1597 /* Encode Python list seq to a JSON term, rval is a PyList */
1598 static PyObject *open_array = NULL;
1599 static PyObject *close_array = NULL;
1600 static PyObject *empty_array = NULL;
1601 PyObject *ident = NULL;
1602 PyObject *s_fast = NULL;
1603 Py_ssize_t num_items;
1604 PyObject **seq_items;
1605 Py_ssize_t i;
1606
1607 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1608 open_array = PyUnicode_InternFromString("[");
1609 close_array = PyUnicode_InternFromString("]");
1610 empty_array = PyUnicode_InternFromString("[]");
1611 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1612 return -1;
1613 }
1614 ident = NULL;
1615 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1616 if (s_fast == NULL)
1617 return -1;
1618 num_items = PySequence_Fast_GET_SIZE(s_fast);
1619 if (num_items == 0) {
1620 Py_DECREF(s_fast);
1621 return PyList_Append(rval, empty_array);
1622 }
1623
1624 if (s->markers != Py_None) {
1625 int has_key;
1626 ident = PyLong_FromVoidPtr(seq);
1627 if (ident == NULL)
1628 goto bail;
1629 has_key = PyDict_Contains(s->markers, ident);
1630 if (has_key) {
1631 if (has_key != -1)
1632 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1633 goto bail;
1634 }
1635 if (PyDict_SetItem(s->markers, ident, seq)) {
1636 goto bail;
1637 }
1638 }
1639
1640 seq_items = PySequence_Fast_ITEMS(s_fast);
1641 if (PyList_Append(rval, open_array))
1642 goto bail;
1643 if (s->indent != Py_None) {
1644 /* TODO: DOES NOT RUN */
1645 indent_level += 1;
1646 /*
1647 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1648 separator = _item_separator + newline_indent
1649 buf += newline_indent
1650 */
1651 }
1652 for (i = 0; i < num_items; i++) {
1653 PyObject *obj = seq_items[i];
1654 if (i) {
1655 if (PyList_Append(rval, s->item_separator))
1656 goto bail;
1657 }
1658 if (encoder_listencode_obj(s, rval, obj, indent_level))
1659 goto bail;
1660 }
1661 if (ident != NULL) {
1662 if (PyDict_DelItem(s->markers, ident))
1663 goto bail;
1664 Py_CLEAR(ident);
1665 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001666
1667 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001668 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001669 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001670
1671 yield '\n' + (' ' * (_indent * _current_indent_level))
1672 }*/
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001673 if (PyList_Append(rval, close_array))
1674 goto bail;
1675 Py_DECREF(s_fast);
1676 return 0;
1677
1678bail:
1679 Py_XDECREF(ident);
1680 Py_DECREF(s_fast);
1681 return -1;
1682}
1683
1684static void
1685encoder_dealloc(PyObject *self)
1686{
1687 /* Deallocate Encoder */
1688 encoder_clear(self);
1689 Py_TYPE(self)->tp_free(self);
1690}
1691
1692static int
1693encoder_traverse(PyObject *self, visitproc visit, void *arg)
1694{
1695 PyEncoderObject *s;
1696 assert(PyEncoder_Check(self));
1697 s = (PyEncoderObject *)self;
1698 Py_VISIT(s->markers);
1699 Py_VISIT(s->defaultfn);
1700 Py_VISIT(s->encoder);
1701 Py_VISIT(s->indent);
1702 Py_VISIT(s->key_separator);
1703 Py_VISIT(s->item_separator);
1704 Py_VISIT(s->sort_keys);
1705 Py_VISIT(s->skipkeys);
1706 return 0;
1707}
1708
1709static int
1710encoder_clear(PyObject *self)
1711{
1712 /* Deallocate Encoder */
1713 PyEncoderObject *s;
1714 assert(PyEncoder_Check(self));
1715 s = (PyEncoderObject *)self;
1716 Py_CLEAR(s->markers);
1717 Py_CLEAR(s->defaultfn);
1718 Py_CLEAR(s->encoder);
1719 Py_CLEAR(s->indent);
1720 Py_CLEAR(s->key_separator);
1721 Py_CLEAR(s->item_separator);
1722 Py_CLEAR(s->sort_keys);
1723 Py_CLEAR(s->skipkeys);
1724 return 0;
1725}
1726
1727PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1728
1729static
1730PyTypeObject PyEncoderType = {
1731 PyVarObject_HEAD_INIT(NULL, 0)
1732 "_json.Encoder", /* tp_name */
1733 sizeof(PyEncoderObject), /* tp_basicsize */
1734 0, /* tp_itemsize */
1735 encoder_dealloc, /* tp_dealloc */
1736 0, /* tp_print */
1737 0, /* tp_getattr */
1738 0, /* tp_setattr */
1739 0, /* tp_compare */
1740 0, /* tp_repr */
1741 0, /* tp_as_number */
1742 0, /* tp_as_sequence */
1743 0, /* tp_as_mapping */
1744 0, /* tp_hash */
1745 encoder_call, /* tp_call */
1746 0, /* tp_str */
1747 0, /* tp_getattro */
1748 0, /* tp_setattro */
1749 0, /* tp_as_buffer */
1750 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1751 encoder_doc, /* tp_doc */
1752 encoder_traverse, /* tp_traverse */
1753 encoder_clear, /* tp_clear */
1754 0, /* tp_richcompare */
1755 0, /* tp_weaklistoffset */
1756 0, /* tp_iter */
1757 0, /* tp_iternext */
1758 0, /* tp_methods */
1759 encoder_members, /* tp_members */
1760 0, /* tp_getset */
1761 0, /* tp_base */
1762 0, /* tp_dict */
1763 0, /* tp_descr_get */
1764 0, /* tp_descr_set */
1765 0, /* tp_dictoffset */
1766 encoder_init, /* tp_init */
1767 0, /* tp_alloc */
1768 encoder_new, /* tp_new */
1769 0, /* tp_free */
1770};
1771
1772static PyMethodDef speedups_methods[] = {
1773 {"encode_basestring_ascii",
1774 (PyCFunction)py_encode_basestring_ascii,
1775 METH_O,
1776 pydoc_encode_basestring_ascii},
1777 {"scanstring",
1778 (PyCFunction)py_scanstring,
1779 METH_VARARGS,
1780 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001781 {NULL, NULL, 0, NULL}
1782};
1783
1784PyDoc_STRVAR(module_doc,
1785"json speedups\n");
1786
Martin v. Löwis1a214512008-06-11 05:26:20 +00001787static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 PyModuleDef_HEAD_INIT,
1789 "_json",
1790 module_doc,
1791 -1,
1792 speedups_methods,
1793 NULL,
1794 NULL,
1795 NULL,
1796 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001797};
1798
1799PyObject*
1800PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001801{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001802 PyObject *m = PyModule_Create(&jsonmodule);
1803 if (!m)
1804 return NULL;
1805 PyScannerType.tp_new = PyType_GenericNew;
1806 if (PyType_Ready(&PyScannerType) < 0)
1807 goto fail;
1808 PyEncoderType.tp_new = PyType_GenericNew;
1809 if (PyType_Ready(&PyEncoderType) < 0)
1810 goto fail;
1811 Py_INCREF((PyObject*)&PyScannerType);
1812 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1813 Py_DECREF((PyObject*)&PyScannerType);
1814 goto fail;
1815 }
1816 Py_INCREF((PyObject*)&PyEncoderType);
1817 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1818 Py_DECREF((PyObject*)&PyEncoderType);
1819 goto fail;
1820 }
1821 return m;
1822 fail:
1823 Py_DECREF(m);
1824 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001825}