blob: 7c5e5e43ed81c11212afb2457a7d2f17db72cd58 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
Christian Heimes90540002008-05-08 14:29:10 +000016
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000017#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
22
23#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
24#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
25#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
26#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
27
28static PyTypeObject PyScannerType;
29static PyTypeObject PyEncoderType;
30
31typedef struct _PyScannerObject {
32 PyObject_HEAD
33 PyObject *strict;
34 PyObject *object_hook;
35 PyObject *object_pairs_hook;
36 PyObject *parse_float;
37 PyObject *parse_int;
38 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000039 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000040} PyScannerObject;
41
42static PyMemberDef scanner_members[] = {
43 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
44 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
45 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
46 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
47 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
48 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
49 {NULL}
50};
51
52typedef struct _PyEncoderObject {
53 PyObject_HEAD
54 PyObject *markers;
55 PyObject *defaultfn;
56 PyObject *encoder;
57 PyObject *indent;
58 PyObject *key_separator;
59 PyObject *item_separator;
60 PyObject *sort_keys;
61 PyObject *skipkeys;
62 int fast_encode;
63 int allow_nan;
64} PyEncoderObject;
65
66static PyMemberDef encoder_members[] = {
67 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
68 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
69 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
70 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
71 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
72 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
73 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
74 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
75 {NULL}
76};
77
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020078/*
79 * A two-level accumulator of unicode objects that avoids both the overhead
80 * of keeping a huge number of small separate objects, and the quadratic
81 * behaviour of using a naive repeated concatenation scheme.
82 */
83
84typedef struct {
85 PyObject *large; /* A list of previously accumulated large strings */
86 PyObject *small; /* Pending small strings */
87} accumulator;
88
89static PyObject *
90join_list_unicode(PyObject *lst)
91{
92 /* return u''.join(lst) */
93 static PyObject *sep = NULL;
94 if (sep == NULL) {
95 sep = PyUnicode_FromStringAndSize("", 0);
96 if (sep == NULL)
97 return NULL;
98 }
99 return PyUnicode_Join(sep, lst);
100}
101
102static int
103init_accumulator(accumulator *acc)
104{
105 acc->large = PyList_New(0);
106 if (acc->large == NULL)
107 return -1;
108 acc->small = PyList_New(0);
109 if (acc->small == NULL) {
110 Py_CLEAR(acc->large);
111 return -1;
112 }
113 return 0;
114}
115
116static int
117flush_accumulator(accumulator *acc)
118{
119 Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
120 if (nsmall) {
121 int ret;
122 PyObject *joined = join_list_unicode(acc->small);
123 if (joined == NULL)
124 return -1;
125 if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
126 Py_DECREF(joined);
127 return -1;
128 }
129 ret = PyList_Append(acc->large, joined);
130 Py_DECREF(joined);
131 return ret;
132 }
133 return 0;
134}
135
136static int
137accumulate_unicode(accumulator *acc, PyObject *obj)
138{
139 int ret;
140 Py_ssize_t nsmall;
141 assert(PyUnicode_Check(obj));
142
143 if (PyList_Append(acc->small, obj))
144 return -1;
145 nsmall = PyList_GET_SIZE(acc->small);
146 /* Each item in a list of unicode objects has an overhead (in 64-bit
147 * builds) of:
148 * - 8 bytes for the list slot
149 * - 56 bytes for the header of the unicode object
150 * that is, 64 bytes. 100000 such objects waste more than 6MB
151 * compared to a single concatenated string.
152 */
153 if (nsmall < 100000)
154 return 0;
155 PyObject *joined = join_list_unicode(acc->small);
156 if (joined == NULL)
157 return -1;
158 if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
159 Py_DECREF(joined);
160 return -1;
161 }
162 ret = PyList_Append(acc->large, joined);
163 Py_DECREF(joined);
164 return ret;
165}
166
167static PyObject *
168finish_accumulator(accumulator *acc)
169{
170 int ret;
171 PyObject *res;
172
173 ret = flush_accumulator(acc);
174 Py_CLEAR(acc->small);
175 if (ret) {
176 Py_CLEAR(acc->large);
177 return NULL;
178 }
179 res = acc->large;
180 acc->large = NULL;
181 return res;
182}
183
184static void
185destroy_accumulator(accumulator *acc)
186{
187 Py_CLEAR(acc->small);
188 Py_CLEAR(acc->large);
189}
190
191/* Forward decls */
192
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000193static PyObject *
194ascii_escape_unicode(PyObject *pystr);
195static PyObject *
196py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
197void init_json(void);
198static PyObject *
199scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
200static PyObject *
201_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
202static PyObject *
203scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
204static int
205scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
206static void
207scanner_dealloc(PyObject *self);
208static int
209scanner_clear(PyObject *self);
210static PyObject *
211encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
212static int
213encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
214static void
215encoder_dealloc(PyObject *self);
216static int
217encoder_clear(PyObject *self);
218static int
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +0200219encoder_listencode_list(PyEncoderObject *s, accumulator *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000220static int
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +0200221encoder_listencode_obj(PyEncoderObject *s, accumulator *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000222static int
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +0200223encoder_listencode_dict(PyEncoderObject *s, accumulator *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000224static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000225_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000226static void
227raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
228static PyObject *
229encoder_encode_string(PyEncoderObject *s, PyObject *obj);
230static int
231_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
232static PyObject *
233_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
234static PyObject *
235encoder_encode_float(PyEncoderObject *s, PyObject *obj);
236
Christian Heimes90540002008-05-08 14:29:10 +0000237#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000238#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000239
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000240#define MIN_EXPANSION 6
Christian Heimes90540002008-05-08 14:29:10 +0000241#ifdef Py_UNICODE_WIDE
242#define MAX_EXPANSION (2 * MIN_EXPANSION)
243#else
244#define MAX_EXPANSION MIN_EXPANSION
245#endif
246
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000247static int
248_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000249{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000250 /* PyObject to Py_ssize_t converter */
251 *size_ptr = PyLong_AsSsize_t(o);
Georg Brandl59682052009-05-05 07:52:05 +0000252 if (*size_ptr == -1 && PyErr_Occurred())
253 return 0;
254 return 1;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000255}
256
257static PyObject *
258_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
259{
260 /* Py_ssize_t to PyObject converter */
261 return PyLong_FromSsize_t(*size_ptr);
262}
263
264static Py_ssize_t
265ascii_escape_unichar(Py_UNICODE c, Py_UNICODE *output, Py_ssize_t chars)
266{
267 /* Escape unicode code point c to ASCII escape sequences
268 in char *output. output must have at least 12 bytes unused to
269 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000270 output[chars++] = '\\';
271 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000272 case '\\': output[chars++] = c; break;
273 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000274 case '\b': output[chars++] = 'b'; break;
275 case '\f': output[chars++] = 'f'; break;
276 case '\n': output[chars++] = 'n'; break;
277 case '\r': output[chars++] = 'r'; break;
278 case '\t': output[chars++] = 't'; break;
279 default:
280#ifdef Py_UNICODE_WIDE
281 if (c >= 0x10000) {
282 /* UTF-16 surrogate pair */
283 Py_UNICODE v = c - 0x10000;
284 c = 0xd800 | ((v >> 10) & 0x3ff);
285 output[chars++] = 'u';
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000286 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
287 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
288 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
289 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000290 c = 0xdc00 | (v & 0x3ff);
291 output[chars++] = '\\';
292 }
293#endif
294 output[chars++] = 'u';
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000295 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
296 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
297 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
298 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000299 }
300 return chars;
301}
302
303static PyObject *
304ascii_escape_unicode(PyObject *pystr)
305{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000306 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000307 Py_ssize_t i;
308 Py_ssize_t input_chars;
309 Py_ssize_t output_size;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000310 Py_ssize_t max_output_size;
Christian Heimes90540002008-05-08 14:29:10 +0000311 Py_ssize_t chars;
312 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000313 Py_UNICODE *output;
Christian Heimes90540002008-05-08 14:29:10 +0000314 Py_UNICODE *input_unicode;
315
316 input_chars = PyUnicode_GET_SIZE(pystr);
317 input_unicode = PyUnicode_AS_UNICODE(pystr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000318
Christian Heimes90540002008-05-08 14:29:10 +0000319 /* One char input can be up to 6 chars output, estimate 4 of these */
320 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000321 max_output_size = 2 + (input_chars * MAX_EXPANSION);
322 rval = PyUnicode_FromStringAndSize(NULL, output_size);
Christian Heimes90540002008-05-08 14:29:10 +0000323 if (rval == NULL) {
324 return NULL;
325 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000326 output = PyUnicode_AS_UNICODE(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000327 chars = 0;
328 output[chars++] = '"';
329 for (i = 0; i < input_chars; i++) {
330 Py_UNICODE c = input_unicode[i];
331 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000332 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000333 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000334 else {
335 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000336 }
337 if (output_size - chars < (1 + MAX_EXPANSION)) {
338 /* There's more than four, so let's resize by a lot */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000339 Py_ssize_t new_output_size = output_size * 2;
Christian Heimes90540002008-05-08 14:29:10 +0000340 /* This is an upper bound */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000341 if (new_output_size > max_output_size) {
342 new_output_size = max_output_size;
Christian Heimes90540002008-05-08 14:29:10 +0000343 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000344 /* Make sure that the output size changed before resizing */
345 if (new_output_size != output_size) {
346 output_size = new_output_size;
347 if (PyUnicode_Resize(&rval, output_size) == -1) {
348 return NULL;
349 }
350 output = PyUnicode_AS_UNICODE(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000351 }
Christian Heimes90540002008-05-08 14:29:10 +0000352 }
353 }
354 output[chars++] = '"';
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000355 if (PyUnicode_Resize(&rval, chars) == -1) {
Christian Heimes90540002008-05-08 14:29:10 +0000356 return NULL;
357 }
358 return rval;
359}
360
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000361static void
Christian Heimes90540002008-05-08 14:29:10 +0000362raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
363{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000364 /* Use the Python function json.decoder.errmsg to raise a nice
365 looking ValueError exception */
Christian Heimes90540002008-05-08 14:29:10 +0000366 static PyObject *errmsg_fn = NULL;
367 PyObject *pymsg;
368 if (errmsg_fn == NULL) {
369 PyObject *decoder = PyImport_ImportModule("json.decoder");
370 if (decoder == NULL)
371 return;
372 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000373 Py_DECREF(decoder);
Christian Heimes90540002008-05-08 14:29:10 +0000374 if (errmsg_fn == NULL)
375 return;
Christian Heimes90540002008-05-08 14:29:10 +0000376 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000377 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000378 if (pymsg) {
379 PyErr_SetObject(PyExc_ValueError, pymsg);
380 Py_DECREF(pymsg);
381 }
Christian Heimes90540002008-05-08 14:29:10 +0000382}
383
384static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000385_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
386 /* return (rval, idx) tuple, stealing reference to rval */
387 PyObject *tpl;
388 PyObject *pyidx;
389 /*
390 steal a reference to rval, returns (rval, idx)
391 */
392 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000393 return NULL;
394 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000395 pyidx = PyLong_FromSsize_t(idx);
396 if (pyidx == NULL) {
397 Py_DECREF(rval);
398 return NULL;
399 }
400 tpl = PyTuple_New(2);
401 if (tpl == NULL) {
402 Py_DECREF(pyidx);
403 Py_DECREF(rval);
404 return NULL;
405 }
406 PyTuple_SET_ITEM(tpl, 0, rval);
407 PyTuple_SET_ITEM(tpl, 1, pyidx);
408 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000409}
410
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000411#define APPEND_OLD_CHUNK \
412 if (chunk != NULL) { \
413 if (chunks == NULL) { \
414 chunks = PyList_New(0); \
415 if (chunks == NULL) { \
416 goto bail; \
417 } \
418 } \
419 if (PyList_Append(chunks, chunk)) { \
420 Py_DECREF(chunk); \
421 goto bail; \
422 } \
423 Py_CLEAR(chunk); \
424 }
425
Christian Heimes90540002008-05-08 14:29:10 +0000426static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000427scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000428{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000429 /* Read the JSON string from PyUnicode pystr.
430 end is the index of the first character after the quote.
431 if strict is zero then literal control characters are allowed
432 *next_end_ptr is a return-by-reference index of the character
433 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000434
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000435 Return value is a new PyUnicode
436 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000437 PyObject *rval = NULL;
Christian Heimes90540002008-05-08 14:29:10 +0000438 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
439 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000440 Py_ssize_t next /* = begin */;
Christian Heimes90540002008-05-08 14:29:10 +0000441 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000442 PyObject *chunks = NULL;
443 PyObject *chunk = NULL;
444
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000445 if (end < 0 || len <= end) {
446 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
447 goto bail;
448 }
Christian Heimes90540002008-05-08 14:29:10 +0000449 while (1) {
450 /* Find the end of the string or the next escape */
451 Py_UNICODE c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000452 for (next = end; next < len; next++) {
453 c = buf[next];
454 if (c == '"' || c == '\\') {
455 break;
456 }
457 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000458 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000459 goto bail;
460 }
461 }
462 if (!(c == '"' || c == '\\')) {
463 raise_errmsg("Unterminated string starting at", pystr, begin);
464 goto bail;
465 }
466 /* Pick up this chunk if it's not zero length */
467 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000468 APPEND_OLD_CHUNK
Christian Heimes90540002008-05-08 14:29:10 +0000469 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
470 if (chunk == NULL) {
471 goto bail;
472 }
Christian Heimes90540002008-05-08 14:29:10 +0000473 }
474 next++;
475 if (c == '"') {
476 end = next;
477 break;
478 }
479 if (next == len) {
480 raise_errmsg("Unterminated string starting at", pystr, begin);
481 goto bail;
482 }
483 c = buf[next];
484 if (c != 'u') {
485 /* Non-unicode backslash escapes */
486 end = next + 1;
487 switch (c) {
488 case '"': break;
489 case '\\': break;
490 case '/': break;
491 case 'b': c = '\b'; break;
492 case 'f': c = '\f'; break;
493 case 'n': c = '\n'; break;
494 case 'r': c = '\r'; break;
495 case 't': c = '\t'; break;
496 default: c = 0;
497 }
498 if (c == 0) {
499 raise_errmsg("Invalid \\escape", pystr, end - 2);
500 goto bail;
501 }
502 }
503 else {
504 c = 0;
505 next++;
506 end = next + 4;
507 if (end >= len) {
508 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
509 goto bail;
510 }
511 /* Decode 4 hex digits */
512 for (; next < end; next++) {
Christian Heimes90540002008-05-08 14:29:10 +0000513 Py_UNICODE digit = buf[next];
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000514 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000515 switch (digit) {
516 case '0': case '1': case '2': case '3': case '4':
517 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000518 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000519 case 'a': case 'b': case 'c': case 'd': case 'e':
520 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000521 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000522 case 'A': case 'B': case 'C': case 'D': case 'E':
523 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000524 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000525 default:
526 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
527 goto bail;
528 }
529 }
530#ifdef Py_UNICODE_WIDE
531 /* Surrogate pair */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000532 if ((c & 0xfc00) == 0xd800) {
Christian Heimes90540002008-05-08 14:29:10 +0000533 Py_UNICODE c2 = 0;
534 if (end + 6 >= len) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000535 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
536 goto bail;
Christian Heimes90540002008-05-08 14:29:10 +0000537 }
538 if (buf[next++] != '\\' || buf[next++] != 'u') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000539 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
540 goto bail;
Christian Heimes90540002008-05-08 14:29:10 +0000541 }
542 end += 6;
543 /* Decode 4 hex digits */
544 for (; next < end; next++) {
Christian Heimes90540002008-05-08 14:29:10 +0000545 Py_UNICODE digit = buf[next];
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000546 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000547 switch (digit) {
548 case '0': case '1': case '2': case '3': case '4':
549 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000550 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000551 case 'a': case 'b': case 'c': case 'd': case 'e':
552 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000553 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000554 case 'A': case 'B': case 'C': case 'D': case 'E':
555 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000556 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000557 default:
558 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
559 goto bail;
560 }
561 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000562 if ((c2 & 0xfc00) != 0xdc00) {
563 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
564 goto bail;
565 }
Christian Heimes90540002008-05-08 14:29:10 +0000566 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
567 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000568 else if ((c & 0xfc00) == 0xdc00) {
569 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
570 goto bail;
571 }
Christian Heimes90540002008-05-08 14:29:10 +0000572#endif
573 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000574 APPEND_OLD_CHUNK
Christian Heimes90540002008-05-08 14:29:10 +0000575 chunk = PyUnicode_FromUnicode(&c, 1);
576 if (chunk == NULL) {
577 goto bail;
578 }
Christian Heimes90540002008-05-08 14:29:10 +0000579 }
580
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000581 if (chunks == NULL) {
582 if (chunk != NULL)
583 rval = chunk;
584 else
585 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000586 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000587 else {
588 APPEND_OLD_CHUNK
589 rval = join_list_unicode(chunks);
590 if (rval == NULL) {
591 goto bail;
592 }
593 Py_CLEAR(chunks);
594 }
595
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000596 *next_end_ptr = end;
597 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000598bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000599 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000600 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000601 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000602 return NULL;
603}
604
605PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000606 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000607 "\n"
608 "Scan the string s for a JSON string. End is the index of the\n"
609 "character in s after the quote that started the JSON string.\n"
610 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
611 "on attempt to decode an invalid string. If strict is False then literal\n"
612 "control characters are allowed in the string.\n"
613 "\n"
614 "Returns a tuple of the decoded string and the index of the character in s\n"
615 "after the end quote."
616);
Christian Heimes90540002008-05-08 14:29:10 +0000617
618static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000619py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000620{
621 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000622 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000623 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000624 Py_ssize_t next_end = -1;
625 int strict = 1;
626 if (!PyArg_ParseTuple(args, "OO&|i:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000627 return NULL;
628 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000629 if (PyUnicode_Check(pystr)) {
630 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000631 }
632 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000633 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000634 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000635 Py_TYPE(pystr)->tp_name);
636 return NULL;
637 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000638 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000639}
640
641PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000642 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000643 "\n"
644 "Return an ASCII-only JSON representation of a Python string"
645);
Christian Heimes90540002008-05-08 14:29:10 +0000646
647static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000648py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000649{
650 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000651 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000652 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000653 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000654 rval = ascii_escape_unicode(pystr);
655 }
656 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000657 PyErr_Format(PyExc_TypeError,
658 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000659 Py_TYPE(pystr)->tp_name);
660 return NULL;
661 }
Christian Heimes90540002008-05-08 14:29:10 +0000662 return rval;
663}
664
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000665static void
666scanner_dealloc(PyObject *self)
667{
668 /* Deallocate scanner object */
669 scanner_clear(self);
670 Py_TYPE(self)->tp_free(self);
671}
672
673static int
674scanner_traverse(PyObject *self, visitproc visit, void *arg)
675{
676 PyScannerObject *s;
677 assert(PyScanner_Check(self));
678 s = (PyScannerObject *)self;
679 Py_VISIT(s->strict);
680 Py_VISIT(s->object_hook);
681 Py_VISIT(s->object_pairs_hook);
682 Py_VISIT(s->parse_float);
683 Py_VISIT(s->parse_int);
684 Py_VISIT(s->parse_constant);
685 return 0;
686}
687
688static int
689scanner_clear(PyObject *self)
690{
691 PyScannerObject *s;
692 assert(PyScanner_Check(self));
693 s = (PyScannerObject *)self;
694 Py_CLEAR(s->strict);
695 Py_CLEAR(s->object_hook);
696 Py_CLEAR(s->object_pairs_hook);
697 Py_CLEAR(s->parse_float);
698 Py_CLEAR(s->parse_int);
699 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000700 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000701 return 0;
702}
703
704static PyObject *
705_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
706 /* Read a JSON object from PyUnicode pystr.
707 idx is the index of the first character after the opening curly brace.
708 *next_idx_ptr is a return-by-reference index to the first character after
709 the closing curly brace.
710
711 Returns a new PyObject (usually a dict, but object_hook can change that)
712 */
713 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
714 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
715 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000716 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000717 PyObject *key = NULL;
718 int strict = PyObject_IsTrue(s->strict);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000719 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000720 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000721
722 if (has_pairs_hook)
723 rval = PyList_New(0);
724 else
725 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000726 if (rval == NULL)
727 return NULL;
728
729 /* skip whitespace after { */
730 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
731
732 /* only loop if the object is non-empty */
733 if (idx <= end_idx && str[idx] != '}') {
734 while (idx <= end_idx) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000735 PyObject *memokey;
736
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000737 /* read key */
738 if (str[idx] != '"') {
739 raise_errmsg("Expecting property name", pystr, idx);
740 goto bail;
741 }
742 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
743 if (key == NULL)
744 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000745 memokey = PyDict_GetItem(s->memo, key);
746 if (memokey != NULL) {
747 Py_INCREF(memokey);
748 Py_DECREF(key);
749 key = memokey;
750 }
751 else {
752 if (PyDict_SetItem(s->memo, key, key) < 0)
753 goto bail;
754 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000755 idx = next_idx;
756
757 /* skip whitespace between key and : delimiter, read :, skip whitespace */
758 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
759 if (idx > end_idx || str[idx] != ':') {
760 raise_errmsg("Expecting : delimiter", pystr, idx);
761 goto bail;
762 }
763 idx++;
764 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
765
766 /* read any JSON term */
767 val = scan_once_unicode(s, pystr, idx, &next_idx);
768 if (val == NULL)
769 goto bail;
770
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000771 if (has_pairs_hook) {
772 PyObject *item = PyTuple_Pack(2, key, val);
773 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000774 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000775 Py_CLEAR(key);
776 Py_CLEAR(val);
777 if (PyList_Append(rval, item) == -1) {
778 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000779 goto bail;
780 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000781 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000782 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000783 else {
784 if (PyDict_SetItem(rval, key, val) < 0)
785 goto bail;
786 Py_CLEAR(key);
787 Py_CLEAR(val);
788 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000789 idx = next_idx;
790
791 /* skip whitespace before } or , */
792 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
793
794 /* bail if the object is closed or we didn't get the , delimiter */
795 if (idx > end_idx) break;
796 if (str[idx] == '}') {
797 break;
798 }
799 else if (str[idx] != ',') {
800 raise_errmsg("Expecting , delimiter", pystr, idx);
801 goto bail;
802 }
803 idx++;
804
805 /* skip whitespace after , delimiter */
806 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
807 }
808 }
809
810 /* verify that idx < end_idx, str[idx] should be '}' */
811 if (idx > end_idx || str[idx] != '}') {
812 raise_errmsg("Expecting object", pystr, end_idx);
813 goto bail;
814 }
815
816 *next_idx_ptr = idx + 1;
817
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000818 if (has_pairs_hook) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000819 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000820 Py_DECREF(rval);
821 return val;
822 }
823
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000824 /* if object_hook is not None: rval = object_hook(rval) */
825 if (s->object_hook != Py_None) {
826 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000827 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000828 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000829 }
830 return rval;
831bail:
832 Py_XDECREF(key);
833 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000834 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000835 return NULL;
836}
837
838static PyObject *
839_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
840 /* Read a JSON array from PyString pystr.
841 idx is the index of the first character after the opening brace.
842 *next_idx_ptr is a return-by-reference index to the first character after
843 the closing brace.
844
845 Returns a new PyList
846 */
847 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
848 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
849 PyObject *val = NULL;
850 PyObject *rval = PyList_New(0);
851 Py_ssize_t next_idx;
852 if (rval == NULL)
853 return NULL;
854
855 /* skip whitespace after [ */
856 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
857
858 /* only loop if the array is non-empty */
859 if (idx <= end_idx && str[idx] != ']') {
860 while (idx <= end_idx) {
861
862 /* read any JSON term */
863 val = scan_once_unicode(s, pystr, idx, &next_idx);
864 if (val == NULL)
865 goto bail;
866
867 if (PyList_Append(rval, val) == -1)
868 goto bail;
869
870 Py_CLEAR(val);
871 idx = next_idx;
872
873 /* skip whitespace between term and , */
874 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
875
876 /* bail if the array is closed or we didn't get the , delimiter */
877 if (idx > end_idx) break;
878 if (str[idx] == ']') {
879 break;
880 }
881 else if (str[idx] != ',') {
882 raise_errmsg("Expecting , delimiter", pystr, idx);
883 goto bail;
884 }
885 idx++;
886
887 /* skip whitespace after , */
888 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
889 }
890 }
891
892 /* verify that idx < end_idx, str[idx] should be ']' */
893 if (idx > end_idx || str[idx] != ']') {
894 raise_errmsg("Expecting object", pystr, end_idx);
895 goto bail;
896 }
897 *next_idx_ptr = idx + 1;
898 return rval;
899bail:
900 Py_XDECREF(val);
901 Py_DECREF(rval);
902 return NULL;
903}
904
905static PyObject *
906_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
907 /* Read a JSON constant from PyString pystr.
908 constant is the constant string that was found
909 ("NaN", "Infinity", "-Infinity").
910 idx is the index of the first character of the constant
911 *next_idx_ptr is a return-by-reference index to the first character after
912 the constant.
913
914 Returns the result of parse_constant
915 */
916 PyObject *cstr;
917 PyObject *rval;
918 /* constant is "NaN", "Infinity", or "-Infinity" */
919 cstr = PyUnicode_InternFromString(constant);
920 if (cstr == NULL)
921 return NULL;
922
923 /* rval = parse_constant(constant) */
924 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
925 idx += PyUnicode_GET_SIZE(cstr);
926 Py_DECREF(cstr);
927 *next_idx_ptr = idx;
928 return rval;
929}
930
931static PyObject *
932_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
933 /* Read a JSON number from PyUnicode pystr.
934 idx is the index of the first character of the number
935 *next_idx_ptr is a return-by-reference index to the first character after
936 the number.
937
938 Returns a new PyObject representation of that number:
939 PyInt, PyLong, or PyFloat.
940 May return other types if parse_int or parse_float are set
941 */
942 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
943 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
944 Py_ssize_t idx = start;
945 int is_float = 0;
946 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200947 PyObject *numstr = NULL;
948 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000949
950 /* read a sign if it's there, make sure it's not the end of the string */
951 if (str[idx] == '-') {
952 idx++;
953 if (idx > end_idx) {
954 PyErr_SetNone(PyExc_StopIteration);
955 return NULL;
956 }
957 }
958
959 /* read as many integer digits as we find as long as it doesn't start with 0 */
960 if (str[idx] >= '1' && str[idx] <= '9') {
961 idx++;
962 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
963 }
964 /* if it starts with 0 we only expect one integer digit */
965 else if (str[idx] == '0') {
966 idx++;
967 }
968 /* no integer digits, error */
969 else {
970 PyErr_SetNone(PyExc_StopIteration);
971 return NULL;
972 }
973
974 /* if the next char is '.' followed by a digit then read all float digits */
975 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
976 is_float = 1;
977 idx += 2;
978 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
979 }
980
981 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
982 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
983 Py_ssize_t e_start = idx;
984 idx++;
985
986 /* read an exponent sign if present */
987 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
988
989 /* read all digits */
990 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
991
992 /* if we got a digit, then parse as float. if not, backtrack */
993 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
994 is_float = 1;
995 }
996 else {
997 idx = e_start;
998 }
999 }
1000
Antoine Pitrouf6454512011-04-25 19:16:06 +02001001 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1002 custom_func = s->parse_float;
1003 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1004 custom_func = s->parse_int;
1005 else
1006 custom_func = NULL;
1007
1008 if (custom_func) {
1009 /* copy the section we determined to be a number */
1010 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1011 if (numstr == NULL)
1012 return NULL;
1013 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001014 }
1015 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001016 Py_ssize_t i, n;
1017 char *buf;
1018 /* Straight conversion to ASCII, to avoid costly conversion of
1019 decimal unicode digits (which cannot appear here) */
1020 n = idx - start;
1021 numstr = PyBytes_FromStringAndSize(NULL, n);
1022 if (numstr == NULL)
1023 return NULL;
1024 buf = PyBytes_AS_STRING(numstr);
1025 for (i = 0; i < n; i++) {
1026 buf[i] = (char) str[i + start];
1027 }
1028 if (is_float)
1029 rval = PyFloat_FromString(numstr);
1030 else
1031 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001032 }
1033 Py_DECREF(numstr);
1034 *next_idx_ptr = idx;
1035 return rval;
1036}
1037
1038static PyObject *
1039scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1040{
1041 /* Read one JSON term (of any kind) from PyUnicode pystr.
1042 idx is the index of the first character of the term
1043 *next_idx_ptr is a return-by-reference index to the first character after
1044 the number.
1045
1046 Returns a new PyObject representation of the term.
1047 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001048 PyObject *res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001049 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1050 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1051 if (idx >= length) {
1052 PyErr_SetNone(PyExc_StopIteration);
1053 return NULL;
1054 }
1055 switch (str[idx]) {
1056 case '"':
1057 /* string */
1058 return scanstring_unicode(pystr, idx + 1,
1059 PyObject_IsTrue(s->strict),
1060 next_idx_ptr);
1061 case '{':
1062 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001063 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1064 "from a unicode string"))
1065 return NULL;
1066 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1067 Py_LeaveRecursiveCall();
1068 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001069 case '[':
1070 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001071 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1072 "from a unicode string"))
1073 return NULL;
1074 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1075 Py_LeaveRecursiveCall();
1076 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001077 case 'n':
1078 /* null */
1079 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1080 Py_INCREF(Py_None);
1081 *next_idx_ptr = idx + 4;
1082 return Py_None;
1083 }
1084 break;
1085 case 't':
1086 /* true */
1087 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1088 Py_INCREF(Py_True);
1089 *next_idx_ptr = idx + 4;
1090 return Py_True;
1091 }
1092 break;
1093 case 'f':
1094 /* false */
1095 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1096 Py_INCREF(Py_False);
1097 *next_idx_ptr = idx + 5;
1098 return Py_False;
1099 }
1100 break;
1101 case 'N':
1102 /* NaN */
1103 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1104 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1105 }
1106 break;
1107 case 'I':
1108 /* Infinity */
1109 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1110 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1111 }
1112 break;
1113 case '-':
1114 /* -Infinity */
1115 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1116 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1117 }
1118 break;
1119 }
1120 /* Didn't find a string, object, array, or named constant. Look for a number. */
1121 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1122}
1123
1124static PyObject *
1125scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1126{
1127 /* Python callable interface to scan_once_{str,unicode} */
1128 PyObject *pystr;
1129 PyObject *rval;
1130 Py_ssize_t idx;
1131 Py_ssize_t next_idx = -1;
1132 static char *kwlist[] = {"string", "idx", NULL};
1133 PyScannerObject *s;
1134 assert(PyScanner_Check(self));
1135 s = (PyScannerObject *)self;
1136 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1137 return NULL;
1138
1139 if (PyUnicode_Check(pystr)) {
1140 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1141 }
1142 else {
1143 PyErr_Format(PyExc_TypeError,
1144 "first argument must be a string, not %.80s",
1145 Py_TYPE(pystr)->tp_name);
1146 return NULL;
1147 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001148 PyDict_Clear(s->memo);
1149 if (rval == NULL)
1150 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001151 return _build_rval_index_tuple(rval, next_idx);
1152}
1153
1154static PyObject *
1155scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1156{
1157 PyScannerObject *s;
1158 s = (PyScannerObject *)type->tp_alloc(type, 0);
1159 if (s != NULL) {
1160 s->strict = NULL;
1161 s->object_hook = NULL;
1162 s->object_pairs_hook = NULL;
1163 s->parse_float = NULL;
1164 s->parse_int = NULL;
1165 s->parse_constant = NULL;
1166 }
1167 return (PyObject *)s;
1168}
1169
1170static int
1171scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1172{
1173 /* Initialize Scanner object */
1174 PyObject *ctx;
1175 static char *kwlist[] = {"context", NULL};
1176 PyScannerObject *s;
1177
1178 assert(PyScanner_Check(self));
1179 s = (PyScannerObject *)self;
1180
1181 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1182 return -1;
1183
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001184 if (s->memo == NULL) {
1185 s->memo = PyDict_New();
1186 if (s->memo == NULL)
1187 goto bail;
1188 }
1189
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001190 /* All of these will fail "gracefully" so we don't need to verify them */
1191 s->strict = PyObject_GetAttrString(ctx, "strict");
1192 if (s->strict == NULL)
1193 goto bail;
1194 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1195 if (s->object_hook == NULL)
1196 goto bail;
1197 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1198 if (s->object_pairs_hook == NULL)
1199 goto bail;
1200 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1201 if (s->parse_float == NULL)
1202 goto bail;
1203 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1204 if (s->parse_int == NULL)
1205 goto bail;
1206 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1207 if (s->parse_constant == NULL)
1208 goto bail;
1209
1210 return 0;
1211
1212bail:
1213 Py_CLEAR(s->strict);
1214 Py_CLEAR(s->object_hook);
1215 Py_CLEAR(s->object_pairs_hook);
1216 Py_CLEAR(s->parse_float);
1217 Py_CLEAR(s->parse_int);
1218 Py_CLEAR(s->parse_constant);
1219 return -1;
1220}
1221
1222PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1223
1224static
1225PyTypeObject PyScannerType = {
1226 PyVarObject_HEAD_INIT(NULL, 0)
1227 "_json.Scanner", /* tp_name */
1228 sizeof(PyScannerObject), /* tp_basicsize */
1229 0, /* tp_itemsize */
1230 scanner_dealloc, /* tp_dealloc */
1231 0, /* tp_print */
1232 0, /* tp_getattr */
1233 0, /* tp_setattr */
1234 0, /* tp_compare */
1235 0, /* tp_repr */
1236 0, /* tp_as_number */
1237 0, /* tp_as_sequence */
1238 0, /* tp_as_mapping */
1239 0, /* tp_hash */
1240 scanner_call, /* tp_call */
1241 0, /* tp_str */
1242 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1243 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1244 0, /* tp_as_buffer */
1245 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1246 scanner_doc, /* tp_doc */
1247 scanner_traverse, /* tp_traverse */
1248 scanner_clear, /* tp_clear */
1249 0, /* tp_richcompare */
1250 0, /* tp_weaklistoffset */
1251 0, /* tp_iter */
1252 0, /* tp_iternext */
1253 0, /* tp_methods */
1254 scanner_members, /* tp_members */
1255 0, /* tp_getset */
1256 0, /* tp_base */
1257 0, /* tp_dict */
1258 0, /* tp_descr_get */
1259 0, /* tp_descr_set */
1260 0, /* tp_dictoffset */
1261 scanner_init, /* tp_init */
1262 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1263 scanner_new, /* tp_new */
1264 0,/* PyObject_GC_Del, */ /* tp_free */
1265};
1266
1267static PyObject *
1268encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1269{
1270 PyEncoderObject *s;
1271 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1272 if (s != NULL) {
1273 s->markers = NULL;
1274 s->defaultfn = NULL;
1275 s->encoder = NULL;
1276 s->indent = NULL;
1277 s->key_separator = NULL;
1278 s->item_separator = NULL;
1279 s->sort_keys = NULL;
1280 s->skipkeys = NULL;
1281 }
1282 return (PyObject *)s;
1283}
1284
1285static int
1286encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1287{
1288 /* initialize Encoder object */
1289 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1290
1291 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001292 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1293 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001294
1295 assert(PyEncoder_Check(self));
1296 s = (PyEncoderObject *)self;
1297
1298 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001299 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1300 &sort_keys, &skipkeys, &allow_nan))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001301 return -1;
1302
Antoine Pitrou781eba72009-12-08 15:57:31 +00001303 s->markers = markers;
1304 s->defaultfn = defaultfn;
1305 s->encoder = encoder;
1306 s->indent = indent;
1307 s->key_separator = key_separator;
1308 s->item_separator = item_separator;
1309 s->sort_keys = sort_keys;
1310 s->skipkeys = skipkeys;
1311 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1312 s->allow_nan = PyObject_IsTrue(allow_nan);
1313
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001314 Py_INCREF(s->markers);
1315 Py_INCREF(s->defaultfn);
1316 Py_INCREF(s->encoder);
1317 Py_INCREF(s->indent);
1318 Py_INCREF(s->key_separator);
1319 Py_INCREF(s->item_separator);
1320 Py_INCREF(s->sort_keys);
1321 Py_INCREF(s->skipkeys);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001322 return 0;
1323}
1324
1325static PyObject *
1326encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1327{
1328 /* Python callable interface to encode_listencode_obj */
1329 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1330 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001331 Py_ssize_t indent_level;
1332 PyEncoderObject *s;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001333 accumulator acc;
1334
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001335 assert(PyEncoder_Check(self));
1336 s = (PyEncoderObject *)self;
1337 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1338 &obj, _convertPyInt_AsSsize_t, &indent_level))
1339 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001340 if (init_accumulator(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001341 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001342 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
1343 destroy_accumulator(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001344 return NULL;
1345 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001346 return finish_accumulator(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001347}
1348
1349static PyObject *
1350_encoded_const(PyObject *obj)
1351{
1352 /* Return the JSON string representation of None, True, False */
1353 if (obj == Py_None) {
1354 static PyObject *s_null = NULL;
1355 if (s_null == NULL) {
1356 s_null = PyUnicode_InternFromString("null");
1357 }
1358 Py_INCREF(s_null);
1359 return s_null;
1360 }
1361 else if (obj == Py_True) {
1362 static PyObject *s_true = NULL;
1363 if (s_true == NULL) {
1364 s_true = PyUnicode_InternFromString("true");
1365 }
1366 Py_INCREF(s_true);
1367 return s_true;
1368 }
1369 else if (obj == Py_False) {
1370 static PyObject *s_false = NULL;
1371 if (s_false == NULL) {
1372 s_false = PyUnicode_InternFromString("false");
1373 }
1374 Py_INCREF(s_false);
1375 return s_false;
1376 }
1377 else {
1378 PyErr_SetString(PyExc_ValueError, "not a const");
1379 return NULL;
1380 }
1381}
1382
1383static PyObject *
1384encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1385{
1386 /* Return the JSON representation of a PyFloat */
1387 double i = PyFloat_AS_DOUBLE(obj);
1388 if (!Py_IS_FINITE(i)) {
1389 if (!s->allow_nan) {
1390 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1391 return NULL;
1392 }
1393 if (i > 0) {
1394 return PyUnicode_FromString("Infinity");
1395 }
1396 else if (i < 0) {
1397 return PyUnicode_FromString("-Infinity");
1398 }
1399 else {
1400 return PyUnicode_FromString("NaN");
1401 }
1402 }
1403 /* Use a better float format here? */
1404 return PyObject_Repr(obj);
1405}
1406
1407static PyObject *
1408encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1409{
1410 /* Return the JSON representation of a string */
1411 if (s->fast_encode)
1412 return py_encode_basestring_ascii(NULL, obj);
1413 else
1414 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1415}
1416
1417static int
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001418_steal_accumulate(accumulator *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001419{
1420 /* Append stolen and then decrement its reference count */
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001421 int rval = accumulate_unicode(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001422 Py_DECREF(stolen);
1423 return rval;
1424}
1425
1426static int
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001427encoder_listencode_obj(PyEncoderObject *s, accumulator *acc,
1428 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001429{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001430 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001431 PyObject *newobj;
1432 int rv;
1433
1434 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1435 PyObject *cstr = _encoded_const(obj);
1436 if (cstr == NULL)
1437 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001438 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001439 }
1440 else if (PyUnicode_Check(obj))
1441 {
1442 PyObject *encoded = encoder_encode_string(s, obj);
1443 if (encoded == NULL)
1444 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001445 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001446 }
1447 else if (PyLong_Check(obj)) {
1448 PyObject *encoded = PyObject_Str(obj);
1449 if (encoded == NULL)
1450 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001451 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001452 }
1453 else if (PyFloat_Check(obj)) {
1454 PyObject *encoded = encoder_encode_float(s, obj);
1455 if (encoded == NULL)
1456 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001457 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001458 }
1459 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001460 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1461 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001462 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001463 Py_LeaveRecursiveCall();
1464 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001465 }
1466 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001467 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1468 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001469 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001470 Py_LeaveRecursiveCall();
1471 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001472 }
1473 else {
1474 PyObject *ident = NULL;
1475 if (s->markers != Py_None) {
1476 int has_key;
1477 ident = PyLong_FromVoidPtr(obj);
1478 if (ident == NULL)
1479 return -1;
1480 has_key = PyDict_Contains(s->markers, ident);
1481 if (has_key) {
1482 if (has_key != -1)
1483 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1484 Py_DECREF(ident);
1485 return -1;
1486 }
1487 if (PyDict_SetItem(s->markers, ident, obj)) {
1488 Py_DECREF(ident);
1489 return -1;
1490 }
1491 }
1492 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1493 if (newobj == NULL) {
1494 Py_XDECREF(ident);
1495 return -1;
1496 }
Ezio Melotti13672652011-05-11 01:02:56 +03001497
1498 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1499 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001500 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001501 Py_LeaveRecursiveCall();
1502
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001503 Py_DECREF(newobj);
1504 if (rv) {
1505 Py_XDECREF(ident);
1506 return -1;
1507 }
1508 if (ident != NULL) {
1509 if (PyDict_DelItem(s->markers, ident)) {
1510 Py_XDECREF(ident);
1511 return -1;
1512 }
1513 Py_XDECREF(ident);
1514 }
1515 return rv;
1516 }
1517}
1518
1519static int
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001520encoder_listencode_dict(PyEncoderObject *s, accumulator *acc,
1521 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001522{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001523 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001524 static PyObject *open_dict = NULL;
1525 static PyObject *close_dict = NULL;
1526 static PyObject *empty_dict = NULL;
1527 PyObject *kstr = NULL;
1528 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001529 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001530 PyObject *items;
1531 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001532 int skipkeys;
1533 Py_ssize_t idx;
1534
1535 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1536 open_dict = PyUnicode_InternFromString("{");
1537 close_dict = PyUnicode_InternFromString("}");
1538 empty_dict = PyUnicode_InternFromString("{}");
1539 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1540 return -1;
1541 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001542 if (Py_SIZE(dct) == 0)
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001543 return accumulate_unicode(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001544
1545 if (s->markers != Py_None) {
1546 int has_key;
1547 ident = PyLong_FromVoidPtr(dct);
1548 if (ident == NULL)
1549 goto bail;
1550 has_key = PyDict_Contains(s->markers, ident);
1551 if (has_key) {
1552 if (has_key != -1)
1553 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1554 goto bail;
1555 }
1556 if (PyDict_SetItem(s->markers, ident, dct)) {
1557 goto bail;
1558 }
1559 }
1560
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001561 if (accumulate_unicode(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001562 goto bail;
1563
1564 if (s->indent != Py_None) {
1565 /* TODO: DOES NOT RUN */
1566 indent_level += 1;
1567 /*
1568 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1569 separator = _item_separator + newline_indent
1570 buf += newline_indent
1571 */
1572 }
1573
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001574 if (PyObject_IsTrue(s->sort_keys)) {
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001575 /* First sort the keys then replace them with (key, value) tuples. */
1576 Py_ssize_t i, nitems;
1577 items = PyMapping_Keys(dct);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001578 if (items == NULL)
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001579 goto bail;
1580 if (!PyList_Check(items)) {
1581 PyErr_SetString(PyExc_ValueError, "keys must return list");
1582 goto bail;
1583 }
1584 if (PyList_Sort(items) < 0)
1585 goto bail;
1586 nitems = PyList_GET_SIZE(items);
1587 for (i = 0; i < nitems; i++) {
1588 PyObject *key, *value;
1589 key = PyList_GET_ITEM(items, i);
1590 value = PyDict_GetItem(dct, key);
1591 item = PyTuple_Pack(2, key, value);
1592 if (item == NULL)
1593 goto bail;
1594 PyList_SET_ITEM(items, i, item);
1595 Py_DECREF(key);
1596 }
1597 }
1598 else {
1599 items = PyMapping_Items(dct);
1600 }
1601 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001602 goto bail;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001603 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001604 Py_DECREF(items);
1605 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001606 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001607 skipkeys = PyObject_IsTrue(s->skipkeys);
1608 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001609 while ((item = PyIter_Next(it)) != NULL) {
1610 PyObject *encoded, *key, *value;
1611 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
1612 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1613 goto bail;
1614 }
1615 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001616 if (PyUnicode_Check(key)) {
1617 Py_INCREF(key);
1618 kstr = key;
1619 }
1620 else if (PyFloat_Check(key)) {
1621 kstr = encoder_encode_float(s, key);
1622 if (kstr == NULL)
1623 goto bail;
1624 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001625 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 /* This must come before the PyLong_Check because
1627 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001628 kstr = _encoded_const(key);
1629 if (kstr == NULL)
1630 goto bail;
1631 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001632 else if (PyLong_Check(key)) {
1633 kstr = PyObject_Str(key);
1634 if (kstr == NULL)
1635 goto bail;
1636 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001637 else if (skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001638 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001639 continue;
1640 }
1641 else {
1642 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001643 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001644 goto bail;
1645 }
1646
1647 if (idx) {
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001648 if (accumulate_unicode(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001649 goto bail;
1650 }
1651
1652 encoded = encoder_encode_string(s, kstr);
1653 Py_CLEAR(kstr);
1654 if (encoded == NULL)
1655 goto bail;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001656 if (accumulate_unicode(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001657 Py_DECREF(encoded);
1658 goto bail;
1659 }
1660 Py_DECREF(encoded);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001661 if (accumulate_unicode(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001662 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001663
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001664 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001665 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001666 goto bail;
1667 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001668 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001669 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001670 if (PyErr_Occurred())
1671 goto bail;
1672 Py_CLEAR(it);
1673
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001674 if (ident != NULL) {
1675 if (PyDict_DelItem(s->markers, ident))
1676 goto bail;
1677 Py_CLEAR(ident);
1678 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001679 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001680 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001681 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001682
1683 yield '\n' + (' ' * (_indent * _current_indent_level))
1684 }*/
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001685 if (accumulate_unicode(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001686 goto bail;
1687 return 0;
1688
1689bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001690 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001691 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001692 Py_XDECREF(kstr);
1693 Py_XDECREF(ident);
1694 return -1;
1695}
1696
1697
1698static int
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001699encoder_listencode_list(PyEncoderObject *s, accumulator *acc,
1700 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001701{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001702 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001703 static PyObject *open_array = NULL;
1704 static PyObject *close_array = NULL;
1705 static PyObject *empty_array = NULL;
1706 PyObject *ident = NULL;
1707 PyObject *s_fast = NULL;
1708 Py_ssize_t num_items;
1709 PyObject **seq_items;
1710 Py_ssize_t i;
1711
1712 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1713 open_array = PyUnicode_InternFromString("[");
1714 close_array = PyUnicode_InternFromString("]");
1715 empty_array = PyUnicode_InternFromString("[]");
1716 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1717 return -1;
1718 }
1719 ident = NULL;
1720 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1721 if (s_fast == NULL)
1722 return -1;
1723 num_items = PySequence_Fast_GET_SIZE(s_fast);
1724 if (num_items == 0) {
1725 Py_DECREF(s_fast);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001726 return accumulate_unicode(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001727 }
1728
1729 if (s->markers != Py_None) {
1730 int has_key;
1731 ident = PyLong_FromVoidPtr(seq);
1732 if (ident == NULL)
1733 goto bail;
1734 has_key = PyDict_Contains(s->markers, ident);
1735 if (has_key) {
1736 if (has_key != -1)
1737 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1738 goto bail;
1739 }
1740 if (PyDict_SetItem(s->markers, ident, seq)) {
1741 goto bail;
1742 }
1743 }
1744
1745 seq_items = PySequence_Fast_ITEMS(s_fast);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001746 if (accumulate_unicode(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001747 goto bail;
1748 if (s->indent != Py_None) {
1749 /* TODO: DOES NOT RUN */
1750 indent_level += 1;
1751 /*
1752 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1753 separator = _item_separator + newline_indent
1754 buf += newline_indent
1755 */
1756 }
1757 for (i = 0; i < num_items; i++) {
1758 PyObject *obj = seq_items[i];
1759 if (i) {
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001760 if (accumulate_unicode(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001761 goto bail;
1762 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001763 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001764 goto bail;
1765 }
1766 if (ident != NULL) {
1767 if (PyDict_DelItem(s->markers, ident))
1768 goto bail;
1769 Py_CLEAR(ident);
1770 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001771
1772 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001773 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001774 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001775
1776 yield '\n' + (' ' * (_indent * _current_indent_level))
1777 }*/
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001778 if (accumulate_unicode(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001779 goto bail;
1780 Py_DECREF(s_fast);
1781 return 0;
1782
1783bail:
1784 Py_XDECREF(ident);
1785 Py_DECREF(s_fast);
1786 return -1;
1787}
1788
1789static void
1790encoder_dealloc(PyObject *self)
1791{
1792 /* Deallocate Encoder */
1793 encoder_clear(self);
1794 Py_TYPE(self)->tp_free(self);
1795}
1796
1797static int
1798encoder_traverse(PyObject *self, visitproc visit, void *arg)
1799{
1800 PyEncoderObject *s;
1801 assert(PyEncoder_Check(self));
1802 s = (PyEncoderObject *)self;
1803 Py_VISIT(s->markers);
1804 Py_VISIT(s->defaultfn);
1805 Py_VISIT(s->encoder);
1806 Py_VISIT(s->indent);
1807 Py_VISIT(s->key_separator);
1808 Py_VISIT(s->item_separator);
1809 Py_VISIT(s->sort_keys);
1810 Py_VISIT(s->skipkeys);
1811 return 0;
1812}
1813
1814static int
1815encoder_clear(PyObject *self)
1816{
1817 /* Deallocate Encoder */
1818 PyEncoderObject *s;
1819 assert(PyEncoder_Check(self));
1820 s = (PyEncoderObject *)self;
1821 Py_CLEAR(s->markers);
1822 Py_CLEAR(s->defaultfn);
1823 Py_CLEAR(s->encoder);
1824 Py_CLEAR(s->indent);
1825 Py_CLEAR(s->key_separator);
1826 Py_CLEAR(s->item_separator);
1827 Py_CLEAR(s->sort_keys);
1828 Py_CLEAR(s->skipkeys);
1829 return 0;
1830}
1831
1832PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1833
1834static
1835PyTypeObject PyEncoderType = {
1836 PyVarObject_HEAD_INIT(NULL, 0)
1837 "_json.Encoder", /* tp_name */
1838 sizeof(PyEncoderObject), /* tp_basicsize */
1839 0, /* tp_itemsize */
1840 encoder_dealloc, /* tp_dealloc */
1841 0, /* tp_print */
1842 0, /* tp_getattr */
1843 0, /* tp_setattr */
1844 0, /* tp_compare */
1845 0, /* tp_repr */
1846 0, /* tp_as_number */
1847 0, /* tp_as_sequence */
1848 0, /* tp_as_mapping */
1849 0, /* tp_hash */
1850 encoder_call, /* tp_call */
1851 0, /* tp_str */
1852 0, /* tp_getattro */
1853 0, /* tp_setattro */
1854 0, /* tp_as_buffer */
1855 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1856 encoder_doc, /* tp_doc */
1857 encoder_traverse, /* tp_traverse */
1858 encoder_clear, /* tp_clear */
1859 0, /* tp_richcompare */
1860 0, /* tp_weaklistoffset */
1861 0, /* tp_iter */
1862 0, /* tp_iternext */
1863 0, /* tp_methods */
1864 encoder_members, /* tp_members */
1865 0, /* tp_getset */
1866 0, /* tp_base */
1867 0, /* tp_dict */
1868 0, /* tp_descr_get */
1869 0, /* tp_descr_set */
1870 0, /* tp_dictoffset */
1871 encoder_init, /* tp_init */
1872 0, /* tp_alloc */
1873 encoder_new, /* tp_new */
1874 0, /* tp_free */
1875};
1876
1877static PyMethodDef speedups_methods[] = {
1878 {"encode_basestring_ascii",
1879 (PyCFunction)py_encode_basestring_ascii,
1880 METH_O,
1881 pydoc_encode_basestring_ascii},
1882 {"scanstring",
1883 (PyCFunction)py_scanstring,
1884 METH_VARARGS,
1885 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001886 {NULL, NULL, 0, NULL}
1887};
1888
1889PyDoc_STRVAR(module_doc,
1890"json speedups\n");
1891
Martin v. Löwis1a214512008-06-11 05:26:20 +00001892static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001893 PyModuleDef_HEAD_INIT,
1894 "_json",
1895 module_doc,
1896 -1,
1897 speedups_methods,
1898 NULL,
1899 NULL,
1900 NULL,
1901 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001902};
1903
1904PyObject*
1905PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001906{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001907 PyObject *m = PyModule_Create(&jsonmodule);
1908 if (!m)
1909 return NULL;
1910 PyScannerType.tp_new = PyType_GenericNew;
1911 if (PyType_Ready(&PyScannerType) < 0)
1912 goto fail;
1913 PyEncoderType.tp_new = PyType_GenericNew;
1914 if (PyType_Ready(&PyEncoderType) < 0)
1915 goto fail;
1916 Py_INCREF((PyObject*)&PyScannerType);
1917 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1918 Py_DECREF((PyObject*)&PyScannerType);
1919 goto fail;
1920 }
1921 Py_INCREF((PyObject*)&PyEncoderType);
1922 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1923 Py_DECREF((PyObject*)&PyEncoderType);
1924 goto fail;
1925 }
1926 return m;
1927 fail:
1928 Py_DECREF(m);
1929 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001930}