blob: d5120fa0c10e05a55e8d04e58d84792fdfcdeac7 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
Christian Heimes90540002008-05-08 14:29:10 +000016
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000017#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
22
23#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
24#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
25#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
26#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
27
28static PyTypeObject PyScannerType;
29static PyTypeObject PyEncoderType;
30
31typedef struct _PyScannerObject {
32 PyObject_HEAD
33 PyObject *strict;
34 PyObject *object_hook;
35 PyObject *object_pairs_hook;
36 PyObject *parse_float;
37 PyObject *parse_int;
38 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000039 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000040} PyScannerObject;
41
42static PyMemberDef scanner_members[] = {
43 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
44 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
45 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
46 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
47 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
48 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
49 {NULL}
50};
51
52typedef struct _PyEncoderObject {
53 PyObject_HEAD
54 PyObject *markers;
55 PyObject *defaultfn;
56 PyObject *encoder;
57 PyObject *indent;
58 PyObject *key_separator;
59 PyObject *item_separator;
60 PyObject *sort_keys;
61 PyObject *skipkeys;
62 int fast_encode;
63 int allow_nan;
64} PyEncoderObject;
65
66static PyMemberDef encoder_members[] = {
67 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
68 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
69 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
70 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
71 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
72 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
73 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
74 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
75 {NULL}
76};
77
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020078/*
79 * A two-level accumulator of unicode objects that avoids both the overhead
80 * of keeping a huge number of small separate objects, and the quadratic
81 * behaviour of using a naive repeated concatenation scheme.
82 */
83
84typedef struct {
85 PyObject *large; /* A list of previously accumulated large strings */
86 PyObject *small; /* Pending small strings */
87} accumulator;
88
89static PyObject *
90join_list_unicode(PyObject *lst)
91{
92 /* return u''.join(lst) */
93 static PyObject *sep = NULL;
94 if (sep == NULL) {
95 sep = PyUnicode_FromStringAndSize("", 0);
96 if (sep == NULL)
97 return NULL;
98 }
99 return PyUnicode_Join(sep, lst);
100}
101
102static int
103init_accumulator(accumulator *acc)
104{
105 acc->large = PyList_New(0);
106 if (acc->large == NULL)
107 return -1;
108 acc->small = PyList_New(0);
109 if (acc->small == NULL) {
110 Py_CLEAR(acc->large);
111 return -1;
112 }
113 return 0;
114}
115
116static int
117flush_accumulator(accumulator *acc)
118{
119 Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
120 if (nsmall) {
121 int ret;
122 PyObject *joined = join_list_unicode(acc->small);
123 if (joined == NULL)
124 return -1;
125 if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
126 Py_DECREF(joined);
127 return -1;
128 }
129 ret = PyList_Append(acc->large, joined);
130 Py_DECREF(joined);
131 return ret;
132 }
133 return 0;
134}
135
136static int
137accumulate_unicode(accumulator *acc, PyObject *obj)
138{
139 int ret;
140 Py_ssize_t nsmall;
Antoine Pitrou18bb3302011-08-20 03:19:34 +0200141 PyObject *joined;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +0200142 assert(PyUnicode_Check(obj));
143
144 if (PyList_Append(acc->small, obj))
145 return -1;
146 nsmall = PyList_GET_SIZE(acc->small);
147 /* Each item in a list of unicode objects has an overhead (in 64-bit
148 * builds) of:
149 * - 8 bytes for the list slot
150 * - 56 bytes for the header of the unicode object
151 * that is, 64 bytes. 100000 such objects waste more than 6MB
152 * compared to a single concatenated string.
153 */
154 if (nsmall < 100000)
155 return 0;
Antoine Pitrou18bb3302011-08-20 03:19:34 +0200156 joined = join_list_unicode(acc->small);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +0200157 if (joined == NULL)
158 return -1;
159 if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
160 Py_DECREF(joined);
161 return -1;
162 }
163 ret = PyList_Append(acc->large, joined);
164 Py_DECREF(joined);
165 return ret;
166}
167
168static PyObject *
169finish_accumulator(accumulator *acc)
170{
171 int ret;
172 PyObject *res;
173
174 ret = flush_accumulator(acc);
175 Py_CLEAR(acc->small);
176 if (ret) {
177 Py_CLEAR(acc->large);
178 return NULL;
179 }
180 res = acc->large;
181 acc->large = NULL;
182 return res;
183}
184
185static void
186destroy_accumulator(accumulator *acc)
187{
188 Py_CLEAR(acc->small);
189 Py_CLEAR(acc->large);
190}
191
192/* Forward decls */
193
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000194static PyObject *
195ascii_escape_unicode(PyObject *pystr);
196static PyObject *
197py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
198void init_json(void);
199static PyObject *
200scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
201static PyObject *
202_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
203static PyObject *
204scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
205static int
206scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
207static void
208scanner_dealloc(PyObject *self);
209static int
210scanner_clear(PyObject *self);
211static PyObject *
212encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
213static int
214encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
215static void
216encoder_dealloc(PyObject *self);
217static int
218encoder_clear(PyObject *self);
219static int
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +0200220encoder_listencode_list(PyEncoderObject *s, accumulator *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000221static int
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +0200222encoder_listencode_obj(PyEncoderObject *s, accumulator *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000223static int
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +0200224encoder_listencode_dict(PyEncoderObject *s, accumulator *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000225static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000226_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000227static void
228raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
229static PyObject *
230encoder_encode_string(PyEncoderObject *s, PyObject *obj);
231static int
232_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
233static PyObject *
234_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
235static PyObject *
236encoder_encode_float(PyEncoderObject *s, PyObject *obj);
237
Christian Heimes90540002008-05-08 14:29:10 +0000238#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000239#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000240
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000241#define MIN_EXPANSION 6
Christian Heimes90540002008-05-08 14:29:10 +0000242#ifdef Py_UNICODE_WIDE
243#define MAX_EXPANSION (2 * MIN_EXPANSION)
244#else
245#define MAX_EXPANSION MIN_EXPANSION
246#endif
247
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000248static int
249_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000250{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000251 /* PyObject to Py_ssize_t converter */
252 *size_ptr = PyLong_AsSsize_t(o);
Georg Brandl59682052009-05-05 07:52:05 +0000253 if (*size_ptr == -1 && PyErr_Occurred())
254 return 0;
255 return 1;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000256}
257
258static PyObject *
259_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
260{
261 /* Py_ssize_t to PyObject converter */
262 return PyLong_FromSsize_t(*size_ptr);
263}
264
265static Py_ssize_t
266ascii_escape_unichar(Py_UNICODE c, Py_UNICODE *output, Py_ssize_t chars)
267{
268 /* Escape unicode code point c to ASCII escape sequences
269 in char *output. output must have at least 12 bytes unused to
270 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000271 output[chars++] = '\\';
272 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000273 case '\\': output[chars++] = c; break;
274 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000275 case '\b': output[chars++] = 'b'; break;
276 case '\f': output[chars++] = 'f'; break;
277 case '\n': output[chars++] = 'n'; break;
278 case '\r': output[chars++] = 'r'; break;
279 case '\t': output[chars++] = 't'; break;
280 default:
281#ifdef Py_UNICODE_WIDE
282 if (c >= 0x10000) {
283 /* UTF-16 surrogate pair */
284 Py_UNICODE v = c - 0x10000;
285 c = 0xd800 | ((v >> 10) & 0x3ff);
286 output[chars++] = 'u';
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000287 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
288 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
289 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
290 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000291 c = 0xdc00 | (v & 0x3ff);
292 output[chars++] = '\\';
293 }
294#endif
295 output[chars++] = 'u';
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000296 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
297 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
298 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
299 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000300 }
301 return chars;
302}
303
304static PyObject *
305ascii_escape_unicode(PyObject *pystr)
306{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000307 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000308 Py_ssize_t i;
309 Py_ssize_t input_chars;
310 Py_ssize_t output_size;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000311 Py_ssize_t max_output_size;
Christian Heimes90540002008-05-08 14:29:10 +0000312 Py_ssize_t chars;
313 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000314 Py_UNICODE *output;
Christian Heimes90540002008-05-08 14:29:10 +0000315 Py_UNICODE *input_unicode;
316
317 input_chars = PyUnicode_GET_SIZE(pystr);
318 input_unicode = PyUnicode_AS_UNICODE(pystr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000319
Christian Heimes90540002008-05-08 14:29:10 +0000320 /* One char input can be up to 6 chars output, estimate 4 of these */
321 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000322 max_output_size = 2 + (input_chars * MAX_EXPANSION);
323 rval = PyUnicode_FromStringAndSize(NULL, output_size);
Christian Heimes90540002008-05-08 14:29:10 +0000324 if (rval == NULL) {
325 return NULL;
326 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000327 output = PyUnicode_AS_UNICODE(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000328 chars = 0;
329 output[chars++] = '"';
330 for (i = 0; i < input_chars; i++) {
331 Py_UNICODE c = input_unicode[i];
332 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000333 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000334 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000335 else {
336 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000337 }
338 if (output_size - chars < (1 + MAX_EXPANSION)) {
339 /* There's more than four, so let's resize by a lot */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000340 Py_ssize_t new_output_size = output_size * 2;
Christian Heimes90540002008-05-08 14:29:10 +0000341 /* This is an upper bound */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000342 if (new_output_size > max_output_size) {
343 new_output_size = max_output_size;
Christian Heimes90540002008-05-08 14:29:10 +0000344 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000345 /* Make sure that the output size changed before resizing */
346 if (new_output_size != output_size) {
347 output_size = new_output_size;
348 if (PyUnicode_Resize(&rval, output_size) == -1) {
349 return NULL;
350 }
351 output = PyUnicode_AS_UNICODE(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000352 }
Christian Heimes90540002008-05-08 14:29:10 +0000353 }
354 }
355 output[chars++] = '"';
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000356 if (PyUnicode_Resize(&rval, chars) == -1) {
Christian Heimes90540002008-05-08 14:29:10 +0000357 return NULL;
358 }
359 return rval;
360}
361
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000362static void
Christian Heimes90540002008-05-08 14:29:10 +0000363raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
364{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000365 /* Use the Python function json.decoder.errmsg to raise a nice
366 looking ValueError exception */
Christian Heimes90540002008-05-08 14:29:10 +0000367 static PyObject *errmsg_fn = NULL;
368 PyObject *pymsg;
369 if (errmsg_fn == NULL) {
370 PyObject *decoder = PyImport_ImportModule("json.decoder");
371 if (decoder == NULL)
372 return;
373 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000374 Py_DECREF(decoder);
Christian Heimes90540002008-05-08 14:29:10 +0000375 if (errmsg_fn == NULL)
376 return;
Christian Heimes90540002008-05-08 14:29:10 +0000377 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000378 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000379 if (pymsg) {
380 PyErr_SetObject(PyExc_ValueError, pymsg);
381 Py_DECREF(pymsg);
382 }
Christian Heimes90540002008-05-08 14:29:10 +0000383}
384
385static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000386_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
387 /* return (rval, idx) tuple, stealing reference to rval */
388 PyObject *tpl;
389 PyObject *pyidx;
390 /*
391 steal a reference to rval, returns (rval, idx)
392 */
393 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000394 return NULL;
395 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000396 pyidx = PyLong_FromSsize_t(idx);
397 if (pyidx == NULL) {
398 Py_DECREF(rval);
399 return NULL;
400 }
401 tpl = PyTuple_New(2);
402 if (tpl == NULL) {
403 Py_DECREF(pyidx);
404 Py_DECREF(rval);
405 return NULL;
406 }
407 PyTuple_SET_ITEM(tpl, 0, rval);
408 PyTuple_SET_ITEM(tpl, 1, pyidx);
409 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000410}
411
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000412#define APPEND_OLD_CHUNK \
413 if (chunk != NULL) { \
414 if (chunks == NULL) { \
415 chunks = PyList_New(0); \
416 if (chunks == NULL) { \
417 goto bail; \
418 } \
419 } \
420 if (PyList_Append(chunks, chunk)) { \
421 Py_DECREF(chunk); \
422 goto bail; \
423 } \
424 Py_CLEAR(chunk); \
425 }
426
Christian Heimes90540002008-05-08 14:29:10 +0000427static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000428scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000429{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000430 /* Read the JSON string from PyUnicode pystr.
431 end is the index of the first character after the quote.
432 if strict is zero then literal control characters are allowed
433 *next_end_ptr is a return-by-reference index of the character
434 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000435
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000436 Return value is a new PyUnicode
437 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000438 PyObject *rval = NULL;
Christian Heimes90540002008-05-08 14:29:10 +0000439 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
440 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000441 Py_ssize_t next /* = begin */;
Christian Heimes90540002008-05-08 14:29:10 +0000442 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000443 PyObject *chunks = NULL;
444 PyObject *chunk = NULL;
445
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000446 if (end < 0 || len <= end) {
447 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
448 goto bail;
449 }
Christian Heimes90540002008-05-08 14:29:10 +0000450 while (1) {
451 /* Find the end of the string or the next escape */
452 Py_UNICODE c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000453 for (next = end; next < len; next++) {
454 c = buf[next];
455 if (c == '"' || c == '\\') {
456 break;
457 }
458 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000459 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000460 goto bail;
461 }
462 }
463 if (!(c == '"' || c == '\\')) {
464 raise_errmsg("Unterminated string starting at", pystr, begin);
465 goto bail;
466 }
467 /* Pick up this chunk if it's not zero length */
468 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000469 APPEND_OLD_CHUNK
Christian Heimes90540002008-05-08 14:29:10 +0000470 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
471 if (chunk == NULL) {
472 goto bail;
473 }
Christian Heimes90540002008-05-08 14:29:10 +0000474 }
475 next++;
476 if (c == '"') {
477 end = next;
478 break;
479 }
480 if (next == len) {
481 raise_errmsg("Unterminated string starting at", pystr, begin);
482 goto bail;
483 }
484 c = buf[next];
485 if (c != 'u') {
486 /* Non-unicode backslash escapes */
487 end = next + 1;
488 switch (c) {
489 case '"': break;
490 case '\\': break;
491 case '/': break;
492 case 'b': c = '\b'; break;
493 case 'f': c = '\f'; break;
494 case 'n': c = '\n'; break;
495 case 'r': c = '\r'; break;
496 case 't': c = '\t'; break;
497 default: c = 0;
498 }
499 if (c == 0) {
500 raise_errmsg("Invalid \\escape", pystr, end - 2);
501 goto bail;
502 }
503 }
504 else {
505 c = 0;
506 next++;
507 end = next + 4;
508 if (end >= len) {
509 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
510 goto bail;
511 }
512 /* Decode 4 hex digits */
513 for (; next < end; next++) {
Christian Heimes90540002008-05-08 14:29:10 +0000514 Py_UNICODE digit = buf[next];
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000515 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000516 switch (digit) {
517 case '0': case '1': case '2': case '3': case '4':
518 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000519 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000520 case 'a': case 'b': case 'c': case 'd': case 'e':
521 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000522 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000523 case 'A': case 'B': case 'C': case 'D': case 'E':
524 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000525 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000526 default:
527 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
528 goto bail;
529 }
530 }
531#ifdef Py_UNICODE_WIDE
532 /* Surrogate pair */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000533 if ((c & 0xfc00) == 0xd800) {
Christian Heimes90540002008-05-08 14:29:10 +0000534 Py_UNICODE c2 = 0;
535 if (end + 6 >= len) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000536 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
537 goto bail;
Christian Heimes90540002008-05-08 14:29:10 +0000538 }
539 if (buf[next++] != '\\' || buf[next++] != 'u') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000540 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
541 goto bail;
Christian Heimes90540002008-05-08 14:29:10 +0000542 }
543 end += 6;
544 /* Decode 4 hex digits */
545 for (; next < end; next++) {
Christian Heimes90540002008-05-08 14:29:10 +0000546 Py_UNICODE digit = buf[next];
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000547 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000548 switch (digit) {
549 case '0': case '1': case '2': case '3': case '4':
550 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000551 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000552 case 'a': case 'b': case 'c': case 'd': case 'e':
553 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000554 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000555 case 'A': case 'B': case 'C': case 'D': case 'E':
556 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000557 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000558 default:
559 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
560 goto bail;
561 }
562 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000563 if ((c2 & 0xfc00) != 0xdc00) {
564 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
565 goto bail;
566 }
Christian Heimes90540002008-05-08 14:29:10 +0000567 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
568 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000569 else if ((c & 0xfc00) == 0xdc00) {
570 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
571 goto bail;
572 }
Christian Heimes90540002008-05-08 14:29:10 +0000573#endif
574 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000575 APPEND_OLD_CHUNK
Christian Heimes90540002008-05-08 14:29:10 +0000576 chunk = PyUnicode_FromUnicode(&c, 1);
577 if (chunk == NULL) {
578 goto bail;
579 }
Christian Heimes90540002008-05-08 14:29:10 +0000580 }
581
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000582 if (chunks == NULL) {
583 if (chunk != NULL)
584 rval = chunk;
585 else
586 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000587 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000588 else {
589 APPEND_OLD_CHUNK
590 rval = join_list_unicode(chunks);
591 if (rval == NULL) {
592 goto bail;
593 }
594 Py_CLEAR(chunks);
595 }
596
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000597 *next_end_ptr = end;
598 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000599bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000600 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000601 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000602 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000603 return NULL;
604}
605
606PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000607 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000608 "\n"
609 "Scan the string s for a JSON string. End is the index of the\n"
610 "character in s after the quote that started the JSON string.\n"
611 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
612 "on attempt to decode an invalid string. If strict is False then literal\n"
613 "control characters are allowed in the string.\n"
614 "\n"
615 "Returns a tuple of the decoded string and the index of the character in s\n"
616 "after the end quote."
617);
Christian Heimes90540002008-05-08 14:29:10 +0000618
619static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000620py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000621{
622 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000623 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000624 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000625 Py_ssize_t next_end = -1;
626 int strict = 1;
627 if (!PyArg_ParseTuple(args, "OO&|i:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000628 return NULL;
629 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000630 if (PyUnicode_Check(pystr)) {
631 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000632 }
633 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000634 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000635 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000636 Py_TYPE(pystr)->tp_name);
637 return NULL;
638 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000639 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000640}
641
642PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000643 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000644 "\n"
645 "Return an ASCII-only JSON representation of a Python string"
646);
Christian Heimes90540002008-05-08 14:29:10 +0000647
648static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000649py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000650{
651 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000652 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000653 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000654 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000655 rval = ascii_escape_unicode(pystr);
656 }
657 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000658 PyErr_Format(PyExc_TypeError,
659 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000660 Py_TYPE(pystr)->tp_name);
661 return NULL;
662 }
Christian Heimes90540002008-05-08 14:29:10 +0000663 return rval;
664}
665
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000666static void
667scanner_dealloc(PyObject *self)
668{
669 /* Deallocate scanner object */
670 scanner_clear(self);
671 Py_TYPE(self)->tp_free(self);
672}
673
674static int
675scanner_traverse(PyObject *self, visitproc visit, void *arg)
676{
677 PyScannerObject *s;
678 assert(PyScanner_Check(self));
679 s = (PyScannerObject *)self;
680 Py_VISIT(s->strict);
681 Py_VISIT(s->object_hook);
682 Py_VISIT(s->object_pairs_hook);
683 Py_VISIT(s->parse_float);
684 Py_VISIT(s->parse_int);
685 Py_VISIT(s->parse_constant);
686 return 0;
687}
688
689static int
690scanner_clear(PyObject *self)
691{
692 PyScannerObject *s;
693 assert(PyScanner_Check(self));
694 s = (PyScannerObject *)self;
695 Py_CLEAR(s->strict);
696 Py_CLEAR(s->object_hook);
697 Py_CLEAR(s->object_pairs_hook);
698 Py_CLEAR(s->parse_float);
699 Py_CLEAR(s->parse_int);
700 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000701 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000702 return 0;
703}
704
705static PyObject *
706_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
707 /* Read a JSON object from PyUnicode pystr.
708 idx is the index of the first character after the opening curly brace.
709 *next_idx_ptr is a return-by-reference index to the first character after
710 the closing curly brace.
711
712 Returns a new PyObject (usually a dict, but object_hook can change that)
713 */
714 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
715 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
716 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000717 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000718 PyObject *key = NULL;
719 int strict = PyObject_IsTrue(s->strict);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000720 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000721 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000722
723 if (has_pairs_hook)
724 rval = PyList_New(0);
725 else
726 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000727 if (rval == NULL)
728 return NULL;
729
730 /* skip whitespace after { */
731 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
732
733 /* only loop if the object is non-empty */
734 if (idx <= end_idx && str[idx] != '}') {
735 while (idx <= end_idx) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000736 PyObject *memokey;
737
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000738 /* read key */
739 if (str[idx] != '"') {
740 raise_errmsg("Expecting property name", pystr, idx);
741 goto bail;
742 }
743 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
744 if (key == NULL)
745 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000746 memokey = PyDict_GetItem(s->memo, key);
747 if (memokey != NULL) {
748 Py_INCREF(memokey);
749 Py_DECREF(key);
750 key = memokey;
751 }
752 else {
753 if (PyDict_SetItem(s->memo, key, key) < 0)
754 goto bail;
755 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000756 idx = next_idx;
757
758 /* skip whitespace between key and : delimiter, read :, skip whitespace */
759 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
760 if (idx > end_idx || str[idx] != ':') {
761 raise_errmsg("Expecting : delimiter", pystr, idx);
762 goto bail;
763 }
764 idx++;
765 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
766
767 /* read any JSON term */
768 val = scan_once_unicode(s, pystr, idx, &next_idx);
769 if (val == NULL)
770 goto bail;
771
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000772 if (has_pairs_hook) {
773 PyObject *item = PyTuple_Pack(2, key, val);
774 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000775 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000776 Py_CLEAR(key);
777 Py_CLEAR(val);
778 if (PyList_Append(rval, item) == -1) {
779 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000780 goto bail;
781 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000782 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000783 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000784 else {
785 if (PyDict_SetItem(rval, key, val) < 0)
786 goto bail;
787 Py_CLEAR(key);
788 Py_CLEAR(val);
789 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000790 idx = next_idx;
791
792 /* skip whitespace before } or , */
793 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
794
795 /* bail if the object is closed or we didn't get the , delimiter */
796 if (idx > end_idx) break;
797 if (str[idx] == '}') {
798 break;
799 }
800 else if (str[idx] != ',') {
801 raise_errmsg("Expecting , delimiter", pystr, idx);
802 goto bail;
803 }
804 idx++;
805
806 /* skip whitespace after , delimiter */
807 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
808 }
809 }
810
811 /* verify that idx < end_idx, str[idx] should be '}' */
812 if (idx > end_idx || str[idx] != '}') {
813 raise_errmsg("Expecting object", pystr, end_idx);
814 goto bail;
815 }
816
817 *next_idx_ptr = idx + 1;
818
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000819 if (has_pairs_hook) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000820 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000821 Py_DECREF(rval);
822 return val;
823 }
824
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000825 /* if object_hook is not None: rval = object_hook(rval) */
826 if (s->object_hook != Py_None) {
827 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000828 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000829 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000830 }
831 return rval;
832bail:
833 Py_XDECREF(key);
834 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000835 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000836 return NULL;
837}
838
839static PyObject *
840_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
841 /* Read a JSON array from PyString pystr.
842 idx is the index of the first character after the opening brace.
843 *next_idx_ptr is a return-by-reference index to the first character after
844 the closing brace.
845
846 Returns a new PyList
847 */
848 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
849 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
850 PyObject *val = NULL;
851 PyObject *rval = PyList_New(0);
852 Py_ssize_t next_idx;
853 if (rval == NULL)
854 return NULL;
855
856 /* skip whitespace after [ */
857 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
858
859 /* only loop if the array is non-empty */
860 if (idx <= end_idx && str[idx] != ']') {
861 while (idx <= end_idx) {
862
863 /* read any JSON term */
864 val = scan_once_unicode(s, pystr, idx, &next_idx);
865 if (val == NULL)
866 goto bail;
867
868 if (PyList_Append(rval, val) == -1)
869 goto bail;
870
871 Py_CLEAR(val);
872 idx = next_idx;
873
874 /* skip whitespace between term and , */
875 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
876
877 /* bail if the array is closed or we didn't get the , delimiter */
878 if (idx > end_idx) break;
879 if (str[idx] == ']') {
880 break;
881 }
882 else if (str[idx] != ',') {
883 raise_errmsg("Expecting , delimiter", pystr, idx);
884 goto bail;
885 }
886 idx++;
887
888 /* skip whitespace after , */
889 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
890 }
891 }
892
893 /* verify that idx < end_idx, str[idx] should be ']' */
894 if (idx > end_idx || str[idx] != ']') {
895 raise_errmsg("Expecting object", pystr, end_idx);
896 goto bail;
897 }
898 *next_idx_ptr = idx + 1;
899 return rval;
900bail:
901 Py_XDECREF(val);
902 Py_DECREF(rval);
903 return NULL;
904}
905
906static PyObject *
907_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
908 /* Read a JSON constant from PyString pystr.
909 constant is the constant string that was found
910 ("NaN", "Infinity", "-Infinity").
911 idx is the index of the first character of the constant
912 *next_idx_ptr is a return-by-reference index to the first character after
913 the constant.
914
915 Returns the result of parse_constant
916 */
917 PyObject *cstr;
918 PyObject *rval;
919 /* constant is "NaN", "Infinity", or "-Infinity" */
920 cstr = PyUnicode_InternFromString(constant);
921 if (cstr == NULL)
922 return NULL;
923
924 /* rval = parse_constant(constant) */
925 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
926 idx += PyUnicode_GET_SIZE(cstr);
927 Py_DECREF(cstr);
928 *next_idx_ptr = idx;
929 return rval;
930}
931
932static PyObject *
933_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
934 /* Read a JSON number from PyUnicode pystr.
935 idx is the index of the first character of the number
936 *next_idx_ptr is a return-by-reference index to the first character after
937 the number.
938
939 Returns a new PyObject representation of that number:
940 PyInt, PyLong, or PyFloat.
941 May return other types if parse_int or parse_float are set
942 */
943 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
944 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
945 Py_ssize_t idx = start;
946 int is_float = 0;
947 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200948 PyObject *numstr = NULL;
949 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000950
951 /* read a sign if it's there, make sure it's not the end of the string */
952 if (str[idx] == '-') {
953 idx++;
954 if (idx > end_idx) {
955 PyErr_SetNone(PyExc_StopIteration);
956 return NULL;
957 }
958 }
959
960 /* read as many integer digits as we find as long as it doesn't start with 0 */
961 if (str[idx] >= '1' && str[idx] <= '9') {
962 idx++;
963 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
964 }
965 /* if it starts with 0 we only expect one integer digit */
966 else if (str[idx] == '0') {
967 idx++;
968 }
969 /* no integer digits, error */
970 else {
971 PyErr_SetNone(PyExc_StopIteration);
972 return NULL;
973 }
974
975 /* if the next char is '.' followed by a digit then read all float digits */
976 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
977 is_float = 1;
978 idx += 2;
979 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
980 }
981
982 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
983 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
984 Py_ssize_t e_start = idx;
985 idx++;
986
987 /* read an exponent sign if present */
988 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
989
990 /* read all digits */
991 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
992
993 /* if we got a digit, then parse as float. if not, backtrack */
994 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
995 is_float = 1;
996 }
997 else {
998 idx = e_start;
999 }
1000 }
1001
Antoine Pitrouf6454512011-04-25 19:16:06 +02001002 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1003 custom_func = s->parse_float;
1004 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1005 custom_func = s->parse_int;
1006 else
1007 custom_func = NULL;
1008
1009 if (custom_func) {
1010 /* copy the section we determined to be a number */
1011 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1012 if (numstr == NULL)
1013 return NULL;
1014 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001015 }
1016 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001017 Py_ssize_t i, n;
1018 char *buf;
1019 /* Straight conversion to ASCII, to avoid costly conversion of
1020 decimal unicode digits (which cannot appear here) */
1021 n = idx - start;
1022 numstr = PyBytes_FromStringAndSize(NULL, n);
1023 if (numstr == NULL)
1024 return NULL;
1025 buf = PyBytes_AS_STRING(numstr);
1026 for (i = 0; i < n; i++) {
1027 buf[i] = (char) str[i + start];
1028 }
1029 if (is_float)
1030 rval = PyFloat_FromString(numstr);
1031 else
1032 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001033 }
1034 Py_DECREF(numstr);
1035 *next_idx_ptr = idx;
1036 return rval;
1037}
1038
1039static PyObject *
1040scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1041{
1042 /* Read one JSON term (of any kind) from PyUnicode pystr.
1043 idx is the index of the first character of the term
1044 *next_idx_ptr is a return-by-reference index to the first character after
1045 the number.
1046
1047 Returns a new PyObject representation of the term.
1048 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001049 PyObject *res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001050 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1051 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1052 if (idx >= length) {
1053 PyErr_SetNone(PyExc_StopIteration);
1054 return NULL;
1055 }
1056 switch (str[idx]) {
1057 case '"':
1058 /* string */
1059 return scanstring_unicode(pystr, idx + 1,
1060 PyObject_IsTrue(s->strict),
1061 next_idx_ptr);
1062 case '{':
1063 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001064 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1065 "from a unicode string"))
1066 return NULL;
1067 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1068 Py_LeaveRecursiveCall();
1069 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001070 case '[':
1071 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001072 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1073 "from a unicode string"))
1074 return NULL;
1075 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1076 Py_LeaveRecursiveCall();
1077 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001078 case 'n':
1079 /* null */
1080 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1081 Py_INCREF(Py_None);
1082 *next_idx_ptr = idx + 4;
1083 return Py_None;
1084 }
1085 break;
1086 case 't':
1087 /* true */
1088 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1089 Py_INCREF(Py_True);
1090 *next_idx_ptr = idx + 4;
1091 return Py_True;
1092 }
1093 break;
1094 case 'f':
1095 /* false */
1096 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1097 Py_INCREF(Py_False);
1098 *next_idx_ptr = idx + 5;
1099 return Py_False;
1100 }
1101 break;
1102 case 'N':
1103 /* NaN */
1104 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1105 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1106 }
1107 break;
1108 case 'I':
1109 /* Infinity */
1110 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1111 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1112 }
1113 break;
1114 case '-':
1115 /* -Infinity */
1116 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1117 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1118 }
1119 break;
1120 }
1121 /* Didn't find a string, object, array, or named constant. Look for a number. */
1122 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1123}
1124
1125static PyObject *
1126scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1127{
1128 /* Python callable interface to scan_once_{str,unicode} */
1129 PyObject *pystr;
1130 PyObject *rval;
1131 Py_ssize_t idx;
1132 Py_ssize_t next_idx = -1;
1133 static char *kwlist[] = {"string", "idx", NULL};
1134 PyScannerObject *s;
1135 assert(PyScanner_Check(self));
1136 s = (PyScannerObject *)self;
1137 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1138 return NULL;
1139
1140 if (PyUnicode_Check(pystr)) {
1141 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1142 }
1143 else {
1144 PyErr_Format(PyExc_TypeError,
1145 "first argument must be a string, not %.80s",
1146 Py_TYPE(pystr)->tp_name);
1147 return NULL;
1148 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001149 PyDict_Clear(s->memo);
1150 if (rval == NULL)
1151 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001152 return _build_rval_index_tuple(rval, next_idx);
1153}
1154
1155static PyObject *
1156scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1157{
1158 PyScannerObject *s;
1159 s = (PyScannerObject *)type->tp_alloc(type, 0);
1160 if (s != NULL) {
1161 s->strict = NULL;
1162 s->object_hook = NULL;
1163 s->object_pairs_hook = NULL;
1164 s->parse_float = NULL;
1165 s->parse_int = NULL;
1166 s->parse_constant = NULL;
1167 }
1168 return (PyObject *)s;
1169}
1170
1171static int
1172scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1173{
1174 /* Initialize Scanner object */
1175 PyObject *ctx;
1176 static char *kwlist[] = {"context", NULL};
1177 PyScannerObject *s;
1178
1179 assert(PyScanner_Check(self));
1180 s = (PyScannerObject *)self;
1181
1182 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1183 return -1;
1184
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001185 if (s->memo == NULL) {
1186 s->memo = PyDict_New();
1187 if (s->memo == NULL)
1188 goto bail;
1189 }
1190
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001191 /* All of these will fail "gracefully" so we don't need to verify them */
1192 s->strict = PyObject_GetAttrString(ctx, "strict");
1193 if (s->strict == NULL)
1194 goto bail;
1195 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1196 if (s->object_hook == NULL)
1197 goto bail;
1198 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1199 if (s->object_pairs_hook == NULL)
1200 goto bail;
1201 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1202 if (s->parse_float == NULL)
1203 goto bail;
1204 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1205 if (s->parse_int == NULL)
1206 goto bail;
1207 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1208 if (s->parse_constant == NULL)
1209 goto bail;
1210
1211 return 0;
1212
1213bail:
1214 Py_CLEAR(s->strict);
1215 Py_CLEAR(s->object_hook);
1216 Py_CLEAR(s->object_pairs_hook);
1217 Py_CLEAR(s->parse_float);
1218 Py_CLEAR(s->parse_int);
1219 Py_CLEAR(s->parse_constant);
1220 return -1;
1221}
1222
1223PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1224
1225static
1226PyTypeObject PyScannerType = {
1227 PyVarObject_HEAD_INIT(NULL, 0)
1228 "_json.Scanner", /* tp_name */
1229 sizeof(PyScannerObject), /* tp_basicsize */
1230 0, /* tp_itemsize */
1231 scanner_dealloc, /* tp_dealloc */
1232 0, /* tp_print */
1233 0, /* tp_getattr */
1234 0, /* tp_setattr */
1235 0, /* tp_compare */
1236 0, /* tp_repr */
1237 0, /* tp_as_number */
1238 0, /* tp_as_sequence */
1239 0, /* tp_as_mapping */
1240 0, /* tp_hash */
1241 scanner_call, /* tp_call */
1242 0, /* tp_str */
1243 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1244 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1245 0, /* tp_as_buffer */
1246 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1247 scanner_doc, /* tp_doc */
1248 scanner_traverse, /* tp_traverse */
1249 scanner_clear, /* tp_clear */
1250 0, /* tp_richcompare */
1251 0, /* tp_weaklistoffset */
1252 0, /* tp_iter */
1253 0, /* tp_iternext */
1254 0, /* tp_methods */
1255 scanner_members, /* tp_members */
1256 0, /* tp_getset */
1257 0, /* tp_base */
1258 0, /* tp_dict */
1259 0, /* tp_descr_get */
1260 0, /* tp_descr_set */
1261 0, /* tp_dictoffset */
1262 scanner_init, /* tp_init */
1263 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1264 scanner_new, /* tp_new */
1265 0,/* PyObject_GC_Del, */ /* tp_free */
1266};
1267
1268static PyObject *
1269encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1270{
1271 PyEncoderObject *s;
1272 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1273 if (s != NULL) {
1274 s->markers = NULL;
1275 s->defaultfn = NULL;
1276 s->encoder = NULL;
1277 s->indent = NULL;
1278 s->key_separator = NULL;
1279 s->item_separator = NULL;
1280 s->sort_keys = NULL;
1281 s->skipkeys = NULL;
1282 }
1283 return (PyObject *)s;
1284}
1285
1286static int
1287encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1288{
1289 /* initialize Encoder object */
1290 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1291
1292 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001293 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1294 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001295
1296 assert(PyEncoder_Check(self));
1297 s = (PyEncoderObject *)self;
1298
1299 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001300 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1301 &sort_keys, &skipkeys, &allow_nan))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001302 return -1;
1303
Antoine Pitrou781eba72009-12-08 15:57:31 +00001304 s->markers = markers;
1305 s->defaultfn = defaultfn;
1306 s->encoder = encoder;
1307 s->indent = indent;
1308 s->key_separator = key_separator;
1309 s->item_separator = item_separator;
1310 s->sort_keys = sort_keys;
1311 s->skipkeys = skipkeys;
1312 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1313 s->allow_nan = PyObject_IsTrue(allow_nan);
1314
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001315 Py_INCREF(s->markers);
1316 Py_INCREF(s->defaultfn);
1317 Py_INCREF(s->encoder);
1318 Py_INCREF(s->indent);
1319 Py_INCREF(s->key_separator);
1320 Py_INCREF(s->item_separator);
1321 Py_INCREF(s->sort_keys);
1322 Py_INCREF(s->skipkeys);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001323 return 0;
1324}
1325
1326static PyObject *
1327encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1328{
1329 /* Python callable interface to encode_listencode_obj */
1330 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1331 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001332 Py_ssize_t indent_level;
1333 PyEncoderObject *s;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001334 accumulator acc;
1335
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001336 assert(PyEncoder_Check(self));
1337 s = (PyEncoderObject *)self;
1338 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1339 &obj, _convertPyInt_AsSsize_t, &indent_level))
1340 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001341 if (init_accumulator(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001342 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001343 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
1344 destroy_accumulator(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001345 return NULL;
1346 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001347 return finish_accumulator(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001348}
1349
1350static PyObject *
1351_encoded_const(PyObject *obj)
1352{
1353 /* Return the JSON string representation of None, True, False */
1354 if (obj == Py_None) {
1355 static PyObject *s_null = NULL;
1356 if (s_null == NULL) {
1357 s_null = PyUnicode_InternFromString("null");
1358 }
1359 Py_INCREF(s_null);
1360 return s_null;
1361 }
1362 else if (obj == Py_True) {
1363 static PyObject *s_true = NULL;
1364 if (s_true == NULL) {
1365 s_true = PyUnicode_InternFromString("true");
1366 }
1367 Py_INCREF(s_true);
1368 return s_true;
1369 }
1370 else if (obj == Py_False) {
1371 static PyObject *s_false = NULL;
1372 if (s_false == NULL) {
1373 s_false = PyUnicode_InternFromString("false");
1374 }
1375 Py_INCREF(s_false);
1376 return s_false;
1377 }
1378 else {
1379 PyErr_SetString(PyExc_ValueError, "not a const");
1380 return NULL;
1381 }
1382}
1383
1384static PyObject *
1385encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1386{
1387 /* Return the JSON representation of a PyFloat */
1388 double i = PyFloat_AS_DOUBLE(obj);
1389 if (!Py_IS_FINITE(i)) {
1390 if (!s->allow_nan) {
1391 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1392 return NULL;
1393 }
1394 if (i > 0) {
1395 return PyUnicode_FromString("Infinity");
1396 }
1397 else if (i < 0) {
1398 return PyUnicode_FromString("-Infinity");
1399 }
1400 else {
1401 return PyUnicode_FromString("NaN");
1402 }
1403 }
1404 /* Use a better float format here? */
1405 return PyObject_Repr(obj);
1406}
1407
1408static PyObject *
1409encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1410{
1411 /* Return the JSON representation of a string */
1412 if (s->fast_encode)
1413 return py_encode_basestring_ascii(NULL, obj);
1414 else
1415 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1416}
1417
1418static int
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001419_steal_accumulate(accumulator *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001420{
1421 /* Append stolen and then decrement its reference count */
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001422 int rval = accumulate_unicode(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001423 Py_DECREF(stolen);
1424 return rval;
1425}
1426
1427static int
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001428encoder_listencode_obj(PyEncoderObject *s, accumulator *acc,
1429 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001430{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001431 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001432 PyObject *newobj;
1433 int rv;
1434
1435 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1436 PyObject *cstr = _encoded_const(obj);
1437 if (cstr == NULL)
1438 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001439 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001440 }
1441 else if (PyUnicode_Check(obj))
1442 {
1443 PyObject *encoded = encoder_encode_string(s, obj);
1444 if (encoded == NULL)
1445 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001446 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001447 }
1448 else if (PyLong_Check(obj)) {
1449 PyObject *encoded = PyObject_Str(obj);
1450 if (encoded == NULL)
1451 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001452 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001453 }
1454 else if (PyFloat_Check(obj)) {
1455 PyObject *encoded = encoder_encode_float(s, obj);
1456 if (encoded == NULL)
1457 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001458 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001459 }
1460 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001461 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1462 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001463 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001464 Py_LeaveRecursiveCall();
1465 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001466 }
1467 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001468 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1469 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001470 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001471 Py_LeaveRecursiveCall();
1472 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001473 }
1474 else {
1475 PyObject *ident = NULL;
1476 if (s->markers != Py_None) {
1477 int has_key;
1478 ident = PyLong_FromVoidPtr(obj);
1479 if (ident == NULL)
1480 return -1;
1481 has_key = PyDict_Contains(s->markers, ident);
1482 if (has_key) {
1483 if (has_key != -1)
1484 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1485 Py_DECREF(ident);
1486 return -1;
1487 }
1488 if (PyDict_SetItem(s->markers, ident, obj)) {
1489 Py_DECREF(ident);
1490 return -1;
1491 }
1492 }
1493 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1494 if (newobj == NULL) {
1495 Py_XDECREF(ident);
1496 return -1;
1497 }
Ezio Melotti13672652011-05-11 01:02:56 +03001498
1499 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1500 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001501 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001502 Py_LeaveRecursiveCall();
1503
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001504 Py_DECREF(newobj);
1505 if (rv) {
1506 Py_XDECREF(ident);
1507 return -1;
1508 }
1509 if (ident != NULL) {
1510 if (PyDict_DelItem(s->markers, ident)) {
1511 Py_XDECREF(ident);
1512 return -1;
1513 }
1514 Py_XDECREF(ident);
1515 }
1516 return rv;
1517 }
1518}
1519
1520static int
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001521encoder_listencode_dict(PyEncoderObject *s, accumulator *acc,
1522 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001523{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001524 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001525 static PyObject *open_dict = NULL;
1526 static PyObject *close_dict = NULL;
1527 static PyObject *empty_dict = NULL;
1528 PyObject *kstr = NULL;
1529 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001530 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001531 PyObject *items;
1532 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001533 int skipkeys;
1534 Py_ssize_t idx;
1535
1536 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1537 open_dict = PyUnicode_InternFromString("{");
1538 close_dict = PyUnicode_InternFromString("}");
1539 empty_dict = PyUnicode_InternFromString("{}");
1540 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1541 return -1;
1542 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001543 if (Py_SIZE(dct) == 0)
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001544 return accumulate_unicode(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001545
1546 if (s->markers != Py_None) {
1547 int has_key;
1548 ident = PyLong_FromVoidPtr(dct);
1549 if (ident == NULL)
1550 goto bail;
1551 has_key = PyDict_Contains(s->markers, ident);
1552 if (has_key) {
1553 if (has_key != -1)
1554 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1555 goto bail;
1556 }
1557 if (PyDict_SetItem(s->markers, ident, dct)) {
1558 goto bail;
1559 }
1560 }
1561
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001562 if (accumulate_unicode(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001563 goto bail;
1564
1565 if (s->indent != Py_None) {
1566 /* TODO: DOES NOT RUN */
1567 indent_level += 1;
1568 /*
1569 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1570 separator = _item_separator + newline_indent
1571 buf += newline_indent
1572 */
1573 }
1574
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001575 if (PyObject_IsTrue(s->sort_keys)) {
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001576 /* First sort the keys then replace them with (key, value) tuples. */
1577 Py_ssize_t i, nitems;
1578 items = PyMapping_Keys(dct);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 if (items == NULL)
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001580 goto bail;
1581 if (!PyList_Check(items)) {
1582 PyErr_SetString(PyExc_ValueError, "keys must return list");
1583 goto bail;
1584 }
1585 if (PyList_Sort(items) < 0)
1586 goto bail;
1587 nitems = PyList_GET_SIZE(items);
1588 for (i = 0; i < nitems; i++) {
1589 PyObject *key, *value;
1590 key = PyList_GET_ITEM(items, i);
1591 value = PyDict_GetItem(dct, key);
1592 item = PyTuple_Pack(2, key, value);
1593 if (item == NULL)
1594 goto bail;
1595 PyList_SET_ITEM(items, i, item);
1596 Py_DECREF(key);
1597 }
1598 }
1599 else {
1600 items = PyMapping_Items(dct);
1601 }
1602 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001603 goto bail;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001604 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001605 Py_DECREF(items);
1606 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001607 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001608 skipkeys = PyObject_IsTrue(s->skipkeys);
1609 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001610 while ((item = PyIter_Next(it)) != NULL) {
1611 PyObject *encoded, *key, *value;
1612 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
1613 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1614 goto bail;
1615 }
1616 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001617 if (PyUnicode_Check(key)) {
1618 Py_INCREF(key);
1619 kstr = key;
1620 }
1621 else if (PyFloat_Check(key)) {
1622 kstr = encoder_encode_float(s, key);
1623 if (kstr == NULL)
1624 goto bail;
1625 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001626 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001627 /* This must come before the PyLong_Check because
1628 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001629 kstr = _encoded_const(key);
1630 if (kstr == NULL)
1631 goto bail;
1632 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001633 else if (PyLong_Check(key)) {
1634 kstr = PyObject_Str(key);
1635 if (kstr == NULL)
1636 goto bail;
1637 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001638 else if (skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001639 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001640 continue;
1641 }
1642 else {
1643 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001644 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001645 goto bail;
1646 }
1647
1648 if (idx) {
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001649 if (accumulate_unicode(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001650 goto bail;
1651 }
1652
1653 encoded = encoder_encode_string(s, kstr);
1654 Py_CLEAR(kstr);
1655 if (encoded == NULL)
1656 goto bail;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001657 if (accumulate_unicode(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001658 Py_DECREF(encoded);
1659 goto bail;
1660 }
1661 Py_DECREF(encoded);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001662 if (accumulate_unicode(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001663 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001664
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001665 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001666 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001667 goto bail;
1668 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001669 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001670 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001671 if (PyErr_Occurred())
1672 goto bail;
1673 Py_CLEAR(it);
1674
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001675 if (ident != NULL) {
1676 if (PyDict_DelItem(s->markers, ident))
1677 goto bail;
1678 Py_CLEAR(ident);
1679 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001680 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001681 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001682 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001683
1684 yield '\n' + (' ' * (_indent * _current_indent_level))
1685 }*/
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001686 if (accumulate_unicode(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001687 goto bail;
1688 return 0;
1689
1690bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001691 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001692 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001693 Py_XDECREF(kstr);
1694 Py_XDECREF(ident);
1695 return -1;
1696}
1697
1698
1699static int
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001700encoder_listencode_list(PyEncoderObject *s, accumulator *acc,
1701 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001702{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001703 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001704 static PyObject *open_array = NULL;
1705 static PyObject *close_array = NULL;
1706 static PyObject *empty_array = NULL;
1707 PyObject *ident = NULL;
1708 PyObject *s_fast = NULL;
1709 Py_ssize_t num_items;
1710 PyObject **seq_items;
1711 Py_ssize_t i;
1712
1713 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1714 open_array = PyUnicode_InternFromString("[");
1715 close_array = PyUnicode_InternFromString("]");
1716 empty_array = PyUnicode_InternFromString("[]");
1717 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1718 return -1;
1719 }
1720 ident = NULL;
1721 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1722 if (s_fast == NULL)
1723 return -1;
1724 num_items = PySequence_Fast_GET_SIZE(s_fast);
1725 if (num_items == 0) {
1726 Py_DECREF(s_fast);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001727 return accumulate_unicode(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001728 }
1729
1730 if (s->markers != Py_None) {
1731 int has_key;
1732 ident = PyLong_FromVoidPtr(seq);
1733 if (ident == NULL)
1734 goto bail;
1735 has_key = PyDict_Contains(s->markers, ident);
1736 if (has_key) {
1737 if (has_key != -1)
1738 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1739 goto bail;
1740 }
1741 if (PyDict_SetItem(s->markers, ident, seq)) {
1742 goto bail;
1743 }
1744 }
1745
1746 seq_items = PySequence_Fast_ITEMS(s_fast);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001747 if (accumulate_unicode(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001748 goto bail;
1749 if (s->indent != Py_None) {
1750 /* TODO: DOES NOT RUN */
1751 indent_level += 1;
1752 /*
1753 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1754 separator = _item_separator + newline_indent
1755 buf += newline_indent
1756 */
1757 }
1758 for (i = 0; i < num_items; i++) {
1759 PyObject *obj = seq_items[i];
1760 if (i) {
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001761 if (accumulate_unicode(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001762 goto bail;
1763 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001764 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001765 goto bail;
1766 }
1767 if (ident != NULL) {
1768 if (PyDict_DelItem(s->markers, ident))
1769 goto bail;
1770 Py_CLEAR(ident);
1771 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001772
1773 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001774 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001775 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001776
1777 yield '\n' + (' ' * (_indent * _current_indent_level))
1778 }*/
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001779 if (accumulate_unicode(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001780 goto bail;
1781 Py_DECREF(s_fast);
1782 return 0;
1783
1784bail:
1785 Py_XDECREF(ident);
1786 Py_DECREF(s_fast);
1787 return -1;
1788}
1789
1790static void
1791encoder_dealloc(PyObject *self)
1792{
1793 /* Deallocate Encoder */
1794 encoder_clear(self);
1795 Py_TYPE(self)->tp_free(self);
1796}
1797
1798static int
1799encoder_traverse(PyObject *self, visitproc visit, void *arg)
1800{
1801 PyEncoderObject *s;
1802 assert(PyEncoder_Check(self));
1803 s = (PyEncoderObject *)self;
1804 Py_VISIT(s->markers);
1805 Py_VISIT(s->defaultfn);
1806 Py_VISIT(s->encoder);
1807 Py_VISIT(s->indent);
1808 Py_VISIT(s->key_separator);
1809 Py_VISIT(s->item_separator);
1810 Py_VISIT(s->sort_keys);
1811 Py_VISIT(s->skipkeys);
1812 return 0;
1813}
1814
1815static int
1816encoder_clear(PyObject *self)
1817{
1818 /* Deallocate Encoder */
1819 PyEncoderObject *s;
1820 assert(PyEncoder_Check(self));
1821 s = (PyEncoderObject *)self;
1822 Py_CLEAR(s->markers);
1823 Py_CLEAR(s->defaultfn);
1824 Py_CLEAR(s->encoder);
1825 Py_CLEAR(s->indent);
1826 Py_CLEAR(s->key_separator);
1827 Py_CLEAR(s->item_separator);
1828 Py_CLEAR(s->sort_keys);
1829 Py_CLEAR(s->skipkeys);
1830 return 0;
1831}
1832
1833PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1834
1835static
1836PyTypeObject PyEncoderType = {
1837 PyVarObject_HEAD_INIT(NULL, 0)
1838 "_json.Encoder", /* tp_name */
1839 sizeof(PyEncoderObject), /* tp_basicsize */
1840 0, /* tp_itemsize */
1841 encoder_dealloc, /* tp_dealloc */
1842 0, /* tp_print */
1843 0, /* tp_getattr */
1844 0, /* tp_setattr */
1845 0, /* tp_compare */
1846 0, /* tp_repr */
1847 0, /* tp_as_number */
1848 0, /* tp_as_sequence */
1849 0, /* tp_as_mapping */
1850 0, /* tp_hash */
1851 encoder_call, /* tp_call */
1852 0, /* tp_str */
1853 0, /* tp_getattro */
1854 0, /* tp_setattro */
1855 0, /* tp_as_buffer */
1856 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1857 encoder_doc, /* tp_doc */
1858 encoder_traverse, /* tp_traverse */
1859 encoder_clear, /* tp_clear */
1860 0, /* tp_richcompare */
1861 0, /* tp_weaklistoffset */
1862 0, /* tp_iter */
1863 0, /* tp_iternext */
1864 0, /* tp_methods */
1865 encoder_members, /* tp_members */
1866 0, /* tp_getset */
1867 0, /* tp_base */
1868 0, /* tp_dict */
1869 0, /* tp_descr_get */
1870 0, /* tp_descr_set */
1871 0, /* tp_dictoffset */
1872 encoder_init, /* tp_init */
1873 0, /* tp_alloc */
1874 encoder_new, /* tp_new */
1875 0, /* tp_free */
1876};
1877
1878static PyMethodDef speedups_methods[] = {
1879 {"encode_basestring_ascii",
1880 (PyCFunction)py_encode_basestring_ascii,
1881 METH_O,
1882 pydoc_encode_basestring_ascii},
1883 {"scanstring",
1884 (PyCFunction)py_scanstring,
1885 METH_VARARGS,
1886 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001887 {NULL, NULL, 0, NULL}
1888};
1889
1890PyDoc_STRVAR(module_doc,
1891"json speedups\n");
1892
Martin v. Löwis1a214512008-06-11 05:26:20 +00001893static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001894 PyModuleDef_HEAD_INIT,
1895 "_json",
1896 module_doc,
1897 -1,
1898 speedups_methods,
1899 NULL,
1900 NULL,
1901 NULL,
1902 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001903};
1904
1905PyObject*
1906PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001907{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001908 PyObject *m = PyModule_Create(&jsonmodule);
1909 if (!m)
1910 return NULL;
1911 PyScannerType.tp_new = PyType_GenericNew;
1912 if (PyType_Ready(&PyScannerType) < 0)
1913 goto fail;
1914 PyEncoderType.tp_new = PyType_GenericNew;
1915 if (PyType_Ready(&PyEncoderType) < 0)
1916 goto fail;
1917 Py_INCREF((PyObject*)&PyScannerType);
1918 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1919 Py_DECREF((PyObject*)&PyScannerType);
1920 goto fail;
1921 }
1922 Py_INCREF((PyObject*)&PyEncoderType);
1923 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1924 Py_DECREF((PyObject*)&PyEncoderType);
1925 goto fail;
1926 }
1927 return m;
1928 fail:
1929 Py_DECREF(m);
1930 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001931}