blob: f63d758348d3681a792daf0ef82a9c089c1dc74f [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +01003#include "accu.h"
4
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00005#ifdef __GNUC__
6#define UNUSED __attribute__((__unused__))
7#else
8#define UNUSED
9#endif
10
11#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
12#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
13#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
14#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
15
16static PyTypeObject PyScannerType;
17static PyTypeObject PyEncoderType;
18
19typedef struct _PyScannerObject {
20 PyObject_HEAD
21 PyObject *strict;
22 PyObject *object_hook;
23 PyObject *object_pairs_hook;
24 PyObject *parse_float;
25 PyObject *parse_int;
26 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000027 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028} PyScannerObject;
29
30static PyMemberDef scanner_members[] = {
31 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
32 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
33 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
34 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
35 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
36 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
37 {NULL}
38};
39
40typedef struct _PyEncoderObject {
41 PyObject_HEAD
42 PyObject *markers;
43 PyObject *defaultfn;
44 PyObject *encoder;
45 PyObject *indent;
46 PyObject *key_separator;
47 PyObject *item_separator;
48 PyObject *sort_keys;
49 PyObject *skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010050 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000051 int allow_nan;
52} PyEncoderObject;
53
54static PyMemberDef encoder_members[] = {
55 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
56 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
57 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
58 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
59 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
60 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
61 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
62 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
63 {NULL}
64};
65
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020066static PyObject *
67join_list_unicode(PyObject *lst)
68{
69 /* return u''.join(lst) */
70 static PyObject *sep = NULL;
71 if (sep == NULL) {
72 sep = PyUnicode_FromStringAndSize("", 0);
73 if (sep == NULL)
74 return NULL;
75 }
76 return PyUnicode_Join(sep, lst);
77}
78
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020079/* Forward decls */
80
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000081static PyObject *
82ascii_escape_unicode(PyObject *pystr);
83static PyObject *
84py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
85void init_json(void);
86static PyObject *
87scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
88static PyObject *
89_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
90static PyObject *
91scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
92static int
93scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
94static void
95scanner_dealloc(PyObject *self);
96static int
97scanner_clear(PyObject *self);
98static PyObject *
99encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
100static int
101encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
102static void
103encoder_dealloc(PyObject *self);
104static int
105encoder_clear(PyObject *self);
106static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200107encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200109encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000110static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200111encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000112static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000113_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000114static void
115raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
116static PyObject *
117encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000118static PyObject *
Ethan Furmana4998a72013-08-10 13:01:45 -0700119encoder_encode_long(PyEncoderObject* s UNUSED, PyObject *obj);
120static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000121encoder_encode_float(PyEncoderObject *s, PyObject *obj);
122
Christian Heimes90540002008-05-08 14:29:10 +0000123#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000124#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000125
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000126static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200127ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000128{
129 /* Escape unicode code point c to ASCII escape sequences
130 in char *output. output must have at least 12 bytes unused to
131 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000132 output[chars++] = '\\';
133 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000134 case '\\': output[chars++] = c; break;
135 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000136 case '\b': output[chars++] = 'b'; break;
137 case '\f': output[chars++] = 'f'; break;
138 case '\n': output[chars++] = 'n'; break;
139 case '\r': output[chars++] = 'r'; break;
140 case '\t': output[chars++] = 't'; break;
141 default:
Christian Heimes90540002008-05-08 14:29:10 +0000142 if (c >= 0x10000) {
143 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100144 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000145 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100146 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
147 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
148 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
149 output[chars++] = Py_hexdigits[(v ) & 0xf];
150 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000151 output[chars++] = '\\';
152 }
Christian Heimes90540002008-05-08 14:29:10 +0000153 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200154 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
155 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
156 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
157 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000158 }
159 return chars;
160}
161
162static PyObject *
163ascii_escape_unicode(PyObject *pystr)
164{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000165 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000166 Py_ssize_t i;
167 Py_ssize_t input_chars;
168 Py_ssize_t output_size;
169 Py_ssize_t chars;
170 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200171 void *input;
172 unsigned char *output;
173 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000174
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200175 if (PyUnicode_READY(pystr) == -1)
176 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000177
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200178 input_chars = PyUnicode_GET_LENGTH(pystr);
179 input = PyUnicode_DATA(pystr);
180 kind = PyUnicode_KIND(pystr);
181
182 /* Compute the output size */
183 for (i = 0, output_size = 2; i < input_chars; i++) {
184 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500185 Py_ssize_t d;
186 if (S_CHAR(c)) {
187 d = 1;
188 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200189 else {
190 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200191 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200192 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500193 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200194 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500195 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200196 }
197 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500198 if (output_size > PY_SSIZE_T_MAX - d) {
199 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
200 return NULL;
201 }
202 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200203 }
204
205 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000206 if (rval == NULL) {
207 return NULL;
208 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200209 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000210 chars = 0;
211 output[chars++] = '"';
212 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200213 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000214 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000215 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000216 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000217 else {
218 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000219 }
Christian Heimes90540002008-05-08 14:29:10 +0000220 }
221 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100222#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200223 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100224#endif
Christian Heimes90540002008-05-08 14:29:10 +0000225 return rval;
226}
227
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100228static PyObject *
229escape_unicode(PyObject *pystr)
230{
231 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
232 Py_ssize_t i;
233 Py_ssize_t input_chars;
234 Py_ssize_t output_size;
235 Py_ssize_t chars;
236 PyObject *rval;
237 void *input;
238 int kind;
239 Py_UCS4 maxchar;
240
241 if (PyUnicode_READY(pystr) == -1)
242 return NULL;
243
244 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
245 input_chars = PyUnicode_GET_LENGTH(pystr);
246 input = PyUnicode_DATA(pystr);
247 kind = PyUnicode_KIND(pystr);
248
249 /* Compute the output size */
250 for (i = 0, output_size = 2; i < input_chars; i++) {
251 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500252 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100253 switch (c) {
254 case '\\': case '"': case '\b': case '\f':
255 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500256 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100257 break;
258 default:
259 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500260 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100261 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500262 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100263 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500264 if (output_size > PY_SSIZE_T_MAX - d) {
265 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
266 return NULL;
267 }
268 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100269 }
270
271 rval = PyUnicode_New(output_size, maxchar);
272 if (rval == NULL)
273 return NULL;
274
275 kind = PyUnicode_KIND(rval);
276
277#define ENCODE_OUTPUT do { \
278 chars = 0; \
279 output[chars++] = '"'; \
280 for (i = 0; i < input_chars; i++) { \
281 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
282 switch (c) { \
283 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
284 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
285 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
286 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
287 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
288 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
289 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
290 default: \
291 if (c <= 0x1f) { \
292 output[chars++] = '\\'; \
293 output[chars++] = 'u'; \
294 output[chars++] = '0'; \
295 output[chars++] = '0'; \
296 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
297 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
298 } else { \
299 output[chars++] = c; \
300 } \
301 } \
302 } \
303 output[chars++] = '"'; \
304 } while (0)
305
306 if (kind == PyUnicode_1BYTE_KIND) {
307 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
308 ENCODE_OUTPUT;
309 } else if (kind == PyUnicode_2BYTE_KIND) {
310 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
311 ENCODE_OUTPUT;
312 } else {
313 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
314 assert(kind == PyUnicode_4BYTE_KIND);
315 ENCODE_OUTPUT;
316 }
317#undef ENCODE_OUTPUT
318
319#ifdef Py_DEBUG
320 assert(_PyUnicode_CheckConsistency(rval, 1));
321#endif
322 return rval;
323}
324
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000325static void
Christian Heimes90540002008-05-08 14:29:10 +0000326raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
327{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200328 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
329 static PyObject *JSONDecodeError = NULL;
330 PyObject *exc;
331 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000332 PyObject *decoder = PyImport_ImportModule("json.decoder");
333 if (decoder == NULL)
334 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200335 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000336 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200337 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000338 return;
Christian Heimes90540002008-05-08 14:29:10 +0000339 }
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200340 exc = PyObject_CallFunction(JSONDecodeError, "(zOn)", msg, s, end);
341 if (exc) {
342 PyErr_SetObject(JSONDecodeError, exc);
343 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000344 }
Christian Heimes90540002008-05-08 14:29:10 +0000345}
346
Ezio Melotti37623ab2013-01-03 08:44:15 +0200347static void
348raise_stop_iteration(Py_ssize_t idx)
349{
350 PyObject *value = PyLong_FromSsize_t(idx);
351 if (value != NULL) {
352 PyErr_SetObject(PyExc_StopIteration, value);
353 Py_DECREF(value);
354 }
355}
356
Christian Heimes90540002008-05-08 14:29:10 +0000357static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000358_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
359 /* return (rval, idx) tuple, stealing reference to rval */
360 PyObject *tpl;
361 PyObject *pyidx;
362 /*
363 steal a reference to rval, returns (rval, idx)
364 */
365 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000366 return NULL;
367 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000368 pyidx = PyLong_FromSsize_t(idx);
369 if (pyidx == NULL) {
370 Py_DECREF(rval);
371 return NULL;
372 }
373 tpl = PyTuple_New(2);
374 if (tpl == NULL) {
375 Py_DECREF(pyidx);
376 Py_DECREF(rval);
377 return NULL;
378 }
379 PyTuple_SET_ITEM(tpl, 0, rval);
380 PyTuple_SET_ITEM(tpl, 1, pyidx);
381 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000382}
383
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000384#define APPEND_OLD_CHUNK \
385 if (chunk != NULL) { \
386 if (chunks == NULL) { \
387 chunks = PyList_New(0); \
388 if (chunks == NULL) { \
389 goto bail; \
390 } \
391 } \
392 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200393 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000394 goto bail; \
395 } \
396 Py_CLEAR(chunk); \
397 }
398
Christian Heimes90540002008-05-08 14:29:10 +0000399static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000400scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000401{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000402 /* Read the JSON string from PyUnicode pystr.
403 end is the index of the first character after the quote.
404 if strict is zero then literal control characters are allowed
405 *next_end_ptr is a return-by-reference index of the character
406 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000407
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000408 Return value is a new PyUnicode
409 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000410 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200411 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000412 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000413 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200414 const void *buf;
415 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000416 PyObject *chunks = NULL;
417 PyObject *chunk = NULL;
418
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200419 if (PyUnicode_READY(pystr) == -1)
420 return 0;
421
422 len = PyUnicode_GET_LENGTH(pystr);
423 buf = PyUnicode_DATA(pystr);
424 kind = PyUnicode_KIND(pystr);
425
Ezio Melotti37623ab2013-01-03 08:44:15 +0200426 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000427 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
428 goto bail;
429 }
Christian Heimes90540002008-05-08 14:29:10 +0000430 while (1) {
431 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200432 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000433 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000435 if (c == '"' || c == '\\') {
436 break;
437 }
438 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000439 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000440 goto bail;
441 }
442 }
443 if (!(c == '"' || c == '\\')) {
444 raise_errmsg("Unterminated string starting at", pystr, begin);
445 goto bail;
446 }
447 /* Pick up this chunk if it's not zero length */
448 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000449 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200450 chunk = PyUnicode_FromKindAndData(
451 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200452 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200453 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000454 if (chunk == NULL) {
455 goto bail;
456 }
Christian Heimes90540002008-05-08 14:29:10 +0000457 }
458 next++;
459 if (c == '"') {
460 end = next;
461 break;
462 }
463 if (next == len) {
464 raise_errmsg("Unterminated string starting at", pystr, begin);
465 goto bail;
466 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200467 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000468 if (c != 'u') {
469 /* Non-unicode backslash escapes */
470 end = next + 1;
471 switch (c) {
472 case '"': break;
473 case '\\': break;
474 case '/': break;
475 case 'b': c = '\b'; break;
476 case 'f': c = '\f'; break;
477 case 'n': c = '\n'; break;
478 case 'r': c = '\r'; break;
479 case 't': c = '\t'; break;
480 default: c = 0;
481 }
482 if (c == 0) {
483 raise_errmsg("Invalid \\escape", pystr, end - 2);
484 goto bail;
485 }
486 }
487 else {
488 c = 0;
489 next++;
490 end = next + 4;
491 if (end >= len) {
492 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
493 goto bail;
494 }
495 /* Decode 4 hex digits */
496 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200497 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000498 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000499 switch (digit) {
500 case '0': case '1': case '2': case '3': case '4':
501 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000502 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000503 case 'a': case 'b': case 'c': case 'd': case 'e':
504 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000505 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000506 case 'A': case 'B': case 'C': case 'D': case 'E':
507 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000508 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000509 default:
510 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
511 goto bail;
512 }
513 }
Christian Heimes90540002008-05-08 14:29:10 +0000514 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200515 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
516 PyUnicode_READ(kind, buf, next++) == '\\' &&
517 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200518 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000519 end += 6;
520 /* Decode 4 hex digits */
521 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200522 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000523 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000524 switch (digit) {
525 case '0': case '1': case '2': case '3': case '4':
526 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000527 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000528 case 'a': case 'b': case 'c': case 'd': case 'e':
529 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000530 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000531 case 'A': case 'B': case 'C': case 'D': case 'E':
532 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000533 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000534 default:
535 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
536 goto bail;
537 }
538 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200539 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
540 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
541 else
542 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000543 }
Christian Heimes90540002008-05-08 14:29:10 +0000544 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000545 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200546 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000547 if (chunk == NULL) {
548 goto bail;
549 }
Christian Heimes90540002008-05-08 14:29:10 +0000550 }
551
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000552 if (chunks == NULL) {
553 if (chunk != NULL)
554 rval = chunk;
555 else
556 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000557 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000558 else {
559 APPEND_OLD_CHUNK
560 rval = join_list_unicode(chunks);
561 if (rval == NULL) {
562 goto bail;
563 }
564 Py_CLEAR(chunks);
565 }
566
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000567 *next_end_ptr = end;
568 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000569bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000570 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000571 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000572 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000573 return NULL;
574}
575
576PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000577 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000578 "\n"
579 "Scan the string s for a JSON string. End is the index of the\n"
580 "character in s after the quote that started the JSON string.\n"
581 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
582 "on attempt to decode an invalid string. If strict is False then literal\n"
583 "control characters are allowed in the string.\n"
584 "\n"
585 "Returns a tuple of the decoded string and the index of the character in s\n"
586 "after the end quote."
587);
Christian Heimes90540002008-05-08 14:29:10 +0000588
589static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000590py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000591{
592 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000593 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000594 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000595 Py_ssize_t next_end = -1;
596 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100597 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000598 return NULL;
599 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000600 if (PyUnicode_Check(pystr)) {
601 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000602 }
603 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000604 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000605 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000606 Py_TYPE(pystr)->tp_name);
607 return NULL;
608 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000609 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000610}
611
612PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000613 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000614 "\n"
615 "Return an ASCII-only JSON representation of a Python string"
616);
Christian Heimes90540002008-05-08 14:29:10 +0000617
618static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000619py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000620{
621 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000622 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000623 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000624 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000625 rval = ascii_escape_unicode(pystr);
626 }
627 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000628 PyErr_Format(PyExc_TypeError,
629 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000630 Py_TYPE(pystr)->tp_name);
631 return NULL;
632 }
Christian Heimes90540002008-05-08 14:29:10 +0000633 return rval;
634}
635
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100636
637PyDoc_STRVAR(pydoc_encode_basestring,
638 "encode_basestring(string) -> string\n"
639 "\n"
640 "Return a JSON representation of a Python string"
641);
642
643static PyObject *
644py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
645{
646 PyObject *rval;
647 /* Return a JSON representation of a Python string */
648 /* METH_O */
649 if (PyUnicode_Check(pystr)) {
650 rval = escape_unicode(pystr);
651 }
652 else {
653 PyErr_Format(PyExc_TypeError,
654 "first argument must be a string, not %.80s",
655 Py_TYPE(pystr)->tp_name);
656 return NULL;
657 }
658 return rval;
659}
660
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000661static void
662scanner_dealloc(PyObject *self)
663{
664 /* Deallocate scanner object */
665 scanner_clear(self);
666 Py_TYPE(self)->tp_free(self);
667}
668
669static int
670scanner_traverse(PyObject *self, visitproc visit, void *arg)
671{
672 PyScannerObject *s;
673 assert(PyScanner_Check(self));
674 s = (PyScannerObject *)self;
675 Py_VISIT(s->strict);
676 Py_VISIT(s->object_hook);
677 Py_VISIT(s->object_pairs_hook);
678 Py_VISIT(s->parse_float);
679 Py_VISIT(s->parse_int);
680 Py_VISIT(s->parse_constant);
681 return 0;
682}
683
684static int
685scanner_clear(PyObject *self)
686{
687 PyScannerObject *s;
688 assert(PyScanner_Check(self));
689 s = (PyScannerObject *)self;
690 Py_CLEAR(s->strict);
691 Py_CLEAR(s->object_hook);
692 Py_CLEAR(s->object_pairs_hook);
693 Py_CLEAR(s->parse_float);
694 Py_CLEAR(s->parse_int);
695 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000696 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000697 return 0;
698}
699
700static PyObject *
701_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
702 /* Read a JSON object from PyUnicode pystr.
703 idx is the index of the first character after the opening curly brace.
704 *next_idx_ptr is a return-by-reference index to the first character after
705 the closing curly brace.
706
707 Returns a new PyObject (usually a dict, but object_hook can change that)
708 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200709 void *str;
710 int kind;
711 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000712 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000713 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000714 PyObject *key = NULL;
715 int strict = PyObject_IsTrue(s->strict);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000716 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000717 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000718
Serhiy Storchakafa494fd2015-05-30 17:45:22 +0300719 if (strict < 0)
720 return NULL;
721
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200722 if (PyUnicode_READY(pystr) == -1)
723 return NULL;
724
725 str = PyUnicode_DATA(pystr);
726 kind = PyUnicode_KIND(pystr);
727 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
728
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000729 if (has_pairs_hook)
730 rval = PyList_New(0);
731 else
732 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000733 if (rval == NULL)
734 return NULL;
735
736 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200737 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000738
739 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200740 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
741 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000742 PyObject *memokey;
743
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000744 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200745 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200746 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000747 goto bail;
748 }
749 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
750 if (key == NULL)
751 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000752 memokey = PyDict_GetItem(s->memo, key);
753 if (memokey != NULL) {
754 Py_INCREF(memokey);
755 Py_DECREF(key);
756 key = memokey;
757 }
758 else {
759 if (PyDict_SetItem(s->memo, key, key) < 0)
760 goto bail;
761 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000762 idx = next_idx;
763
764 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200765 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
766 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200767 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000768 goto bail;
769 }
770 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200771 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000772
773 /* read any JSON term */
774 val = scan_once_unicode(s, pystr, idx, &next_idx);
775 if (val == NULL)
776 goto bail;
777
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000778 if (has_pairs_hook) {
779 PyObject *item = PyTuple_Pack(2, key, val);
780 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000781 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000782 Py_CLEAR(key);
783 Py_CLEAR(val);
784 if (PyList_Append(rval, item) == -1) {
785 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000786 goto bail;
787 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000788 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000789 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000790 else {
791 if (PyDict_SetItem(rval, key, val) < 0)
792 goto bail;
793 Py_CLEAR(key);
794 Py_CLEAR(val);
795 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000796 idx = next_idx;
797
798 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200799 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000800
801 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200802 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000803 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200804 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200805 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000806 goto bail;
807 }
808 idx++;
809
810 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200811 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000812 }
813 }
814
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000815 *next_idx_ptr = idx + 1;
816
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000817 if (has_pairs_hook) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000818 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000819 Py_DECREF(rval);
820 return val;
821 }
822
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000823 /* if object_hook is not None: rval = object_hook(rval) */
824 if (s->object_hook != Py_None) {
825 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000826 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000827 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000828 }
829 return rval;
830bail:
831 Py_XDECREF(key);
832 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000833 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000834 return NULL;
835}
836
837static PyObject *
838_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200839 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000840 idx is the index of the first character after the opening brace.
841 *next_idx_ptr is a return-by-reference index to the first character after
842 the closing brace.
843
844 Returns a new PyList
845 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200846 void *str;
847 int kind;
848 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000849 PyObject *val = NULL;
850 PyObject *rval = PyList_New(0);
851 Py_ssize_t next_idx;
852 if (rval == NULL)
853 return NULL;
854
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200855 if (PyUnicode_READY(pystr) == -1)
856 return NULL;
857
858 str = PyUnicode_DATA(pystr);
859 kind = PyUnicode_KIND(pystr);
860 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
861
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000862 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200863 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000864
865 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200866 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
867 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000868
869 /* read any JSON term */
870 val = scan_once_unicode(s, pystr, idx, &next_idx);
871 if (val == NULL)
872 goto bail;
873
874 if (PyList_Append(rval, val) == -1)
875 goto bail;
876
877 Py_CLEAR(val);
878 idx = next_idx;
879
880 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200881 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000882
883 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200884 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000885 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200886 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200887 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000888 goto bail;
889 }
890 idx++;
891
892 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200893 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000894 }
895 }
896
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200897 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
898 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200899 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000900 goto bail;
901 }
902 *next_idx_ptr = idx + 1;
903 return rval;
904bail:
905 Py_XDECREF(val);
906 Py_DECREF(rval);
907 return NULL;
908}
909
910static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200911_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
912 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000913 constant is the constant string that was found
914 ("NaN", "Infinity", "-Infinity").
915 idx is the index of the first character of the constant
916 *next_idx_ptr is a return-by-reference index to the first character after
917 the constant.
918
919 Returns the result of parse_constant
920 */
921 PyObject *cstr;
922 PyObject *rval;
923 /* constant is "NaN", "Infinity", or "-Infinity" */
924 cstr = PyUnicode_InternFromString(constant);
925 if (cstr == NULL)
926 return NULL;
927
928 /* rval = parse_constant(constant) */
929 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200930 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000931 Py_DECREF(cstr);
932 *next_idx_ptr = idx;
933 return rval;
934}
935
936static PyObject *
937_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
938 /* Read a JSON number from PyUnicode pystr.
939 idx is the index of the first character of the number
940 *next_idx_ptr is a return-by-reference index to the first character after
941 the number.
942
943 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200944 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000945 May return other types if parse_int or parse_float are set
946 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200947 void *str;
948 int kind;
949 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000950 Py_ssize_t idx = start;
951 int is_float = 0;
952 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200953 PyObject *numstr = NULL;
954 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000955
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200956 if (PyUnicode_READY(pystr) == -1)
957 return NULL;
958
959 str = PyUnicode_DATA(pystr);
960 kind = PyUnicode_KIND(pystr);
961 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
962
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000963 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200964 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000965 idx++;
966 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200967 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000968 return NULL;
969 }
970 }
971
972 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200973 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000974 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200975 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000976 }
977 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200978 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000979 idx++;
980 }
981 /* no integer digits, error */
982 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200983 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000984 return NULL;
985 }
986
987 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200988 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000989 is_float = 1;
990 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200991 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000992 }
993
994 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200995 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000996 Py_ssize_t e_start = idx;
997 idx++;
998
999 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001000 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001001
1002 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001003 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001004
1005 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001006 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001007 is_float = 1;
1008 }
1009 else {
1010 idx = e_start;
1011 }
1012 }
1013
Antoine Pitrouf6454512011-04-25 19:16:06 +02001014 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1015 custom_func = s->parse_float;
1016 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1017 custom_func = s->parse_int;
1018 else
1019 custom_func = NULL;
1020
1021 if (custom_func) {
1022 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001023 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001024 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001025 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001026 if (numstr == NULL)
1027 return NULL;
1028 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001029 }
1030 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001031 Py_ssize_t i, n;
1032 char *buf;
1033 /* Straight conversion to ASCII, to avoid costly conversion of
1034 decimal unicode digits (which cannot appear here) */
1035 n = idx - start;
1036 numstr = PyBytes_FromStringAndSize(NULL, n);
1037 if (numstr == NULL)
1038 return NULL;
1039 buf = PyBytes_AS_STRING(numstr);
1040 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001041 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001042 }
1043 if (is_float)
1044 rval = PyFloat_FromString(numstr);
1045 else
1046 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001047 }
1048 Py_DECREF(numstr);
1049 *next_idx_ptr = idx;
1050 return rval;
1051}
1052
1053static PyObject *
1054scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1055{
1056 /* Read one JSON term (of any kind) from PyUnicode pystr.
1057 idx is the index of the first character of the term
1058 *next_idx_ptr is a return-by-reference index to the first character after
1059 the number.
1060
1061 Returns a new PyObject representation of the term.
1062 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001063 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001064 void *str;
1065 int kind;
1066 Py_ssize_t length;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001067 int strict;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001068
1069 if (PyUnicode_READY(pystr) == -1)
1070 return NULL;
1071
1072 str = PyUnicode_DATA(pystr);
1073 kind = PyUnicode_KIND(pystr);
1074 length = PyUnicode_GET_LENGTH(pystr);
1075
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001076 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001077 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001078 return NULL;
1079 }
1080 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001081 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001082 return NULL;
1083 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001084
1085 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001086 case '"':
1087 /* string */
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001088 strict = PyObject_IsTrue(s->strict);
1089 if (strict < 0)
1090 return NULL;
1091 return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001092 case '{':
1093 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001094 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1095 "from a unicode string"))
1096 return NULL;
1097 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1098 Py_LeaveRecursiveCall();
1099 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001100 case '[':
1101 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001102 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1103 "from a unicode string"))
1104 return NULL;
1105 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1106 Py_LeaveRecursiveCall();
1107 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001108 case 'n':
1109 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001110 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001111 Py_INCREF(Py_None);
1112 *next_idx_ptr = idx + 4;
1113 return Py_None;
1114 }
1115 break;
1116 case 't':
1117 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001118 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001119 Py_INCREF(Py_True);
1120 *next_idx_ptr = idx + 4;
1121 return Py_True;
1122 }
1123 break;
1124 case 'f':
1125 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001126 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1127 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1128 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001129 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001130 Py_INCREF(Py_False);
1131 *next_idx_ptr = idx + 5;
1132 return Py_False;
1133 }
1134 break;
1135 case 'N':
1136 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001137 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001138 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001139 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1140 }
1141 break;
1142 case 'I':
1143 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001144 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1145 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1146 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001147 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001148 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1149 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001150 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001151 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1152 }
1153 break;
1154 case '-':
1155 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001156 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001157 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1158 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001159 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001160 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001161 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1162 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001163 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001164 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1165 }
1166 break;
1167 }
1168 /* Didn't find a string, object, array, or named constant. Look for a number. */
1169 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1170}
1171
1172static PyObject *
1173scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1174{
1175 /* Python callable interface to scan_once_{str,unicode} */
1176 PyObject *pystr;
1177 PyObject *rval;
1178 Py_ssize_t idx;
1179 Py_ssize_t next_idx = -1;
1180 static char *kwlist[] = {"string", "idx", NULL};
1181 PyScannerObject *s;
1182 assert(PyScanner_Check(self));
1183 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001184 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001185 return NULL;
1186
1187 if (PyUnicode_Check(pystr)) {
1188 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1189 }
1190 else {
1191 PyErr_Format(PyExc_TypeError,
1192 "first argument must be a string, not %.80s",
1193 Py_TYPE(pystr)->tp_name);
1194 return NULL;
1195 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001196 PyDict_Clear(s->memo);
1197 if (rval == NULL)
1198 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001199 return _build_rval_index_tuple(rval, next_idx);
1200}
1201
1202static PyObject *
1203scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1204{
1205 PyScannerObject *s;
1206 s = (PyScannerObject *)type->tp_alloc(type, 0);
1207 if (s != NULL) {
1208 s->strict = NULL;
1209 s->object_hook = NULL;
1210 s->object_pairs_hook = NULL;
1211 s->parse_float = NULL;
1212 s->parse_int = NULL;
1213 s->parse_constant = NULL;
1214 }
1215 return (PyObject *)s;
1216}
1217
1218static int
1219scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1220{
1221 /* Initialize Scanner object */
1222 PyObject *ctx;
1223 static char *kwlist[] = {"context", NULL};
1224 PyScannerObject *s;
1225
1226 assert(PyScanner_Check(self));
1227 s = (PyScannerObject *)self;
1228
1229 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1230 return -1;
1231
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001232 if (s->memo == NULL) {
1233 s->memo = PyDict_New();
1234 if (s->memo == NULL)
1235 goto bail;
1236 }
1237
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001238 /* All of these will fail "gracefully" so we don't need to verify them */
1239 s->strict = PyObject_GetAttrString(ctx, "strict");
1240 if (s->strict == NULL)
1241 goto bail;
1242 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1243 if (s->object_hook == NULL)
1244 goto bail;
1245 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1246 if (s->object_pairs_hook == NULL)
1247 goto bail;
1248 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1249 if (s->parse_float == NULL)
1250 goto bail;
1251 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1252 if (s->parse_int == NULL)
1253 goto bail;
1254 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1255 if (s->parse_constant == NULL)
1256 goto bail;
1257
1258 return 0;
1259
1260bail:
1261 Py_CLEAR(s->strict);
1262 Py_CLEAR(s->object_hook);
1263 Py_CLEAR(s->object_pairs_hook);
1264 Py_CLEAR(s->parse_float);
1265 Py_CLEAR(s->parse_int);
1266 Py_CLEAR(s->parse_constant);
1267 return -1;
1268}
1269
1270PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1271
1272static
1273PyTypeObject PyScannerType = {
1274 PyVarObject_HEAD_INIT(NULL, 0)
1275 "_json.Scanner", /* tp_name */
1276 sizeof(PyScannerObject), /* tp_basicsize */
1277 0, /* tp_itemsize */
1278 scanner_dealloc, /* tp_dealloc */
1279 0, /* tp_print */
1280 0, /* tp_getattr */
1281 0, /* tp_setattr */
1282 0, /* tp_compare */
1283 0, /* tp_repr */
1284 0, /* tp_as_number */
1285 0, /* tp_as_sequence */
1286 0, /* tp_as_mapping */
1287 0, /* tp_hash */
1288 scanner_call, /* tp_call */
1289 0, /* tp_str */
1290 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1291 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1292 0, /* tp_as_buffer */
1293 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1294 scanner_doc, /* tp_doc */
1295 scanner_traverse, /* tp_traverse */
1296 scanner_clear, /* tp_clear */
1297 0, /* tp_richcompare */
1298 0, /* tp_weaklistoffset */
1299 0, /* tp_iter */
1300 0, /* tp_iternext */
1301 0, /* tp_methods */
1302 scanner_members, /* tp_members */
1303 0, /* tp_getset */
1304 0, /* tp_base */
1305 0, /* tp_dict */
1306 0, /* tp_descr_get */
1307 0, /* tp_descr_set */
1308 0, /* tp_dictoffset */
1309 scanner_init, /* tp_init */
1310 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1311 scanner_new, /* tp_new */
1312 0,/* PyObject_GC_Del, */ /* tp_free */
1313};
1314
1315static PyObject *
1316encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1317{
1318 PyEncoderObject *s;
1319 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1320 if (s != NULL) {
1321 s->markers = NULL;
1322 s->defaultfn = NULL;
1323 s->encoder = NULL;
1324 s->indent = NULL;
1325 s->key_separator = NULL;
1326 s->item_separator = NULL;
1327 s->sort_keys = NULL;
1328 s->skipkeys = NULL;
1329 }
1330 return (PyObject *)s;
1331}
1332
1333static int
1334encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1335{
1336 /* initialize Encoder object */
1337 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1338
1339 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001340 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001341 PyObject *item_separator, *sort_keys, *skipkeys;
1342 int allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001343
1344 assert(PyEncoder_Check(self));
1345 s = (PyEncoderObject *)self;
1346
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001347 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUOOp:make_encoder", kwlist,
1348 &markers, &defaultfn, &encoder, &indent,
1349 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001350 &sort_keys, &skipkeys, &allow_nan))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001351 return -1;
1352
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001353 if (markers != Py_None && !PyDict_Check(markers)) {
1354 PyErr_Format(PyExc_TypeError,
1355 "make_encoder() argument 1 must be dict or None, "
1356 "not %.200s", Py_TYPE(markers)->tp_name);
1357 return -1;
1358 }
1359
Antoine Pitrou781eba72009-12-08 15:57:31 +00001360 s->markers = markers;
1361 s->defaultfn = defaultfn;
1362 s->encoder = encoder;
1363 s->indent = indent;
1364 s->key_separator = key_separator;
1365 s->item_separator = item_separator;
1366 s->sort_keys = sort_keys;
1367 s->skipkeys = skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001368 s->fast_encode = NULL;
1369 if (PyCFunction_Check(s->encoder)) {
1370 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1371 if (f == (PyCFunction)py_encode_basestring_ascii ||
1372 f == (PyCFunction)py_encode_basestring) {
1373 s->fast_encode = f;
1374 }
1375 }
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001376 s->allow_nan = allow_nan;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001377
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001378 Py_INCREF(s->markers);
1379 Py_INCREF(s->defaultfn);
1380 Py_INCREF(s->encoder);
1381 Py_INCREF(s->indent);
1382 Py_INCREF(s->key_separator);
1383 Py_INCREF(s->item_separator);
1384 Py_INCREF(s->sort_keys);
1385 Py_INCREF(s->skipkeys);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001386 return 0;
1387}
1388
1389static PyObject *
1390encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1391{
1392 /* Python callable interface to encode_listencode_obj */
1393 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1394 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001395 Py_ssize_t indent_level;
1396 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001397 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001398
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001399 assert(PyEncoder_Check(self));
1400 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001401 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1402 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001403 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001404 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001405 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001406 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001407 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001408 return NULL;
1409 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001410 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001411}
1412
1413static PyObject *
1414_encoded_const(PyObject *obj)
1415{
1416 /* Return the JSON string representation of None, True, False */
1417 if (obj == Py_None) {
1418 static PyObject *s_null = NULL;
1419 if (s_null == NULL) {
1420 s_null = PyUnicode_InternFromString("null");
1421 }
1422 Py_INCREF(s_null);
1423 return s_null;
1424 }
1425 else if (obj == Py_True) {
1426 static PyObject *s_true = NULL;
1427 if (s_true == NULL) {
1428 s_true = PyUnicode_InternFromString("true");
1429 }
1430 Py_INCREF(s_true);
1431 return s_true;
1432 }
1433 else if (obj == Py_False) {
1434 static PyObject *s_false = NULL;
1435 if (s_false == NULL) {
1436 s_false = PyUnicode_InternFromString("false");
1437 }
1438 Py_INCREF(s_false);
1439 return s_false;
1440 }
1441 else {
1442 PyErr_SetString(PyExc_ValueError, "not a const");
1443 return NULL;
1444 }
1445}
1446
1447static PyObject *
Ethan Furmana4998a72013-08-10 13:01:45 -07001448encoder_encode_long(PyEncoderObject* s UNUSED, PyObject *obj)
1449{
1450 /* Return the JSON representation of a PyLong and PyLong subclasses.
1451 Calls int() on PyLong subclasses in case the str() was changed.
1452 Added specifically to deal with IntEnum. See Issue18264. */
1453 PyObject *encoded, *longobj;
1454 if (PyLong_CheckExact(obj)) {
1455 encoded = PyObject_Str(obj);
1456 }
1457 else {
1458 longobj = PyNumber_Long(obj);
1459 if (longobj == NULL) {
1460 PyErr_SetString(
1461 PyExc_ValueError,
1462 "Unable to coerce int subclass to int"
1463 );
1464 return NULL;
1465 }
1466 encoded = PyObject_Str(longobj);
1467 Py_DECREF(longobj);
1468 }
1469 return encoded;
1470}
1471
1472
1473static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001474encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1475{
Ethan Furmana4998a72013-08-10 13:01:45 -07001476 /* Return the JSON representation of a PyFloat.
1477 Modified to call float() on float subclasses in case the subclass
1478 changes the repr. See Issue18264. */
1479 PyObject *encoded, *floatobj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001480 double i = PyFloat_AS_DOUBLE(obj);
1481 if (!Py_IS_FINITE(i)) {
1482 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001483 PyErr_SetString(
1484 PyExc_ValueError,
1485 "Out of range float values are not JSON compliant"
1486 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001487 return NULL;
1488 }
1489 if (i > 0) {
1490 return PyUnicode_FromString("Infinity");
1491 }
1492 else if (i < 0) {
1493 return PyUnicode_FromString("-Infinity");
1494 }
1495 else {
1496 return PyUnicode_FromString("NaN");
1497 }
1498 }
Ethan Furmana4998a72013-08-10 13:01:45 -07001499 /* coerce float subclasses to float (primarily for Enum) */
1500 if (PyFloat_CheckExact(obj)) {
1501 /* Use a better float format here? */
1502 encoded = PyObject_Repr(obj);
1503 }
1504 else {
1505 floatobj = PyNumber_Float(obj);
1506 if (floatobj == NULL) {
1507 PyErr_SetString(
1508 PyExc_ValueError,
1509 "Unable to coerce float subclass to float"
1510 );
1511 return NULL;
1512 }
1513 encoded = PyObject_Repr(floatobj);
1514 Py_DECREF(floatobj);
1515 }
1516 return encoded;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001517}
1518
1519static PyObject *
1520encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1521{
1522 /* Return the JSON representation of a string */
1523 if (s->fast_encode)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001524 return s->fast_encode(NULL, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001525 else
1526 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1527}
1528
1529static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001530_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001531{
1532 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001533 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001534 Py_DECREF(stolen);
1535 return rval;
1536}
1537
1538static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001539encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001540 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001541{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001542 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001543 PyObject *newobj;
1544 int rv;
1545
1546 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1547 PyObject *cstr = _encoded_const(obj);
1548 if (cstr == NULL)
1549 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001550 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001551 }
1552 else if (PyUnicode_Check(obj))
1553 {
1554 PyObject *encoded = encoder_encode_string(s, obj);
1555 if (encoded == NULL)
1556 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001557 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001558 }
1559 else if (PyLong_Check(obj)) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001560 PyObject *encoded = encoder_encode_long(s, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001561 if (encoded == NULL)
1562 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001563 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001564 }
1565 else if (PyFloat_Check(obj)) {
1566 PyObject *encoded = encoder_encode_float(s, obj);
1567 if (encoded == NULL)
1568 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001569 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001570 }
1571 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001572 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1573 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001574 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001575 Py_LeaveRecursiveCall();
1576 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001577 }
1578 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001579 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1580 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001581 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001582 Py_LeaveRecursiveCall();
1583 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001584 }
1585 else {
1586 PyObject *ident = NULL;
1587 if (s->markers != Py_None) {
1588 int has_key;
1589 ident = PyLong_FromVoidPtr(obj);
1590 if (ident == NULL)
1591 return -1;
1592 has_key = PyDict_Contains(s->markers, ident);
1593 if (has_key) {
1594 if (has_key != -1)
1595 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1596 Py_DECREF(ident);
1597 return -1;
1598 }
1599 if (PyDict_SetItem(s->markers, ident, obj)) {
1600 Py_DECREF(ident);
1601 return -1;
1602 }
1603 }
1604 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1605 if (newobj == NULL) {
1606 Py_XDECREF(ident);
1607 return -1;
1608 }
Ezio Melotti13672652011-05-11 01:02:56 +03001609
1610 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1611 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001612 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001613 Py_LeaveRecursiveCall();
1614
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001615 Py_DECREF(newobj);
1616 if (rv) {
1617 Py_XDECREF(ident);
1618 return -1;
1619 }
1620 if (ident != NULL) {
1621 if (PyDict_DelItem(s->markers, ident)) {
1622 Py_XDECREF(ident);
1623 return -1;
1624 }
1625 Py_XDECREF(ident);
1626 }
1627 return rv;
1628 }
1629}
1630
1631static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001632encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001633 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001634{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001635 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001636 static PyObject *open_dict = NULL;
1637 static PyObject *close_dict = NULL;
1638 static PyObject *empty_dict = NULL;
1639 PyObject *kstr = NULL;
1640 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001641 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001642 PyObject *items;
1643 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001644 int skipkeys;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001645 int sortkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001646 Py_ssize_t idx;
1647
1648 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1649 open_dict = PyUnicode_InternFromString("{");
1650 close_dict = PyUnicode_InternFromString("}");
1651 empty_dict = PyUnicode_InternFromString("{}");
1652 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1653 return -1;
1654 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001655 if (Py_SIZE(dct) == 0)
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001656 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001657
1658 if (s->markers != Py_None) {
1659 int has_key;
1660 ident = PyLong_FromVoidPtr(dct);
1661 if (ident == NULL)
1662 goto bail;
1663 has_key = PyDict_Contains(s->markers, ident);
1664 if (has_key) {
1665 if (has_key != -1)
1666 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1667 goto bail;
1668 }
1669 if (PyDict_SetItem(s->markers, ident, dct)) {
1670 goto bail;
1671 }
1672 }
1673
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001674 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001675 goto bail;
1676
1677 if (s->indent != Py_None) {
1678 /* TODO: DOES NOT RUN */
1679 indent_level += 1;
1680 /*
1681 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1682 separator = _item_separator + newline_indent
1683 buf += newline_indent
1684 */
1685 }
1686
Benjamin Peterson501182a2015-05-02 22:28:04 -04001687 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001688 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001689 goto bail;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001690 sortkeys = PyObject_IsTrue(s->sort_keys);
1691 if (sortkeys < 0 || (sortkeys && PyList_Sort(items) < 0))
Benjamin Peterson501182a2015-05-02 22:28:04 -04001692 goto bail;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001693 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001694 Py_DECREF(items);
1695 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001696 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001697 skipkeys = PyObject_IsTrue(s->skipkeys);
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001698 if (skipkeys < 0)
1699 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001700 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001701 while ((item = PyIter_Next(it)) != NULL) {
1702 PyObject *encoded, *key, *value;
1703 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
1704 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1705 goto bail;
1706 }
1707 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001708 if (PyUnicode_Check(key)) {
1709 Py_INCREF(key);
1710 kstr = key;
1711 }
1712 else if (PyFloat_Check(key)) {
1713 kstr = encoder_encode_float(s, key);
1714 if (kstr == NULL)
1715 goto bail;
1716 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001717 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 /* This must come before the PyLong_Check because
1719 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001720 kstr = _encoded_const(key);
1721 if (kstr == NULL)
1722 goto bail;
1723 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001724 else if (PyLong_Check(key)) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001725 kstr = encoder_encode_long(s, key);
1726 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001727 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001728 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001729 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001730 else if (skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001731 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001732 continue;
1733 }
1734 else {
1735 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001736 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001737 goto bail;
1738 }
1739
1740 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001741 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001742 goto bail;
1743 }
1744
1745 encoded = encoder_encode_string(s, kstr);
1746 Py_CLEAR(kstr);
1747 if (encoded == NULL)
1748 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001749 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001750 Py_DECREF(encoded);
1751 goto bail;
1752 }
1753 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001754 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001755 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001756
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001757 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001758 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001759 goto bail;
1760 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001761 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001762 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001763 if (PyErr_Occurred())
1764 goto bail;
1765 Py_CLEAR(it);
1766
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001767 if (ident != NULL) {
1768 if (PyDict_DelItem(s->markers, ident))
1769 goto bail;
1770 Py_CLEAR(ident);
1771 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001772 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001773 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001774 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001775
1776 yield '\n' + (' ' * (_indent * _current_indent_level))
1777 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001778 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001779 goto bail;
1780 return 0;
1781
1782bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001783 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001784 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001785 Py_XDECREF(kstr);
1786 Py_XDECREF(ident);
1787 return -1;
1788}
1789
1790
1791static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001792encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001793 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001794{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001795 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001796 static PyObject *open_array = NULL;
1797 static PyObject *close_array = NULL;
1798 static PyObject *empty_array = NULL;
1799 PyObject *ident = NULL;
1800 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001801 Py_ssize_t i;
1802
1803 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1804 open_array = PyUnicode_InternFromString("[");
1805 close_array = PyUnicode_InternFromString("]");
1806 empty_array = PyUnicode_InternFromString("[]");
1807 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1808 return -1;
1809 }
1810 ident = NULL;
1811 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1812 if (s_fast == NULL)
1813 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001814 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001815 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001816 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001817 }
1818
1819 if (s->markers != Py_None) {
1820 int has_key;
1821 ident = PyLong_FromVoidPtr(seq);
1822 if (ident == NULL)
1823 goto bail;
1824 has_key = PyDict_Contains(s->markers, ident);
1825 if (has_key) {
1826 if (has_key != -1)
1827 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1828 goto bail;
1829 }
1830 if (PyDict_SetItem(s->markers, ident, seq)) {
1831 goto bail;
1832 }
1833 }
1834
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001835 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001836 goto bail;
1837 if (s->indent != Py_None) {
1838 /* TODO: DOES NOT RUN */
1839 indent_level += 1;
1840 /*
1841 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1842 separator = _item_separator + newline_indent
1843 buf += newline_indent
1844 */
1845 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001846 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1847 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001848 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001849 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001850 goto bail;
1851 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001852 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001853 goto bail;
1854 }
1855 if (ident != NULL) {
1856 if (PyDict_DelItem(s->markers, ident))
1857 goto bail;
1858 Py_CLEAR(ident);
1859 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001860
1861 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001862 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001863 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001864
1865 yield '\n' + (' ' * (_indent * _current_indent_level))
1866 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001867 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001868 goto bail;
1869 Py_DECREF(s_fast);
1870 return 0;
1871
1872bail:
1873 Py_XDECREF(ident);
1874 Py_DECREF(s_fast);
1875 return -1;
1876}
1877
1878static void
1879encoder_dealloc(PyObject *self)
1880{
1881 /* Deallocate Encoder */
1882 encoder_clear(self);
1883 Py_TYPE(self)->tp_free(self);
1884}
1885
1886static int
1887encoder_traverse(PyObject *self, visitproc visit, void *arg)
1888{
1889 PyEncoderObject *s;
1890 assert(PyEncoder_Check(self));
1891 s = (PyEncoderObject *)self;
1892 Py_VISIT(s->markers);
1893 Py_VISIT(s->defaultfn);
1894 Py_VISIT(s->encoder);
1895 Py_VISIT(s->indent);
1896 Py_VISIT(s->key_separator);
1897 Py_VISIT(s->item_separator);
1898 Py_VISIT(s->sort_keys);
1899 Py_VISIT(s->skipkeys);
1900 return 0;
1901}
1902
1903static int
1904encoder_clear(PyObject *self)
1905{
1906 /* Deallocate Encoder */
1907 PyEncoderObject *s;
1908 assert(PyEncoder_Check(self));
1909 s = (PyEncoderObject *)self;
1910 Py_CLEAR(s->markers);
1911 Py_CLEAR(s->defaultfn);
1912 Py_CLEAR(s->encoder);
1913 Py_CLEAR(s->indent);
1914 Py_CLEAR(s->key_separator);
1915 Py_CLEAR(s->item_separator);
1916 Py_CLEAR(s->sort_keys);
1917 Py_CLEAR(s->skipkeys);
1918 return 0;
1919}
1920
1921PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1922
1923static
1924PyTypeObject PyEncoderType = {
1925 PyVarObject_HEAD_INIT(NULL, 0)
1926 "_json.Encoder", /* tp_name */
1927 sizeof(PyEncoderObject), /* tp_basicsize */
1928 0, /* tp_itemsize */
1929 encoder_dealloc, /* tp_dealloc */
1930 0, /* tp_print */
1931 0, /* tp_getattr */
1932 0, /* tp_setattr */
1933 0, /* tp_compare */
1934 0, /* tp_repr */
1935 0, /* tp_as_number */
1936 0, /* tp_as_sequence */
1937 0, /* tp_as_mapping */
1938 0, /* tp_hash */
1939 encoder_call, /* tp_call */
1940 0, /* tp_str */
1941 0, /* tp_getattro */
1942 0, /* tp_setattro */
1943 0, /* tp_as_buffer */
1944 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1945 encoder_doc, /* tp_doc */
1946 encoder_traverse, /* tp_traverse */
1947 encoder_clear, /* tp_clear */
1948 0, /* tp_richcompare */
1949 0, /* tp_weaklistoffset */
1950 0, /* tp_iter */
1951 0, /* tp_iternext */
1952 0, /* tp_methods */
1953 encoder_members, /* tp_members */
1954 0, /* tp_getset */
1955 0, /* tp_base */
1956 0, /* tp_dict */
1957 0, /* tp_descr_get */
1958 0, /* tp_descr_set */
1959 0, /* tp_dictoffset */
1960 encoder_init, /* tp_init */
1961 0, /* tp_alloc */
1962 encoder_new, /* tp_new */
1963 0, /* tp_free */
1964};
1965
1966static PyMethodDef speedups_methods[] = {
1967 {"encode_basestring_ascii",
1968 (PyCFunction)py_encode_basestring_ascii,
1969 METH_O,
1970 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001971 {"encode_basestring",
1972 (PyCFunction)py_encode_basestring,
1973 METH_O,
1974 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001975 {"scanstring",
1976 (PyCFunction)py_scanstring,
1977 METH_VARARGS,
1978 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001979 {NULL, NULL, 0, NULL}
1980};
1981
1982PyDoc_STRVAR(module_doc,
1983"json speedups\n");
1984
Martin v. Löwis1a214512008-06-11 05:26:20 +00001985static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001986 PyModuleDef_HEAD_INIT,
1987 "_json",
1988 module_doc,
1989 -1,
1990 speedups_methods,
1991 NULL,
1992 NULL,
1993 NULL,
1994 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001995};
1996
Victor Stinnerf024d262015-03-17 17:48:27 +01001997PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001998PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001999{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002000 PyObject *m = PyModule_Create(&jsonmodule);
2001 if (!m)
2002 return NULL;
2003 PyScannerType.tp_new = PyType_GenericNew;
2004 if (PyType_Ready(&PyScannerType) < 0)
2005 goto fail;
2006 PyEncoderType.tp_new = PyType_GenericNew;
2007 if (PyType_Ready(&PyEncoderType) < 0)
2008 goto fail;
2009 Py_INCREF((PyObject*)&PyScannerType);
2010 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
2011 Py_DECREF((PyObject*)&PyScannerType);
2012 goto fail;
2013 }
2014 Py_INCREF((PyObject*)&PyEncoderType);
2015 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
2016 Py_DECREF((PyObject*)&PyEncoderType);
2017 goto fail;
2018 }
2019 return m;
2020 fail:
2021 Py_DECREF(m);
2022 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00002023}