blob: f4000f83a55891181929118786bf037cb2df2e38 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +01003#include "accu.h"
4
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00005#ifdef __GNUC__
6#define UNUSED __attribute__((__unused__))
7#else
8#define UNUSED
9#endif
10
11#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
12#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
13#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
14#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
15
16static PyTypeObject PyScannerType;
17static PyTypeObject PyEncoderType;
18
19typedef struct _PyScannerObject {
20 PyObject_HEAD
21 PyObject *strict;
22 PyObject *object_hook;
23 PyObject *object_pairs_hook;
24 PyObject *parse_float;
25 PyObject *parse_int;
26 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000027 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028} PyScannerObject;
29
30static PyMemberDef scanner_members[] = {
31 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
32 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
33 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
34 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
35 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
36 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
37 {NULL}
38};
39
40typedef struct _PyEncoderObject {
41 PyObject_HEAD
42 PyObject *markers;
43 PyObject *defaultfn;
44 PyObject *encoder;
45 PyObject *indent;
46 PyObject *key_separator;
47 PyObject *item_separator;
48 PyObject *sort_keys;
49 PyObject *skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010050 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000051 int allow_nan;
52} PyEncoderObject;
53
54static PyMemberDef encoder_members[] = {
55 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
56 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
57 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
58 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
59 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
60 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
61 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
62 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
63 {NULL}
64};
65
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020066static PyObject *
67join_list_unicode(PyObject *lst)
68{
69 /* return u''.join(lst) */
70 static PyObject *sep = NULL;
71 if (sep == NULL) {
72 sep = PyUnicode_FromStringAndSize("", 0);
73 if (sep == NULL)
74 return NULL;
75 }
76 return PyUnicode_Join(sep, lst);
77}
78
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020079/* Forward decls */
80
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000081static PyObject *
82ascii_escape_unicode(PyObject *pystr);
83static PyObject *
84py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
85void init_json(void);
86static PyObject *
87scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
88static PyObject *
89_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
90static PyObject *
91scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
92static int
93scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
94static void
95scanner_dealloc(PyObject *self);
96static int
97scanner_clear(PyObject *self);
98static PyObject *
99encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
100static int
101encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
102static void
103encoder_dealloc(PyObject *self);
104static int
105encoder_clear(PyObject *self);
106static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200107encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200109encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000110static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200111encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000112static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000113_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000114static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200115raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000116static PyObject *
117encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000118static PyObject *
119encoder_encode_float(PyEncoderObject *s, PyObject *obj);
120
Christian Heimes90540002008-05-08 14:29:10 +0000121#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000122#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000123
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000124static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200125ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000126{
127 /* Escape unicode code point c to ASCII escape sequences
128 in char *output. output must have at least 12 bytes unused to
129 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000130 output[chars++] = '\\';
131 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000132 case '\\': output[chars++] = c; break;
133 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000134 case '\b': output[chars++] = 'b'; break;
135 case '\f': output[chars++] = 'f'; break;
136 case '\n': output[chars++] = 'n'; break;
137 case '\r': output[chars++] = 'r'; break;
138 case '\t': output[chars++] = 't'; break;
139 default:
Christian Heimes90540002008-05-08 14:29:10 +0000140 if (c >= 0x10000) {
141 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100142 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000143 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100144 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
145 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
146 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
147 output[chars++] = Py_hexdigits[(v ) & 0xf];
148 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000149 output[chars++] = '\\';
150 }
Christian Heimes90540002008-05-08 14:29:10 +0000151 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200152 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
153 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
154 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
155 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000156 }
157 return chars;
158}
159
160static PyObject *
161ascii_escape_unicode(PyObject *pystr)
162{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000163 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000164 Py_ssize_t i;
165 Py_ssize_t input_chars;
166 Py_ssize_t output_size;
167 Py_ssize_t chars;
168 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200169 void *input;
170 unsigned char *output;
171 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000172
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200173 if (PyUnicode_READY(pystr) == -1)
174 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000175
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200176 input_chars = PyUnicode_GET_LENGTH(pystr);
177 input = PyUnicode_DATA(pystr);
178 kind = PyUnicode_KIND(pystr);
179
180 /* Compute the output size */
181 for (i = 0, output_size = 2; i < input_chars; i++) {
182 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500183 Py_ssize_t d;
184 if (S_CHAR(c)) {
185 d = 1;
186 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200187 else {
188 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200189 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200190 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500191 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200192 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500193 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200194 }
195 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500196 if (output_size > PY_SSIZE_T_MAX - d) {
197 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
198 return NULL;
199 }
200 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200201 }
202
203 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000204 if (rval == NULL) {
205 return NULL;
206 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200207 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000208 chars = 0;
209 output[chars++] = '"';
210 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200211 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000212 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000213 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000214 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000215 else {
216 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000217 }
Christian Heimes90540002008-05-08 14:29:10 +0000218 }
219 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100220#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200221 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100222#endif
Christian Heimes90540002008-05-08 14:29:10 +0000223 return rval;
224}
225
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100226static PyObject *
227escape_unicode(PyObject *pystr)
228{
229 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
230 Py_ssize_t i;
231 Py_ssize_t input_chars;
232 Py_ssize_t output_size;
233 Py_ssize_t chars;
234 PyObject *rval;
235 void *input;
236 int kind;
237 Py_UCS4 maxchar;
238
239 if (PyUnicode_READY(pystr) == -1)
240 return NULL;
241
242 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
243 input_chars = PyUnicode_GET_LENGTH(pystr);
244 input = PyUnicode_DATA(pystr);
245 kind = PyUnicode_KIND(pystr);
246
247 /* Compute the output size */
248 for (i = 0, output_size = 2; i < input_chars; i++) {
249 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500250 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100251 switch (c) {
252 case '\\': case '"': case '\b': case '\f':
253 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500254 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100255 break;
256 default:
257 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500258 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100259 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500260 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100261 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500262 if (output_size > PY_SSIZE_T_MAX - d) {
263 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
264 return NULL;
265 }
266 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100267 }
268
269 rval = PyUnicode_New(output_size, maxchar);
270 if (rval == NULL)
271 return NULL;
272
273 kind = PyUnicode_KIND(rval);
274
275#define ENCODE_OUTPUT do { \
276 chars = 0; \
277 output[chars++] = '"'; \
278 for (i = 0; i < input_chars; i++) { \
279 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
280 switch (c) { \
281 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
282 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
283 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
284 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
285 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
286 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
287 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
288 default: \
289 if (c <= 0x1f) { \
290 output[chars++] = '\\'; \
291 output[chars++] = 'u'; \
292 output[chars++] = '0'; \
293 output[chars++] = '0'; \
294 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
295 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
296 } else { \
297 output[chars++] = c; \
298 } \
299 } \
300 } \
301 output[chars++] = '"'; \
302 } while (0)
303
304 if (kind == PyUnicode_1BYTE_KIND) {
305 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
306 ENCODE_OUTPUT;
307 } else if (kind == PyUnicode_2BYTE_KIND) {
308 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
309 ENCODE_OUTPUT;
310 } else {
311 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
312 assert(kind == PyUnicode_4BYTE_KIND);
313 ENCODE_OUTPUT;
314 }
315#undef ENCODE_OUTPUT
316
317#ifdef Py_DEBUG
318 assert(_PyUnicode_CheckConsistency(rval, 1));
319#endif
320 return rval;
321}
322
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000323static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200324raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000325{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200326 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
327 static PyObject *JSONDecodeError = NULL;
328 PyObject *exc;
329 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000330 PyObject *decoder = PyImport_ImportModule("json.decoder");
331 if (decoder == NULL)
332 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200333 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000334 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200335 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000336 return;
Christian Heimes90540002008-05-08 14:29:10 +0000337 }
Victor Stinner4c381542016-12-09 00:33:39 +0100338 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200339 if (exc) {
340 PyErr_SetObject(JSONDecodeError, exc);
341 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000342 }
Christian Heimes90540002008-05-08 14:29:10 +0000343}
344
Ezio Melotti37623ab2013-01-03 08:44:15 +0200345static void
346raise_stop_iteration(Py_ssize_t idx)
347{
348 PyObject *value = PyLong_FromSsize_t(idx);
349 if (value != NULL) {
350 PyErr_SetObject(PyExc_StopIteration, value);
351 Py_DECREF(value);
352 }
353}
354
Christian Heimes90540002008-05-08 14:29:10 +0000355static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000356_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
357 /* return (rval, idx) tuple, stealing reference to rval */
358 PyObject *tpl;
359 PyObject *pyidx;
360 /*
361 steal a reference to rval, returns (rval, idx)
362 */
363 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000364 return NULL;
365 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000366 pyidx = PyLong_FromSsize_t(idx);
367 if (pyidx == NULL) {
368 Py_DECREF(rval);
369 return NULL;
370 }
371 tpl = PyTuple_New(2);
372 if (tpl == NULL) {
373 Py_DECREF(pyidx);
374 Py_DECREF(rval);
375 return NULL;
376 }
377 PyTuple_SET_ITEM(tpl, 0, rval);
378 PyTuple_SET_ITEM(tpl, 1, pyidx);
379 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000380}
381
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000382#define APPEND_OLD_CHUNK \
383 if (chunk != NULL) { \
384 if (chunks == NULL) { \
385 chunks = PyList_New(0); \
386 if (chunks == NULL) { \
387 goto bail; \
388 } \
389 } \
390 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200391 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000392 goto bail; \
393 } \
394 Py_CLEAR(chunk); \
395 }
396
Christian Heimes90540002008-05-08 14:29:10 +0000397static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000398scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000399{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000400 /* Read the JSON string from PyUnicode pystr.
401 end is the index of the first character after the quote.
402 if strict is zero then literal control characters are allowed
403 *next_end_ptr is a return-by-reference index of the character
404 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000405
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000406 Return value is a new PyUnicode
407 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000408 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200409 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000410 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000411 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200412 const void *buf;
413 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000414 PyObject *chunks = NULL;
415 PyObject *chunk = NULL;
416
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200417 if (PyUnicode_READY(pystr) == -1)
418 return 0;
419
420 len = PyUnicode_GET_LENGTH(pystr);
421 buf = PyUnicode_DATA(pystr);
422 kind = PyUnicode_KIND(pystr);
423
Ezio Melotti37623ab2013-01-03 08:44:15 +0200424 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000425 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
426 goto bail;
427 }
Christian Heimes90540002008-05-08 14:29:10 +0000428 while (1) {
429 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200430 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000431 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200432 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000433 if (c == '"' || c == '\\') {
434 break;
435 }
436 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000437 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000438 goto bail;
439 }
440 }
441 if (!(c == '"' || c == '\\')) {
442 raise_errmsg("Unterminated string starting at", pystr, begin);
443 goto bail;
444 }
445 /* Pick up this chunk if it's not zero length */
446 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000447 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200448 chunk = PyUnicode_FromKindAndData(
449 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200450 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000452 if (chunk == NULL) {
453 goto bail;
454 }
Christian Heimes90540002008-05-08 14:29:10 +0000455 }
456 next++;
457 if (c == '"') {
458 end = next;
459 break;
460 }
461 if (next == len) {
462 raise_errmsg("Unterminated string starting at", pystr, begin);
463 goto bail;
464 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200465 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000466 if (c != 'u') {
467 /* Non-unicode backslash escapes */
468 end = next + 1;
469 switch (c) {
470 case '"': break;
471 case '\\': break;
472 case '/': break;
473 case 'b': c = '\b'; break;
474 case 'f': c = '\f'; break;
475 case 'n': c = '\n'; break;
476 case 'r': c = '\r'; break;
477 case 't': c = '\t'; break;
478 default: c = 0;
479 }
480 if (c == 0) {
481 raise_errmsg("Invalid \\escape", pystr, end - 2);
482 goto bail;
483 }
484 }
485 else {
486 c = 0;
487 next++;
488 end = next + 4;
489 if (end >= len) {
490 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
491 goto bail;
492 }
493 /* Decode 4 hex digits */
494 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200495 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000496 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000497 switch (digit) {
498 case '0': case '1': case '2': case '3': case '4':
499 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000500 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000501 case 'a': case 'b': case 'c': case 'd': case 'e':
502 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000503 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000504 case 'A': case 'B': case 'C': case 'D': case 'E':
505 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000506 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000507 default:
508 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
509 goto bail;
510 }
511 }
Christian Heimes90540002008-05-08 14:29:10 +0000512 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200513 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
514 PyUnicode_READ(kind, buf, next++) == '\\' &&
515 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200516 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000517 end += 6;
518 /* Decode 4 hex digits */
519 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200520 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000521 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000522 switch (digit) {
523 case '0': case '1': case '2': case '3': case '4':
524 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000525 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000526 case 'a': case 'b': case 'c': case 'd': case 'e':
527 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000528 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000529 case 'A': case 'B': case 'C': case 'D': case 'E':
530 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000531 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000532 default:
533 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
534 goto bail;
535 }
536 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200537 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
538 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
539 else
540 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000541 }
Christian Heimes90540002008-05-08 14:29:10 +0000542 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000543 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200544 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000545 if (chunk == NULL) {
546 goto bail;
547 }
Christian Heimes90540002008-05-08 14:29:10 +0000548 }
549
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000550 if (chunks == NULL) {
551 if (chunk != NULL)
552 rval = chunk;
553 else
554 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000555 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000556 else {
557 APPEND_OLD_CHUNK
558 rval = join_list_unicode(chunks);
559 if (rval == NULL) {
560 goto bail;
561 }
562 Py_CLEAR(chunks);
563 }
564
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000565 *next_end_ptr = end;
566 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000567bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000568 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000569 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000570 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000571 return NULL;
572}
573
574PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000575 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000576 "\n"
577 "Scan the string s for a JSON string. End is the index of the\n"
578 "character in s after the quote that started the JSON string.\n"
579 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
580 "on attempt to decode an invalid string. If strict is False then literal\n"
581 "control characters are allowed in the string.\n"
582 "\n"
583 "Returns a tuple of the decoded string and the index of the character in s\n"
584 "after the end quote."
585);
Christian Heimes90540002008-05-08 14:29:10 +0000586
587static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000588py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000589{
590 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000591 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000592 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000593 Py_ssize_t next_end = -1;
594 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100595 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000596 return NULL;
597 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000598 if (PyUnicode_Check(pystr)) {
599 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000600 }
601 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000602 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000603 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000604 Py_TYPE(pystr)->tp_name);
605 return NULL;
606 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000607 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000608}
609
610PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000611 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000612 "\n"
613 "Return an ASCII-only JSON representation of a Python string"
614);
Christian Heimes90540002008-05-08 14:29:10 +0000615
616static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000617py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000618{
619 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000620 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000621 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000622 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000623 rval = ascii_escape_unicode(pystr);
624 }
625 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000626 PyErr_Format(PyExc_TypeError,
627 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000628 Py_TYPE(pystr)->tp_name);
629 return NULL;
630 }
Christian Heimes90540002008-05-08 14:29:10 +0000631 return rval;
632}
633
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100634
635PyDoc_STRVAR(pydoc_encode_basestring,
636 "encode_basestring(string) -> string\n"
637 "\n"
638 "Return a JSON representation of a Python string"
639);
640
641static PyObject *
642py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
643{
644 PyObject *rval;
645 /* Return a JSON representation of a Python string */
646 /* METH_O */
647 if (PyUnicode_Check(pystr)) {
648 rval = escape_unicode(pystr);
649 }
650 else {
651 PyErr_Format(PyExc_TypeError,
652 "first argument must be a string, not %.80s",
653 Py_TYPE(pystr)->tp_name);
654 return NULL;
655 }
656 return rval;
657}
658
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000659static void
660scanner_dealloc(PyObject *self)
661{
662 /* Deallocate scanner object */
663 scanner_clear(self);
664 Py_TYPE(self)->tp_free(self);
665}
666
667static int
668scanner_traverse(PyObject *self, visitproc visit, void *arg)
669{
670 PyScannerObject *s;
671 assert(PyScanner_Check(self));
672 s = (PyScannerObject *)self;
673 Py_VISIT(s->strict);
674 Py_VISIT(s->object_hook);
675 Py_VISIT(s->object_pairs_hook);
676 Py_VISIT(s->parse_float);
677 Py_VISIT(s->parse_int);
678 Py_VISIT(s->parse_constant);
679 return 0;
680}
681
682static int
683scanner_clear(PyObject *self)
684{
685 PyScannerObject *s;
686 assert(PyScanner_Check(self));
687 s = (PyScannerObject *)self;
688 Py_CLEAR(s->strict);
689 Py_CLEAR(s->object_hook);
690 Py_CLEAR(s->object_pairs_hook);
691 Py_CLEAR(s->parse_float);
692 Py_CLEAR(s->parse_int);
693 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000694 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000695 return 0;
696}
697
698static PyObject *
699_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
700 /* Read a JSON object from PyUnicode pystr.
701 idx is the index of the first character after the opening curly brace.
702 *next_idx_ptr is a return-by-reference index to the first character after
703 the closing curly brace.
704
705 Returns a new PyObject (usually a dict, but object_hook can change that)
706 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200707 void *str;
708 int kind;
709 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000710 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000711 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000712 PyObject *key = NULL;
713 int strict = PyObject_IsTrue(s->strict);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000714 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000715 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000716
Serhiy Storchakafa494fd2015-05-30 17:45:22 +0300717 if (strict < 0)
718 return NULL;
719
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200720 if (PyUnicode_READY(pystr) == -1)
721 return NULL;
722
723 str = PyUnicode_DATA(pystr);
724 kind = PyUnicode_KIND(pystr);
725 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
726
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000727 if (has_pairs_hook)
728 rval = PyList_New(0);
729 else
730 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000731 if (rval == NULL)
732 return NULL;
733
734 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200735 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000736
737 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200738 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
739 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000740 PyObject *memokey;
741
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000742 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200743 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200744 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000745 goto bail;
746 }
747 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
748 if (key == NULL)
749 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000750 memokey = PyDict_GetItem(s->memo, key);
751 if (memokey != NULL) {
752 Py_INCREF(memokey);
753 Py_DECREF(key);
754 key = memokey;
755 }
756 else {
757 if (PyDict_SetItem(s->memo, key, key) < 0)
758 goto bail;
759 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000760 idx = next_idx;
761
762 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200763 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
764 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200765 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000766 goto bail;
767 }
768 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200769 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000770
771 /* read any JSON term */
772 val = scan_once_unicode(s, pystr, idx, &next_idx);
773 if (val == NULL)
774 goto bail;
775
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000776 if (has_pairs_hook) {
777 PyObject *item = PyTuple_Pack(2, key, val);
778 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000779 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000780 Py_CLEAR(key);
781 Py_CLEAR(val);
782 if (PyList_Append(rval, item) == -1) {
783 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000784 goto bail;
785 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000786 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000787 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000788 else {
789 if (PyDict_SetItem(rval, key, val) < 0)
790 goto bail;
791 Py_CLEAR(key);
792 Py_CLEAR(val);
793 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000794 idx = next_idx;
795
796 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200797 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000798
799 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200800 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000801 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200802 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200803 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000804 goto bail;
805 }
806 idx++;
807
808 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200809 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000810 }
811 }
812
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000813 *next_idx_ptr = idx + 1;
814
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000815 if (has_pairs_hook) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100816 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000817 Py_DECREF(rval);
818 return val;
819 }
820
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000821 /* if object_hook is not None: rval = object_hook(rval) */
822 if (s->object_hook != Py_None) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100823 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000824 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000825 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000826 }
827 return rval;
828bail:
829 Py_XDECREF(key);
830 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000831 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000832 return NULL;
833}
834
835static PyObject *
836_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200837 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000838 idx is the index of the first character after the opening brace.
839 *next_idx_ptr is a return-by-reference index to the first character after
840 the closing brace.
841
842 Returns a new PyList
843 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200844 void *str;
845 int kind;
846 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000847 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200848 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000849 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000850
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200851 if (PyUnicode_READY(pystr) == -1)
852 return NULL;
853
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200854 rval = PyList_New(0);
855 if (rval == NULL)
856 return NULL;
857
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200858 str = PyUnicode_DATA(pystr);
859 kind = PyUnicode_KIND(pystr);
860 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
861
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000862 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200863 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000864
865 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200866 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
867 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000868
869 /* read any JSON term */
870 val = scan_once_unicode(s, pystr, idx, &next_idx);
871 if (val == NULL)
872 goto bail;
873
874 if (PyList_Append(rval, val) == -1)
875 goto bail;
876
877 Py_CLEAR(val);
878 idx = next_idx;
879
880 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200881 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000882
883 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200884 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000885 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200886 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200887 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000888 goto bail;
889 }
890 idx++;
891
892 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200893 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000894 }
895 }
896
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200897 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
898 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200899 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000900 goto bail;
901 }
902 *next_idx_ptr = idx + 1;
903 return rval;
904bail:
905 Py_XDECREF(val);
906 Py_DECREF(rval);
907 return NULL;
908}
909
910static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200911_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
912 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000913 constant is the constant string that was found
914 ("NaN", "Infinity", "-Infinity").
915 idx is the index of the first character of the constant
916 *next_idx_ptr is a return-by-reference index to the first character after
917 the constant.
918
919 Returns the result of parse_constant
920 */
921 PyObject *cstr;
922 PyObject *rval;
923 /* constant is "NaN", "Infinity", or "-Infinity" */
924 cstr = PyUnicode_InternFromString(constant);
925 if (cstr == NULL)
926 return NULL;
927
928 /* rval = parse_constant(constant) */
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100929 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200930 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000931 Py_DECREF(cstr);
932 *next_idx_ptr = idx;
933 return rval;
934}
935
936static PyObject *
937_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
938 /* Read a JSON number from PyUnicode pystr.
939 idx is the index of the first character of the number
940 *next_idx_ptr is a return-by-reference index to the first character after
941 the number.
942
943 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200944 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000945 May return other types if parse_int or parse_float are set
946 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200947 void *str;
948 int kind;
949 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000950 Py_ssize_t idx = start;
951 int is_float = 0;
952 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200953 PyObject *numstr = NULL;
954 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000955
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200956 if (PyUnicode_READY(pystr) == -1)
957 return NULL;
958
959 str = PyUnicode_DATA(pystr);
960 kind = PyUnicode_KIND(pystr);
961 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
962
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000963 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200964 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000965 idx++;
966 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200967 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000968 return NULL;
969 }
970 }
971
972 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200973 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000974 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200975 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000976 }
977 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200978 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000979 idx++;
980 }
981 /* no integer digits, error */
982 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200983 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000984 return NULL;
985 }
986
987 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200988 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000989 is_float = 1;
990 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200991 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000992 }
993
994 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200995 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000996 Py_ssize_t e_start = idx;
997 idx++;
998
999 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001000 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001001
1002 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001003 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001004
1005 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001006 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001007 is_float = 1;
1008 }
1009 else {
1010 idx = e_start;
1011 }
1012 }
1013
Antoine Pitrouf6454512011-04-25 19:16:06 +02001014 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1015 custom_func = s->parse_float;
1016 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1017 custom_func = s->parse_int;
1018 else
1019 custom_func = NULL;
1020
1021 if (custom_func) {
1022 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001023 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001024 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001025 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001026 if (numstr == NULL)
1027 return NULL;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001028 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001029 }
1030 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001031 Py_ssize_t i, n;
1032 char *buf;
1033 /* Straight conversion to ASCII, to avoid costly conversion of
1034 decimal unicode digits (which cannot appear here) */
1035 n = idx - start;
1036 numstr = PyBytes_FromStringAndSize(NULL, n);
1037 if (numstr == NULL)
1038 return NULL;
1039 buf = PyBytes_AS_STRING(numstr);
1040 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001041 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001042 }
1043 if (is_float)
1044 rval = PyFloat_FromString(numstr);
1045 else
1046 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001047 }
1048 Py_DECREF(numstr);
1049 *next_idx_ptr = idx;
1050 return rval;
1051}
1052
1053static PyObject *
1054scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1055{
1056 /* Read one JSON term (of any kind) from PyUnicode pystr.
1057 idx is the index of the first character of the term
1058 *next_idx_ptr is a return-by-reference index to the first character after
1059 the number.
1060
1061 Returns a new PyObject representation of the term.
1062 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001063 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001064 void *str;
1065 int kind;
1066 Py_ssize_t length;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001067 int strict;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001068
1069 if (PyUnicode_READY(pystr) == -1)
1070 return NULL;
1071
1072 str = PyUnicode_DATA(pystr);
1073 kind = PyUnicode_KIND(pystr);
1074 length = PyUnicode_GET_LENGTH(pystr);
1075
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001076 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001077 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001078 return NULL;
1079 }
1080 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001081 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001082 return NULL;
1083 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001084
1085 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001086 case '"':
1087 /* string */
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001088 strict = PyObject_IsTrue(s->strict);
1089 if (strict < 0)
1090 return NULL;
1091 return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001092 case '{':
1093 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001094 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1095 "from a unicode string"))
1096 return NULL;
1097 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1098 Py_LeaveRecursiveCall();
1099 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001100 case '[':
1101 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001102 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1103 "from a unicode string"))
1104 return NULL;
1105 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1106 Py_LeaveRecursiveCall();
1107 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001108 case 'n':
1109 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001110 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001111 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001112 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001113 }
1114 break;
1115 case 't':
1116 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001117 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001118 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001119 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001120 }
1121 break;
1122 case 'f':
1123 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001124 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1125 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1126 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001127 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001128 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001129 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001130 }
1131 break;
1132 case 'N':
1133 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001134 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001135 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001136 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1137 }
1138 break;
1139 case 'I':
1140 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001141 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1142 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1143 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001144 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001145 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1146 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001147 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001148 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1149 }
1150 break;
1151 case '-':
1152 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001153 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001154 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1155 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001156 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001157 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001158 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1159 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001160 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001161 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1162 }
1163 break;
1164 }
1165 /* Didn't find a string, object, array, or named constant. Look for a number. */
1166 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1167}
1168
1169static PyObject *
1170scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1171{
1172 /* Python callable interface to scan_once_{str,unicode} */
1173 PyObject *pystr;
1174 PyObject *rval;
1175 Py_ssize_t idx;
1176 Py_ssize_t next_idx = -1;
1177 static char *kwlist[] = {"string", "idx", NULL};
1178 PyScannerObject *s;
1179 assert(PyScanner_Check(self));
1180 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001181 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001182 return NULL;
1183
1184 if (PyUnicode_Check(pystr)) {
1185 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1186 }
1187 else {
1188 PyErr_Format(PyExc_TypeError,
1189 "first argument must be a string, not %.80s",
1190 Py_TYPE(pystr)->tp_name);
1191 return NULL;
1192 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001193 PyDict_Clear(s->memo);
1194 if (rval == NULL)
1195 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001196 return _build_rval_index_tuple(rval, next_idx);
1197}
1198
1199static PyObject *
1200scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1201{
1202 PyScannerObject *s;
1203 s = (PyScannerObject *)type->tp_alloc(type, 0);
1204 if (s != NULL) {
1205 s->strict = NULL;
1206 s->object_hook = NULL;
1207 s->object_pairs_hook = NULL;
1208 s->parse_float = NULL;
1209 s->parse_int = NULL;
1210 s->parse_constant = NULL;
1211 }
1212 return (PyObject *)s;
1213}
1214
1215static int
1216scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1217{
1218 /* Initialize Scanner object */
1219 PyObject *ctx;
1220 static char *kwlist[] = {"context", NULL};
1221 PyScannerObject *s;
1222
1223 assert(PyScanner_Check(self));
1224 s = (PyScannerObject *)self;
1225
1226 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1227 return -1;
1228
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001229 if (s->memo == NULL) {
1230 s->memo = PyDict_New();
1231 if (s->memo == NULL)
1232 goto bail;
1233 }
1234
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001235 /* All of these will fail "gracefully" so we don't need to verify them */
1236 s->strict = PyObject_GetAttrString(ctx, "strict");
1237 if (s->strict == NULL)
1238 goto bail;
1239 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1240 if (s->object_hook == NULL)
1241 goto bail;
1242 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1243 if (s->object_pairs_hook == NULL)
1244 goto bail;
1245 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1246 if (s->parse_float == NULL)
1247 goto bail;
1248 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1249 if (s->parse_int == NULL)
1250 goto bail;
1251 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1252 if (s->parse_constant == NULL)
1253 goto bail;
1254
1255 return 0;
1256
1257bail:
1258 Py_CLEAR(s->strict);
1259 Py_CLEAR(s->object_hook);
1260 Py_CLEAR(s->object_pairs_hook);
1261 Py_CLEAR(s->parse_float);
1262 Py_CLEAR(s->parse_int);
1263 Py_CLEAR(s->parse_constant);
1264 return -1;
1265}
1266
1267PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1268
1269static
1270PyTypeObject PyScannerType = {
1271 PyVarObject_HEAD_INIT(NULL, 0)
1272 "_json.Scanner", /* tp_name */
1273 sizeof(PyScannerObject), /* tp_basicsize */
1274 0, /* tp_itemsize */
1275 scanner_dealloc, /* tp_dealloc */
1276 0, /* tp_print */
1277 0, /* tp_getattr */
1278 0, /* tp_setattr */
1279 0, /* tp_compare */
1280 0, /* tp_repr */
1281 0, /* tp_as_number */
1282 0, /* tp_as_sequence */
1283 0, /* tp_as_mapping */
1284 0, /* tp_hash */
1285 scanner_call, /* tp_call */
1286 0, /* tp_str */
1287 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1288 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1289 0, /* tp_as_buffer */
1290 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1291 scanner_doc, /* tp_doc */
1292 scanner_traverse, /* tp_traverse */
1293 scanner_clear, /* tp_clear */
1294 0, /* tp_richcompare */
1295 0, /* tp_weaklistoffset */
1296 0, /* tp_iter */
1297 0, /* tp_iternext */
1298 0, /* tp_methods */
1299 scanner_members, /* tp_members */
1300 0, /* tp_getset */
1301 0, /* tp_base */
1302 0, /* tp_dict */
1303 0, /* tp_descr_get */
1304 0, /* tp_descr_set */
1305 0, /* tp_dictoffset */
1306 scanner_init, /* tp_init */
1307 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1308 scanner_new, /* tp_new */
1309 0,/* PyObject_GC_Del, */ /* tp_free */
1310};
1311
1312static PyObject *
1313encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1314{
1315 PyEncoderObject *s;
1316 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1317 if (s != NULL) {
1318 s->markers = NULL;
1319 s->defaultfn = NULL;
1320 s->encoder = NULL;
1321 s->indent = NULL;
1322 s->key_separator = NULL;
1323 s->item_separator = NULL;
1324 s->sort_keys = NULL;
1325 s->skipkeys = NULL;
1326 }
1327 return (PyObject *)s;
1328}
1329
1330static int
1331encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1332{
1333 /* initialize Encoder object */
1334 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1335
1336 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001337 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001338 PyObject *item_separator, *sort_keys, *skipkeys;
1339 int allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001340
1341 assert(PyEncoder_Check(self));
1342 s = (PyEncoderObject *)self;
1343
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001344 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUOOp:make_encoder", kwlist,
1345 &markers, &defaultfn, &encoder, &indent,
1346 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001347 &sort_keys, &skipkeys, &allow_nan))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001348 return -1;
1349
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001350 if (markers != Py_None && !PyDict_Check(markers)) {
1351 PyErr_Format(PyExc_TypeError,
1352 "make_encoder() argument 1 must be dict or None, "
1353 "not %.200s", Py_TYPE(markers)->tp_name);
1354 return -1;
1355 }
1356
Antoine Pitrou781eba72009-12-08 15:57:31 +00001357 s->markers = markers;
1358 s->defaultfn = defaultfn;
1359 s->encoder = encoder;
1360 s->indent = indent;
1361 s->key_separator = key_separator;
1362 s->item_separator = item_separator;
1363 s->sort_keys = sort_keys;
1364 s->skipkeys = skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001365 s->fast_encode = NULL;
1366 if (PyCFunction_Check(s->encoder)) {
1367 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1368 if (f == (PyCFunction)py_encode_basestring_ascii ||
1369 f == (PyCFunction)py_encode_basestring) {
1370 s->fast_encode = f;
1371 }
1372 }
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001373 s->allow_nan = allow_nan;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001374
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001375 Py_INCREF(s->markers);
1376 Py_INCREF(s->defaultfn);
1377 Py_INCREF(s->encoder);
1378 Py_INCREF(s->indent);
1379 Py_INCREF(s->key_separator);
1380 Py_INCREF(s->item_separator);
1381 Py_INCREF(s->sort_keys);
1382 Py_INCREF(s->skipkeys);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001383 return 0;
1384}
1385
1386static PyObject *
1387encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1388{
1389 /* Python callable interface to encode_listencode_obj */
1390 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1391 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001392 Py_ssize_t indent_level;
1393 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001394 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001395
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001396 assert(PyEncoder_Check(self));
1397 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001398 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1399 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001400 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001401 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001402 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001403 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001404 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001405 return NULL;
1406 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001407 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001408}
1409
1410static PyObject *
1411_encoded_const(PyObject *obj)
1412{
1413 /* Return the JSON string representation of None, True, False */
1414 if (obj == Py_None) {
1415 static PyObject *s_null = NULL;
1416 if (s_null == NULL) {
1417 s_null = PyUnicode_InternFromString("null");
1418 }
1419 Py_INCREF(s_null);
1420 return s_null;
1421 }
1422 else if (obj == Py_True) {
1423 static PyObject *s_true = NULL;
1424 if (s_true == NULL) {
1425 s_true = PyUnicode_InternFromString("true");
1426 }
1427 Py_INCREF(s_true);
1428 return s_true;
1429 }
1430 else if (obj == Py_False) {
1431 static PyObject *s_false = NULL;
1432 if (s_false == NULL) {
1433 s_false = PyUnicode_InternFromString("false");
1434 }
1435 Py_INCREF(s_false);
1436 return s_false;
1437 }
1438 else {
1439 PyErr_SetString(PyExc_ValueError, "not a const");
1440 return NULL;
1441 }
1442}
1443
1444static PyObject *
1445encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1446{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001447 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001448 double i = PyFloat_AS_DOUBLE(obj);
1449 if (!Py_IS_FINITE(i)) {
1450 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001451 PyErr_SetString(
1452 PyExc_ValueError,
1453 "Out of range float values are not JSON compliant"
1454 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001455 return NULL;
1456 }
1457 if (i > 0) {
1458 return PyUnicode_FromString("Infinity");
1459 }
1460 else if (i < 0) {
1461 return PyUnicode_FromString("-Infinity");
1462 }
1463 else {
1464 return PyUnicode_FromString("NaN");
1465 }
1466 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001467 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001468}
1469
1470static PyObject *
1471encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1472{
1473 /* Return the JSON representation of a string */
1474 if (s->fast_encode)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001475 return s->fast_encode(NULL, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001476 else
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001477 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001478}
1479
1480static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001481_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001482{
1483 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001484 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001485 Py_DECREF(stolen);
1486 return rval;
1487}
1488
1489static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001490encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001491 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001492{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001493 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001494 PyObject *newobj;
1495 int rv;
1496
1497 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1498 PyObject *cstr = _encoded_const(obj);
1499 if (cstr == NULL)
1500 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001501 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001502 }
1503 else if (PyUnicode_Check(obj))
1504 {
1505 PyObject *encoded = encoder_encode_string(s, obj);
1506 if (encoded == NULL)
1507 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001508 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001509 }
1510 else if (PyLong_Check(obj)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001511 PyObject *encoded = PyLong_Type.tp_str(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001512 if (encoded == NULL)
1513 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001514 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001515 }
1516 else if (PyFloat_Check(obj)) {
1517 PyObject *encoded = encoder_encode_float(s, obj);
1518 if (encoded == NULL)
1519 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001520 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001521 }
1522 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001523 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1524 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001525 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001526 Py_LeaveRecursiveCall();
1527 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001528 }
1529 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001530 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1531 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001532 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001533 Py_LeaveRecursiveCall();
1534 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001535 }
1536 else {
1537 PyObject *ident = NULL;
1538 if (s->markers != Py_None) {
1539 int has_key;
1540 ident = PyLong_FromVoidPtr(obj);
1541 if (ident == NULL)
1542 return -1;
1543 has_key = PyDict_Contains(s->markers, ident);
1544 if (has_key) {
1545 if (has_key != -1)
1546 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1547 Py_DECREF(ident);
1548 return -1;
1549 }
1550 if (PyDict_SetItem(s->markers, ident, obj)) {
1551 Py_DECREF(ident);
1552 return -1;
1553 }
1554 }
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001555 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001556 if (newobj == NULL) {
1557 Py_XDECREF(ident);
1558 return -1;
1559 }
Ezio Melotti13672652011-05-11 01:02:56 +03001560
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001561 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1562 Py_DECREF(newobj);
1563 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001564 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001565 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001566 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001567 Py_LeaveRecursiveCall();
1568
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001569 Py_DECREF(newobj);
1570 if (rv) {
1571 Py_XDECREF(ident);
1572 return -1;
1573 }
1574 if (ident != NULL) {
1575 if (PyDict_DelItem(s->markers, ident)) {
1576 Py_XDECREF(ident);
1577 return -1;
1578 }
1579 Py_XDECREF(ident);
1580 }
1581 return rv;
1582 }
1583}
1584
1585static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001586encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001587 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001588{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001589 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001590 static PyObject *open_dict = NULL;
1591 static PyObject *close_dict = NULL;
1592 static PyObject *empty_dict = NULL;
1593 PyObject *kstr = NULL;
1594 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001595 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001596 PyObject *items;
1597 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001598 int skipkeys;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001599 int sortkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001600 Py_ssize_t idx;
1601
1602 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1603 open_dict = PyUnicode_InternFromString("{");
1604 close_dict = PyUnicode_InternFromString("}");
1605 empty_dict = PyUnicode_InternFromString("{}");
1606 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1607 return -1;
1608 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001609 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001610 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001611
1612 if (s->markers != Py_None) {
1613 int has_key;
1614 ident = PyLong_FromVoidPtr(dct);
1615 if (ident == NULL)
1616 goto bail;
1617 has_key = PyDict_Contains(s->markers, ident);
1618 if (has_key) {
1619 if (has_key != -1)
1620 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1621 goto bail;
1622 }
1623 if (PyDict_SetItem(s->markers, ident, dct)) {
1624 goto bail;
1625 }
1626 }
1627
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001628 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001629 goto bail;
1630
1631 if (s->indent != Py_None) {
1632 /* TODO: DOES NOT RUN */
1633 indent_level += 1;
1634 /*
1635 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1636 separator = _item_separator + newline_indent
1637 buf += newline_indent
1638 */
1639 }
1640
Benjamin Peterson501182a2015-05-02 22:28:04 -04001641 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001642 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001643 goto bail;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001644 sortkeys = PyObject_IsTrue(s->sort_keys);
1645 if (sortkeys < 0 || (sortkeys && PyList_Sort(items) < 0))
Benjamin Peterson501182a2015-05-02 22:28:04 -04001646 goto bail;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001647 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001648 Py_DECREF(items);
1649 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001650 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001651 skipkeys = PyObject_IsTrue(s->skipkeys);
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001652 if (skipkeys < 0)
1653 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001654 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001655 while ((item = PyIter_Next(it)) != NULL) {
1656 PyObject *encoded, *key, *value;
1657 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
1658 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1659 goto bail;
1660 }
1661 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001662 if (PyUnicode_Check(key)) {
1663 Py_INCREF(key);
1664 kstr = key;
1665 }
1666 else if (PyFloat_Check(key)) {
1667 kstr = encoder_encode_float(s, key);
1668 if (kstr == NULL)
1669 goto bail;
1670 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001671 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001672 /* This must come before the PyLong_Check because
1673 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001674 kstr = _encoded_const(key);
1675 if (kstr == NULL)
1676 goto bail;
1677 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001678 else if (PyLong_Check(key)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001679 kstr = PyLong_Type.tp_str(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001680 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001681 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001682 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001683 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001684 else if (skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001685 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001686 continue;
1687 }
1688 else {
1689 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001690 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001691 goto bail;
1692 }
1693
1694 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001695 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001696 goto bail;
1697 }
1698
1699 encoded = encoder_encode_string(s, kstr);
1700 Py_CLEAR(kstr);
1701 if (encoded == NULL)
1702 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001703 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001704 Py_DECREF(encoded);
1705 goto bail;
1706 }
1707 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001708 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001709 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001710
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001711 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001712 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001713 goto bail;
1714 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001715 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001716 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001717 if (PyErr_Occurred())
1718 goto bail;
1719 Py_CLEAR(it);
1720
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001721 if (ident != NULL) {
1722 if (PyDict_DelItem(s->markers, ident))
1723 goto bail;
1724 Py_CLEAR(ident);
1725 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001726 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001727 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001728 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001729
1730 yield '\n' + (' ' * (_indent * _current_indent_level))
1731 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001732 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001733 goto bail;
1734 return 0;
1735
1736bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001737 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001738 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001739 Py_XDECREF(kstr);
1740 Py_XDECREF(ident);
1741 return -1;
1742}
1743
1744
1745static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001746encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001747 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001748{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001749 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001750 static PyObject *open_array = NULL;
1751 static PyObject *close_array = NULL;
1752 static PyObject *empty_array = NULL;
1753 PyObject *ident = NULL;
1754 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001755 Py_ssize_t i;
1756
1757 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1758 open_array = PyUnicode_InternFromString("[");
1759 close_array = PyUnicode_InternFromString("]");
1760 empty_array = PyUnicode_InternFromString("[]");
1761 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1762 return -1;
1763 }
1764 ident = NULL;
1765 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1766 if (s_fast == NULL)
1767 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001768 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001769 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001770 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001771 }
1772
1773 if (s->markers != Py_None) {
1774 int has_key;
1775 ident = PyLong_FromVoidPtr(seq);
1776 if (ident == NULL)
1777 goto bail;
1778 has_key = PyDict_Contains(s->markers, ident);
1779 if (has_key) {
1780 if (has_key != -1)
1781 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1782 goto bail;
1783 }
1784 if (PyDict_SetItem(s->markers, ident, seq)) {
1785 goto bail;
1786 }
1787 }
1788
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001789 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001790 goto bail;
1791 if (s->indent != Py_None) {
1792 /* TODO: DOES NOT RUN */
1793 indent_level += 1;
1794 /*
1795 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1796 separator = _item_separator + newline_indent
1797 buf += newline_indent
1798 */
1799 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001800 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1801 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001802 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001803 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001804 goto bail;
1805 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001806 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001807 goto bail;
1808 }
1809 if (ident != NULL) {
1810 if (PyDict_DelItem(s->markers, ident))
1811 goto bail;
1812 Py_CLEAR(ident);
1813 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001814
1815 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001816 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001817 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001818
1819 yield '\n' + (' ' * (_indent * _current_indent_level))
1820 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001821 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001822 goto bail;
1823 Py_DECREF(s_fast);
1824 return 0;
1825
1826bail:
1827 Py_XDECREF(ident);
1828 Py_DECREF(s_fast);
1829 return -1;
1830}
1831
1832static void
1833encoder_dealloc(PyObject *self)
1834{
1835 /* Deallocate Encoder */
1836 encoder_clear(self);
1837 Py_TYPE(self)->tp_free(self);
1838}
1839
1840static int
1841encoder_traverse(PyObject *self, visitproc visit, void *arg)
1842{
1843 PyEncoderObject *s;
1844 assert(PyEncoder_Check(self));
1845 s = (PyEncoderObject *)self;
1846 Py_VISIT(s->markers);
1847 Py_VISIT(s->defaultfn);
1848 Py_VISIT(s->encoder);
1849 Py_VISIT(s->indent);
1850 Py_VISIT(s->key_separator);
1851 Py_VISIT(s->item_separator);
1852 Py_VISIT(s->sort_keys);
1853 Py_VISIT(s->skipkeys);
1854 return 0;
1855}
1856
1857static int
1858encoder_clear(PyObject *self)
1859{
1860 /* Deallocate Encoder */
1861 PyEncoderObject *s;
1862 assert(PyEncoder_Check(self));
1863 s = (PyEncoderObject *)self;
1864 Py_CLEAR(s->markers);
1865 Py_CLEAR(s->defaultfn);
1866 Py_CLEAR(s->encoder);
1867 Py_CLEAR(s->indent);
1868 Py_CLEAR(s->key_separator);
1869 Py_CLEAR(s->item_separator);
1870 Py_CLEAR(s->sort_keys);
1871 Py_CLEAR(s->skipkeys);
1872 return 0;
1873}
1874
1875PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1876
1877static
1878PyTypeObject PyEncoderType = {
1879 PyVarObject_HEAD_INIT(NULL, 0)
1880 "_json.Encoder", /* tp_name */
1881 sizeof(PyEncoderObject), /* tp_basicsize */
1882 0, /* tp_itemsize */
1883 encoder_dealloc, /* tp_dealloc */
1884 0, /* tp_print */
1885 0, /* tp_getattr */
1886 0, /* tp_setattr */
1887 0, /* tp_compare */
1888 0, /* tp_repr */
1889 0, /* tp_as_number */
1890 0, /* tp_as_sequence */
1891 0, /* tp_as_mapping */
1892 0, /* tp_hash */
1893 encoder_call, /* tp_call */
1894 0, /* tp_str */
1895 0, /* tp_getattro */
1896 0, /* tp_setattro */
1897 0, /* tp_as_buffer */
1898 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1899 encoder_doc, /* tp_doc */
1900 encoder_traverse, /* tp_traverse */
1901 encoder_clear, /* tp_clear */
1902 0, /* tp_richcompare */
1903 0, /* tp_weaklistoffset */
1904 0, /* tp_iter */
1905 0, /* tp_iternext */
1906 0, /* tp_methods */
1907 encoder_members, /* tp_members */
1908 0, /* tp_getset */
1909 0, /* tp_base */
1910 0, /* tp_dict */
1911 0, /* tp_descr_get */
1912 0, /* tp_descr_set */
1913 0, /* tp_dictoffset */
1914 encoder_init, /* tp_init */
1915 0, /* tp_alloc */
1916 encoder_new, /* tp_new */
1917 0, /* tp_free */
1918};
1919
1920static PyMethodDef speedups_methods[] = {
1921 {"encode_basestring_ascii",
1922 (PyCFunction)py_encode_basestring_ascii,
1923 METH_O,
1924 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001925 {"encode_basestring",
1926 (PyCFunction)py_encode_basestring,
1927 METH_O,
1928 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001929 {"scanstring",
1930 (PyCFunction)py_scanstring,
1931 METH_VARARGS,
1932 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001933 {NULL, NULL, 0, NULL}
1934};
1935
1936PyDoc_STRVAR(module_doc,
1937"json speedups\n");
1938
Martin v. Löwis1a214512008-06-11 05:26:20 +00001939static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001940 PyModuleDef_HEAD_INIT,
1941 "_json",
1942 module_doc,
1943 -1,
1944 speedups_methods,
1945 NULL,
1946 NULL,
1947 NULL,
1948 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001949};
1950
Victor Stinnerf024d262015-03-17 17:48:27 +01001951PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001952PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001953{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001954 PyObject *m = PyModule_Create(&jsonmodule);
1955 if (!m)
1956 return NULL;
1957 PyScannerType.tp_new = PyType_GenericNew;
1958 if (PyType_Ready(&PyScannerType) < 0)
1959 goto fail;
1960 PyEncoderType.tp_new = PyType_GenericNew;
1961 if (PyType_Ready(&PyEncoderType) < 0)
1962 goto fail;
1963 Py_INCREF((PyObject*)&PyScannerType);
1964 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1965 Py_DECREF((PyObject*)&PyScannerType);
1966 goto fail;
1967 }
1968 Py_INCREF((PyObject*)&PyEncoderType);
1969 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1970 Py_DECREF((PyObject*)&PyEncoderType);
1971 goto fail;
1972 }
1973 return m;
1974 fail:
1975 Py_DECREF(m);
1976 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001977}