blob: d3dbf98805d414624d76278be7e34a33c0600251 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +01003#include "accu.h"
4
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00005#ifdef __GNUC__
6#define UNUSED __attribute__((__unused__))
7#else
8#define UNUSED
9#endif
10
11#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
12#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
13#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
14#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
15
16static PyTypeObject PyScannerType;
17static PyTypeObject PyEncoderType;
18
19typedef struct _PyScannerObject {
20 PyObject_HEAD
21 PyObject *strict;
22 PyObject *object_hook;
23 PyObject *object_pairs_hook;
24 PyObject *parse_float;
25 PyObject *parse_int;
26 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000027 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028} PyScannerObject;
29
30static PyMemberDef scanner_members[] = {
31 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
32 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
33 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
34 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
35 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
36 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
37 {NULL}
38};
39
40typedef struct _PyEncoderObject {
41 PyObject_HEAD
42 PyObject *markers;
43 PyObject *defaultfn;
44 PyObject *encoder;
45 PyObject *indent;
46 PyObject *key_separator;
47 PyObject *item_separator;
48 PyObject *sort_keys;
49 PyObject *skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010050 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000051 int allow_nan;
52} PyEncoderObject;
53
54static PyMemberDef encoder_members[] = {
55 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
56 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
57 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
58 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
59 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
60 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
61 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
62 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
63 {NULL}
64};
65
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020066static PyObject *
67join_list_unicode(PyObject *lst)
68{
69 /* return u''.join(lst) */
70 static PyObject *sep = NULL;
71 if (sep == NULL) {
72 sep = PyUnicode_FromStringAndSize("", 0);
73 if (sep == NULL)
74 return NULL;
75 }
76 return PyUnicode_Join(sep, lst);
77}
78
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020079/* Forward decls */
80
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000081static PyObject *
82ascii_escape_unicode(PyObject *pystr);
83static PyObject *
84py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
85void init_json(void);
86static PyObject *
87scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
88static PyObject *
89_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
90static PyObject *
91scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
92static int
93scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
94static void
95scanner_dealloc(PyObject *self);
96static int
97scanner_clear(PyObject *self);
98static PyObject *
99encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
100static int
101encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
102static void
103encoder_dealloc(PyObject *self);
104static int
105encoder_clear(PyObject *self);
106static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200107encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200109encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000110static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200111encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000112static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000113_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000114static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200115raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000116static PyObject *
117encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000118static PyObject *
119encoder_encode_float(PyEncoderObject *s, PyObject *obj);
120
Christian Heimes90540002008-05-08 14:29:10 +0000121#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000122#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000123
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000124static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200125ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000126{
127 /* Escape unicode code point c to ASCII escape sequences
128 in char *output. output must have at least 12 bytes unused to
129 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000130 output[chars++] = '\\';
131 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000132 case '\\': output[chars++] = c; break;
133 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000134 case '\b': output[chars++] = 'b'; break;
135 case '\f': output[chars++] = 'f'; break;
136 case '\n': output[chars++] = 'n'; break;
137 case '\r': output[chars++] = 'r'; break;
138 case '\t': output[chars++] = 't'; break;
139 default:
Christian Heimes90540002008-05-08 14:29:10 +0000140 if (c >= 0x10000) {
141 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100142 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000143 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100144 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
145 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
146 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
147 output[chars++] = Py_hexdigits[(v ) & 0xf];
148 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000149 output[chars++] = '\\';
150 }
Christian Heimes90540002008-05-08 14:29:10 +0000151 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200152 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
153 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
154 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
155 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000156 }
157 return chars;
158}
159
160static PyObject *
161ascii_escape_unicode(PyObject *pystr)
162{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000163 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000164 Py_ssize_t i;
165 Py_ssize_t input_chars;
166 Py_ssize_t output_size;
167 Py_ssize_t chars;
168 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200169 void *input;
170 unsigned char *output;
171 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000172
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200173 if (PyUnicode_READY(pystr) == -1)
174 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000175
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200176 input_chars = PyUnicode_GET_LENGTH(pystr);
177 input = PyUnicode_DATA(pystr);
178 kind = PyUnicode_KIND(pystr);
179
180 /* Compute the output size */
181 for (i = 0, output_size = 2; i < input_chars; i++) {
182 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500183 Py_ssize_t d;
184 if (S_CHAR(c)) {
185 d = 1;
186 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200187 else {
188 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200189 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200190 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500191 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200192 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500193 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200194 }
195 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500196 if (output_size > PY_SSIZE_T_MAX - d) {
197 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
198 return NULL;
199 }
200 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200201 }
202
203 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000204 if (rval == NULL) {
205 return NULL;
206 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200207 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000208 chars = 0;
209 output[chars++] = '"';
210 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200211 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000212 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000213 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000214 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000215 else {
216 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000217 }
Christian Heimes90540002008-05-08 14:29:10 +0000218 }
219 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100220#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200221 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100222#endif
Christian Heimes90540002008-05-08 14:29:10 +0000223 return rval;
224}
225
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100226static PyObject *
227escape_unicode(PyObject *pystr)
228{
229 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
230 Py_ssize_t i;
231 Py_ssize_t input_chars;
232 Py_ssize_t output_size;
233 Py_ssize_t chars;
234 PyObject *rval;
235 void *input;
236 int kind;
237 Py_UCS4 maxchar;
238
239 if (PyUnicode_READY(pystr) == -1)
240 return NULL;
241
242 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
243 input_chars = PyUnicode_GET_LENGTH(pystr);
244 input = PyUnicode_DATA(pystr);
245 kind = PyUnicode_KIND(pystr);
246
247 /* Compute the output size */
248 for (i = 0, output_size = 2; i < input_chars; i++) {
249 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500250 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100251 switch (c) {
252 case '\\': case '"': case '\b': case '\f':
253 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500254 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100255 break;
256 default:
257 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500258 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100259 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500260 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100261 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500262 if (output_size > PY_SSIZE_T_MAX - d) {
263 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
264 return NULL;
265 }
266 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100267 }
268
269 rval = PyUnicode_New(output_size, maxchar);
270 if (rval == NULL)
271 return NULL;
272
273 kind = PyUnicode_KIND(rval);
274
275#define ENCODE_OUTPUT do { \
276 chars = 0; \
277 output[chars++] = '"'; \
278 for (i = 0; i < input_chars; i++) { \
279 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
280 switch (c) { \
281 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
282 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
283 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
284 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
285 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
286 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
287 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
288 default: \
289 if (c <= 0x1f) { \
290 output[chars++] = '\\'; \
291 output[chars++] = 'u'; \
292 output[chars++] = '0'; \
293 output[chars++] = '0'; \
294 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
295 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
296 } else { \
297 output[chars++] = c; \
298 } \
299 } \
300 } \
301 output[chars++] = '"'; \
302 } while (0)
303
304 if (kind == PyUnicode_1BYTE_KIND) {
305 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
306 ENCODE_OUTPUT;
307 } else if (kind == PyUnicode_2BYTE_KIND) {
308 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
309 ENCODE_OUTPUT;
310 } else {
311 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
312 assert(kind == PyUnicode_4BYTE_KIND);
313 ENCODE_OUTPUT;
314 }
315#undef ENCODE_OUTPUT
316
317#ifdef Py_DEBUG
318 assert(_PyUnicode_CheckConsistency(rval, 1));
319#endif
320 return rval;
321}
322
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000323static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200324raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000325{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200326 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
327 static PyObject *JSONDecodeError = NULL;
328 PyObject *exc;
329 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000330 PyObject *decoder = PyImport_ImportModule("json.decoder");
331 if (decoder == NULL)
332 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200333 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000334 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200335 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000336 return;
Christian Heimes90540002008-05-08 14:29:10 +0000337 }
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200338 exc = PyObject_CallFunction(JSONDecodeError, "(zOn)", msg, s, end);
339 if (exc) {
340 PyErr_SetObject(JSONDecodeError, exc);
341 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000342 }
Christian Heimes90540002008-05-08 14:29:10 +0000343}
344
Ezio Melotti37623ab2013-01-03 08:44:15 +0200345static void
346raise_stop_iteration(Py_ssize_t idx)
347{
348 PyObject *value = PyLong_FromSsize_t(idx);
349 if (value != NULL) {
350 PyErr_SetObject(PyExc_StopIteration, value);
351 Py_DECREF(value);
352 }
353}
354
Christian Heimes90540002008-05-08 14:29:10 +0000355static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000356_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
357 /* return (rval, idx) tuple, stealing reference to rval */
358 PyObject *tpl;
359 PyObject *pyidx;
360 /*
361 steal a reference to rval, returns (rval, idx)
362 */
363 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000364 return NULL;
365 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000366 pyidx = PyLong_FromSsize_t(idx);
367 if (pyidx == NULL) {
368 Py_DECREF(rval);
369 return NULL;
370 }
371 tpl = PyTuple_New(2);
372 if (tpl == NULL) {
373 Py_DECREF(pyidx);
374 Py_DECREF(rval);
375 return NULL;
376 }
377 PyTuple_SET_ITEM(tpl, 0, rval);
378 PyTuple_SET_ITEM(tpl, 1, pyidx);
379 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000380}
381
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000382#define APPEND_OLD_CHUNK \
383 if (chunk != NULL) { \
384 if (chunks == NULL) { \
385 chunks = PyList_New(0); \
386 if (chunks == NULL) { \
387 goto bail; \
388 } \
389 } \
390 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200391 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000392 goto bail; \
393 } \
394 Py_CLEAR(chunk); \
395 }
396
Christian Heimes90540002008-05-08 14:29:10 +0000397static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000398scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000399{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000400 /* Read the JSON string from PyUnicode pystr.
401 end is the index of the first character after the quote.
402 if strict is zero then literal control characters are allowed
403 *next_end_ptr is a return-by-reference index of the character
404 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000405
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000406 Return value is a new PyUnicode
407 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000408 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200409 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000410 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000411 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200412 const void *buf;
413 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000414 PyObject *chunks = NULL;
415 PyObject *chunk = NULL;
416
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200417 if (PyUnicode_READY(pystr) == -1)
418 return 0;
419
420 len = PyUnicode_GET_LENGTH(pystr);
421 buf = PyUnicode_DATA(pystr);
422 kind = PyUnicode_KIND(pystr);
423
Ezio Melotti37623ab2013-01-03 08:44:15 +0200424 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000425 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
426 goto bail;
427 }
Christian Heimes90540002008-05-08 14:29:10 +0000428 while (1) {
429 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200430 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000431 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200432 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000433 if (c == '"' || c == '\\') {
434 break;
435 }
436 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000437 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000438 goto bail;
439 }
440 }
441 if (!(c == '"' || c == '\\')) {
442 raise_errmsg("Unterminated string starting at", pystr, begin);
443 goto bail;
444 }
445 /* Pick up this chunk if it's not zero length */
446 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000447 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200448 chunk = PyUnicode_FromKindAndData(
449 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200450 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000452 if (chunk == NULL) {
453 goto bail;
454 }
Christian Heimes90540002008-05-08 14:29:10 +0000455 }
456 next++;
457 if (c == '"') {
458 end = next;
459 break;
460 }
461 if (next == len) {
462 raise_errmsg("Unterminated string starting at", pystr, begin);
463 goto bail;
464 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200465 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000466 if (c != 'u') {
467 /* Non-unicode backslash escapes */
468 end = next + 1;
469 switch (c) {
470 case '"': break;
471 case '\\': break;
472 case '/': break;
473 case 'b': c = '\b'; break;
474 case 'f': c = '\f'; break;
475 case 'n': c = '\n'; break;
476 case 'r': c = '\r'; break;
477 case 't': c = '\t'; break;
478 default: c = 0;
479 }
480 if (c == 0) {
481 raise_errmsg("Invalid \\escape", pystr, end - 2);
482 goto bail;
483 }
484 }
485 else {
486 c = 0;
487 next++;
488 end = next + 4;
489 if (end >= len) {
490 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
491 goto bail;
492 }
493 /* Decode 4 hex digits */
494 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200495 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000496 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000497 switch (digit) {
498 case '0': case '1': case '2': case '3': case '4':
499 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000500 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000501 case 'a': case 'b': case 'c': case 'd': case 'e':
502 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000503 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000504 case 'A': case 'B': case 'C': case 'D': case 'E':
505 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000506 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000507 default:
508 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
509 goto bail;
510 }
511 }
Christian Heimes90540002008-05-08 14:29:10 +0000512 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200513 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
514 PyUnicode_READ(kind, buf, next++) == '\\' &&
515 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200516 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000517 end += 6;
518 /* Decode 4 hex digits */
519 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200520 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000521 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000522 switch (digit) {
523 case '0': case '1': case '2': case '3': case '4':
524 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000525 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000526 case 'a': case 'b': case 'c': case 'd': case 'e':
527 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000528 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000529 case 'A': case 'B': case 'C': case 'D': case 'E':
530 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000531 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000532 default:
533 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
534 goto bail;
535 }
536 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200537 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
538 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
539 else
540 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000541 }
Christian Heimes90540002008-05-08 14:29:10 +0000542 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000543 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200544 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000545 if (chunk == NULL) {
546 goto bail;
547 }
Christian Heimes90540002008-05-08 14:29:10 +0000548 }
549
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000550 if (chunks == NULL) {
551 if (chunk != NULL)
552 rval = chunk;
553 else
554 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000555 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000556 else {
557 APPEND_OLD_CHUNK
558 rval = join_list_unicode(chunks);
559 if (rval == NULL) {
560 goto bail;
561 }
562 Py_CLEAR(chunks);
563 }
564
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000565 *next_end_ptr = end;
566 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000567bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000568 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000569 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000570 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000571 return NULL;
572}
573
574PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000575 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000576 "\n"
577 "Scan the string s for a JSON string. End is the index of the\n"
578 "character in s after the quote that started the JSON string.\n"
579 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
580 "on attempt to decode an invalid string. If strict is False then literal\n"
581 "control characters are allowed in the string.\n"
582 "\n"
583 "Returns a tuple of the decoded string and the index of the character in s\n"
584 "after the end quote."
585);
Christian Heimes90540002008-05-08 14:29:10 +0000586
587static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000588py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000589{
590 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000591 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000592 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000593 Py_ssize_t next_end = -1;
594 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100595 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000596 return NULL;
597 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000598 if (PyUnicode_Check(pystr)) {
599 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000600 }
601 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000602 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000603 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000604 Py_TYPE(pystr)->tp_name);
605 return NULL;
606 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000607 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000608}
609
610PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000611 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000612 "\n"
613 "Return an ASCII-only JSON representation of a Python string"
614);
Christian Heimes90540002008-05-08 14:29:10 +0000615
616static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000617py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000618{
619 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000620 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000621 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000622 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000623 rval = ascii_escape_unicode(pystr);
624 }
625 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000626 PyErr_Format(PyExc_TypeError,
627 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000628 Py_TYPE(pystr)->tp_name);
629 return NULL;
630 }
Christian Heimes90540002008-05-08 14:29:10 +0000631 return rval;
632}
633
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100634
635PyDoc_STRVAR(pydoc_encode_basestring,
636 "encode_basestring(string) -> string\n"
637 "\n"
638 "Return a JSON representation of a Python string"
639);
640
641static PyObject *
642py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
643{
644 PyObject *rval;
645 /* Return a JSON representation of a Python string */
646 /* METH_O */
647 if (PyUnicode_Check(pystr)) {
648 rval = escape_unicode(pystr);
649 }
650 else {
651 PyErr_Format(PyExc_TypeError,
652 "first argument must be a string, not %.80s",
653 Py_TYPE(pystr)->tp_name);
654 return NULL;
655 }
656 return rval;
657}
658
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000659static void
660scanner_dealloc(PyObject *self)
661{
662 /* Deallocate scanner object */
663 scanner_clear(self);
664 Py_TYPE(self)->tp_free(self);
665}
666
667static int
668scanner_traverse(PyObject *self, visitproc visit, void *arg)
669{
670 PyScannerObject *s;
671 assert(PyScanner_Check(self));
672 s = (PyScannerObject *)self;
673 Py_VISIT(s->strict);
674 Py_VISIT(s->object_hook);
675 Py_VISIT(s->object_pairs_hook);
676 Py_VISIT(s->parse_float);
677 Py_VISIT(s->parse_int);
678 Py_VISIT(s->parse_constant);
679 return 0;
680}
681
682static int
683scanner_clear(PyObject *self)
684{
685 PyScannerObject *s;
686 assert(PyScanner_Check(self));
687 s = (PyScannerObject *)self;
688 Py_CLEAR(s->strict);
689 Py_CLEAR(s->object_hook);
690 Py_CLEAR(s->object_pairs_hook);
691 Py_CLEAR(s->parse_float);
692 Py_CLEAR(s->parse_int);
693 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000694 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000695 return 0;
696}
697
698static PyObject *
699_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
700 /* Read a JSON object from PyUnicode pystr.
701 idx is the index of the first character after the opening curly brace.
702 *next_idx_ptr is a return-by-reference index to the first character after
703 the closing curly brace.
704
705 Returns a new PyObject (usually a dict, but object_hook can change that)
706 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200707 void *str;
708 int kind;
709 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000710 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000711 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000712 PyObject *key = NULL;
713 int strict = PyObject_IsTrue(s->strict);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000714 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000715 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000716
Serhiy Storchakafa494fd2015-05-30 17:45:22 +0300717 if (strict < 0)
718 return NULL;
719
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200720 if (PyUnicode_READY(pystr) == -1)
721 return NULL;
722
723 str = PyUnicode_DATA(pystr);
724 kind = PyUnicode_KIND(pystr);
725 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
726
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000727 if (has_pairs_hook)
728 rval = PyList_New(0);
729 else
730 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000731 if (rval == NULL)
732 return NULL;
733
734 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200735 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000736
737 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200738 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
739 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000740 PyObject *memokey;
741
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000742 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200743 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200744 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000745 goto bail;
746 }
747 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
748 if (key == NULL)
749 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000750 memokey = PyDict_GetItem(s->memo, key);
751 if (memokey != NULL) {
752 Py_INCREF(memokey);
753 Py_DECREF(key);
754 key = memokey;
755 }
756 else {
757 if (PyDict_SetItem(s->memo, key, key) < 0)
758 goto bail;
759 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000760 idx = next_idx;
761
762 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200763 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
764 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200765 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000766 goto bail;
767 }
768 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200769 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000770
771 /* read any JSON term */
772 val = scan_once_unicode(s, pystr, idx, &next_idx);
773 if (val == NULL)
774 goto bail;
775
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000776 if (has_pairs_hook) {
777 PyObject *item = PyTuple_Pack(2, key, val);
778 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000779 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000780 Py_CLEAR(key);
781 Py_CLEAR(val);
782 if (PyList_Append(rval, item) == -1) {
783 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000784 goto bail;
785 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000786 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000787 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000788 else {
789 if (PyDict_SetItem(rval, key, val) < 0)
790 goto bail;
791 Py_CLEAR(key);
792 Py_CLEAR(val);
793 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000794 idx = next_idx;
795
796 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200797 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000798
799 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200800 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000801 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200802 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200803 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000804 goto bail;
805 }
806 idx++;
807
808 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200809 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000810 }
811 }
812
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000813 *next_idx_ptr = idx + 1;
814
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000815 if (has_pairs_hook) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000816 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000817 Py_DECREF(rval);
818 return val;
819 }
820
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000821 /* if object_hook is not None: rval = object_hook(rval) */
822 if (s->object_hook != Py_None) {
823 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000824 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000825 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000826 }
827 return rval;
828bail:
829 Py_XDECREF(key);
830 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000831 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000832 return NULL;
833}
834
835static PyObject *
836_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200837 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000838 idx is the index of the first character after the opening brace.
839 *next_idx_ptr is a return-by-reference index to the first character after
840 the closing brace.
841
842 Returns a new PyList
843 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200844 void *str;
845 int kind;
846 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000847 PyObject *val = NULL;
848 PyObject *rval = PyList_New(0);
849 Py_ssize_t next_idx;
850 if (rval == NULL)
851 return NULL;
852
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200853 if (PyUnicode_READY(pystr) == -1)
854 return NULL;
855
856 str = PyUnicode_DATA(pystr);
857 kind = PyUnicode_KIND(pystr);
858 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
859
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000860 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200861 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000862
863 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200864 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
865 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000866
867 /* read any JSON term */
868 val = scan_once_unicode(s, pystr, idx, &next_idx);
869 if (val == NULL)
870 goto bail;
871
872 if (PyList_Append(rval, val) == -1)
873 goto bail;
874
875 Py_CLEAR(val);
876 idx = next_idx;
877
878 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200879 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000880
881 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200882 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000883 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200884 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200885 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000886 goto bail;
887 }
888 idx++;
889
890 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200891 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000892 }
893 }
894
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200895 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
896 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200897 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000898 goto bail;
899 }
900 *next_idx_ptr = idx + 1;
901 return rval;
902bail:
903 Py_XDECREF(val);
904 Py_DECREF(rval);
905 return NULL;
906}
907
908static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200909_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
910 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000911 constant is the constant string that was found
912 ("NaN", "Infinity", "-Infinity").
913 idx is the index of the first character of the constant
914 *next_idx_ptr is a return-by-reference index to the first character after
915 the constant.
916
917 Returns the result of parse_constant
918 */
919 PyObject *cstr;
920 PyObject *rval;
921 /* constant is "NaN", "Infinity", or "-Infinity" */
922 cstr = PyUnicode_InternFromString(constant);
923 if (cstr == NULL)
924 return NULL;
925
926 /* rval = parse_constant(constant) */
927 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200928 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000929 Py_DECREF(cstr);
930 *next_idx_ptr = idx;
931 return rval;
932}
933
934static PyObject *
935_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
936 /* Read a JSON number from PyUnicode pystr.
937 idx is the index of the first character of the number
938 *next_idx_ptr is a return-by-reference index to the first character after
939 the number.
940
941 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200942 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000943 May return other types if parse_int or parse_float are set
944 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200945 void *str;
946 int kind;
947 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000948 Py_ssize_t idx = start;
949 int is_float = 0;
950 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200951 PyObject *numstr = NULL;
952 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000953
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200954 if (PyUnicode_READY(pystr) == -1)
955 return NULL;
956
957 str = PyUnicode_DATA(pystr);
958 kind = PyUnicode_KIND(pystr);
959 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
960
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000961 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200962 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000963 idx++;
964 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200965 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000966 return NULL;
967 }
968 }
969
970 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200971 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000972 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200973 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000974 }
975 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200976 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000977 idx++;
978 }
979 /* no integer digits, error */
980 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200981 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000982 return NULL;
983 }
984
985 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200986 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000987 is_float = 1;
988 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200989 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000990 }
991
992 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200993 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000994 Py_ssize_t e_start = idx;
995 idx++;
996
997 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200998 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000999
1000 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001001 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001002
1003 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001004 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001005 is_float = 1;
1006 }
1007 else {
1008 idx = e_start;
1009 }
1010 }
1011
Antoine Pitrouf6454512011-04-25 19:16:06 +02001012 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1013 custom_func = s->parse_float;
1014 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1015 custom_func = s->parse_int;
1016 else
1017 custom_func = NULL;
1018
1019 if (custom_func) {
1020 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001021 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001022 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001023 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001024 if (numstr == NULL)
1025 return NULL;
1026 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001027 }
1028 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001029 Py_ssize_t i, n;
1030 char *buf;
1031 /* Straight conversion to ASCII, to avoid costly conversion of
1032 decimal unicode digits (which cannot appear here) */
1033 n = idx - start;
1034 numstr = PyBytes_FromStringAndSize(NULL, n);
1035 if (numstr == NULL)
1036 return NULL;
1037 buf = PyBytes_AS_STRING(numstr);
1038 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001039 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001040 }
1041 if (is_float)
1042 rval = PyFloat_FromString(numstr);
1043 else
1044 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001045 }
1046 Py_DECREF(numstr);
1047 *next_idx_ptr = idx;
1048 return rval;
1049}
1050
1051static PyObject *
1052scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1053{
1054 /* Read one JSON term (of any kind) from PyUnicode pystr.
1055 idx is the index of the first character of the term
1056 *next_idx_ptr is a return-by-reference index to the first character after
1057 the number.
1058
1059 Returns a new PyObject representation of the term.
1060 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001061 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001062 void *str;
1063 int kind;
1064 Py_ssize_t length;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001065 int strict;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001066
1067 if (PyUnicode_READY(pystr) == -1)
1068 return NULL;
1069
1070 str = PyUnicode_DATA(pystr);
1071 kind = PyUnicode_KIND(pystr);
1072 length = PyUnicode_GET_LENGTH(pystr);
1073
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001074 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001075 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001076 return NULL;
1077 }
1078 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001079 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001080 return NULL;
1081 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001082
1083 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001084 case '"':
1085 /* string */
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001086 strict = PyObject_IsTrue(s->strict);
1087 if (strict < 0)
1088 return NULL;
1089 return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001090 case '{':
1091 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001092 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1093 "from a unicode string"))
1094 return NULL;
1095 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1096 Py_LeaveRecursiveCall();
1097 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001098 case '[':
1099 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001100 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1101 "from a unicode string"))
1102 return NULL;
1103 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1104 Py_LeaveRecursiveCall();
1105 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001106 case 'n':
1107 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001108 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001109 Py_INCREF(Py_None);
1110 *next_idx_ptr = idx + 4;
1111 return Py_None;
1112 }
1113 break;
1114 case 't':
1115 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001116 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001117 Py_INCREF(Py_True);
1118 *next_idx_ptr = idx + 4;
1119 return Py_True;
1120 }
1121 break;
1122 case 'f':
1123 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001124 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1125 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1126 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001127 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001128 Py_INCREF(Py_False);
1129 *next_idx_ptr = idx + 5;
1130 return Py_False;
1131 }
1132 break;
1133 case 'N':
1134 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001135 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001136 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001137 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1138 }
1139 break;
1140 case 'I':
1141 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001142 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1143 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1144 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001145 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001146 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1147 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001148 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001149 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1150 }
1151 break;
1152 case '-':
1153 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001154 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001155 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1156 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001157 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001158 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001159 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1160 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001161 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001162 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1163 }
1164 break;
1165 }
1166 /* Didn't find a string, object, array, or named constant. Look for a number. */
1167 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1168}
1169
1170static PyObject *
1171scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1172{
1173 /* Python callable interface to scan_once_{str,unicode} */
1174 PyObject *pystr;
1175 PyObject *rval;
1176 Py_ssize_t idx;
1177 Py_ssize_t next_idx = -1;
1178 static char *kwlist[] = {"string", "idx", NULL};
1179 PyScannerObject *s;
1180 assert(PyScanner_Check(self));
1181 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001182 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001183 return NULL;
1184
1185 if (PyUnicode_Check(pystr)) {
1186 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1187 }
1188 else {
1189 PyErr_Format(PyExc_TypeError,
1190 "first argument must be a string, not %.80s",
1191 Py_TYPE(pystr)->tp_name);
1192 return NULL;
1193 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001194 PyDict_Clear(s->memo);
1195 if (rval == NULL)
1196 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001197 return _build_rval_index_tuple(rval, next_idx);
1198}
1199
1200static PyObject *
1201scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1202{
1203 PyScannerObject *s;
1204 s = (PyScannerObject *)type->tp_alloc(type, 0);
1205 if (s != NULL) {
1206 s->strict = NULL;
1207 s->object_hook = NULL;
1208 s->object_pairs_hook = NULL;
1209 s->parse_float = NULL;
1210 s->parse_int = NULL;
1211 s->parse_constant = NULL;
1212 }
1213 return (PyObject *)s;
1214}
1215
1216static int
1217scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1218{
1219 /* Initialize Scanner object */
1220 PyObject *ctx;
1221 static char *kwlist[] = {"context", NULL};
1222 PyScannerObject *s;
1223
1224 assert(PyScanner_Check(self));
1225 s = (PyScannerObject *)self;
1226
1227 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1228 return -1;
1229
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001230 if (s->memo == NULL) {
1231 s->memo = PyDict_New();
1232 if (s->memo == NULL)
1233 goto bail;
1234 }
1235
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001236 /* All of these will fail "gracefully" so we don't need to verify them */
1237 s->strict = PyObject_GetAttrString(ctx, "strict");
1238 if (s->strict == NULL)
1239 goto bail;
1240 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1241 if (s->object_hook == NULL)
1242 goto bail;
1243 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1244 if (s->object_pairs_hook == NULL)
1245 goto bail;
1246 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1247 if (s->parse_float == NULL)
1248 goto bail;
1249 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1250 if (s->parse_int == NULL)
1251 goto bail;
1252 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1253 if (s->parse_constant == NULL)
1254 goto bail;
1255
1256 return 0;
1257
1258bail:
1259 Py_CLEAR(s->strict);
1260 Py_CLEAR(s->object_hook);
1261 Py_CLEAR(s->object_pairs_hook);
1262 Py_CLEAR(s->parse_float);
1263 Py_CLEAR(s->parse_int);
1264 Py_CLEAR(s->parse_constant);
1265 return -1;
1266}
1267
1268PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1269
1270static
1271PyTypeObject PyScannerType = {
1272 PyVarObject_HEAD_INIT(NULL, 0)
1273 "_json.Scanner", /* tp_name */
1274 sizeof(PyScannerObject), /* tp_basicsize */
1275 0, /* tp_itemsize */
1276 scanner_dealloc, /* tp_dealloc */
1277 0, /* tp_print */
1278 0, /* tp_getattr */
1279 0, /* tp_setattr */
1280 0, /* tp_compare */
1281 0, /* tp_repr */
1282 0, /* tp_as_number */
1283 0, /* tp_as_sequence */
1284 0, /* tp_as_mapping */
1285 0, /* tp_hash */
1286 scanner_call, /* tp_call */
1287 0, /* tp_str */
1288 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1289 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1290 0, /* tp_as_buffer */
1291 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1292 scanner_doc, /* tp_doc */
1293 scanner_traverse, /* tp_traverse */
1294 scanner_clear, /* tp_clear */
1295 0, /* tp_richcompare */
1296 0, /* tp_weaklistoffset */
1297 0, /* tp_iter */
1298 0, /* tp_iternext */
1299 0, /* tp_methods */
1300 scanner_members, /* tp_members */
1301 0, /* tp_getset */
1302 0, /* tp_base */
1303 0, /* tp_dict */
1304 0, /* tp_descr_get */
1305 0, /* tp_descr_set */
1306 0, /* tp_dictoffset */
1307 scanner_init, /* tp_init */
1308 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1309 scanner_new, /* tp_new */
1310 0,/* PyObject_GC_Del, */ /* tp_free */
1311};
1312
1313static PyObject *
1314encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1315{
1316 PyEncoderObject *s;
1317 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1318 if (s != NULL) {
1319 s->markers = NULL;
1320 s->defaultfn = NULL;
1321 s->encoder = NULL;
1322 s->indent = NULL;
1323 s->key_separator = NULL;
1324 s->item_separator = NULL;
1325 s->sort_keys = NULL;
1326 s->skipkeys = NULL;
1327 }
1328 return (PyObject *)s;
1329}
1330
1331static int
1332encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1333{
1334 /* initialize Encoder object */
1335 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1336
1337 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001338 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001339 PyObject *item_separator, *sort_keys, *skipkeys;
1340 int allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001341
1342 assert(PyEncoder_Check(self));
1343 s = (PyEncoderObject *)self;
1344
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001345 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUOOp:make_encoder", kwlist,
1346 &markers, &defaultfn, &encoder, &indent,
1347 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001348 &sort_keys, &skipkeys, &allow_nan))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001349 return -1;
1350
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001351 if (markers != Py_None && !PyDict_Check(markers)) {
1352 PyErr_Format(PyExc_TypeError,
1353 "make_encoder() argument 1 must be dict or None, "
1354 "not %.200s", Py_TYPE(markers)->tp_name);
1355 return -1;
1356 }
1357
Antoine Pitrou781eba72009-12-08 15:57:31 +00001358 s->markers = markers;
1359 s->defaultfn = defaultfn;
1360 s->encoder = encoder;
1361 s->indent = indent;
1362 s->key_separator = key_separator;
1363 s->item_separator = item_separator;
1364 s->sort_keys = sort_keys;
1365 s->skipkeys = skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001366 s->fast_encode = NULL;
1367 if (PyCFunction_Check(s->encoder)) {
1368 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1369 if (f == (PyCFunction)py_encode_basestring_ascii ||
1370 f == (PyCFunction)py_encode_basestring) {
1371 s->fast_encode = f;
1372 }
1373 }
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001374 s->allow_nan = allow_nan;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001375
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001376 Py_INCREF(s->markers);
1377 Py_INCREF(s->defaultfn);
1378 Py_INCREF(s->encoder);
1379 Py_INCREF(s->indent);
1380 Py_INCREF(s->key_separator);
1381 Py_INCREF(s->item_separator);
1382 Py_INCREF(s->sort_keys);
1383 Py_INCREF(s->skipkeys);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001384 return 0;
1385}
1386
1387static PyObject *
1388encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1389{
1390 /* Python callable interface to encode_listencode_obj */
1391 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1392 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001393 Py_ssize_t indent_level;
1394 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001395 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001396
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001397 assert(PyEncoder_Check(self));
1398 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001399 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1400 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001401 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001402 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001403 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001404 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001405 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001406 return NULL;
1407 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001408 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001409}
1410
1411static PyObject *
1412_encoded_const(PyObject *obj)
1413{
1414 /* Return the JSON string representation of None, True, False */
1415 if (obj == Py_None) {
1416 static PyObject *s_null = NULL;
1417 if (s_null == NULL) {
1418 s_null = PyUnicode_InternFromString("null");
1419 }
1420 Py_INCREF(s_null);
1421 return s_null;
1422 }
1423 else if (obj == Py_True) {
1424 static PyObject *s_true = NULL;
1425 if (s_true == NULL) {
1426 s_true = PyUnicode_InternFromString("true");
1427 }
1428 Py_INCREF(s_true);
1429 return s_true;
1430 }
1431 else if (obj == Py_False) {
1432 static PyObject *s_false = NULL;
1433 if (s_false == NULL) {
1434 s_false = PyUnicode_InternFromString("false");
1435 }
1436 Py_INCREF(s_false);
1437 return s_false;
1438 }
1439 else {
1440 PyErr_SetString(PyExc_ValueError, "not a const");
1441 return NULL;
1442 }
1443}
1444
1445static PyObject *
1446encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1447{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001448 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001449 double i = PyFloat_AS_DOUBLE(obj);
1450 if (!Py_IS_FINITE(i)) {
1451 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001452 PyErr_SetString(
1453 PyExc_ValueError,
1454 "Out of range float values are not JSON compliant"
1455 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001456 return NULL;
1457 }
1458 if (i > 0) {
1459 return PyUnicode_FromString("Infinity");
1460 }
1461 else if (i < 0) {
1462 return PyUnicode_FromString("-Infinity");
1463 }
1464 else {
1465 return PyUnicode_FromString("NaN");
1466 }
1467 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001468 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001469}
1470
1471static PyObject *
1472encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1473{
1474 /* Return the JSON representation of a string */
1475 if (s->fast_encode)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001476 return s->fast_encode(NULL, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001477 else
1478 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1479}
1480
1481static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001482_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001483{
1484 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001485 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001486 Py_DECREF(stolen);
1487 return rval;
1488}
1489
1490static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001491encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001492 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001493{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001494 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001495 PyObject *newobj;
1496 int rv;
1497
1498 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1499 PyObject *cstr = _encoded_const(obj);
1500 if (cstr == NULL)
1501 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001502 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001503 }
1504 else if (PyUnicode_Check(obj))
1505 {
1506 PyObject *encoded = encoder_encode_string(s, obj);
1507 if (encoded == NULL)
1508 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001509 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001510 }
1511 else if (PyLong_Check(obj)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001512 PyObject *encoded = PyLong_Type.tp_str(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001513 if (encoded == NULL)
1514 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001515 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001516 }
1517 else if (PyFloat_Check(obj)) {
1518 PyObject *encoded = encoder_encode_float(s, obj);
1519 if (encoded == NULL)
1520 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001521 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001522 }
1523 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001524 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1525 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001526 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001527 Py_LeaveRecursiveCall();
1528 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001529 }
1530 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001531 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1532 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001533 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001534 Py_LeaveRecursiveCall();
1535 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001536 }
1537 else {
1538 PyObject *ident = NULL;
1539 if (s->markers != Py_None) {
1540 int has_key;
1541 ident = PyLong_FromVoidPtr(obj);
1542 if (ident == NULL)
1543 return -1;
1544 has_key = PyDict_Contains(s->markers, ident);
1545 if (has_key) {
1546 if (has_key != -1)
1547 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1548 Py_DECREF(ident);
1549 return -1;
1550 }
1551 if (PyDict_SetItem(s->markers, ident, obj)) {
1552 Py_DECREF(ident);
1553 return -1;
1554 }
1555 }
1556 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1557 if (newobj == NULL) {
1558 Py_XDECREF(ident);
1559 return -1;
1560 }
Ezio Melotti13672652011-05-11 01:02:56 +03001561
1562 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1563 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001564 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001565 Py_LeaveRecursiveCall();
1566
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001567 Py_DECREF(newobj);
1568 if (rv) {
1569 Py_XDECREF(ident);
1570 return -1;
1571 }
1572 if (ident != NULL) {
1573 if (PyDict_DelItem(s->markers, ident)) {
1574 Py_XDECREF(ident);
1575 return -1;
1576 }
1577 Py_XDECREF(ident);
1578 }
1579 return rv;
1580 }
1581}
1582
1583static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001584encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001585 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001586{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001587 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001588 static PyObject *open_dict = NULL;
1589 static PyObject *close_dict = NULL;
1590 static PyObject *empty_dict = NULL;
1591 PyObject *kstr = NULL;
1592 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001593 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001594 PyObject *items;
1595 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001596 int skipkeys;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001597 int sortkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001598 Py_ssize_t idx;
1599
1600 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1601 open_dict = PyUnicode_InternFromString("{");
1602 close_dict = PyUnicode_InternFromString("}");
1603 empty_dict = PyUnicode_InternFromString("{}");
1604 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1605 return -1;
1606 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001607 if (Py_SIZE(dct) == 0)
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001608 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001609
1610 if (s->markers != Py_None) {
1611 int has_key;
1612 ident = PyLong_FromVoidPtr(dct);
1613 if (ident == NULL)
1614 goto bail;
1615 has_key = PyDict_Contains(s->markers, ident);
1616 if (has_key) {
1617 if (has_key != -1)
1618 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1619 goto bail;
1620 }
1621 if (PyDict_SetItem(s->markers, ident, dct)) {
1622 goto bail;
1623 }
1624 }
1625
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001626 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001627 goto bail;
1628
1629 if (s->indent != Py_None) {
1630 /* TODO: DOES NOT RUN */
1631 indent_level += 1;
1632 /*
1633 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1634 separator = _item_separator + newline_indent
1635 buf += newline_indent
1636 */
1637 }
1638
Benjamin Peterson501182a2015-05-02 22:28:04 -04001639 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001640 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001641 goto bail;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001642 sortkeys = PyObject_IsTrue(s->sort_keys);
1643 if (sortkeys < 0 || (sortkeys && PyList_Sort(items) < 0))
Benjamin Peterson501182a2015-05-02 22:28:04 -04001644 goto bail;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001645 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001646 Py_DECREF(items);
1647 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001648 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001649 skipkeys = PyObject_IsTrue(s->skipkeys);
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001650 if (skipkeys < 0)
1651 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001652 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001653 while ((item = PyIter_Next(it)) != NULL) {
1654 PyObject *encoded, *key, *value;
1655 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
1656 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1657 goto bail;
1658 }
1659 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001660 if (PyUnicode_Check(key)) {
1661 Py_INCREF(key);
1662 kstr = key;
1663 }
1664 else if (PyFloat_Check(key)) {
1665 kstr = encoder_encode_float(s, key);
1666 if (kstr == NULL)
1667 goto bail;
1668 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001669 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 /* This must come before the PyLong_Check because
1671 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001672 kstr = _encoded_const(key);
1673 if (kstr == NULL)
1674 goto bail;
1675 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001676 else if (PyLong_Check(key)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001677 kstr = PyLong_Type.tp_str(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001678 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001679 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001680 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001681 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001682 else if (skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001683 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001684 continue;
1685 }
1686 else {
1687 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001688 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001689 goto bail;
1690 }
1691
1692 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001693 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001694 goto bail;
1695 }
1696
1697 encoded = encoder_encode_string(s, kstr);
1698 Py_CLEAR(kstr);
1699 if (encoded == NULL)
1700 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001701 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001702 Py_DECREF(encoded);
1703 goto bail;
1704 }
1705 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001706 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001707 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001708
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001709 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001710 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001711 goto bail;
1712 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001713 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001714 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001715 if (PyErr_Occurred())
1716 goto bail;
1717 Py_CLEAR(it);
1718
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001719 if (ident != NULL) {
1720 if (PyDict_DelItem(s->markers, ident))
1721 goto bail;
1722 Py_CLEAR(ident);
1723 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001724 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001725 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001726 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001727
1728 yield '\n' + (' ' * (_indent * _current_indent_level))
1729 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001730 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001731 goto bail;
1732 return 0;
1733
1734bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001735 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001736 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001737 Py_XDECREF(kstr);
1738 Py_XDECREF(ident);
1739 return -1;
1740}
1741
1742
1743static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001744encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001745 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001746{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001747 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001748 static PyObject *open_array = NULL;
1749 static PyObject *close_array = NULL;
1750 static PyObject *empty_array = NULL;
1751 PyObject *ident = NULL;
1752 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001753 Py_ssize_t i;
1754
1755 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1756 open_array = PyUnicode_InternFromString("[");
1757 close_array = PyUnicode_InternFromString("]");
1758 empty_array = PyUnicode_InternFromString("[]");
1759 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1760 return -1;
1761 }
1762 ident = NULL;
1763 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1764 if (s_fast == NULL)
1765 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001766 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001767 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001768 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001769 }
1770
1771 if (s->markers != Py_None) {
1772 int has_key;
1773 ident = PyLong_FromVoidPtr(seq);
1774 if (ident == NULL)
1775 goto bail;
1776 has_key = PyDict_Contains(s->markers, ident);
1777 if (has_key) {
1778 if (has_key != -1)
1779 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1780 goto bail;
1781 }
1782 if (PyDict_SetItem(s->markers, ident, seq)) {
1783 goto bail;
1784 }
1785 }
1786
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001787 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001788 goto bail;
1789 if (s->indent != Py_None) {
1790 /* TODO: DOES NOT RUN */
1791 indent_level += 1;
1792 /*
1793 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1794 separator = _item_separator + newline_indent
1795 buf += newline_indent
1796 */
1797 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001798 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1799 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001800 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001801 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001802 goto bail;
1803 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001804 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001805 goto bail;
1806 }
1807 if (ident != NULL) {
1808 if (PyDict_DelItem(s->markers, ident))
1809 goto bail;
1810 Py_CLEAR(ident);
1811 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001812
1813 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001814 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001815 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001816
1817 yield '\n' + (' ' * (_indent * _current_indent_level))
1818 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001819 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001820 goto bail;
1821 Py_DECREF(s_fast);
1822 return 0;
1823
1824bail:
1825 Py_XDECREF(ident);
1826 Py_DECREF(s_fast);
1827 return -1;
1828}
1829
1830static void
1831encoder_dealloc(PyObject *self)
1832{
1833 /* Deallocate Encoder */
1834 encoder_clear(self);
1835 Py_TYPE(self)->tp_free(self);
1836}
1837
1838static int
1839encoder_traverse(PyObject *self, visitproc visit, void *arg)
1840{
1841 PyEncoderObject *s;
1842 assert(PyEncoder_Check(self));
1843 s = (PyEncoderObject *)self;
1844 Py_VISIT(s->markers);
1845 Py_VISIT(s->defaultfn);
1846 Py_VISIT(s->encoder);
1847 Py_VISIT(s->indent);
1848 Py_VISIT(s->key_separator);
1849 Py_VISIT(s->item_separator);
1850 Py_VISIT(s->sort_keys);
1851 Py_VISIT(s->skipkeys);
1852 return 0;
1853}
1854
1855static int
1856encoder_clear(PyObject *self)
1857{
1858 /* Deallocate Encoder */
1859 PyEncoderObject *s;
1860 assert(PyEncoder_Check(self));
1861 s = (PyEncoderObject *)self;
1862 Py_CLEAR(s->markers);
1863 Py_CLEAR(s->defaultfn);
1864 Py_CLEAR(s->encoder);
1865 Py_CLEAR(s->indent);
1866 Py_CLEAR(s->key_separator);
1867 Py_CLEAR(s->item_separator);
1868 Py_CLEAR(s->sort_keys);
1869 Py_CLEAR(s->skipkeys);
1870 return 0;
1871}
1872
1873PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1874
1875static
1876PyTypeObject PyEncoderType = {
1877 PyVarObject_HEAD_INIT(NULL, 0)
1878 "_json.Encoder", /* tp_name */
1879 sizeof(PyEncoderObject), /* tp_basicsize */
1880 0, /* tp_itemsize */
1881 encoder_dealloc, /* tp_dealloc */
1882 0, /* tp_print */
1883 0, /* tp_getattr */
1884 0, /* tp_setattr */
1885 0, /* tp_compare */
1886 0, /* tp_repr */
1887 0, /* tp_as_number */
1888 0, /* tp_as_sequence */
1889 0, /* tp_as_mapping */
1890 0, /* tp_hash */
1891 encoder_call, /* tp_call */
1892 0, /* tp_str */
1893 0, /* tp_getattro */
1894 0, /* tp_setattro */
1895 0, /* tp_as_buffer */
1896 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1897 encoder_doc, /* tp_doc */
1898 encoder_traverse, /* tp_traverse */
1899 encoder_clear, /* tp_clear */
1900 0, /* tp_richcompare */
1901 0, /* tp_weaklistoffset */
1902 0, /* tp_iter */
1903 0, /* tp_iternext */
1904 0, /* tp_methods */
1905 encoder_members, /* tp_members */
1906 0, /* tp_getset */
1907 0, /* tp_base */
1908 0, /* tp_dict */
1909 0, /* tp_descr_get */
1910 0, /* tp_descr_set */
1911 0, /* tp_dictoffset */
1912 encoder_init, /* tp_init */
1913 0, /* tp_alloc */
1914 encoder_new, /* tp_new */
1915 0, /* tp_free */
1916};
1917
1918static PyMethodDef speedups_methods[] = {
1919 {"encode_basestring_ascii",
1920 (PyCFunction)py_encode_basestring_ascii,
1921 METH_O,
1922 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001923 {"encode_basestring",
1924 (PyCFunction)py_encode_basestring,
1925 METH_O,
1926 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001927 {"scanstring",
1928 (PyCFunction)py_scanstring,
1929 METH_VARARGS,
1930 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001931 {NULL, NULL, 0, NULL}
1932};
1933
1934PyDoc_STRVAR(module_doc,
1935"json speedups\n");
1936
Martin v. Löwis1a214512008-06-11 05:26:20 +00001937static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 PyModuleDef_HEAD_INIT,
1939 "_json",
1940 module_doc,
1941 -1,
1942 speedups_methods,
1943 NULL,
1944 NULL,
1945 NULL,
1946 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001947};
1948
Victor Stinnerf024d262015-03-17 17:48:27 +01001949PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001950PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001951{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001952 PyObject *m = PyModule_Create(&jsonmodule);
1953 if (!m)
1954 return NULL;
1955 PyScannerType.tp_new = PyType_GenericNew;
1956 if (PyType_Ready(&PyScannerType) < 0)
1957 goto fail;
1958 PyEncoderType.tp_new = PyType_GenericNew;
1959 if (PyType_Ready(&PyEncoderType) < 0)
1960 goto fail;
1961 Py_INCREF((PyObject*)&PyScannerType);
1962 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1963 Py_DECREF((PyObject*)&PyScannerType);
1964 goto fail;
1965 }
1966 Py_INCREF((PyObject*)&PyEncoderType);
1967 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1968 Py_DECREF((PyObject*)&PyEncoderType);
1969 goto fail;
1970 }
1971 return m;
1972 fail:
1973 Py_DECREF(m);
1974 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001975}