blob: faa213491b63873dabb843393fed13689f34a2fd [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +01003#include "accu.h"
4
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00005#ifdef __GNUC__
6#define UNUSED __attribute__((__unused__))
7#else
8#define UNUSED
9#endif
10
11#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
12#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
13#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
14#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
15
16static PyTypeObject PyScannerType;
17static PyTypeObject PyEncoderType;
18
19typedef struct _PyScannerObject {
20 PyObject_HEAD
21 PyObject *strict;
22 PyObject *object_hook;
23 PyObject *object_pairs_hook;
24 PyObject *parse_float;
25 PyObject *parse_int;
26 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000027 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028} PyScannerObject;
29
30static PyMemberDef scanner_members[] = {
31 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
32 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
33 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
34 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
35 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
36 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
37 {NULL}
38};
39
40typedef struct _PyEncoderObject {
41 PyObject_HEAD
42 PyObject *markers;
43 PyObject *defaultfn;
44 PyObject *encoder;
45 PyObject *indent;
46 PyObject *key_separator;
47 PyObject *item_separator;
48 PyObject *sort_keys;
49 PyObject *skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010050 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000051 int allow_nan;
52} PyEncoderObject;
53
54static PyMemberDef encoder_members[] = {
55 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
56 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
57 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
58 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
59 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
60 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
61 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
62 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
63 {NULL}
64};
65
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020066static PyObject *
67join_list_unicode(PyObject *lst)
68{
69 /* return u''.join(lst) */
70 static PyObject *sep = NULL;
71 if (sep == NULL) {
72 sep = PyUnicode_FromStringAndSize("", 0);
73 if (sep == NULL)
74 return NULL;
75 }
76 return PyUnicode_Join(sep, lst);
77}
78
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020079/* Forward decls */
80
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000081static PyObject *
82ascii_escape_unicode(PyObject *pystr);
83static PyObject *
84py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
85void init_json(void);
86static PyObject *
87scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
88static PyObject *
89_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
90static PyObject *
91scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
92static int
93scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
94static void
95scanner_dealloc(PyObject *self);
96static int
97scanner_clear(PyObject *self);
98static PyObject *
99encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
100static int
101encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
102static void
103encoder_dealloc(PyObject *self);
104static int
105encoder_clear(PyObject *self);
106static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200107encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200109encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000110static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200111encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000112static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000113_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000114static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200115raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000116static PyObject *
117encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000118static PyObject *
119encoder_encode_float(PyEncoderObject *s, PyObject *obj);
120
Christian Heimes90540002008-05-08 14:29:10 +0000121#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000122#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000123
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000124static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200125ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000126{
127 /* Escape unicode code point c to ASCII escape sequences
128 in char *output. output must have at least 12 bytes unused to
129 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000130 output[chars++] = '\\';
131 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000132 case '\\': output[chars++] = c; break;
133 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000134 case '\b': output[chars++] = 'b'; break;
135 case '\f': output[chars++] = 'f'; break;
136 case '\n': output[chars++] = 'n'; break;
137 case '\r': output[chars++] = 'r'; break;
138 case '\t': output[chars++] = 't'; break;
139 default:
Christian Heimes90540002008-05-08 14:29:10 +0000140 if (c >= 0x10000) {
141 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100142 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000143 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100144 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
145 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
146 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
147 output[chars++] = Py_hexdigits[(v ) & 0xf];
148 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000149 output[chars++] = '\\';
150 }
Christian Heimes90540002008-05-08 14:29:10 +0000151 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200152 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
153 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
154 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
155 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000156 }
157 return chars;
158}
159
160static PyObject *
161ascii_escape_unicode(PyObject *pystr)
162{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000163 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000164 Py_ssize_t i;
165 Py_ssize_t input_chars;
166 Py_ssize_t output_size;
167 Py_ssize_t chars;
168 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200169 void *input;
170 unsigned char *output;
171 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000172
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200173 if (PyUnicode_READY(pystr) == -1)
174 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000175
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200176 input_chars = PyUnicode_GET_LENGTH(pystr);
177 input = PyUnicode_DATA(pystr);
178 kind = PyUnicode_KIND(pystr);
179
180 /* Compute the output size */
181 for (i = 0, output_size = 2; i < input_chars; i++) {
182 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500183 Py_ssize_t d;
184 if (S_CHAR(c)) {
185 d = 1;
186 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200187 else {
188 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200189 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200190 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500191 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200192 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500193 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200194 }
195 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500196 if (output_size > PY_SSIZE_T_MAX - d) {
197 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
198 return NULL;
199 }
200 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200201 }
202
203 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000204 if (rval == NULL) {
205 return NULL;
206 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200207 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000208 chars = 0;
209 output[chars++] = '"';
210 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200211 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000212 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000213 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000214 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000215 else {
216 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000217 }
Christian Heimes90540002008-05-08 14:29:10 +0000218 }
219 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100220#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200221 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100222#endif
Christian Heimes90540002008-05-08 14:29:10 +0000223 return rval;
224}
225
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100226static PyObject *
227escape_unicode(PyObject *pystr)
228{
229 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
230 Py_ssize_t i;
231 Py_ssize_t input_chars;
232 Py_ssize_t output_size;
233 Py_ssize_t chars;
234 PyObject *rval;
235 void *input;
236 int kind;
237 Py_UCS4 maxchar;
238
239 if (PyUnicode_READY(pystr) == -1)
240 return NULL;
241
242 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
243 input_chars = PyUnicode_GET_LENGTH(pystr);
244 input = PyUnicode_DATA(pystr);
245 kind = PyUnicode_KIND(pystr);
246
247 /* Compute the output size */
248 for (i = 0, output_size = 2; i < input_chars; i++) {
249 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500250 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100251 switch (c) {
252 case '\\': case '"': case '\b': case '\f':
253 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500254 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100255 break;
256 default:
257 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500258 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100259 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500260 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100261 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500262 if (output_size > PY_SSIZE_T_MAX - d) {
263 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
264 return NULL;
265 }
266 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100267 }
268
269 rval = PyUnicode_New(output_size, maxchar);
270 if (rval == NULL)
271 return NULL;
272
273 kind = PyUnicode_KIND(rval);
274
275#define ENCODE_OUTPUT do { \
276 chars = 0; \
277 output[chars++] = '"'; \
278 for (i = 0; i < input_chars; i++) { \
279 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
280 switch (c) { \
281 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
282 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
283 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
284 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
285 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
286 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
287 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
288 default: \
289 if (c <= 0x1f) { \
290 output[chars++] = '\\'; \
291 output[chars++] = 'u'; \
292 output[chars++] = '0'; \
293 output[chars++] = '0'; \
294 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
295 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
296 } else { \
297 output[chars++] = c; \
298 } \
299 } \
300 } \
301 output[chars++] = '"'; \
302 } while (0)
303
304 if (kind == PyUnicode_1BYTE_KIND) {
305 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
306 ENCODE_OUTPUT;
307 } else if (kind == PyUnicode_2BYTE_KIND) {
308 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
309 ENCODE_OUTPUT;
310 } else {
311 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
312 assert(kind == PyUnicode_4BYTE_KIND);
313 ENCODE_OUTPUT;
314 }
315#undef ENCODE_OUTPUT
316
317#ifdef Py_DEBUG
318 assert(_PyUnicode_CheckConsistency(rval, 1));
319#endif
320 return rval;
321}
322
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000323static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200324raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000325{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200326 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
327 static PyObject *JSONDecodeError = NULL;
328 PyObject *exc;
329 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000330 PyObject *decoder = PyImport_ImportModule("json.decoder");
331 if (decoder == NULL)
332 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200333 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000334 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200335 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000336 return;
Christian Heimes90540002008-05-08 14:29:10 +0000337 }
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200338 exc = PyObject_CallFunction(JSONDecodeError, "(zOn)", msg, s, end);
339 if (exc) {
340 PyErr_SetObject(JSONDecodeError, exc);
341 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000342 }
Christian Heimes90540002008-05-08 14:29:10 +0000343}
344
Ezio Melotti37623ab2013-01-03 08:44:15 +0200345static void
346raise_stop_iteration(Py_ssize_t idx)
347{
348 PyObject *value = PyLong_FromSsize_t(idx);
349 if (value != NULL) {
350 PyErr_SetObject(PyExc_StopIteration, value);
351 Py_DECREF(value);
352 }
353}
354
Christian Heimes90540002008-05-08 14:29:10 +0000355static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000356_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
357 /* return (rval, idx) tuple, stealing reference to rval */
358 PyObject *tpl;
359 PyObject *pyidx;
360 /*
361 steal a reference to rval, returns (rval, idx)
362 */
363 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000364 return NULL;
365 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000366 pyidx = PyLong_FromSsize_t(idx);
367 if (pyidx == NULL) {
368 Py_DECREF(rval);
369 return NULL;
370 }
371 tpl = PyTuple_New(2);
372 if (tpl == NULL) {
373 Py_DECREF(pyidx);
374 Py_DECREF(rval);
375 return NULL;
376 }
377 PyTuple_SET_ITEM(tpl, 0, rval);
378 PyTuple_SET_ITEM(tpl, 1, pyidx);
379 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000380}
381
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000382#define APPEND_OLD_CHUNK \
383 if (chunk != NULL) { \
384 if (chunks == NULL) { \
385 chunks = PyList_New(0); \
386 if (chunks == NULL) { \
387 goto bail; \
388 } \
389 } \
390 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200391 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000392 goto bail; \
393 } \
394 Py_CLEAR(chunk); \
395 }
396
Christian Heimes90540002008-05-08 14:29:10 +0000397static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000398scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000399{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000400 /* Read the JSON string from PyUnicode pystr.
401 end is the index of the first character after the quote.
402 if strict is zero then literal control characters are allowed
403 *next_end_ptr is a return-by-reference index of the character
404 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000405
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000406 Return value is a new PyUnicode
407 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000408 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200409 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000410 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000411 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200412 const void *buf;
413 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000414 PyObject *chunks = NULL;
415 PyObject *chunk = NULL;
416
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200417 if (PyUnicode_READY(pystr) == -1)
418 return 0;
419
420 len = PyUnicode_GET_LENGTH(pystr);
421 buf = PyUnicode_DATA(pystr);
422 kind = PyUnicode_KIND(pystr);
423
Ezio Melotti37623ab2013-01-03 08:44:15 +0200424 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000425 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
426 goto bail;
427 }
Christian Heimes90540002008-05-08 14:29:10 +0000428 while (1) {
429 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200430 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000431 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200432 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000433 if (c == '"' || c == '\\') {
434 break;
435 }
436 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000437 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000438 goto bail;
439 }
440 }
441 if (!(c == '"' || c == '\\')) {
442 raise_errmsg("Unterminated string starting at", pystr, begin);
443 goto bail;
444 }
445 /* Pick up this chunk if it's not zero length */
446 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000447 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200448 chunk = PyUnicode_FromKindAndData(
449 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200450 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000452 if (chunk == NULL) {
453 goto bail;
454 }
Christian Heimes90540002008-05-08 14:29:10 +0000455 }
456 next++;
457 if (c == '"') {
458 end = next;
459 break;
460 }
461 if (next == len) {
462 raise_errmsg("Unterminated string starting at", pystr, begin);
463 goto bail;
464 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200465 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000466 if (c != 'u') {
467 /* Non-unicode backslash escapes */
468 end = next + 1;
469 switch (c) {
470 case '"': break;
471 case '\\': break;
472 case '/': break;
473 case 'b': c = '\b'; break;
474 case 'f': c = '\f'; break;
475 case 'n': c = '\n'; break;
476 case 'r': c = '\r'; break;
477 case 't': c = '\t'; break;
478 default: c = 0;
479 }
480 if (c == 0) {
481 raise_errmsg("Invalid \\escape", pystr, end - 2);
482 goto bail;
483 }
484 }
485 else {
486 c = 0;
487 next++;
488 end = next + 4;
489 if (end >= len) {
490 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
491 goto bail;
492 }
493 /* Decode 4 hex digits */
494 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200495 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000496 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000497 switch (digit) {
498 case '0': case '1': case '2': case '3': case '4':
499 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000500 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000501 case 'a': case 'b': case 'c': case 'd': case 'e':
502 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000503 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000504 case 'A': case 'B': case 'C': case 'D': case 'E':
505 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000506 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000507 default:
508 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
509 goto bail;
510 }
511 }
Christian Heimes90540002008-05-08 14:29:10 +0000512 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200513 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
514 PyUnicode_READ(kind, buf, next++) == '\\' &&
515 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200516 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000517 end += 6;
518 /* Decode 4 hex digits */
519 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200520 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000521 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000522 switch (digit) {
523 case '0': case '1': case '2': case '3': case '4':
524 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000525 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000526 case 'a': case 'b': case 'c': case 'd': case 'e':
527 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000528 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000529 case 'A': case 'B': case 'C': case 'D': case 'E':
530 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000531 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000532 default:
533 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
534 goto bail;
535 }
536 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200537 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
538 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
539 else
540 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000541 }
Christian Heimes90540002008-05-08 14:29:10 +0000542 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000543 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200544 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000545 if (chunk == NULL) {
546 goto bail;
547 }
Christian Heimes90540002008-05-08 14:29:10 +0000548 }
549
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000550 if (chunks == NULL) {
551 if (chunk != NULL)
552 rval = chunk;
553 else
554 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000555 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000556 else {
557 APPEND_OLD_CHUNK
558 rval = join_list_unicode(chunks);
559 if (rval == NULL) {
560 goto bail;
561 }
562 Py_CLEAR(chunks);
563 }
564
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000565 *next_end_ptr = end;
566 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000567bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000568 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000569 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000570 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000571 return NULL;
572}
573
574PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000575 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000576 "\n"
577 "Scan the string s for a JSON string. End is the index of the\n"
578 "character in s after the quote that started the JSON string.\n"
579 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
580 "on attempt to decode an invalid string. If strict is False then literal\n"
581 "control characters are allowed in the string.\n"
582 "\n"
583 "Returns a tuple of the decoded string and the index of the character in s\n"
584 "after the end quote."
585);
Christian Heimes90540002008-05-08 14:29:10 +0000586
587static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000588py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000589{
590 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000591 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000592 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000593 Py_ssize_t next_end = -1;
594 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100595 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000596 return NULL;
597 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000598 if (PyUnicode_Check(pystr)) {
599 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000600 }
601 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000602 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000603 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000604 Py_TYPE(pystr)->tp_name);
605 return NULL;
606 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000607 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000608}
609
610PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000611 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000612 "\n"
613 "Return an ASCII-only JSON representation of a Python string"
614);
Christian Heimes90540002008-05-08 14:29:10 +0000615
616static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000617py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000618{
619 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000620 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000621 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000622 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000623 rval = ascii_escape_unicode(pystr);
624 }
625 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000626 PyErr_Format(PyExc_TypeError,
627 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000628 Py_TYPE(pystr)->tp_name);
629 return NULL;
630 }
Christian Heimes90540002008-05-08 14:29:10 +0000631 return rval;
632}
633
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100634
635PyDoc_STRVAR(pydoc_encode_basestring,
636 "encode_basestring(string) -> string\n"
637 "\n"
638 "Return a JSON representation of a Python string"
639);
640
641static PyObject *
642py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
643{
644 PyObject *rval;
645 /* Return a JSON representation of a Python string */
646 /* METH_O */
647 if (PyUnicode_Check(pystr)) {
648 rval = escape_unicode(pystr);
649 }
650 else {
651 PyErr_Format(PyExc_TypeError,
652 "first argument must be a string, not %.80s",
653 Py_TYPE(pystr)->tp_name);
654 return NULL;
655 }
656 return rval;
657}
658
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000659static void
660scanner_dealloc(PyObject *self)
661{
662 /* Deallocate scanner object */
663 scanner_clear(self);
664 Py_TYPE(self)->tp_free(self);
665}
666
667static int
668scanner_traverse(PyObject *self, visitproc visit, void *arg)
669{
670 PyScannerObject *s;
671 assert(PyScanner_Check(self));
672 s = (PyScannerObject *)self;
673 Py_VISIT(s->strict);
674 Py_VISIT(s->object_hook);
675 Py_VISIT(s->object_pairs_hook);
676 Py_VISIT(s->parse_float);
677 Py_VISIT(s->parse_int);
678 Py_VISIT(s->parse_constant);
679 return 0;
680}
681
682static int
683scanner_clear(PyObject *self)
684{
685 PyScannerObject *s;
686 assert(PyScanner_Check(self));
687 s = (PyScannerObject *)self;
688 Py_CLEAR(s->strict);
689 Py_CLEAR(s->object_hook);
690 Py_CLEAR(s->object_pairs_hook);
691 Py_CLEAR(s->parse_float);
692 Py_CLEAR(s->parse_int);
693 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000694 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000695 return 0;
696}
697
698static PyObject *
699_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
700 /* Read a JSON object from PyUnicode pystr.
701 idx is the index of the first character after the opening curly brace.
702 *next_idx_ptr is a return-by-reference index to the first character after
703 the closing curly brace.
704
705 Returns a new PyObject (usually a dict, but object_hook can change that)
706 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200707 void *str;
708 int kind;
709 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000710 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000711 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000712 PyObject *key = NULL;
713 int strict = PyObject_IsTrue(s->strict);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000714 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000715 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000716
Serhiy Storchakafa494fd2015-05-30 17:45:22 +0300717 if (strict < 0)
718 return NULL;
719
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200720 if (PyUnicode_READY(pystr) == -1)
721 return NULL;
722
723 str = PyUnicode_DATA(pystr);
724 kind = PyUnicode_KIND(pystr);
725 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
726
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000727 if (has_pairs_hook)
728 rval = PyList_New(0);
729 else
730 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000731 if (rval == NULL)
732 return NULL;
733
734 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200735 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000736
737 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200738 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
739 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000740 PyObject *memokey;
741
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000742 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200743 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200744 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000745 goto bail;
746 }
747 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
748 if (key == NULL)
749 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000750 memokey = PyDict_GetItem(s->memo, key);
751 if (memokey != NULL) {
752 Py_INCREF(memokey);
753 Py_DECREF(key);
754 key = memokey;
755 }
756 else {
757 if (PyDict_SetItem(s->memo, key, key) < 0)
758 goto bail;
759 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000760 idx = next_idx;
761
762 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200763 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
764 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200765 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000766 goto bail;
767 }
768 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200769 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000770
771 /* read any JSON term */
772 val = scan_once_unicode(s, pystr, idx, &next_idx);
773 if (val == NULL)
774 goto bail;
775
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000776 if (has_pairs_hook) {
777 PyObject *item = PyTuple_Pack(2, key, val);
778 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000779 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000780 Py_CLEAR(key);
781 Py_CLEAR(val);
782 if (PyList_Append(rval, item) == -1) {
783 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000784 goto bail;
785 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000786 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000787 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000788 else {
789 if (PyDict_SetItem(rval, key, val) < 0)
790 goto bail;
791 Py_CLEAR(key);
792 Py_CLEAR(val);
793 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000794 idx = next_idx;
795
796 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200797 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000798
799 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200800 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000801 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200802 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200803 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000804 goto bail;
805 }
806 idx++;
807
808 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200809 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000810 }
811 }
812
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000813 *next_idx_ptr = idx + 1;
814
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000815 if (has_pairs_hook) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000816 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000817 Py_DECREF(rval);
818 return val;
819 }
820
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000821 /* if object_hook is not None: rval = object_hook(rval) */
822 if (s->object_hook != Py_None) {
823 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000824 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000825 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000826 }
827 return rval;
828bail:
829 Py_XDECREF(key);
830 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000831 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000832 return NULL;
833}
834
835static PyObject *
836_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200837 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000838 idx is the index of the first character after the opening brace.
839 *next_idx_ptr is a return-by-reference index to the first character after
840 the closing brace.
841
842 Returns a new PyList
843 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200844 void *str;
845 int kind;
846 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000847 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200848 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000849 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000850
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200851 if (PyUnicode_READY(pystr) == -1)
852 return NULL;
853
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200854 rval = PyList_New(0);
855 if (rval == NULL)
856 return NULL;
857
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200858 str = PyUnicode_DATA(pystr);
859 kind = PyUnicode_KIND(pystr);
860 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
861
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000862 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200863 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000864
865 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200866 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
867 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000868
869 /* read any JSON term */
870 val = scan_once_unicode(s, pystr, idx, &next_idx);
871 if (val == NULL)
872 goto bail;
873
874 if (PyList_Append(rval, val) == -1)
875 goto bail;
876
877 Py_CLEAR(val);
878 idx = next_idx;
879
880 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200881 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000882
883 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200884 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000885 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200886 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200887 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000888 goto bail;
889 }
890 idx++;
891
892 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200893 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000894 }
895 }
896
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200897 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
898 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200899 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000900 goto bail;
901 }
902 *next_idx_ptr = idx + 1;
903 return rval;
904bail:
905 Py_XDECREF(val);
906 Py_DECREF(rval);
907 return NULL;
908}
909
910static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200911_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
912 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000913 constant is the constant string that was found
914 ("NaN", "Infinity", "-Infinity").
915 idx is the index of the first character of the constant
916 *next_idx_ptr is a return-by-reference index to the first character after
917 the constant.
918
919 Returns the result of parse_constant
920 */
921 PyObject *cstr;
922 PyObject *rval;
923 /* constant is "NaN", "Infinity", or "-Infinity" */
924 cstr = PyUnicode_InternFromString(constant);
925 if (cstr == NULL)
926 return NULL;
927
928 /* rval = parse_constant(constant) */
929 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200930 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000931 Py_DECREF(cstr);
932 *next_idx_ptr = idx;
933 return rval;
934}
935
936static PyObject *
937_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
938 /* Read a JSON number from PyUnicode pystr.
939 idx is the index of the first character of the number
940 *next_idx_ptr is a return-by-reference index to the first character after
941 the number.
942
943 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200944 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000945 May return other types if parse_int or parse_float are set
946 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200947 void *str;
948 int kind;
949 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000950 Py_ssize_t idx = start;
951 int is_float = 0;
952 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200953 PyObject *numstr = NULL;
954 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000955
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200956 if (PyUnicode_READY(pystr) == -1)
957 return NULL;
958
959 str = PyUnicode_DATA(pystr);
960 kind = PyUnicode_KIND(pystr);
961 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
962
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000963 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200964 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000965 idx++;
966 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200967 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000968 return NULL;
969 }
970 }
971
972 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200973 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000974 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200975 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000976 }
977 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200978 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000979 idx++;
980 }
981 /* no integer digits, error */
982 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200983 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000984 return NULL;
985 }
986
987 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200988 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000989 is_float = 1;
990 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200991 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000992 }
993
994 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200995 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000996 Py_ssize_t e_start = idx;
997 idx++;
998
999 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001000 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001001
1002 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001003 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001004
1005 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001006 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001007 is_float = 1;
1008 }
1009 else {
1010 idx = e_start;
1011 }
1012 }
1013
Antoine Pitrouf6454512011-04-25 19:16:06 +02001014 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1015 custom_func = s->parse_float;
1016 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1017 custom_func = s->parse_int;
1018 else
1019 custom_func = NULL;
1020
1021 if (custom_func) {
1022 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001023 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001024 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001025 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001026 if (numstr == NULL)
1027 return NULL;
1028 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001029 }
1030 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001031 Py_ssize_t i, n;
1032 char *buf;
1033 /* Straight conversion to ASCII, to avoid costly conversion of
1034 decimal unicode digits (which cannot appear here) */
1035 n = idx - start;
1036 numstr = PyBytes_FromStringAndSize(NULL, n);
1037 if (numstr == NULL)
1038 return NULL;
1039 buf = PyBytes_AS_STRING(numstr);
1040 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001041 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001042 }
1043 if (is_float)
1044 rval = PyFloat_FromString(numstr);
1045 else
1046 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001047 }
1048 Py_DECREF(numstr);
1049 *next_idx_ptr = idx;
1050 return rval;
1051}
1052
1053static PyObject *
1054scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1055{
1056 /* Read one JSON term (of any kind) from PyUnicode pystr.
1057 idx is the index of the first character of the term
1058 *next_idx_ptr is a return-by-reference index to the first character after
1059 the number.
1060
1061 Returns a new PyObject representation of the term.
1062 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001063 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001064 void *str;
1065 int kind;
1066 Py_ssize_t length;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001067 int strict;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001068
1069 if (PyUnicode_READY(pystr) == -1)
1070 return NULL;
1071
1072 str = PyUnicode_DATA(pystr);
1073 kind = PyUnicode_KIND(pystr);
1074 length = PyUnicode_GET_LENGTH(pystr);
1075
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001076 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001077 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001078 return NULL;
1079 }
1080 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001081 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001082 return NULL;
1083 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001084
1085 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001086 case '"':
1087 /* string */
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001088 strict = PyObject_IsTrue(s->strict);
1089 if (strict < 0)
1090 return NULL;
1091 return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001092 case '{':
1093 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001094 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1095 "from a unicode string"))
1096 return NULL;
1097 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1098 Py_LeaveRecursiveCall();
1099 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001100 case '[':
1101 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001102 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1103 "from a unicode string"))
1104 return NULL;
1105 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1106 Py_LeaveRecursiveCall();
1107 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001108 case 'n':
1109 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001110 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001111 Py_INCREF(Py_None);
1112 *next_idx_ptr = idx + 4;
1113 return Py_None;
1114 }
1115 break;
1116 case 't':
1117 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001118 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001119 Py_INCREF(Py_True);
1120 *next_idx_ptr = idx + 4;
1121 return Py_True;
1122 }
1123 break;
1124 case 'f':
1125 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001126 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1127 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1128 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001129 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001130 Py_INCREF(Py_False);
1131 *next_idx_ptr = idx + 5;
1132 return Py_False;
1133 }
1134 break;
1135 case 'N':
1136 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001137 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001138 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001139 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1140 }
1141 break;
1142 case 'I':
1143 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001144 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1145 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1146 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001147 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001148 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1149 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001150 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001151 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1152 }
1153 break;
1154 case '-':
1155 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001156 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001157 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1158 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001159 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001160 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001161 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1162 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001163 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001164 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1165 }
1166 break;
1167 }
1168 /* Didn't find a string, object, array, or named constant. Look for a number. */
1169 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1170}
1171
1172static PyObject *
1173scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1174{
1175 /* Python callable interface to scan_once_{str,unicode} */
1176 PyObject *pystr;
1177 PyObject *rval;
1178 Py_ssize_t idx;
1179 Py_ssize_t next_idx = -1;
1180 static char *kwlist[] = {"string", "idx", NULL};
1181 PyScannerObject *s;
1182 assert(PyScanner_Check(self));
1183 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001184 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001185 return NULL;
1186
1187 if (PyUnicode_Check(pystr)) {
1188 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1189 }
1190 else {
1191 PyErr_Format(PyExc_TypeError,
1192 "first argument must be a string, not %.80s",
1193 Py_TYPE(pystr)->tp_name);
1194 return NULL;
1195 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001196 PyDict_Clear(s->memo);
1197 if (rval == NULL)
1198 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001199 return _build_rval_index_tuple(rval, next_idx);
1200}
1201
1202static PyObject *
1203scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1204{
1205 PyScannerObject *s;
1206 s = (PyScannerObject *)type->tp_alloc(type, 0);
1207 if (s != NULL) {
1208 s->strict = NULL;
1209 s->object_hook = NULL;
1210 s->object_pairs_hook = NULL;
1211 s->parse_float = NULL;
1212 s->parse_int = NULL;
1213 s->parse_constant = NULL;
1214 }
1215 return (PyObject *)s;
1216}
1217
1218static int
1219scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1220{
1221 /* Initialize Scanner object */
1222 PyObject *ctx;
1223 static char *kwlist[] = {"context", NULL};
1224 PyScannerObject *s;
1225
1226 assert(PyScanner_Check(self));
1227 s = (PyScannerObject *)self;
1228
1229 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1230 return -1;
1231
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001232 if (s->memo == NULL) {
1233 s->memo = PyDict_New();
1234 if (s->memo == NULL)
1235 goto bail;
1236 }
1237
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001238 /* All of these will fail "gracefully" so we don't need to verify them */
1239 s->strict = PyObject_GetAttrString(ctx, "strict");
1240 if (s->strict == NULL)
1241 goto bail;
1242 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1243 if (s->object_hook == NULL)
1244 goto bail;
1245 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1246 if (s->object_pairs_hook == NULL)
1247 goto bail;
1248 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1249 if (s->parse_float == NULL)
1250 goto bail;
1251 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1252 if (s->parse_int == NULL)
1253 goto bail;
1254 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1255 if (s->parse_constant == NULL)
1256 goto bail;
1257
1258 return 0;
1259
1260bail:
1261 Py_CLEAR(s->strict);
1262 Py_CLEAR(s->object_hook);
1263 Py_CLEAR(s->object_pairs_hook);
1264 Py_CLEAR(s->parse_float);
1265 Py_CLEAR(s->parse_int);
1266 Py_CLEAR(s->parse_constant);
1267 return -1;
1268}
1269
1270PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1271
1272static
1273PyTypeObject PyScannerType = {
1274 PyVarObject_HEAD_INIT(NULL, 0)
1275 "_json.Scanner", /* tp_name */
1276 sizeof(PyScannerObject), /* tp_basicsize */
1277 0, /* tp_itemsize */
1278 scanner_dealloc, /* tp_dealloc */
1279 0, /* tp_print */
1280 0, /* tp_getattr */
1281 0, /* tp_setattr */
1282 0, /* tp_compare */
1283 0, /* tp_repr */
1284 0, /* tp_as_number */
1285 0, /* tp_as_sequence */
1286 0, /* tp_as_mapping */
1287 0, /* tp_hash */
1288 scanner_call, /* tp_call */
1289 0, /* tp_str */
1290 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1291 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1292 0, /* tp_as_buffer */
1293 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1294 scanner_doc, /* tp_doc */
1295 scanner_traverse, /* tp_traverse */
1296 scanner_clear, /* tp_clear */
1297 0, /* tp_richcompare */
1298 0, /* tp_weaklistoffset */
1299 0, /* tp_iter */
1300 0, /* tp_iternext */
1301 0, /* tp_methods */
1302 scanner_members, /* tp_members */
1303 0, /* tp_getset */
1304 0, /* tp_base */
1305 0, /* tp_dict */
1306 0, /* tp_descr_get */
1307 0, /* tp_descr_set */
1308 0, /* tp_dictoffset */
1309 scanner_init, /* tp_init */
1310 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1311 scanner_new, /* tp_new */
1312 0,/* PyObject_GC_Del, */ /* tp_free */
1313};
1314
1315static PyObject *
1316encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1317{
1318 PyEncoderObject *s;
1319 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1320 if (s != NULL) {
1321 s->markers = NULL;
1322 s->defaultfn = NULL;
1323 s->encoder = NULL;
1324 s->indent = NULL;
1325 s->key_separator = NULL;
1326 s->item_separator = NULL;
1327 s->sort_keys = NULL;
1328 s->skipkeys = NULL;
1329 }
1330 return (PyObject *)s;
1331}
1332
1333static int
1334encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1335{
1336 /* initialize Encoder object */
1337 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1338
1339 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001340 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001341 PyObject *item_separator, *sort_keys, *skipkeys;
1342 int allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001343
1344 assert(PyEncoder_Check(self));
1345 s = (PyEncoderObject *)self;
1346
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001347 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUOOp:make_encoder", kwlist,
1348 &markers, &defaultfn, &encoder, &indent,
1349 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001350 &sort_keys, &skipkeys, &allow_nan))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001351 return -1;
1352
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001353 if (markers != Py_None && !PyDict_Check(markers)) {
1354 PyErr_Format(PyExc_TypeError,
1355 "make_encoder() argument 1 must be dict or None, "
1356 "not %.200s", Py_TYPE(markers)->tp_name);
1357 return -1;
1358 }
1359
Antoine Pitrou781eba72009-12-08 15:57:31 +00001360 s->markers = markers;
1361 s->defaultfn = defaultfn;
1362 s->encoder = encoder;
1363 s->indent = indent;
1364 s->key_separator = key_separator;
1365 s->item_separator = item_separator;
1366 s->sort_keys = sort_keys;
1367 s->skipkeys = skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001368 s->fast_encode = NULL;
1369 if (PyCFunction_Check(s->encoder)) {
1370 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1371 if (f == (PyCFunction)py_encode_basestring_ascii ||
1372 f == (PyCFunction)py_encode_basestring) {
1373 s->fast_encode = f;
1374 }
1375 }
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001376 s->allow_nan = allow_nan;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001377
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001378 Py_INCREF(s->markers);
1379 Py_INCREF(s->defaultfn);
1380 Py_INCREF(s->encoder);
1381 Py_INCREF(s->indent);
1382 Py_INCREF(s->key_separator);
1383 Py_INCREF(s->item_separator);
1384 Py_INCREF(s->sort_keys);
1385 Py_INCREF(s->skipkeys);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001386 return 0;
1387}
1388
1389static PyObject *
1390encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1391{
1392 /* Python callable interface to encode_listencode_obj */
1393 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1394 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001395 Py_ssize_t indent_level;
1396 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001397 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001398
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001399 assert(PyEncoder_Check(self));
1400 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001401 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1402 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001403 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001404 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001405 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001406 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001407 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001408 return NULL;
1409 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001410 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001411}
1412
1413static PyObject *
1414_encoded_const(PyObject *obj)
1415{
1416 /* Return the JSON string representation of None, True, False */
1417 if (obj == Py_None) {
1418 static PyObject *s_null = NULL;
1419 if (s_null == NULL) {
1420 s_null = PyUnicode_InternFromString("null");
1421 }
1422 Py_INCREF(s_null);
1423 return s_null;
1424 }
1425 else if (obj == Py_True) {
1426 static PyObject *s_true = NULL;
1427 if (s_true == NULL) {
1428 s_true = PyUnicode_InternFromString("true");
1429 }
1430 Py_INCREF(s_true);
1431 return s_true;
1432 }
1433 else if (obj == Py_False) {
1434 static PyObject *s_false = NULL;
1435 if (s_false == NULL) {
1436 s_false = PyUnicode_InternFromString("false");
1437 }
1438 Py_INCREF(s_false);
1439 return s_false;
1440 }
1441 else {
1442 PyErr_SetString(PyExc_ValueError, "not a const");
1443 return NULL;
1444 }
1445}
1446
1447static PyObject *
1448encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1449{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001450 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001451 double i = PyFloat_AS_DOUBLE(obj);
1452 if (!Py_IS_FINITE(i)) {
1453 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001454 PyErr_SetString(
1455 PyExc_ValueError,
1456 "Out of range float values are not JSON compliant"
1457 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001458 return NULL;
1459 }
1460 if (i > 0) {
1461 return PyUnicode_FromString("Infinity");
1462 }
1463 else if (i < 0) {
1464 return PyUnicode_FromString("-Infinity");
1465 }
1466 else {
1467 return PyUnicode_FromString("NaN");
1468 }
1469 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001470 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001471}
1472
1473static PyObject *
1474encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1475{
1476 /* Return the JSON representation of a string */
1477 if (s->fast_encode)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001478 return s->fast_encode(NULL, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001479 else
1480 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1481}
1482
1483static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001484_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001485{
1486 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001487 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001488 Py_DECREF(stolen);
1489 return rval;
1490}
1491
1492static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001493encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001494 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001495{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001496 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001497 PyObject *newobj;
1498 int rv;
1499
1500 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1501 PyObject *cstr = _encoded_const(obj);
1502 if (cstr == NULL)
1503 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001504 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001505 }
1506 else if (PyUnicode_Check(obj))
1507 {
1508 PyObject *encoded = encoder_encode_string(s, obj);
1509 if (encoded == NULL)
1510 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001511 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001512 }
1513 else if (PyLong_Check(obj)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001514 PyObject *encoded = PyLong_Type.tp_str(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001515 if (encoded == NULL)
1516 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001517 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001518 }
1519 else if (PyFloat_Check(obj)) {
1520 PyObject *encoded = encoder_encode_float(s, obj);
1521 if (encoded == NULL)
1522 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001523 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001524 }
1525 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001526 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1527 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001528 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001529 Py_LeaveRecursiveCall();
1530 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001531 }
1532 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001533 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1534 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001535 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001536 Py_LeaveRecursiveCall();
1537 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001538 }
1539 else {
1540 PyObject *ident = NULL;
1541 if (s->markers != Py_None) {
1542 int has_key;
1543 ident = PyLong_FromVoidPtr(obj);
1544 if (ident == NULL)
1545 return -1;
1546 has_key = PyDict_Contains(s->markers, ident);
1547 if (has_key) {
1548 if (has_key != -1)
1549 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1550 Py_DECREF(ident);
1551 return -1;
1552 }
1553 if (PyDict_SetItem(s->markers, ident, obj)) {
1554 Py_DECREF(ident);
1555 return -1;
1556 }
1557 }
1558 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1559 if (newobj == NULL) {
1560 Py_XDECREF(ident);
1561 return -1;
1562 }
Ezio Melotti13672652011-05-11 01:02:56 +03001563
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001564 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1565 Py_DECREF(newobj);
1566 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001567 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001568 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001569 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001570 Py_LeaveRecursiveCall();
1571
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001572 Py_DECREF(newobj);
1573 if (rv) {
1574 Py_XDECREF(ident);
1575 return -1;
1576 }
1577 if (ident != NULL) {
1578 if (PyDict_DelItem(s->markers, ident)) {
1579 Py_XDECREF(ident);
1580 return -1;
1581 }
1582 Py_XDECREF(ident);
1583 }
1584 return rv;
1585 }
1586}
1587
1588static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001589encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001590 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001591{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001592 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001593 static PyObject *open_dict = NULL;
1594 static PyObject *close_dict = NULL;
1595 static PyObject *empty_dict = NULL;
1596 PyObject *kstr = NULL;
1597 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001598 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001599 PyObject *items;
1600 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001601 int skipkeys;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001602 int sortkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001603 Py_ssize_t idx;
1604
1605 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1606 open_dict = PyUnicode_InternFromString("{");
1607 close_dict = PyUnicode_InternFromString("}");
1608 empty_dict = PyUnicode_InternFromString("{}");
1609 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1610 return -1;
1611 }
Serhiy Storchaka3023ebb2017-01-13 08:34:34 +02001612 if (PyDict_Size(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001613 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001614
1615 if (s->markers != Py_None) {
1616 int has_key;
1617 ident = PyLong_FromVoidPtr(dct);
1618 if (ident == NULL)
1619 goto bail;
1620 has_key = PyDict_Contains(s->markers, ident);
1621 if (has_key) {
1622 if (has_key != -1)
1623 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1624 goto bail;
1625 }
1626 if (PyDict_SetItem(s->markers, ident, dct)) {
1627 goto bail;
1628 }
1629 }
1630
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001631 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001632 goto bail;
1633
1634 if (s->indent != Py_None) {
1635 /* TODO: DOES NOT RUN */
1636 indent_level += 1;
1637 /*
1638 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1639 separator = _item_separator + newline_indent
1640 buf += newline_indent
1641 */
1642 }
1643
Benjamin Peterson501182a2015-05-02 22:28:04 -04001644 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001645 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001646 goto bail;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001647 sortkeys = PyObject_IsTrue(s->sort_keys);
1648 if (sortkeys < 0 || (sortkeys && PyList_Sort(items) < 0))
Benjamin Peterson501182a2015-05-02 22:28:04 -04001649 goto bail;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001650 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001651 Py_DECREF(items);
1652 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001653 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001654 skipkeys = PyObject_IsTrue(s->skipkeys);
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001655 if (skipkeys < 0)
1656 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001657 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001658 while ((item = PyIter_Next(it)) != NULL) {
1659 PyObject *encoded, *key, *value;
1660 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
1661 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1662 goto bail;
1663 }
1664 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001665 if (PyUnicode_Check(key)) {
1666 Py_INCREF(key);
1667 kstr = key;
1668 }
1669 else if (PyFloat_Check(key)) {
1670 kstr = encoder_encode_float(s, key);
1671 if (kstr == NULL)
1672 goto bail;
1673 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001674 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 /* This must come before the PyLong_Check because
1676 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001677 kstr = _encoded_const(key);
1678 if (kstr == NULL)
1679 goto bail;
1680 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001681 else if (PyLong_Check(key)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001682 kstr = PyLong_Type.tp_str(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001683 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001684 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001685 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001686 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001687 else if (skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001688 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001689 continue;
1690 }
1691 else {
1692 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001693 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001694 goto bail;
1695 }
1696
1697 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001698 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001699 goto bail;
1700 }
1701
1702 encoded = encoder_encode_string(s, kstr);
1703 Py_CLEAR(kstr);
1704 if (encoded == NULL)
1705 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001706 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001707 Py_DECREF(encoded);
1708 goto bail;
1709 }
1710 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001711 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001712 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001713
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001714 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001715 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001716 goto bail;
1717 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001718 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001719 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001720 if (PyErr_Occurred())
1721 goto bail;
1722 Py_CLEAR(it);
1723
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001724 if (ident != NULL) {
1725 if (PyDict_DelItem(s->markers, ident))
1726 goto bail;
1727 Py_CLEAR(ident);
1728 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001729 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001730 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001731 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001732
1733 yield '\n' + (' ' * (_indent * _current_indent_level))
1734 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001735 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001736 goto bail;
1737 return 0;
1738
1739bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001740 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001741 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001742 Py_XDECREF(kstr);
1743 Py_XDECREF(ident);
1744 return -1;
1745}
1746
1747
1748static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001749encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001750 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001751{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001752 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001753 static PyObject *open_array = NULL;
1754 static PyObject *close_array = NULL;
1755 static PyObject *empty_array = NULL;
1756 PyObject *ident = NULL;
1757 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001758 Py_ssize_t i;
1759
1760 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1761 open_array = PyUnicode_InternFromString("[");
1762 close_array = PyUnicode_InternFromString("]");
1763 empty_array = PyUnicode_InternFromString("[]");
1764 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1765 return -1;
1766 }
1767 ident = NULL;
1768 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1769 if (s_fast == NULL)
1770 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001771 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001772 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001773 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001774 }
1775
1776 if (s->markers != Py_None) {
1777 int has_key;
1778 ident = PyLong_FromVoidPtr(seq);
1779 if (ident == NULL)
1780 goto bail;
1781 has_key = PyDict_Contains(s->markers, ident);
1782 if (has_key) {
1783 if (has_key != -1)
1784 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1785 goto bail;
1786 }
1787 if (PyDict_SetItem(s->markers, ident, seq)) {
1788 goto bail;
1789 }
1790 }
1791
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001792 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001793 goto bail;
1794 if (s->indent != Py_None) {
1795 /* TODO: DOES NOT RUN */
1796 indent_level += 1;
1797 /*
1798 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1799 separator = _item_separator + newline_indent
1800 buf += newline_indent
1801 */
1802 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001803 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1804 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001805 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001806 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001807 goto bail;
1808 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001809 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001810 goto bail;
1811 }
1812 if (ident != NULL) {
1813 if (PyDict_DelItem(s->markers, ident))
1814 goto bail;
1815 Py_CLEAR(ident);
1816 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001817
1818 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001819 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001820 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001821
1822 yield '\n' + (' ' * (_indent * _current_indent_level))
1823 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001824 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001825 goto bail;
1826 Py_DECREF(s_fast);
1827 return 0;
1828
1829bail:
1830 Py_XDECREF(ident);
1831 Py_DECREF(s_fast);
1832 return -1;
1833}
1834
1835static void
1836encoder_dealloc(PyObject *self)
1837{
1838 /* Deallocate Encoder */
1839 encoder_clear(self);
1840 Py_TYPE(self)->tp_free(self);
1841}
1842
1843static int
1844encoder_traverse(PyObject *self, visitproc visit, void *arg)
1845{
1846 PyEncoderObject *s;
1847 assert(PyEncoder_Check(self));
1848 s = (PyEncoderObject *)self;
1849 Py_VISIT(s->markers);
1850 Py_VISIT(s->defaultfn);
1851 Py_VISIT(s->encoder);
1852 Py_VISIT(s->indent);
1853 Py_VISIT(s->key_separator);
1854 Py_VISIT(s->item_separator);
1855 Py_VISIT(s->sort_keys);
1856 Py_VISIT(s->skipkeys);
1857 return 0;
1858}
1859
1860static int
1861encoder_clear(PyObject *self)
1862{
1863 /* Deallocate Encoder */
1864 PyEncoderObject *s;
1865 assert(PyEncoder_Check(self));
1866 s = (PyEncoderObject *)self;
1867 Py_CLEAR(s->markers);
1868 Py_CLEAR(s->defaultfn);
1869 Py_CLEAR(s->encoder);
1870 Py_CLEAR(s->indent);
1871 Py_CLEAR(s->key_separator);
1872 Py_CLEAR(s->item_separator);
1873 Py_CLEAR(s->sort_keys);
1874 Py_CLEAR(s->skipkeys);
1875 return 0;
1876}
1877
1878PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1879
1880static
1881PyTypeObject PyEncoderType = {
1882 PyVarObject_HEAD_INIT(NULL, 0)
1883 "_json.Encoder", /* tp_name */
1884 sizeof(PyEncoderObject), /* tp_basicsize */
1885 0, /* tp_itemsize */
1886 encoder_dealloc, /* tp_dealloc */
1887 0, /* tp_print */
1888 0, /* tp_getattr */
1889 0, /* tp_setattr */
1890 0, /* tp_compare */
1891 0, /* tp_repr */
1892 0, /* tp_as_number */
1893 0, /* tp_as_sequence */
1894 0, /* tp_as_mapping */
1895 0, /* tp_hash */
1896 encoder_call, /* tp_call */
1897 0, /* tp_str */
1898 0, /* tp_getattro */
1899 0, /* tp_setattro */
1900 0, /* tp_as_buffer */
1901 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1902 encoder_doc, /* tp_doc */
1903 encoder_traverse, /* tp_traverse */
1904 encoder_clear, /* tp_clear */
1905 0, /* tp_richcompare */
1906 0, /* tp_weaklistoffset */
1907 0, /* tp_iter */
1908 0, /* tp_iternext */
1909 0, /* tp_methods */
1910 encoder_members, /* tp_members */
1911 0, /* tp_getset */
1912 0, /* tp_base */
1913 0, /* tp_dict */
1914 0, /* tp_descr_get */
1915 0, /* tp_descr_set */
1916 0, /* tp_dictoffset */
1917 encoder_init, /* tp_init */
1918 0, /* tp_alloc */
1919 encoder_new, /* tp_new */
1920 0, /* tp_free */
1921};
1922
1923static PyMethodDef speedups_methods[] = {
1924 {"encode_basestring_ascii",
1925 (PyCFunction)py_encode_basestring_ascii,
1926 METH_O,
1927 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001928 {"encode_basestring",
1929 (PyCFunction)py_encode_basestring,
1930 METH_O,
1931 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001932 {"scanstring",
1933 (PyCFunction)py_scanstring,
1934 METH_VARARGS,
1935 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001936 {NULL, NULL, 0, NULL}
1937};
1938
1939PyDoc_STRVAR(module_doc,
1940"json speedups\n");
1941
Martin v. Löwis1a214512008-06-11 05:26:20 +00001942static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001943 PyModuleDef_HEAD_INIT,
1944 "_json",
1945 module_doc,
1946 -1,
1947 speedups_methods,
1948 NULL,
1949 NULL,
1950 NULL,
1951 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001952};
1953
Victor Stinnerf024d262015-03-17 17:48:27 +01001954PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001955PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001956{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001957 PyObject *m = PyModule_Create(&jsonmodule);
1958 if (!m)
1959 return NULL;
1960 PyScannerType.tp_new = PyType_GenericNew;
1961 if (PyType_Ready(&PyScannerType) < 0)
1962 goto fail;
1963 PyEncoderType.tp_new = PyType_GenericNew;
1964 if (PyType_Ready(&PyEncoderType) < 0)
1965 goto fail;
1966 Py_INCREF((PyObject*)&PyScannerType);
1967 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1968 Py_DECREF((PyObject*)&PyScannerType);
1969 goto fail;
1970 }
1971 Py_INCREF((PyObject*)&PyEncoderType);
1972 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1973 Py_DECREF((PyObject*)&PyEncoderType);
1974 goto fail;
1975 }
1976 return m;
1977 fail:
1978 Py_DECREF(m);
1979 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001980}