blob: a84b08510907792aebb91dcfed56389305f0c139 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +01003#include "accu.h"
4
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00005#ifdef __GNUC__
6#define UNUSED __attribute__((__unused__))
7#else
8#define UNUSED
9#endif
10
11#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
12#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
13#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
14#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
15
16static PyTypeObject PyScannerType;
17static PyTypeObject PyEncoderType;
18
19typedef struct _PyScannerObject {
20 PyObject_HEAD
21 PyObject *strict;
22 PyObject *object_hook;
23 PyObject *object_pairs_hook;
24 PyObject *parse_float;
25 PyObject *parse_int;
26 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000027 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028} PyScannerObject;
29
30static PyMemberDef scanner_members[] = {
31 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
32 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
33 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
34 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
35 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
36 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
37 {NULL}
38};
39
40typedef struct _PyEncoderObject {
41 PyObject_HEAD
42 PyObject *markers;
43 PyObject *defaultfn;
44 PyObject *encoder;
45 PyObject *indent;
46 PyObject *key_separator;
47 PyObject *item_separator;
48 PyObject *sort_keys;
49 PyObject *skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010050 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000051 int allow_nan;
52} PyEncoderObject;
53
54static PyMemberDef encoder_members[] = {
55 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
56 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
57 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
58 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
59 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
60 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
61 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
62 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
63 {NULL}
64};
65
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020066static PyObject *
67join_list_unicode(PyObject *lst)
68{
69 /* return u''.join(lst) */
70 static PyObject *sep = NULL;
71 if (sep == NULL) {
72 sep = PyUnicode_FromStringAndSize("", 0);
73 if (sep == NULL)
74 return NULL;
75 }
76 return PyUnicode_Join(sep, lst);
77}
78
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020079/* Forward decls */
80
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000081static PyObject *
82ascii_escape_unicode(PyObject *pystr);
83static PyObject *
84py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
85void init_json(void);
86static PyObject *
87scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
88static PyObject *
89_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
90static PyObject *
91scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000092static void
93scanner_dealloc(PyObject *self);
94static int
95scanner_clear(PyObject *self);
96static PyObject *
97encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000098static void
99encoder_dealloc(PyObject *self);
100static int
101encoder_clear(PyObject *self);
102static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200103encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000104static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200105encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000106static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200107encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000109_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000110static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200111raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000112static PyObject *
113encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000114static PyObject *
115encoder_encode_float(PyEncoderObject *s, PyObject *obj);
116
Christian Heimes90540002008-05-08 14:29:10 +0000117#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000118#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000119
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000120static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200121ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000122{
123 /* Escape unicode code point c to ASCII escape sequences
124 in char *output. output must have at least 12 bytes unused to
125 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000126 output[chars++] = '\\';
127 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000128 case '\\': output[chars++] = c; break;
129 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000130 case '\b': output[chars++] = 'b'; break;
131 case '\f': output[chars++] = 'f'; break;
132 case '\n': output[chars++] = 'n'; break;
133 case '\r': output[chars++] = 'r'; break;
134 case '\t': output[chars++] = 't'; break;
135 default:
Christian Heimes90540002008-05-08 14:29:10 +0000136 if (c >= 0x10000) {
137 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100138 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000139 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100140 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
141 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
142 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
143 output[chars++] = Py_hexdigits[(v ) & 0xf];
144 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000145 output[chars++] = '\\';
146 }
Christian Heimes90540002008-05-08 14:29:10 +0000147 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200148 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
149 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
150 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
151 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000152 }
153 return chars;
154}
155
156static PyObject *
157ascii_escape_unicode(PyObject *pystr)
158{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000159 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000160 Py_ssize_t i;
161 Py_ssize_t input_chars;
162 Py_ssize_t output_size;
163 Py_ssize_t chars;
164 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200165 void *input;
166 unsigned char *output;
167 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000168
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200169 if (PyUnicode_READY(pystr) == -1)
170 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000171
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200172 input_chars = PyUnicode_GET_LENGTH(pystr);
173 input = PyUnicode_DATA(pystr);
174 kind = PyUnicode_KIND(pystr);
175
176 /* Compute the output size */
177 for (i = 0, output_size = 2; i < input_chars; i++) {
178 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500179 Py_ssize_t d;
180 if (S_CHAR(c)) {
181 d = 1;
182 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200183 else {
184 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200185 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200186 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500187 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200188 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500189 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200190 }
191 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500192 if (output_size > PY_SSIZE_T_MAX - d) {
193 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
194 return NULL;
195 }
196 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 }
198
199 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000200 if (rval == NULL) {
201 return NULL;
202 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200203 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000204 chars = 0;
205 output[chars++] = '"';
206 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200207 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000208 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000209 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000210 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000211 else {
212 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000213 }
Christian Heimes90540002008-05-08 14:29:10 +0000214 }
215 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100216#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200217 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100218#endif
Christian Heimes90540002008-05-08 14:29:10 +0000219 return rval;
220}
221
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100222static PyObject *
223escape_unicode(PyObject *pystr)
224{
225 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
226 Py_ssize_t i;
227 Py_ssize_t input_chars;
228 Py_ssize_t output_size;
229 Py_ssize_t chars;
230 PyObject *rval;
231 void *input;
232 int kind;
233 Py_UCS4 maxchar;
234
235 if (PyUnicode_READY(pystr) == -1)
236 return NULL;
237
238 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
239 input_chars = PyUnicode_GET_LENGTH(pystr);
240 input = PyUnicode_DATA(pystr);
241 kind = PyUnicode_KIND(pystr);
242
243 /* Compute the output size */
244 for (i = 0, output_size = 2; i < input_chars; i++) {
245 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500246 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100247 switch (c) {
248 case '\\': case '"': case '\b': case '\f':
249 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500250 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100251 break;
252 default:
253 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500254 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100255 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500256 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100257 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500258 if (output_size > PY_SSIZE_T_MAX - d) {
259 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
260 return NULL;
261 }
262 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100263 }
264
265 rval = PyUnicode_New(output_size, maxchar);
266 if (rval == NULL)
267 return NULL;
268
269 kind = PyUnicode_KIND(rval);
270
271#define ENCODE_OUTPUT do { \
272 chars = 0; \
273 output[chars++] = '"'; \
274 for (i = 0; i < input_chars; i++) { \
275 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
276 switch (c) { \
277 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
278 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
279 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
280 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
281 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
282 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
283 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
284 default: \
285 if (c <= 0x1f) { \
286 output[chars++] = '\\'; \
287 output[chars++] = 'u'; \
288 output[chars++] = '0'; \
289 output[chars++] = '0'; \
290 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
291 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
292 } else { \
293 output[chars++] = c; \
294 } \
295 } \
296 } \
297 output[chars++] = '"'; \
298 } while (0)
299
300 if (kind == PyUnicode_1BYTE_KIND) {
301 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
302 ENCODE_OUTPUT;
303 } else if (kind == PyUnicode_2BYTE_KIND) {
304 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
305 ENCODE_OUTPUT;
306 } else {
307 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
308 assert(kind == PyUnicode_4BYTE_KIND);
309 ENCODE_OUTPUT;
310 }
311#undef ENCODE_OUTPUT
312
313#ifdef Py_DEBUG
314 assert(_PyUnicode_CheckConsistency(rval, 1));
315#endif
316 return rval;
317}
318
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000319static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200320raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000321{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200322 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
323 static PyObject *JSONDecodeError = NULL;
324 PyObject *exc;
325 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000326 PyObject *decoder = PyImport_ImportModule("json.decoder");
327 if (decoder == NULL)
328 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200329 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000330 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200331 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000332 return;
Christian Heimes90540002008-05-08 14:29:10 +0000333 }
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200334 exc = PyObject_CallFunction(JSONDecodeError, "(zOn)", msg, s, end);
335 if (exc) {
336 PyErr_SetObject(JSONDecodeError, exc);
337 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000338 }
Christian Heimes90540002008-05-08 14:29:10 +0000339}
340
Ezio Melotti37623ab2013-01-03 08:44:15 +0200341static void
342raise_stop_iteration(Py_ssize_t idx)
343{
344 PyObject *value = PyLong_FromSsize_t(idx);
345 if (value != NULL) {
346 PyErr_SetObject(PyExc_StopIteration, value);
347 Py_DECREF(value);
348 }
349}
350
Christian Heimes90540002008-05-08 14:29:10 +0000351static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000352_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
353 /* return (rval, idx) tuple, stealing reference to rval */
354 PyObject *tpl;
355 PyObject *pyidx;
356 /*
357 steal a reference to rval, returns (rval, idx)
358 */
359 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000360 return NULL;
361 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000362 pyidx = PyLong_FromSsize_t(idx);
363 if (pyidx == NULL) {
364 Py_DECREF(rval);
365 return NULL;
366 }
367 tpl = PyTuple_New(2);
368 if (tpl == NULL) {
369 Py_DECREF(pyidx);
370 Py_DECREF(rval);
371 return NULL;
372 }
373 PyTuple_SET_ITEM(tpl, 0, rval);
374 PyTuple_SET_ITEM(tpl, 1, pyidx);
375 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000376}
377
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000378#define APPEND_OLD_CHUNK \
379 if (chunk != NULL) { \
380 if (chunks == NULL) { \
381 chunks = PyList_New(0); \
382 if (chunks == NULL) { \
383 goto bail; \
384 } \
385 } \
386 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200387 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000388 goto bail; \
389 } \
390 Py_CLEAR(chunk); \
391 }
392
Christian Heimes90540002008-05-08 14:29:10 +0000393static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000394scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000395{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000396 /* Read the JSON string from PyUnicode pystr.
397 end is the index of the first character after the quote.
398 if strict is zero then literal control characters are allowed
399 *next_end_ptr is a return-by-reference index of the character
400 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000401
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000402 Return value is a new PyUnicode
403 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000404 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000406 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000407 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200408 const void *buf;
409 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000410 PyObject *chunks = NULL;
411 PyObject *chunk = NULL;
412
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 if (PyUnicode_READY(pystr) == -1)
414 return 0;
415
416 len = PyUnicode_GET_LENGTH(pystr);
417 buf = PyUnicode_DATA(pystr);
418 kind = PyUnicode_KIND(pystr);
419
Ezio Melotti37623ab2013-01-03 08:44:15 +0200420 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000421 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
422 goto bail;
423 }
Christian Heimes90540002008-05-08 14:29:10 +0000424 while (1) {
425 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000427 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200428 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000429 if (c == '"' || c == '\\') {
430 break;
431 }
432 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000433 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000434 goto bail;
435 }
436 }
437 if (!(c == '"' || c == '\\')) {
438 raise_errmsg("Unterminated string starting at", pystr, begin);
439 goto bail;
440 }
441 /* Pick up this chunk if it's not zero length */
442 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000443 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200444 chunk = PyUnicode_FromKindAndData(
445 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200446 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200447 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000448 if (chunk == NULL) {
449 goto bail;
450 }
Christian Heimes90540002008-05-08 14:29:10 +0000451 }
452 next++;
453 if (c == '"') {
454 end = next;
455 break;
456 }
457 if (next == len) {
458 raise_errmsg("Unterminated string starting at", pystr, begin);
459 goto bail;
460 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200461 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000462 if (c != 'u') {
463 /* Non-unicode backslash escapes */
464 end = next + 1;
465 switch (c) {
466 case '"': break;
467 case '\\': break;
468 case '/': break;
469 case 'b': c = '\b'; break;
470 case 'f': c = '\f'; break;
471 case 'n': c = '\n'; break;
472 case 'r': c = '\r'; break;
473 case 't': c = '\t'; break;
474 default: c = 0;
475 }
476 if (c == 0) {
477 raise_errmsg("Invalid \\escape", pystr, end - 2);
478 goto bail;
479 }
480 }
481 else {
482 c = 0;
483 next++;
484 end = next + 4;
485 if (end >= len) {
486 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
487 goto bail;
488 }
489 /* Decode 4 hex digits */
490 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000492 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000493 switch (digit) {
494 case '0': case '1': case '2': case '3': case '4':
495 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000496 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000497 case 'a': case 'b': case 'c': case 'd': case 'e':
498 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000499 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000500 case 'A': case 'B': case 'C': case 'D': case 'E':
501 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000502 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000503 default:
504 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
505 goto bail;
506 }
507 }
Christian Heimes90540002008-05-08 14:29:10 +0000508 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200509 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
510 PyUnicode_READ(kind, buf, next++) == '\\' &&
511 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200512 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000513 end += 6;
514 /* Decode 4 hex digits */
515 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200516 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000517 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000518 switch (digit) {
519 case '0': case '1': case '2': case '3': case '4':
520 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000521 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000522 case 'a': case 'b': case 'c': case 'd': case 'e':
523 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000524 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000525 case 'A': case 'B': case 'C': case 'D': case 'E':
526 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000527 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000528 default:
529 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
530 goto bail;
531 }
532 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200533 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
534 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
535 else
536 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000537 }
Christian Heimes90540002008-05-08 14:29:10 +0000538 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000539 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200540 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000541 if (chunk == NULL) {
542 goto bail;
543 }
Christian Heimes90540002008-05-08 14:29:10 +0000544 }
545
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000546 if (chunks == NULL) {
547 if (chunk != NULL)
548 rval = chunk;
549 else
550 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000551 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000552 else {
553 APPEND_OLD_CHUNK
554 rval = join_list_unicode(chunks);
555 if (rval == NULL) {
556 goto bail;
557 }
558 Py_CLEAR(chunks);
559 }
560
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000561 *next_end_ptr = end;
562 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000563bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000564 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000565 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000566 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000567 return NULL;
568}
569
570PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000571 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000572 "\n"
573 "Scan the string s for a JSON string. End is the index of the\n"
574 "character in s after the quote that started the JSON string.\n"
575 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
576 "on attempt to decode an invalid string. If strict is False then literal\n"
577 "control characters are allowed in the string.\n"
578 "\n"
579 "Returns a tuple of the decoded string and the index of the character in s\n"
580 "after the end quote."
581);
Christian Heimes90540002008-05-08 14:29:10 +0000582
583static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000584py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000585{
586 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000587 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000588 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000589 Py_ssize_t next_end = -1;
590 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100591 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000592 return NULL;
593 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000594 if (PyUnicode_Check(pystr)) {
595 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000596 }
597 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000598 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000599 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000600 Py_TYPE(pystr)->tp_name);
601 return NULL;
602 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000603 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000604}
605
606PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000607 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000608 "\n"
609 "Return an ASCII-only JSON representation of a Python string"
610);
Christian Heimes90540002008-05-08 14:29:10 +0000611
612static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000613py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000614{
615 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000616 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000617 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000618 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000619 rval = ascii_escape_unicode(pystr);
620 }
621 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000622 PyErr_Format(PyExc_TypeError,
623 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000624 Py_TYPE(pystr)->tp_name);
625 return NULL;
626 }
Christian Heimes90540002008-05-08 14:29:10 +0000627 return rval;
628}
629
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100630
631PyDoc_STRVAR(pydoc_encode_basestring,
632 "encode_basestring(string) -> string\n"
633 "\n"
634 "Return a JSON representation of a Python string"
635);
636
637static PyObject *
638py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
639{
640 PyObject *rval;
641 /* Return a JSON representation of a Python string */
642 /* METH_O */
643 if (PyUnicode_Check(pystr)) {
644 rval = escape_unicode(pystr);
645 }
646 else {
647 PyErr_Format(PyExc_TypeError,
648 "first argument must be a string, not %.80s",
649 Py_TYPE(pystr)->tp_name);
650 return NULL;
651 }
652 return rval;
653}
654
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000655static void
656scanner_dealloc(PyObject *self)
657{
658 /* Deallocate scanner object */
659 scanner_clear(self);
660 Py_TYPE(self)->tp_free(self);
661}
662
663static int
664scanner_traverse(PyObject *self, visitproc visit, void *arg)
665{
666 PyScannerObject *s;
667 assert(PyScanner_Check(self));
668 s = (PyScannerObject *)self;
669 Py_VISIT(s->strict);
670 Py_VISIT(s->object_hook);
671 Py_VISIT(s->object_pairs_hook);
672 Py_VISIT(s->parse_float);
673 Py_VISIT(s->parse_int);
674 Py_VISIT(s->parse_constant);
675 return 0;
676}
677
678static int
679scanner_clear(PyObject *self)
680{
681 PyScannerObject *s;
682 assert(PyScanner_Check(self));
683 s = (PyScannerObject *)self;
684 Py_CLEAR(s->strict);
685 Py_CLEAR(s->object_hook);
686 Py_CLEAR(s->object_pairs_hook);
687 Py_CLEAR(s->parse_float);
688 Py_CLEAR(s->parse_int);
689 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000690 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000691 return 0;
692}
693
694static PyObject *
695_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
696 /* Read a JSON object from PyUnicode pystr.
697 idx is the index of the first character after the opening curly brace.
698 *next_idx_ptr is a return-by-reference index to the first character after
699 the closing curly brace.
700
701 Returns a new PyObject (usually a dict, but object_hook can change that)
702 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200703 void *str;
704 int kind;
705 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000706 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000707 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000708 PyObject *key = NULL;
709 int strict = PyObject_IsTrue(s->strict);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000710 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000711 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000712
Serhiy Storchakafa494fd2015-05-30 17:45:22 +0300713 if (strict < 0)
714 return NULL;
715
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200716 if (PyUnicode_READY(pystr) == -1)
717 return NULL;
718
719 str = PyUnicode_DATA(pystr);
720 kind = PyUnicode_KIND(pystr);
721 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
722
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000723 if (has_pairs_hook)
724 rval = PyList_New(0);
725 else
726 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000727 if (rval == NULL)
728 return NULL;
729
730 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200731 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000732
733 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200734 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
735 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000736 PyObject *memokey;
737
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000738 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200739 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200740 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000741 goto bail;
742 }
743 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
744 if (key == NULL)
745 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000746 memokey = PyDict_GetItem(s->memo, key);
747 if (memokey != NULL) {
748 Py_INCREF(memokey);
749 Py_DECREF(key);
750 key = memokey;
751 }
752 else {
753 if (PyDict_SetItem(s->memo, key, key) < 0)
754 goto bail;
755 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000756 idx = next_idx;
757
758 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200759 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
760 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200761 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000762 goto bail;
763 }
764 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200765 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000766
767 /* read any JSON term */
768 val = scan_once_unicode(s, pystr, idx, &next_idx);
769 if (val == NULL)
770 goto bail;
771
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000772 if (has_pairs_hook) {
773 PyObject *item = PyTuple_Pack(2, key, val);
774 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000775 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000776 Py_CLEAR(key);
777 Py_CLEAR(val);
778 if (PyList_Append(rval, item) == -1) {
779 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000780 goto bail;
781 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000782 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000783 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000784 else {
785 if (PyDict_SetItem(rval, key, val) < 0)
786 goto bail;
787 Py_CLEAR(key);
788 Py_CLEAR(val);
789 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000790 idx = next_idx;
791
792 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200793 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000794
795 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200796 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000797 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200798 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200799 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000800 goto bail;
801 }
802 idx++;
803
804 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200805 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000806 }
807 }
808
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000809 *next_idx_ptr = idx + 1;
810
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000811 if (has_pairs_hook) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000812 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000813 Py_DECREF(rval);
814 return val;
815 }
816
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000817 /* if object_hook is not None: rval = object_hook(rval) */
818 if (s->object_hook != Py_None) {
819 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000820 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000821 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000822 }
823 return rval;
824bail:
825 Py_XDECREF(key);
826 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000827 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000828 return NULL;
829}
830
831static PyObject *
832_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200833 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000834 idx is the index of the first character after the opening brace.
835 *next_idx_ptr is a return-by-reference index to the first character after
836 the closing brace.
837
838 Returns a new PyList
839 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200840 void *str;
841 int kind;
842 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000843 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200844 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000845 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000846
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200847 if (PyUnicode_READY(pystr) == -1)
848 return NULL;
849
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200850 rval = PyList_New(0);
851 if (rval == NULL)
852 return NULL;
853
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200854 str = PyUnicode_DATA(pystr);
855 kind = PyUnicode_KIND(pystr);
856 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
857
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000858 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200859 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000860
861 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200862 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
863 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000864
865 /* read any JSON term */
866 val = scan_once_unicode(s, pystr, idx, &next_idx);
867 if (val == NULL)
868 goto bail;
869
870 if (PyList_Append(rval, val) == -1)
871 goto bail;
872
873 Py_CLEAR(val);
874 idx = next_idx;
875
876 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200877 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000878
879 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200880 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000881 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200882 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200883 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000884 goto bail;
885 }
886 idx++;
887
888 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200889 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000890 }
891 }
892
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200893 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
894 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200895 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000896 goto bail;
897 }
898 *next_idx_ptr = idx + 1;
899 return rval;
900bail:
901 Py_XDECREF(val);
902 Py_DECREF(rval);
903 return NULL;
904}
905
906static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200907_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
908 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000909 constant is the constant string that was found
910 ("NaN", "Infinity", "-Infinity").
911 idx is the index of the first character of the constant
912 *next_idx_ptr is a return-by-reference index to the first character after
913 the constant.
914
915 Returns the result of parse_constant
916 */
917 PyObject *cstr;
918 PyObject *rval;
919 /* constant is "NaN", "Infinity", or "-Infinity" */
920 cstr = PyUnicode_InternFromString(constant);
921 if (cstr == NULL)
922 return NULL;
923
924 /* rval = parse_constant(constant) */
925 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200926 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000927 Py_DECREF(cstr);
928 *next_idx_ptr = idx;
929 return rval;
930}
931
932static PyObject *
933_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
934 /* Read a JSON number from PyUnicode pystr.
935 idx is the index of the first character of the number
936 *next_idx_ptr is a return-by-reference index to the first character after
937 the number.
938
939 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200940 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000941 May return other types if parse_int or parse_float are set
942 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200943 void *str;
944 int kind;
945 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000946 Py_ssize_t idx = start;
947 int is_float = 0;
948 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200949 PyObject *numstr = NULL;
950 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000951
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200952 if (PyUnicode_READY(pystr) == -1)
953 return NULL;
954
955 str = PyUnicode_DATA(pystr);
956 kind = PyUnicode_KIND(pystr);
957 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
958
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000959 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200960 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000961 idx++;
962 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200963 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000964 return NULL;
965 }
966 }
967
968 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200969 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000970 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200971 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000972 }
973 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200974 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000975 idx++;
976 }
977 /* no integer digits, error */
978 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200979 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000980 return NULL;
981 }
982
983 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200984 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000985 is_float = 1;
986 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200987 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000988 }
989
990 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200991 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000992 Py_ssize_t e_start = idx;
993 idx++;
994
995 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200996 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000997
998 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200999 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001000
1001 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001002 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001003 is_float = 1;
1004 }
1005 else {
1006 idx = e_start;
1007 }
1008 }
1009
Antoine Pitrouf6454512011-04-25 19:16:06 +02001010 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1011 custom_func = s->parse_float;
1012 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1013 custom_func = s->parse_int;
1014 else
1015 custom_func = NULL;
1016
1017 if (custom_func) {
1018 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001019 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001020 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001021 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001022 if (numstr == NULL)
1023 return NULL;
1024 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001025 }
1026 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001027 Py_ssize_t i, n;
1028 char *buf;
1029 /* Straight conversion to ASCII, to avoid costly conversion of
1030 decimal unicode digits (which cannot appear here) */
1031 n = idx - start;
1032 numstr = PyBytes_FromStringAndSize(NULL, n);
1033 if (numstr == NULL)
1034 return NULL;
1035 buf = PyBytes_AS_STRING(numstr);
1036 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001037 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001038 }
1039 if (is_float)
1040 rval = PyFloat_FromString(numstr);
1041 else
1042 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001043 }
1044 Py_DECREF(numstr);
1045 *next_idx_ptr = idx;
1046 return rval;
1047}
1048
1049static PyObject *
1050scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1051{
1052 /* Read one JSON term (of any kind) from PyUnicode pystr.
1053 idx is the index of the first character of the term
1054 *next_idx_ptr is a return-by-reference index to the first character after
1055 the number.
1056
1057 Returns a new PyObject representation of the term.
1058 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001059 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001060 void *str;
1061 int kind;
1062 Py_ssize_t length;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001063 int strict;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001064
1065 if (PyUnicode_READY(pystr) == -1)
1066 return NULL;
1067
1068 str = PyUnicode_DATA(pystr);
1069 kind = PyUnicode_KIND(pystr);
1070 length = PyUnicode_GET_LENGTH(pystr);
1071
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001072 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001073 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001074 return NULL;
1075 }
1076 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001077 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001078 return NULL;
1079 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001080
1081 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001082 case '"':
1083 /* string */
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001084 strict = PyObject_IsTrue(s->strict);
1085 if (strict < 0)
1086 return NULL;
1087 return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001088 case '{':
1089 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001090 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1091 "from a unicode string"))
1092 return NULL;
1093 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1094 Py_LeaveRecursiveCall();
1095 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001096 case '[':
1097 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001098 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1099 "from a unicode string"))
1100 return NULL;
1101 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1102 Py_LeaveRecursiveCall();
1103 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001104 case 'n':
1105 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001106 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001107 Py_INCREF(Py_None);
1108 *next_idx_ptr = idx + 4;
1109 return Py_None;
1110 }
1111 break;
1112 case 't':
1113 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001114 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001115 Py_INCREF(Py_True);
1116 *next_idx_ptr = idx + 4;
1117 return Py_True;
1118 }
1119 break;
1120 case 'f':
1121 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001122 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1123 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1124 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001125 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001126 Py_INCREF(Py_False);
1127 *next_idx_ptr = idx + 5;
1128 return Py_False;
1129 }
1130 break;
1131 case 'N':
1132 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001133 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001134 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001135 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1136 }
1137 break;
1138 case 'I':
1139 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001140 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1141 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1142 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001143 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001144 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1145 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001146 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001147 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1148 }
1149 break;
1150 case '-':
1151 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001152 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001153 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1154 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001155 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001156 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001157 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1158 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001159 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001160 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1161 }
1162 break;
1163 }
1164 /* Didn't find a string, object, array, or named constant. Look for a number. */
1165 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1166}
1167
1168static PyObject *
1169scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1170{
1171 /* Python callable interface to scan_once_{str,unicode} */
1172 PyObject *pystr;
1173 PyObject *rval;
1174 Py_ssize_t idx;
1175 Py_ssize_t next_idx = -1;
1176 static char *kwlist[] = {"string", "idx", NULL};
1177 PyScannerObject *s;
1178 assert(PyScanner_Check(self));
1179 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001180 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001181 return NULL;
1182
1183 if (PyUnicode_Check(pystr)) {
1184 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1185 }
1186 else {
1187 PyErr_Format(PyExc_TypeError,
1188 "first argument must be a string, not %.80s",
1189 Py_TYPE(pystr)->tp_name);
1190 return NULL;
1191 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001192 PyDict_Clear(s->memo);
1193 if (rval == NULL)
1194 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001195 return _build_rval_index_tuple(rval, next_idx);
1196}
1197
1198static PyObject *
1199scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1200{
1201 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001202 PyObject *ctx;
1203 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001204
1205 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka39b73dd2017-05-05 10:40:30 +03001206 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001207
Serhiy Storchaka39b73dd2017-05-05 10:40:30 +03001208 s = (PyScannerObject *)type->tp_alloc(type, 0);
1209 if (s == NULL) {
1210 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001211 }
1212
Serhiy Storchaka39b73dd2017-05-05 10:40:30 +03001213 s->memo = PyDict_New();
1214 if (s->memo == NULL)
1215 goto bail;
1216
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001217 /* All of these will fail "gracefully" so we don't need to verify them */
1218 s->strict = PyObject_GetAttrString(ctx, "strict");
1219 if (s->strict == NULL)
1220 goto bail;
1221 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1222 if (s->object_hook == NULL)
1223 goto bail;
1224 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1225 if (s->object_pairs_hook == NULL)
1226 goto bail;
1227 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1228 if (s->parse_float == NULL)
1229 goto bail;
1230 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1231 if (s->parse_int == NULL)
1232 goto bail;
1233 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1234 if (s->parse_constant == NULL)
1235 goto bail;
1236
Serhiy Storchaka39b73dd2017-05-05 10:40:30 +03001237 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001238
1239bail:
Serhiy Storchaka39b73dd2017-05-05 10:40:30 +03001240 Py_DECREF(s);
1241 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001242}
1243
1244PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1245
1246static
1247PyTypeObject PyScannerType = {
1248 PyVarObject_HEAD_INIT(NULL, 0)
1249 "_json.Scanner", /* tp_name */
1250 sizeof(PyScannerObject), /* tp_basicsize */
1251 0, /* tp_itemsize */
1252 scanner_dealloc, /* tp_dealloc */
1253 0, /* tp_print */
1254 0, /* tp_getattr */
1255 0, /* tp_setattr */
1256 0, /* tp_compare */
1257 0, /* tp_repr */
1258 0, /* tp_as_number */
1259 0, /* tp_as_sequence */
1260 0, /* tp_as_mapping */
1261 0, /* tp_hash */
1262 scanner_call, /* tp_call */
1263 0, /* tp_str */
1264 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1265 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1266 0, /* tp_as_buffer */
1267 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1268 scanner_doc, /* tp_doc */
1269 scanner_traverse, /* tp_traverse */
1270 scanner_clear, /* tp_clear */
1271 0, /* tp_richcompare */
1272 0, /* tp_weaklistoffset */
1273 0, /* tp_iter */
1274 0, /* tp_iternext */
1275 0, /* tp_methods */
1276 scanner_members, /* tp_members */
1277 0, /* tp_getset */
1278 0, /* tp_base */
1279 0, /* tp_dict */
1280 0, /* tp_descr_get */
1281 0, /* tp_descr_set */
1282 0, /* tp_dictoffset */
Serhiy Storchaka39b73dd2017-05-05 10:40:30 +03001283 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001284 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1285 scanner_new, /* tp_new */
1286 0,/* PyObject_GC_Del, */ /* tp_free */
1287};
1288
1289static PyObject *
1290encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1291{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001292 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1293
1294 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001295 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001296 PyObject *item_separator, *sort_keys, *skipkeys;
1297 int allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001298
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001299 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUOOp:make_encoder", kwlist,
1300 &markers, &defaultfn, &encoder, &indent,
1301 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001302 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka39b73dd2017-05-05 10:40:30 +03001303 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001304
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001305 if (markers != Py_None && !PyDict_Check(markers)) {
1306 PyErr_Format(PyExc_TypeError,
1307 "make_encoder() argument 1 must be dict or None, "
1308 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka39b73dd2017-05-05 10:40:30 +03001309 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001310 }
1311
Serhiy Storchaka39b73dd2017-05-05 10:40:30 +03001312 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1313 if (s == NULL)
1314 return NULL;
1315
Antoine Pitrou781eba72009-12-08 15:57:31 +00001316 s->markers = markers;
1317 s->defaultfn = defaultfn;
1318 s->encoder = encoder;
1319 s->indent = indent;
1320 s->key_separator = key_separator;
1321 s->item_separator = item_separator;
1322 s->sort_keys = sort_keys;
1323 s->skipkeys = skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001324 s->fast_encode = NULL;
1325 if (PyCFunction_Check(s->encoder)) {
1326 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1327 if (f == (PyCFunction)py_encode_basestring_ascii ||
1328 f == (PyCFunction)py_encode_basestring) {
1329 s->fast_encode = f;
1330 }
1331 }
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001332 s->allow_nan = allow_nan;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001333
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001334 Py_INCREF(s->markers);
1335 Py_INCREF(s->defaultfn);
1336 Py_INCREF(s->encoder);
1337 Py_INCREF(s->indent);
1338 Py_INCREF(s->key_separator);
1339 Py_INCREF(s->item_separator);
1340 Py_INCREF(s->sort_keys);
1341 Py_INCREF(s->skipkeys);
Serhiy Storchaka39b73dd2017-05-05 10:40:30 +03001342 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001343}
1344
1345static PyObject *
1346encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1347{
1348 /* Python callable interface to encode_listencode_obj */
1349 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1350 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001351 Py_ssize_t indent_level;
1352 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001353 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001354
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001355 assert(PyEncoder_Check(self));
1356 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001357 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1358 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001359 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001360 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001361 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001362 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001363 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001364 return NULL;
1365 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001366 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001367}
1368
1369static PyObject *
1370_encoded_const(PyObject *obj)
1371{
1372 /* Return the JSON string representation of None, True, False */
1373 if (obj == Py_None) {
1374 static PyObject *s_null = NULL;
1375 if (s_null == NULL) {
1376 s_null = PyUnicode_InternFromString("null");
1377 }
1378 Py_INCREF(s_null);
1379 return s_null;
1380 }
1381 else if (obj == Py_True) {
1382 static PyObject *s_true = NULL;
1383 if (s_true == NULL) {
1384 s_true = PyUnicode_InternFromString("true");
1385 }
1386 Py_INCREF(s_true);
1387 return s_true;
1388 }
1389 else if (obj == Py_False) {
1390 static PyObject *s_false = NULL;
1391 if (s_false == NULL) {
1392 s_false = PyUnicode_InternFromString("false");
1393 }
1394 Py_INCREF(s_false);
1395 return s_false;
1396 }
1397 else {
1398 PyErr_SetString(PyExc_ValueError, "not a const");
1399 return NULL;
1400 }
1401}
1402
1403static PyObject *
1404encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1405{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001406 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001407 double i = PyFloat_AS_DOUBLE(obj);
1408 if (!Py_IS_FINITE(i)) {
1409 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001410 PyErr_SetString(
1411 PyExc_ValueError,
1412 "Out of range float values are not JSON compliant"
1413 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001414 return NULL;
1415 }
1416 if (i > 0) {
1417 return PyUnicode_FromString("Infinity");
1418 }
1419 else if (i < 0) {
1420 return PyUnicode_FromString("-Infinity");
1421 }
1422 else {
1423 return PyUnicode_FromString("NaN");
1424 }
1425 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001426 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001427}
1428
1429static PyObject *
1430encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1431{
1432 /* Return the JSON representation of a string */
1433 if (s->fast_encode)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001434 return s->fast_encode(NULL, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001435 else
1436 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1437}
1438
1439static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001440_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001441{
1442 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001443 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001444 Py_DECREF(stolen);
1445 return rval;
1446}
1447
1448static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001449encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001450 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001451{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001452 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001453 PyObject *newobj;
1454 int rv;
1455
1456 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1457 PyObject *cstr = _encoded_const(obj);
1458 if (cstr == NULL)
1459 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001460 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001461 }
1462 else if (PyUnicode_Check(obj))
1463 {
1464 PyObject *encoded = encoder_encode_string(s, obj);
1465 if (encoded == NULL)
1466 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001467 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001468 }
1469 else if (PyLong_Check(obj)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001470 PyObject *encoded = PyLong_Type.tp_str(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001471 if (encoded == NULL)
1472 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001473 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001474 }
1475 else if (PyFloat_Check(obj)) {
1476 PyObject *encoded = encoder_encode_float(s, obj);
1477 if (encoded == NULL)
1478 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001479 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001480 }
1481 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001482 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1483 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001484 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001485 Py_LeaveRecursiveCall();
1486 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001487 }
1488 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001489 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1490 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001491 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001492 Py_LeaveRecursiveCall();
1493 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001494 }
1495 else {
1496 PyObject *ident = NULL;
1497 if (s->markers != Py_None) {
1498 int has_key;
1499 ident = PyLong_FromVoidPtr(obj);
1500 if (ident == NULL)
1501 return -1;
1502 has_key = PyDict_Contains(s->markers, ident);
1503 if (has_key) {
1504 if (has_key != -1)
1505 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1506 Py_DECREF(ident);
1507 return -1;
1508 }
1509 if (PyDict_SetItem(s->markers, ident, obj)) {
1510 Py_DECREF(ident);
1511 return -1;
1512 }
1513 }
1514 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1515 if (newobj == NULL) {
1516 Py_XDECREF(ident);
1517 return -1;
1518 }
Ezio Melotti13672652011-05-11 01:02:56 +03001519
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001520 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1521 Py_DECREF(newobj);
1522 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001523 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001524 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001525 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001526 Py_LeaveRecursiveCall();
1527
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001528 Py_DECREF(newobj);
1529 if (rv) {
1530 Py_XDECREF(ident);
1531 return -1;
1532 }
1533 if (ident != NULL) {
1534 if (PyDict_DelItem(s->markers, ident)) {
1535 Py_XDECREF(ident);
1536 return -1;
1537 }
1538 Py_XDECREF(ident);
1539 }
1540 return rv;
1541 }
1542}
1543
1544static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001545encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001546 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001547{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001548 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001549 static PyObject *open_dict = NULL;
1550 static PyObject *close_dict = NULL;
1551 static PyObject *empty_dict = NULL;
1552 PyObject *kstr = NULL;
1553 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001554 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001555 PyObject *items;
1556 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001557 int skipkeys;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001558 int sortkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001559 Py_ssize_t idx;
1560
1561 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1562 open_dict = PyUnicode_InternFromString("{");
1563 close_dict = PyUnicode_InternFromString("}");
1564 empty_dict = PyUnicode_InternFromString("{}");
1565 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1566 return -1;
1567 }
Serhiy Storchaka3023ebb2017-01-13 08:34:34 +02001568 if (PyDict_Size(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001569 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001570
1571 if (s->markers != Py_None) {
1572 int has_key;
1573 ident = PyLong_FromVoidPtr(dct);
1574 if (ident == NULL)
1575 goto bail;
1576 has_key = PyDict_Contains(s->markers, ident);
1577 if (has_key) {
1578 if (has_key != -1)
1579 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1580 goto bail;
1581 }
1582 if (PyDict_SetItem(s->markers, ident, dct)) {
1583 goto bail;
1584 }
1585 }
1586
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001587 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001588 goto bail;
1589
1590 if (s->indent != Py_None) {
1591 /* TODO: DOES NOT RUN */
1592 indent_level += 1;
1593 /*
1594 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1595 separator = _item_separator + newline_indent
1596 buf += newline_indent
1597 */
1598 }
1599
Benjamin Peterson501182a2015-05-02 22:28:04 -04001600 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001601 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001602 goto bail;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001603 sortkeys = PyObject_IsTrue(s->sort_keys);
1604 if (sortkeys < 0 || (sortkeys && PyList_Sort(items) < 0))
Benjamin Peterson501182a2015-05-02 22:28:04 -04001605 goto bail;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001606 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001607 Py_DECREF(items);
1608 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001609 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001610 skipkeys = PyObject_IsTrue(s->skipkeys);
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001611 if (skipkeys < 0)
1612 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001613 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001614 while ((item = PyIter_Next(it)) != NULL) {
1615 PyObject *encoded, *key, *value;
1616 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
1617 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1618 goto bail;
1619 }
1620 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001621 if (PyUnicode_Check(key)) {
1622 Py_INCREF(key);
1623 kstr = key;
1624 }
1625 else if (PyFloat_Check(key)) {
1626 kstr = encoder_encode_float(s, key);
1627 if (kstr == NULL)
1628 goto bail;
1629 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001630 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 /* This must come before the PyLong_Check because
1632 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001633 kstr = _encoded_const(key);
1634 if (kstr == NULL)
1635 goto bail;
1636 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001637 else if (PyLong_Check(key)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001638 kstr = PyLong_Type.tp_str(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001639 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001640 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001641 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001642 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001643 else if (skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001644 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001645 continue;
1646 }
1647 else {
1648 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001649 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001650 goto bail;
1651 }
1652
1653 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001654 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001655 goto bail;
1656 }
1657
1658 encoded = encoder_encode_string(s, kstr);
1659 Py_CLEAR(kstr);
1660 if (encoded == NULL)
1661 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001662 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001663 Py_DECREF(encoded);
1664 goto bail;
1665 }
1666 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001667 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001668 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001669
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001670 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001671 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001672 goto bail;
1673 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001674 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001675 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001676 if (PyErr_Occurred())
1677 goto bail;
1678 Py_CLEAR(it);
1679
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001680 if (ident != NULL) {
1681 if (PyDict_DelItem(s->markers, ident))
1682 goto bail;
1683 Py_CLEAR(ident);
1684 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001685 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001686 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001687 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001688
1689 yield '\n' + (' ' * (_indent * _current_indent_level))
1690 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001691 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001692 goto bail;
1693 return 0;
1694
1695bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001696 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001697 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001698 Py_XDECREF(kstr);
1699 Py_XDECREF(ident);
1700 return -1;
1701}
1702
1703
1704static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001705encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001706 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001707{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001708 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001709 static PyObject *open_array = NULL;
1710 static PyObject *close_array = NULL;
1711 static PyObject *empty_array = NULL;
1712 PyObject *ident = NULL;
1713 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001714 Py_ssize_t i;
1715
1716 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1717 open_array = PyUnicode_InternFromString("[");
1718 close_array = PyUnicode_InternFromString("]");
1719 empty_array = PyUnicode_InternFromString("[]");
1720 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1721 return -1;
1722 }
1723 ident = NULL;
1724 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1725 if (s_fast == NULL)
1726 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001727 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001728 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001729 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001730 }
1731
1732 if (s->markers != Py_None) {
1733 int has_key;
1734 ident = PyLong_FromVoidPtr(seq);
1735 if (ident == NULL)
1736 goto bail;
1737 has_key = PyDict_Contains(s->markers, ident);
1738 if (has_key) {
1739 if (has_key != -1)
1740 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1741 goto bail;
1742 }
1743 if (PyDict_SetItem(s->markers, ident, seq)) {
1744 goto bail;
1745 }
1746 }
1747
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001748 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001749 goto bail;
1750 if (s->indent != Py_None) {
1751 /* TODO: DOES NOT RUN */
1752 indent_level += 1;
1753 /*
1754 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1755 separator = _item_separator + newline_indent
1756 buf += newline_indent
1757 */
1758 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001759 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1760 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001761 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001762 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001763 goto bail;
1764 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001765 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001766 goto bail;
1767 }
1768 if (ident != NULL) {
1769 if (PyDict_DelItem(s->markers, ident))
1770 goto bail;
1771 Py_CLEAR(ident);
1772 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001773
1774 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001775 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001776 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001777
1778 yield '\n' + (' ' * (_indent * _current_indent_level))
1779 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001780 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001781 goto bail;
1782 Py_DECREF(s_fast);
1783 return 0;
1784
1785bail:
1786 Py_XDECREF(ident);
1787 Py_DECREF(s_fast);
1788 return -1;
1789}
1790
1791static void
1792encoder_dealloc(PyObject *self)
1793{
1794 /* Deallocate Encoder */
1795 encoder_clear(self);
1796 Py_TYPE(self)->tp_free(self);
1797}
1798
1799static int
1800encoder_traverse(PyObject *self, visitproc visit, void *arg)
1801{
1802 PyEncoderObject *s;
1803 assert(PyEncoder_Check(self));
1804 s = (PyEncoderObject *)self;
1805 Py_VISIT(s->markers);
1806 Py_VISIT(s->defaultfn);
1807 Py_VISIT(s->encoder);
1808 Py_VISIT(s->indent);
1809 Py_VISIT(s->key_separator);
1810 Py_VISIT(s->item_separator);
1811 Py_VISIT(s->sort_keys);
1812 Py_VISIT(s->skipkeys);
1813 return 0;
1814}
1815
1816static int
1817encoder_clear(PyObject *self)
1818{
1819 /* Deallocate Encoder */
1820 PyEncoderObject *s;
1821 assert(PyEncoder_Check(self));
1822 s = (PyEncoderObject *)self;
1823 Py_CLEAR(s->markers);
1824 Py_CLEAR(s->defaultfn);
1825 Py_CLEAR(s->encoder);
1826 Py_CLEAR(s->indent);
1827 Py_CLEAR(s->key_separator);
1828 Py_CLEAR(s->item_separator);
1829 Py_CLEAR(s->sort_keys);
1830 Py_CLEAR(s->skipkeys);
1831 return 0;
1832}
1833
1834PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1835
1836static
1837PyTypeObject PyEncoderType = {
1838 PyVarObject_HEAD_INIT(NULL, 0)
1839 "_json.Encoder", /* tp_name */
1840 sizeof(PyEncoderObject), /* tp_basicsize */
1841 0, /* tp_itemsize */
1842 encoder_dealloc, /* tp_dealloc */
1843 0, /* tp_print */
1844 0, /* tp_getattr */
1845 0, /* tp_setattr */
1846 0, /* tp_compare */
1847 0, /* tp_repr */
1848 0, /* tp_as_number */
1849 0, /* tp_as_sequence */
1850 0, /* tp_as_mapping */
1851 0, /* tp_hash */
1852 encoder_call, /* tp_call */
1853 0, /* tp_str */
1854 0, /* tp_getattro */
1855 0, /* tp_setattro */
1856 0, /* tp_as_buffer */
1857 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1858 encoder_doc, /* tp_doc */
1859 encoder_traverse, /* tp_traverse */
1860 encoder_clear, /* tp_clear */
1861 0, /* tp_richcompare */
1862 0, /* tp_weaklistoffset */
1863 0, /* tp_iter */
1864 0, /* tp_iternext */
1865 0, /* tp_methods */
1866 encoder_members, /* tp_members */
1867 0, /* tp_getset */
1868 0, /* tp_base */
1869 0, /* tp_dict */
1870 0, /* tp_descr_get */
1871 0, /* tp_descr_set */
1872 0, /* tp_dictoffset */
Serhiy Storchaka39b73dd2017-05-05 10:40:30 +03001873 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001874 0, /* tp_alloc */
1875 encoder_new, /* tp_new */
1876 0, /* tp_free */
1877};
1878
1879static PyMethodDef speedups_methods[] = {
1880 {"encode_basestring_ascii",
1881 (PyCFunction)py_encode_basestring_ascii,
1882 METH_O,
1883 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001884 {"encode_basestring",
1885 (PyCFunction)py_encode_basestring,
1886 METH_O,
1887 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001888 {"scanstring",
1889 (PyCFunction)py_scanstring,
1890 METH_VARARGS,
1891 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001892 {NULL, NULL, 0, NULL}
1893};
1894
1895PyDoc_STRVAR(module_doc,
1896"json speedups\n");
1897
Martin v. Löwis1a214512008-06-11 05:26:20 +00001898static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001899 PyModuleDef_HEAD_INIT,
1900 "_json",
1901 module_doc,
1902 -1,
1903 speedups_methods,
1904 NULL,
1905 NULL,
1906 NULL,
1907 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001908};
1909
Victor Stinnerf024d262015-03-17 17:48:27 +01001910PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001911PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001912{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001913 PyObject *m = PyModule_Create(&jsonmodule);
1914 if (!m)
1915 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001916 if (PyType_Ready(&PyScannerType) < 0)
1917 goto fail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001918 if (PyType_Ready(&PyEncoderType) < 0)
1919 goto fail;
1920 Py_INCREF((PyObject*)&PyScannerType);
1921 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1922 Py_DECREF((PyObject*)&PyScannerType);
1923 goto fail;
1924 }
1925 Py_INCREF((PyObject*)&PyEncoderType);
1926 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1927 Py_DECREF((PyObject*)&PyEncoderType);
1928 goto fail;
1929 }
1930 return m;
1931 fail:
1932 Py_DECREF(m);
1933 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001934}