blob: e78e3711121c2baeec0fad34800be78cb75ef697 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +01003#include "accu.h"
4
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00005#ifdef __GNUC__
6#define UNUSED __attribute__((__unused__))
7#else
8#define UNUSED
9#endif
10
11#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
12#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
13#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
14#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
15
16static PyTypeObject PyScannerType;
17static PyTypeObject PyEncoderType;
18
19typedef struct _PyScannerObject {
20 PyObject_HEAD
21 PyObject *strict;
22 PyObject *object_hook;
23 PyObject *object_pairs_hook;
24 PyObject *parse_float;
25 PyObject *parse_int;
26 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000027 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028} PyScannerObject;
29
30static PyMemberDef scanner_members[] = {
31 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
32 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
33 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
34 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
35 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
36 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
37 {NULL}
38};
39
40typedef struct _PyEncoderObject {
41 PyObject_HEAD
42 PyObject *markers;
43 PyObject *defaultfn;
44 PyObject *encoder;
45 PyObject *indent;
46 PyObject *key_separator;
47 PyObject *item_separator;
48 PyObject *sort_keys;
49 PyObject *skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010050 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000051 int allow_nan;
52} PyEncoderObject;
53
54static PyMemberDef encoder_members[] = {
55 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
56 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
57 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
58 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
59 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
60 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
61 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
62 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
63 {NULL}
64};
65
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020066static PyObject *
67join_list_unicode(PyObject *lst)
68{
69 /* return u''.join(lst) */
70 static PyObject *sep = NULL;
71 if (sep == NULL) {
72 sep = PyUnicode_FromStringAndSize("", 0);
73 if (sep == NULL)
74 return NULL;
75 }
76 return PyUnicode_Join(sep, lst);
77}
78
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020079/* Forward decls */
80
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000081static PyObject *
82ascii_escape_unicode(PyObject *pystr);
83static PyObject *
84py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
85void init_json(void);
86static PyObject *
87scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
88static PyObject *
89_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
90static PyObject *
91scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000092static void
93scanner_dealloc(PyObject *self);
94static int
95scanner_clear(PyObject *self);
96static PyObject *
97encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000098static void
99encoder_dealloc(PyObject *self);
100static int
101encoder_clear(PyObject *self);
102static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200103encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000104static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200105encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000106static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200107encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000109_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000110static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200111raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000112static PyObject *
113encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000114static PyObject *
115encoder_encode_float(PyEncoderObject *s, PyObject *obj);
116
Christian Heimes90540002008-05-08 14:29:10 +0000117#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000118#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000119
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000120static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200121ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000122{
123 /* Escape unicode code point c to ASCII escape sequences
124 in char *output. output must have at least 12 bytes unused to
125 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000126 output[chars++] = '\\';
127 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000128 case '\\': output[chars++] = c; break;
129 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000130 case '\b': output[chars++] = 'b'; break;
131 case '\f': output[chars++] = 'f'; break;
132 case '\n': output[chars++] = 'n'; break;
133 case '\r': output[chars++] = 'r'; break;
134 case '\t': output[chars++] = 't'; break;
135 default:
Christian Heimes90540002008-05-08 14:29:10 +0000136 if (c >= 0x10000) {
137 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100138 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000139 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100140 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
141 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
142 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
143 output[chars++] = Py_hexdigits[(v ) & 0xf];
144 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000145 output[chars++] = '\\';
146 }
Christian Heimes90540002008-05-08 14:29:10 +0000147 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200148 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
149 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
150 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
151 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000152 }
153 return chars;
154}
155
156static PyObject *
157ascii_escape_unicode(PyObject *pystr)
158{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000159 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000160 Py_ssize_t i;
161 Py_ssize_t input_chars;
162 Py_ssize_t output_size;
163 Py_ssize_t chars;
164 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200165 void *input;
166 unsigned char *output;
167 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000168
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200169 if (PyUnicode_READY(pystr) == -1)
170 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000171
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200172 input_chars = PyUnicode_GET_LENGTH(pystr);
173 input = PyUnicode_DATA(pystr);
174 kind = PyUnicode_KIND(pystr);
175
176 /* Compute the output size */
177 for (i = 0, output_size = 2; i < input_chars; i++) {
178 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500179 Py_ssize_t d;
180 if (S_CHAR(c)) {
181 d = 1;
182 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200183 else {
184 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200185 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200186 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500187 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200188 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500189 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200190 }
191 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500192 if (output_size > PY_SSIZE_T_MAX - d) {
193 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
194 return NULL;
195 }
196 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 }
198
199 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000200 if (rval == NULL) {
201 return NULL;
202 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200203 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000204 chars = 0;
205 output[chars++] = '"';
206 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200207 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000208 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000209 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000210 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000211 else {
212 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000213 }
Christian Heimes90540002008-05-08 14:29:10 +0000214 }
215 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100216#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200217 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100218#endif
Christian Heimes90540002008-05-08 14:29:10 +0000219 return rval;
220}
221
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100222static PyObject *
223escape_unicode(PyObject *pystr)
224{
225 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
226 Py_ssize_t i;
227 Py_ssize_t input_chars;
228 Py_ssize_t output_size;
229 Py_ssize_t chars;
230 PyObject *rval;
231 void *input;
232 int kind;
233 Py_UCS4 maxchar;
234
235 if (PyUnicode_READY(pystr) == -1)
236 return NULL;
237
238 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
239 input_chars = PyUnicode_GET_LENGTH(pystr);
240 input = PyUnicode_DATA(pystr);
241 kind = PyUnicode_KIND(pystr);
242
243 /* Compute the output size */
244 for (i = 0, output_size = 2; i < input_chars; i++) {
245 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500246 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100247 switch (c) {
248 case '\\': case '"': case '\b': case '\f':
249 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500250 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100251 break;
252 default:
253 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500254 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100255 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500256 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100257 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500258 if (output_size > PY_SSIZE_T_MAX - d) {
259 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
260 return NULL;
261 }
262 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100263 }
264
265 rval = PyUnicode_New(output_size, maxchar);
266 if (rval == NULL)
267 return NULL;
268
269 kind = PyUnicode_KIND(rval);
270
271#define ENCODE_OUTPUT do { \
272 chars = 0; \
273 output[chars++] = '"'; \
274 for (i = 0; i < input_chars; i++) { \
275 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
276 switch (c) { \
277 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
278 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
279 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
280 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
281 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
282 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
283 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
284 default: \
285 if (c <= 0x1f) { \
286 output[chars++] = '\\'; \
287 output[chars++] = 'u'; \
288 output[chars++] = '0'; \
289 output[chars++] = '0'; \
290 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
291 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
292 } else { \
293 output[chars++] = c; \
294 } \
295 } \
296 } \
297 output[chars++] = '"'; \
298 } while (0)
299
300 if (kind == PyUnicode_1BYTE_KIND) {
301 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
302 ENCODE_OUTPUT;
303 } else if (kind == PyUnicode_2BYTE_KIND) {
304 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
305 ENCODE_OUTPUT;
306 } else {
307 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
308 assert(kind == PyUnicode_4BYTE_KIND);
309 ENCODE_OUTPUT;
310 }
311#undef ENCODE_OUTPUT
312
313#ifdef Py_DEBUG
314 assert(_PyUnicode_CheckConsistency(rval, 1));
315#endif
316 return rval;
317}
318
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000319static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200320raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000321{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200322 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
323 static PyObject *JSONDecodeError = NULL;
324 PyObject *exc;
325 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000326 PyObject *decoder = PyImport_ImportModule("json.decoder");
327 if (decoder == NULL)
328 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200329 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000330 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200331 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000332 return;
Christian Heimes90540002008-05-08 14:29:10 +0000333 }
Victor Stinner4c381542016-12-09 00:33:39 +0100334 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200335 if (exc) {
336 PyErr_SetObject(JSONDecodeError, exc);
337 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000338 }
Christian Heimes90540002008-05-08 14:29:10 +0000339}
340
Ezio Melotti37623ab2013-01-03 08:44:15 +0200341static void
342raise_stop_iteration(Py_ssize_t idx)
343{
344 PyObject *value = PyLong_FromSsize_t(idx);
345 if (value != NULL) {
346 PyErr_SetObject(PyExc_StopIteration, value);
347 Py_DECREF(value);
348 }
349}
350
Christian Heimes90540002008-05-08 14:29:10 +0000351static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000352_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
353 /* return (rval, idx) tuple, stealing reference to rval */
354 PyObject *tpl;
355 PyObject *pyidx;
356 /*
357 steal a reference to rval, returns (rval, idx)
358 */
359 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000360 return NULL;
361 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000362 pyidx = PyLong_FromSsize_t(idx);
363 if (pyidx == NULL) {
364 Py_DECREF(rval);
365 return NULL;
366 }
367 tpl = PyTuple_New(2);
368 if (tpl == NULL) {
369 Py_DECREF(pyidx);
370 Py_DECREF(rval);
371 return NULL;
372 }
373 PyTuple_SET_ITEM(tpl, 0, rval);
374 PyTuple_SET_ITEM(tpl, 1, pyidx);
375 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000376}
377
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000378#define APPEND_OLD_CHUNK \
379 if (chunk != NULL) { \
380 if (chunks == NULL) { \
381 chunks = PyList_New(0); \
382 if (chunks == NULL) { \
383 goto bail; \
384 } \
385 } \
386 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200387 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000388 goto bail; \
389 } \
390 Py_CLEAR(chunk); \
391 }
392
Christian Heimes90540002008-05-08 14:29:10 +0000393static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000394scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000395{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000396 /* Read the JSON string from PyUnicode pystr.
397 end is the index of the first character after the quote.
398 if strict is zero then literal control characters are allowed
399 *next_end_ptr is a return-by-reference index of the character
400 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000401
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000402 Return value is a new PyUnicode
403 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000404 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000406 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000407 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200408 const void *buf;
409 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000410 PyObject *chunks = NULL;
411 PyObject *chunk = NULL;
412
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 if (PyUnicode_READY(pystr) == -1)
414 return 0;
415
416 len = PyUnicode_GET_LENGTH(pystr);
417 buf = PyUnicode_DATA(pystr);
418 kind = PyUnicode_KIND(pystr);
419
Ezio Melotti37623ab2013-01-03 08:44:15 +0200420 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000421 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
422 goto bail;
423 }
Christian Heimes90540002008-05-08 14:29:10 +0000424 while (1) {
425 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000427 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200428 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000429 if (c == '"' || c == '\\') {
430 break;
431 }
432 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000433 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000434 goto bail;
435 }
436 }
437 if (!(c == '"' || c == '\\')) {
438 raise_errmsg("Unterminated string starting at", pystr, begin);
439 goto bail;
440 }
441 /* Pick up this chunk if it's not zero length */
442 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000443 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200444 chunk = PyUnicode_FromKindAndData(
445 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200446 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200447 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000448 if (chunk == NULL) {
449 goto bail;
450 }
Christian Heimes90540002008-05-08 14:29:10 +0000451 }
452 next++;
453 if (c == '"') {
454 end = next;
455 break;
456 }
457 if (next == len) {
458 raise_errmsg("Unterminated string starting at", pystr, begin);
459 goto bail;
460 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200461 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000462 if (c != 'u') {
463 /* Non-unicode backslash escapes */
464 end = next + 1;
465 switch (c) {
466 case '"': break;
467 case '\\': break;
468 case '/': break;
469 case 'b': c = '\b'; break;
470 case 'f': c = '\f'; break;
471 case 'n': c = '\n'; break;
472 case 'r': c = '\r'; break;
473 case 't': c = '\t'; break;
474 default: c = 0;
475 }
476 if (c == 0) {
477 raise_errmsg("Invalid \\escape", pystr, end - 2);
478 goto bail;
479 }
480 }
481 else {
482 c = 0;
483 next++;
484 end = next + 4;
485 if (end >= len) {
486 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
487 goto bail;
488 }
489 /* Decode 4 hex digits */
490 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000492 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000493 switch (digit) {
494 case '0': case '1': case '2': case '3': case '4':
495 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000496 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000497 case 'a': case 'b': case 'c': case 'd': case 'e':
498 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000499 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000500 case 'A': case 'B': case 'C': case 'D': case 'E':
501 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000502 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000503 default:
504 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
505 goto bail;
506 }
507 }
Christian Heimes90540002008-05-08 14:29:10 +0000508 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200509 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
510 PyUnicode_READ(kind, buf, next++) == '\\' &&
511 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200512 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000513 end += 6;
514 /* Decode 4 hex digits */
515 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200516 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000517 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000518 switch (digit) {
519 case '0': case '1': case '2': case '3': case '4':
520 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000521 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000522 case 'a': case 'b': case 'c': case 'd': case 'e':
523 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000524 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000525 case 'A': case 'B': case 'C': case 'D': case 'E':
526 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000527 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000528 default:
529 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
530 goto bail;
531 }
532 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200533 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
534 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
535 else
536 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000537 }
Christian Heimes90540002008-05-08 14:29:10 +0000538 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000539 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200540 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000541 if (chunk == NULL) {
542 goto bail;
543 }
Christian Heimes90540002008-05-08 14:29:10 +0000544 }
545
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000546 if (chunks == NULL) {
547 if (chunk != NULL)
548 rval = chunk;
549 else
550 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000551 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000552 else {
553 APPEND_OLD_CHUNK
554 rval = join_list_unicode(chunks);
555 if (rval == NULL) {
556 goto bail;
557 }
558 Py_CLEAR(chunks);
559 }
560
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000561 *next_end_ptr = end;
562 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000563bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000564 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000565 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000566 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000567 return NULL;
568}
569
570PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000571 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000572 "\n"
573 "Scan the string s for a JSON string. End is the index of the\n"
574 "character in s after the quote that started the JSON string.\n"
575 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
576 "on attempt to decode an invalid string. If strict is False then literal\n"
577 "control characters are allowed in the string.\n"
578 "\n"
579 "Returns a tuple of the decoded string and the index of the character in s\n"
580 "after the end quote."
581);
Christian Heimes90540002008-05-08 14:29:10 +0000582
583static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000584py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000585{
586 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000587 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000588 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000589 Py_ssize_t next_end = -1;
590 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100591 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000592 return NULL;
593 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000594 if (PyUnicode_Check(pystr)) {
595 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000596 }
597 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000598 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000599 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000600 Py_TYPE(pystr)->tp_name);
601 return NULL;
602 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000603 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000604}
605
606PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000607 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000608 "\n"
609 "Return an ASCII-only JSON representation of a Python string"
610);
Christian Heimes90540002008-05-08 14:29:10 +0000611
612static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000613py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000614{
615 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000616 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000617 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000618 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000619 rval = ascii_escape_unicode(pystr);
620 }
621 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000622 PyErr_Format(PyExc_TypeError,
623 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000624 Py_TYPE(pystr)->tp_name);
625 return NULL;
626 }
Christian Heimes90540002008-05-08 14:29:10 +0000627 return rval;
628}
629
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100630
631PyDoc_STRVAR(pydoc_encode_basestring,
632 "encode_basestring(string) -> string\n"
633 "\n"
634 "Return a JSON representation of a Python string"
635);
636
637static PyObject *
638py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
639{
640 PyObject *rval;
641 /* Return a JSON representation of a Python string */
642 /* METH_O */
643 if (PyUnicode_Check(pystr)) {
644 rval = escape_unicode(pystr);
645 }
646 else {
647 PyErr_Format(PyExc_TypeError,
648 "first argument must be a string, not %.80s",
649 Py_TYPE(pystr)->tp_name);
650 return NULL;
651 }
652 return rval;
653}
654
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000655static void
656scanner_dealloc(PyObject *self)
657{
658 /* Deallocate scanner object */
659 scanner_clear(self);
660 Py_TYPE(self)->tp_free(self);
661}
662
663static int
664scanner_traverse(PyObject *self, visitproc visit, void *arg)
665{
666 PyScannerObject *s;
667 assert(PyScanner_Check(self));
668 s = (PyScannerObject *)self;
669 Py_VISIT(s->strict);
670 Py_VISIT(s->object_hook);
671 Py_VISIT(s->object_pairs_hook);
672 Py_VISIT(s->parse_float);
673 Py_VISIT(s->parse_int);
674 Py_VISIT(s->parse_constant);
675 return 0;
676}
677
678static int
679scanner_clear(PyObject *self)
680{
681 PyScannerObject *s;
682 assert(PyScanner_Check(self));
683 s = (PyScannerObject *)self;
684 Py_CLEAR(s->strict);
685 Py_CLEAR(s->object_hook);
686 Py_CLEAR(s->object_pairs_hook);
687 Py_CLEAR(s->parse_float);
688 Py_CLEAR(s->parse_int);
689 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000690 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000691 return 0;
692}
693
694static PyObject *
695_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
696 /* Read a JSON object from PyUnicode pystr.
697 idx is the index of the first character after the opening curly brace.
698 *next_idx_ptr is a return-by-reference index to the first character after
699 the closing curly brace.
700
701 Returns a new PyObject (usually a dict, but object_hook can change that)
702 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200703 void *str;
704 int kind;
705 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000706 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000707 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000708 PyObject *key = NULL;
709 int strict = PyObject_IsTrue(s->strict);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000710 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000711 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000712
Serhiy Storchakafa494fd2015-05-30 17:45:22 +0300713 if (strict < 0)
714 return NULL;
715
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200716 if (PyUnicode_READY(pystr) == -1)
717 return NULL;
718
719 str = PyUnicode_DATA(pystr);
720 kind = PyUnicode_KIND(pystr);
721 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
722
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000723 if (has_pairs_hook)
724 rval = PyList_New(0);
725 else
726 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000727 if (rval == NULL)
728 return NULL;
729
730 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200731 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000732
733 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200734 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
735 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000736 PyObject *memokey;
737
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000738 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200739 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200740 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000741 goto bail;
742 }
743 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
744 if (key == NULL)
745 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000746 memokey = PyDict_GetItem(s->memo, key);
747 if (memokey != NULL) {
748 Py_INCREF(memokey);
749 Py_DECREF(key);
750 key = memokey;
751 }
752 else {
753 if (PyDict_SetItem(s->memo, key, key) < 0)
754 goto bail;
755 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000756 idx = next_idx;
757
758 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200759 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
760 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200761 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000762 goto bail;
763 }
764 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200765 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000766
767 /* read any JSON term */
768 val = scan_once_unicode(s, pystr, idx, &next_idx);
769 if (val == NULL)
770 goto bail;
771
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000772 if (has_pairs_hook) {
773 PyObject *item = PyTuple_Pack(2, key, val);
774 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000775 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000776 Py_CLEAR(key);
777 Py_CLEAR(val);
778 if (PyList_Append(rval, item) == -1) {
779 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000780 goto bail;
781 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000782 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000783 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000784 else {
785 if (PyDict_SetItem(rval, key, val) < 0)
786 goto bail;
787 Py_CLEAR(key);
788 Py_CLEAR(val);
789 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000790 idx = next_idx;
791
792 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200793 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000794
795 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200796 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000797 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200798 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200799 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000800 goto bail;
801 }
802 idx++;
803
804 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200805 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000806 }
807 }
808
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000809 *next_idx_ptr = idx + 1;
810
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000811 if (has_pairs_hook) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100812 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000813 Py_DECREF(rval);
814 return val;
815 }
816
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000817 /* if object_hook is not None: rval = object_hook(rval) */
818 if (s->object_hook != Py_None) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100819 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000820 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000821 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000822 }
823 return rval;
824bail:
825 Py_XDECREF(key);
826 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000827 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000828 return NULL;
829}
830
831static PyObject *
832_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200833 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000834 idx is the index of the first character after the opening brace.
835 *next_idx_ptr is a return-by-reference index to the first character after
836 the closing brace.
837
838 Returns a new PyList
839 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200840 void *str;
841 int kind;
842 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000843 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200844 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000845 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000846
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200847 if (PyUnicode_READY(pystr) == -1)
848 return NULL;
849
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200850 rval = PyList_New(0);
851 if (rval == NULL)
852 return NULL;
853
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200854 str = PyUnicode_DATA(pystr);
855 kind = PyUnicode_KIND(pystr);
856 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
857
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000858 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200859 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000860
861 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200862 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
863 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000864
865 /* read any JSON term */
866 val = scan_once_unicode(s, pystr, idx, &next_idx);
867 if (val == NULL)
868 goto bail;
869
870 if (PyList_Append(rval, val) == -1)
871 goto bail;
872
873 Py_CLEAR(val);
874 idx = next_idx;
875
876 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200877 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000878
879 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200880 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000881 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200882 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200883 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000884 goto bail;
885 }
886 idx++;
887
888 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200889 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000890 }
891 }
892
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200893 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
894 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200895 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000896 goto bail;
897 }
898 *next_idx_ptr = idx + 1;
899 return rval;
900bail:
901 Py_XDECREF(val);
902 Py_DECREF(rval);
903 return NULL;
904}
905
906static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200907_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
908 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000909 constant is the constant string that was found
910 ("NaN", "Infinity", "-Infinity").
911 idx is the index of the first character of the constant
912 *next_idx_ptr is a return-by-reference index to the first character after
913 the constant.
914
915 Returns the result of parse_constant
916 */
917 PyObject *cstr;
918 PyObject *rval;
919 /* constant is "NaN", "Infinity", or "-Infinity" */
920 cstr = PyUnicode_InternFromString(constant);
921 if (cstr == NULL)
922 return NULL;
923
924 /* rval = parse_constant(constant) */
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100925 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200926 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000927 Py_DECREF(cstr);
928 *next_idx_ptr = idx;
929 return rval;
930}
931
932static PyObject *
933_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
934 /* Read a JSON number from PyUnicode pystr.
935 idx is the index of the first character of the number
936 *next_idx_ptr is a return-by-reference index to the first character after
937 the number.
938
939 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200940 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000941 May return other types if parse_int or parse_float are set
942 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200943 void *str;
944 int kind;
945 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000946 Py_ssize_t idx = start;
947 int is_float = 0;
948 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200949 PyObject *numstr = NULL;
950 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000951
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200952 if (PyUnicode_READY(pystr) == -1)
953 return NULL;
954
955 str = PyUnicode_DATA(pystr);
956 kind = PyUnicode_KIND(pystr);
957 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
958
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000959 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200960 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000961 idx++;
962 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200963 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000964 return NULL;
965 }
966 }
967
968 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200969 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000970 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200971 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000972 }
973 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200974 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000975 idx++;
976 }
977 /* no integer digits, error */
978 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200979 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000980 return NULL;
981 }
982
983 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200984 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000985 is_float = 1;
986 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200987 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000988 }
989
990 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200991 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000992 Py_ssize_t e_start = idx;
993 idx++;
994
995 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200996 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000997
998 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200999 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001000
1001 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001002 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001003 is_float = 1;
1004 }
1005 else {
1006 idx = e_start;
1007 }
1008 }
1009
Antoine Pitrouf6454512011-04-25 19:16:06 +02001010 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1011 custom_func = s->parse_float;
1012 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1013 custom_func = s->parse_int;
1014 else
1015 custom_func = NULL;
1016
1017 if (custom_func) {
1018 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001019 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001020 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001021 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001022 if (numstr == NULL)
1023 return NULL;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001024 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001025 }
1026 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001027 Py_ssize_t i, n;
1028 char *buf;
1029 /* Straight conversion to ASCII, to avoid costly conversion of
1030 decimal unicode digits (which cannot appear here) */
1031 n = idx - start;
1032 numstr = PyBytes_FromStringAndSize(NULL, n);
1033 if (numstr == NULL)
1034 return NULL;
1035 buf = PyBytes_AS_STRING(numstr);
1036 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001037 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001038 }
1039 if (is_float)
1040 rval = PyFloat_FromString(numstr);
1041 else
1042 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001043 }
1044 Py_DECREF(numstr);
1045 *next_idx_ptr = idx;
1046 return rval;
1047}
1048
1049static PyObject *
1050scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1051{
1052 /* Read one JSON term (of any kind) from PyUnicode pystr.
1053 idx is the index of the first character of the term
1054 *next_idx_ptr is a return-by-reference index to the first character after
1055 the number.
1056
1057 Returns a new PyObject representation of the term.
1058 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001059 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001060 void *str;
1061 int kind;
1062 Py_ssize_t length;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001063 int strict;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001064
1065 if (PyUnicode_READY(pystr) == -1)
1066 return NULL;
1067
1068 str = PyUnicode_DATA(pystr);
1069 kind = PyUnicode_KIND(pystr);
1070 length = PyUnicode_GET_LENGTH(pystr);
1071
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001072 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001073 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001074 return NULL;
1075 }
1076 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001077 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001078 return NULL;
1079 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001080
1081 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001082 case '"':
1083 /* string */
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001084 strict = PyObject_IsTrue(s->strict);
1085 if (strict < 0)
1086 return NULL;
1087 return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001088 case '{':
1089 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001090 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1091 "from a unicode string"))
1092 return NULL;
1093 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1094 Py_LeaveRecursiveCall();
1095 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001096 case '[':
1097 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001098 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1099 "from a unicode string"))
1100 return NULL;
1101 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1102 Py_LeaveRecursiveCall();
1103 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001104 case 'n':
1105 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001106 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001107 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001108 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001109 }
1110 break;
1111 case 't':
1112 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001113 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001114 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001115 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001116 }
1117 break;
1118 case 'f':
1119 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001120 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1121 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1122 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001123 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001124 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001125 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001126 }
1127 break;
1128 case 'N':
1129 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001130 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001131 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001132 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1133 }
1134 break;
1135 case 'I':
1136 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001137 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1138 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1139 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001140 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001141 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1142 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001143 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001144 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1145 }
1146 break;
1147 case '-':
1148 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001149 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001150 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1151 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001152 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001153 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001154 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1155 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001156 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001157 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1158 }
1159 break;
1160 }
1161 /* Didn't find a string, object, array, or named constant. Look for a number. */
1162 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1163}
1164
1165static PyObject *
1166scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1167{
1168 /* Python callable interface to scan_once_{str,unicode} */
1169 PyObject *pystr;
1170 PyObject *rval;
1171 Py_ssize_t idx;
1172 Py_ssize_t next_idx = -1;
1173 static char *kwlist[] = {"string", "idx", NULL};
1174 PyScannerObject *s;
1175 assert(PyScanner_Check(self));
1176 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001177 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001178 return NULL;
1179
1180 if (PyUnicode_Check(pystr)) {
1181 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1182 }
1183 else {
1184 PyErr_Format(PyExc_TypeError,
1185 "first argument must be a string, not %.80s",
1186 Py_TYPE(pystr)->tp_name);
1187 return NULL;
1188 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001189 PyDict_Clear(s->memo);
1190 if (rval == NULL)
1191 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001192 return _build_rval_index_tuple(rval, next_idx);
1193}
1194
1195static PyObject *
1196scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1197{
1198 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001199 PyObject *ctx;
1200 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001201
1202 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001203 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001204
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001205 s = (PyScannerObject *)type->tp_alloc(type, 0);
1206 if (s == NULL) {
1207 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001208 }
1209
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001210 s->memo = PyDict_New();
1211 if (s->memo == NULL)
1212 goto bail;
1213
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001214 /* All of these will fail "gracefully" so we don't need to verify them */
1215 s->strict = PyObject_GetAttrString(ctx, "strict");
1216 if (s->strict == NULL)
1217 goto bail;
1218 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1219 if (s->object_hook == NULL)
1220 goto bail;
1221 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1222 if (s->object_pairs_hook == NULL)
1223 goto bail;
1224 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1225 if (s->parse_float == NULL)
1226 goto bail;
1227 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1228 if (s->parse_int == NULL)
1229 goto bail;
1230 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1231 if (s->parse_constant == NULL)
1232 goto bail;
1233
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001234 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001235
1236bail:
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001237 Py_DECREF(s);
1238 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001239}
1240
1241PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1242
1243static
1244PyTypeObject PyScannerType = {
1245 PyVarObject_HEAD_INIT(NULL, 0)
1246 "_json.Scanner", /* tp_name */
1247 sizeof(PyScannerObject), /* tp_basicsize */
1248 0, /* tp_itemsize */
1249 scanner_dealloc, /* tp_dealloc */
1250 0, /* tp_print */
1251 0, /* tp_getattr */
1252 0, /* tp_setattr */
1253 0, /* tp_compare */
1254 0, /* tp_repr */
1255 0, /* tp_as_number */
1256 0, /* tp_as_sequence */
1257 0, /* tp_as_mapping */
1258 0, /* tp_hash */
1259 scanner_call, /* tp_call */
1260 0, /* tp_str */
1261 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1262 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1263 0, /* tp_as_buffer */
1264 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1265 scanner_doc, /* tp_doc */
1266 scanner_traverse, /* tp_traverse */
1267 scanner_clear, /* tp_clear */
1268 0, /* tp_richcompare */
1269 0, /* tp_weaklistoffset */
1270 0, /* tp_iter */
1271 0, /* tp_iternext */
1272 0, /* tp_methods */
1273 scanner_members, /* tp_members */
1274 0, /* tp_getset */
1275 0, /* tp_base */
1276 0, /* tp_dict */
1277 0, /* tp_descr_get */
1278 0, /* tp_descr_set */
1279 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001280 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001281 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1282 scanner_new, /* tp_new */
1283 0,/* PyObject_GC_Del, */ /* tp_free */
1284};
1285
1286static PyObject *
1287encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1288{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001289 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1290
1291 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001292 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001293 PyObject *item_separator, *sort_keys, *skipkeys;
1294 int allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001295
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001296 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUOOp:make_encoder", kwlist,
1297 &markers, &defaultfn, &encoder, &indent,
1298 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001299 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001300 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001301
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001302 if (markers != Py_None && !PyDict_Check(markers)) {
1303 PyErr_Format(PyExc_TypeError,
1304 "make_encoder() argument 1 must be dict or None, "
1305 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001306 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001307 }
1308
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001309 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1310 if (s == NULL)
1311 return NULL;
1312
Antoine Pitrou781eba72009-12-08 15:57:31 +00001313 s->markers = markers;
1314 s->defaultfn = defaultfn;
1315 s->encoder = encoder;
1316 s->indent = indent;
1317 s->key_separator = key_separator;
1318 s->item_separator = item_separator;
1319 s->sort_keys = sort_keys;
1320 s->skipkeys = skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001321 s->fast_encode = NULL;
1322 if (PyCFunction_Check(s->encoder)) {
1323 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1324 if (f == (PyCFunction)py_encode_basestring_ascii ||
1325 f == (PyCFunction)py_encode_basestring) {
1326 s->fast_encode = f;
1327 }
1328 }
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001329 s->allow_nan = allow_nan;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001330
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001331 Py_INCREF(s->markers);
1332 Py_INCREF(s->defaultfn);
1333 Py_INCREF(s->encoder);
1334 Py_INCREF(s->indent);
1335 Py_INCREF(s->key_separator);
1336 Py_INCREF(s->item_separator);
1337 Py_INCREF(s->sort_keys);
1338 Py_INCREF(s->skipkeys);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001339 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001340}
1341
1342static PyObject *
1343encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1344{
1345 /* Python callable interface to encode_listencode_obj */
1346 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1347 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001348 Py_ssize_t indent_level;
1349 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001350 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001351
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001352 assert(PyEncoder_Check(self));
1353 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001354 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1355 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001356 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001357 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001358 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001359 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001360 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001361 return NULL;
1362 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001363 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001364}
1365
1366static PyObject *
1367_encoded_const(PyObject *obj)
1368{
1369 /* Return the JSON string representation of None, True, False */
1370 if (obj == Py_None) {
1371 static PyObject *s_null = NULL;
1372 if (s_null == NULL) {
1373 s_null = PyUnicode_InternFromString("null");
1374 }
1375 Py_INCREF(s_null);
1376 return s_null;
1377 }
1378 else if (obj == Py_True) {
1379 static PyObject *s_true = NULL;
1380 if (s_true == NULL) {
1381 s_true = PyUnicode_InternFromString("true");
1382 }
1383 Py_INCREF(s_true);
1384 return s_true;
1385 }
1386 else if (obj == Py_False) {
1387 static PyObject *s_false = NULL;
1388 if (s_false == NULL) {
1389 s_false = PyUnicode_InternFromString("false");
1390 }
1391 Py_INCREF(s_false);
1392 return s_false;
1393 }
1394 else {
1395 PyErr_SetString(PyExc_ValueError, "not a const");
1396 return NULL;
1397 }
1398}
1399
1400static PyObject *
1401encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1402{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001403 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001404 double i = PyFloat_AS_DOUBLE(obj);
1405 if (!Py_IS_FINITE(i)) {
1406 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001407 PyErr_SetString(
1408 PyExc_ValueError,
1409 "Out of range float values are not JSON compliant"
1410 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001411 return NULL;
1412 }
1413 if (i > 0) {
1414 return PyUnicode_FromString("Infinity");
1415 }
1416 else if (i < 0) {
1417 return PyUnicode_FromString("-Infinity");
1418 }
1419 else {
1420 return PyUnicode_FromString("NaN");
1421 }
1422 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001423 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001424}
1425
1426static PyObject *
1427encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1428{
1429 /* Return the JSON representation of a string */
1430 if (s->fast_encode)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001431 return s->fast_encode(NULL, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001432 else
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001433 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001434}
1435
1436static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001437_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001438{
1439 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001440 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001441 Py_DECREF(stolen);
1442 return rval;
1443}
1444
1445static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001446encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001447 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001448{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001449 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001450 PyObject *newobj;
1451 int rv;
1452
1453 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1454 PyObject *cstr = _encoded_const(obj);
1455 if (cstr == NULL)
1456 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001457 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001458 }
1459 else if (PyUnicode_Check(obj))
1460 {
1461 PyObject *encoded = encoder_encode_string(s, obj);
1462 if (encoded == NULL)
1463 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001464 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001465 }
1466 else if (PyLong_Check(obj)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001467 PyObject *encoded = PyLong_Type.tp_str(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001468 if (encoded == NULL)
1469 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001470 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001471 }
1472 else if (PyFloat_Check(obj)) {
1473 PyObject *encoded = encoder_encode_float(s, obj);
1474 if (encoded == NULL)
1475 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001476 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001477 }
1478 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001479 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1480 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001481 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001482 Py_LeaveRecursiveCall();
1483 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001484 }
1485 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001486 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1487 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001488 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001489 Py_LeaveRecursiveCall();
1490 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001491 }
1492 else {
1493 PyObject *ident = NULL;
1494 if (s->markers != Py_None) {
1495 int has_key;
1496 ident = PyLong_FromVoidPtr(obj);
1497 if (ident == NULL)
1498 return -1;
1499 has_key = PyDict_Contains(s->markers, ident);
1500 if (has_key) {
1501 if (has_key != -1)
1502 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1503 Py_DECREF(ident);
1504 return -1;
1505 }
1506 if (PyDict_SetItem(s->markers, ident, obj)) {
1507 Py_DECREF(ident);
1508 return -1;
1509 }
1510 }
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001511 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001512 if (newobj == NULL) {
1513 Py_XDECREF(ident);
1514 return -1;
1515 }
Ezio Melotti13672652011-05-11 01:02:56 +03001516
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001517 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1518 Py_DECREF(newobj);
1519 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001520 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001521 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001522 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001523 Py_LeaveRecursiveCall();
1524
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001525 Py_DECREF(newobj);
1526 if (rv) {
1527 Py_XDECREF(ident);
1528 return -1;
1529 }
1530 if (ident != NULL) {
1531 if (PyDict_DelItem(s->markers, ident)) {
1532 Py_XDECREF(ident);
1533 return -1;
1534 }
1535 Py_XDECREF(ident);
1536 }
1537 return rv;
1538 }
1539}
1540
1541static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001542encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001543 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001544{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001545 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001546 static PyObject *open_dict = NULL;
1547 static PyObject *close_dict = NULL;
1548 static PyObject *empty_dict = NULL;
1549 PyObject *kstr = NULL;
1550 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001551 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001552 PyObject *items;
1553 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001554 int skipkeys;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001555 int sortkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001556 Py_ssize_t idx;
1557
1558 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1559 open_dict = PyUnicode_InternFromString("{");
1560 close_dict = PyUnicode_InternFromString("}");
1561 empty_dict = PyUnicode_InternFromString("{}");
1562 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1563 return -1;
1564 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001565 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001566 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001567
1568 if (s->markers != Py_None) {
1569 int has_key;
1570 ident = PyLong_FromVoidPtr(dct);
1571 if (ident == NULL)
1572 goto bail;
1573 has_key = PyDict_Contains(s->markers, ident);
1574 if (has_key) {
1575 if (has_key != -1)
1576 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1577 goto bail;
1578 }
1579 if (PyDict_SetItem(s->markers, ident, dct)) {
1580 goto bail;
1581 }
1582 }
1583
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001584 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001585 goto bail;
1586
1587 if (s->indent != Py_None) {
1588 /* TODO: DOES NOT RUN */
1589 indent_level += 1;
1590 /*
1591 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1592 separator = _item_separator + newline_indent
1593 buf += newline_indent
1594 */
1595 }
1596
Benjamin Peterson501182a2015-05-02 22:28:04 -04001597 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001598 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001599 goto bail;
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001600 sortkeys = PyObject_IsTrue(s->sort_keys);
1601 if (sortkeys < 0 || (sortkeys && PyList_Sort(items) < 0))
Benjamin Peterson501182a2015-05-02 22:28:04 -04001602 goto bail;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001603 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001604 Py_DECREF(items);
1605 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001606 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001607 skipkeys = PyObject_IsTrue(s->skipkeys);
Serhiy Storchakafa494fd2015-05-30 17:45:22 +03001608 if (skipkeys < 0)
1609 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001610 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001611 while ((item = PyIter_Next(it)) != NULL) {
1612 PyObject *encoded, *key, *value;
Serhiy Storchakafff9a312017-03-21 08:53:25 +02001613 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001614 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1615 goto bail;
1616 }
1617 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001618 if (PyUnicode_Check(key)) {
1619 Py_INCREF(key);
1620 kstr = key;
1621 }
1622 else if (PyFloat_Check(key)) {
1623 kstr = encoder_encode_float(s, key);
1624 if (kstr == NULL)
1625 goto bail;
1626 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001627 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 /* This must come before the PyLong_Check because
1629 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001630 kstr = _encoded_const(key);
1631 if (kstr == NULL)
1632 goto bail;
1633 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001634 else if (PyLong_Check(key)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001635 kstr = PyLong_Type.tp_str(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001636 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001637 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001638 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001639 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001640 else if (skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001641 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001642 continue;
1643 }
1644 else {
1645 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001646 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001647 goto bail;
1648 }
1649
1650 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001651 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001652 goto bail;
1653 }
1654
1655 encoded = encoder_encode_string(s, kstr);
1656 Py_CLEAR(kstr);
1657 if (encoded == NULL)
1658 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001659 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001660 Py_DECREF(encoded);
1661 goto bail;
1662 }
1663 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001664 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001665 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001666
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001667 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001668 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001669 goto bail;
1670 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001671 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001672 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001673 if (PyErr_Occurred())
1674 goto bail;
1675 Py_CLEAR(it);
1676
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001677 if (ident != NULL) {
1678 if (PyDict_DelItem(s->markers, ident))
1679 goto bail;
1680 Py_CLEAR(ident);
1681 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001682 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001683 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001684 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001685
1686 yield '\n' + (' ' * (_indent * _current_indent_level))
1687 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001688 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001689 goto bail;
1690 return 0;
1691
1692bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001693 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001694 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001695 Py_XDECREF(kstr);
1696 Py_XDECREF(ident);
1697 return -1;
1698}
1699
1700
1701static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001702encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001703 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001704{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001705 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001706 static PyObject *open_array = NULL;
1707 static PyObject *close_array = NULL;
1708 static PyObject *empty_array = NULL;
1709 PyObject *ident = NULL;
1710 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001711 Py_ssize_t i;
1712
1713 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1714 open_array = PyUnicode_InternFromString("[");
1715 close_array = PyUnicode_InternFromString("]");
1716 empty_array = PyUnicode_InternFromString("[]");
1717 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1718 return -1;
1719 }
1720 ident = NULL;
1721 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1722 if (s_fast == NULL)
1723 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001724 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001725 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001726 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001727 }
1728
1729 if (s->markers != Py_None) {
1730 int has_key;
1731 ident = PyLong_FromVoidPtr(seq);
1732 if (ident == NULL)
1733 goto bail;
1734 has_key = PyDict_Contains(s->markers, ident);
1735 if (has_key) {
1736 if (has_key != -1)
1737 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1738 goto bail;
1739 }
1740 if (PyDict_SetItem(s->markers, ident, seq)) {
1741 goto bail;
1742 }
1743 }
1744
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001745 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001746 goto bail;
1747 if (s->indent != Py_None) {
1748 /* TODO: DOES NOT RUN */
1749 indent_level += 1;
1750 /*
1751 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1752 separator = _item_separator + newline_indent
1753 buf += newline_indent
1754 */
1755 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001756 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1757 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001758 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001759 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001760 goto bail;
1761 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001762 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001763 goto bail;
1764 }
1765 if (ident != NULL) {
1766 if (PyDict_DelItem(s->markers, ident))
1767 goto bail;
1768 Py_CLEAR(ident);
1769 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001770
1771 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001772 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001773 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001774
1775 yield '\n' + (' ' * (_indent * _current_indent_level))
1776 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001777 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001778 goto bail;
1779 Py_DECREF(s_fast);
1780 return 0;
1781
1782bail:
1783 Py_XDECREF(ident);
1784 Py_DECREF(s_fast);
1785 return -1;
1786}
1787
1788static void
1789encoder_dealloc(PyObject *self)
1790{
1791 /* Deallocate Encoder */
1792 encoder_clear(self);
1793 Py_TYPE(self)->tp_free(self);
1794}
1795
1796static int
1797encoder_traverse(PyObject *self, visitproc visit, void *arg)
1798{
1799 PyEncoderObject *s;
1800 assert(PyEncoder_Check(self));
1801 s = (PyEncoderObject *)self;
1802 Py_VISIT(s->markers);
1803 Py_VISIT(s->defaultfn);
1804 Py_VISIT(s->encoder);
1805 Py_VISIT(s->indent);
1806 Py_VISIT(s->key_separator);
1807 Py_VISIT(s->item_separator);
1808 Py_VISIT(s->sort_keys);
1809 Py_VISIT(s->skipkeys);
1810 return 0;
1811}
1812
1813static int
1814encoder_clear(PyObject *self)
1815{
1816 /* Deallocate Encoder */
1817 PyEncoderObject *s;
1818 assert(PyEncoder_Check(self));
1819 s = (PyEncoderObject *)self;
1820 Py_CLEAR(s->markers);
1821 Py_CLEAR(s->defaultfn);
1822 Py_CLEAR(s->encoder);
1823 Py_CLEAR(s->indent);
1824 Py_CLEAR(s->key_separator);
1825 Py_CLEAR(s->item_separator);
1826 Py_CLEAR(s->sort_keys);
1827 Py_CLEAR(s->skipkeys);
1828 return 0;
1829}
1830
1831PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1832
1833static
1834PyTypeObject PyEncoderType = {
1835 PyVarObject_HEAD_INIT(NULL, 0)
1836 "_json.Encoder", /* tp_name */
1837 sizeof(PyEncoderObject), /* tp_basicsize */
1838 0, /* tp_itemsize */
1839 encoder_dealloc, /* tp_dealloc */
1840 0, /* tp_print */
1841 0, /* tp_getattr */
1842 0, /* tp_setattr */
1843 0, /* tp_compare */
1844 0, /* tp_repr */
1845 0, /* tp_as_number */
1846 0, /* tp_as_sequence */
1847 0, /* tp_as_mapping */
1848 0, /* tp_hash */
1849 encoder_call, /* tp_call */
1850 0, /* tp_str */
1851 0, /* tp_getattro */
1852 0, /* tp_setattro */
1853 0, /* tp_as_buffer */
1854 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1855 encoder_doc, /* tp_doc */
1856 encoder_traverse, /* tp_traverse */
1857 encoder_clear, /* tp_clear */
1858 0, /* tp_richcompare */
1859 0, /* tp_weaklistoffset */
1860 0, /* tp_iter */
1861 0, /* tp_iternext */
1862 0, /* tp_methods */
1863 encoder_members, /* tp_members */
1864 0, /* tp_getset */
1865 0, /* tp_base */
1866 0, /* tp_dict */
1867 0, /* tp_descr_get */
1868 0, /* tp_descr_set */
1869 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001870 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001871 0, /* tp_alloc */
1872 encoder_new, /* tp_new */
1873 0, /* tp_free */
1874};
1875
1876static PyMethodDef speedups_methods[] = {
1877 {"encode_basestring_ascii",
1878 (PyCFunction)py_encode_basestring_ascii,
1879 METH_O,
1880 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001881 {"encode_basestring",
1882 (PyCFunction)py_encode_basestring,
1883 METH_O,
1884 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001885 {"scanstring",
1886 (PyCFunction)py_scanstring,
1887 METH_VARARGS,
1888 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001889 {NULL, NULL, 0, NULL}
1890};
1891
1892PyDoc_STRVAR(module_doc,
1893"json speedups\n");
1894
Martin v. Löwis1a214512008-06-11 05:26:20 +00001895static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001896 PyModuleDef_HEAD_INIT,
1897 "_json",
1898 module_doc,
1899 -1,
1900 speedups_methods,
1901 NULL,
1902 NULL,
1903 NULL,
1904 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001905};
1906
Victor Stinnerf024d262015-03-17 17:48:27 +01001907PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001908PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001909{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001910 PyObject *m = PyModule_Create(&jsonmodule);
1911 if (!m)
1912 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001913 if (PyType_Ready(&PyScannerType) < 0)
1914 goto fail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001915 if (PyType_Ready(&PyEncoderType) < 0)
1916 goto fail;
1917 Py_INCREF((PyObject*)&PyScannerType);
1918 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1919 Py_DECREF((PyObject*)&PyScannerType);
1920 goto fail;
1921 }
1922 Py_INCREF((PyObject*)&PyEncoderType);
1923 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1924 Py_DECREF((PyObject*)&PyEncoderType);
1925 goto fail;
1926 }
1927 return m;
1928 fail:
1929 Py_DECREF(m);
1930 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001931}