blob: 94a7c0d2bf09e4a0e080b3a34607668702388c25 [file] [log] [blame]
Eric Snow2ebc5ce2017-09-07 23:51:28 -06001
2/* Core extension modules are built-in on some platforms (e.g. Windows). */
3#ifdef Py_BUILD_CORE
Eric Snowfc1bf872017-09-11 18:30:43 -07004#define Py_BUILD_CORE_BUILTIN
Eric Snow2ebc5ce2017-09-07 23:51:28 -06005#undef Py_BUILD_CORE
6#endif
7
Christian Heimes90540002008-05-08 14:29:10 +00008#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00009#include "structmember.h"
Victor Stinnere281f7d2018-11-01 02:30:36 +010010#include "pycore_accu.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +010011
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000012#ifdef __GNUC__
13#define UNUSED __attribute__((__unused__))
14#else
15#define UNUSED
16#endif
17
18#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
19#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
20#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
21#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
22
23static PyTypeObject PyScannerType;
24static PyTypeObject PyEncoderType;
25
26typedef struct _PyScannerObject {
27 PyObject_HEAD
Segev Finer541bd282017-07-13 08:52:08 +030028 signed char strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000029 PyObject *object_hook;
30 PyObject *object_pairs_hook;
31 PyObject *parse_float;
32 PyObject *parse_int;
33 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000034 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000035} PyScannerObject;
36
37static PyMemberDef scanner_members[] = {
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030038 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000039 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
40 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
41 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
42 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
43 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
44 {NULL}
45};
46
47typedef struct _PyEncoderObject {
48 PyObject_HEAD
49 PyObject *markers;
50 PyObject *defaultfn;
51 PyObject *encoder;
52 PyObject *indent;
53 PyObject *key_separator;
54 PyObject *item_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030055 char sort_keys;
56 char skipkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000057 int allow_nan;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030058 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000059} PyEncoderObject;
60
61static PyMemberDef encoder_members[] = {
62 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
63 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
64 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
65 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
66 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
67 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030068 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
69 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000070 {NULL}
71};
72
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020073static PyObject *
74join_list_unicode(PyObject *lst)
75{
76 /* return u''.join(lst) */
77 static PyObject *sep = NULL;
78 if (sep == NULL) {
79 sep = PyUnicode_FromStringAndSize("", 0);
80 if (sep == NULL)
81 return NULL;
82 }
83 return PyUnicode_Join(sep, lst);
84}
85
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020086/* Forward decls */
87
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000088static PyObject *
89ascii_escape_unicode(PyObject *pystr);
90static PyObject *
91py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
92void init_json(void);
93static PyObject *
94scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
95static PyObject *
96_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
97static PyObject *
98scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000099static void
100scanner_dealloc(PyObject *self);
101static int
102scanner_clear(PyObject *self);
103static PyObject *
104encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000105static void
106encoder_dealloc(PyObject *self);
107static int
108encoder_clear(PyObject *self);
109static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200110encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200112encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000113static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200114encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000115static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000116_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000117static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200118raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000119static PyObject *
120encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000121static PyObject *
122encoder_encode_float(PyEncoderObject *s, PyObject *obj);
123
Christian Heimes90540002008-05-08 14:29:10 +0000124#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000125#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000126
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000127static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200128ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000129{
130 /* Escape unicode code point c to ASCII escape sequences
131 in char *output. output must have at least 12 bytes unused to
132 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000133 output[chars++] = '\\';
134 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000135 case '\\': output[chars++] = c; break;
136 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000137 case '\b': output[chars++] = 'b'; break;
138 case '\f': output[chars++] = 'f'; break;
139 case '\n': output[chars++] = 'n'; break;
140 case '\r': output[chars++] = 'r'; break;
141 case '\t': output[chars++] = 't'; break;
142 default:
Christian Heimes90540002008-05-08 14:29:10 +0000143 if (c >= 0x10000) {
144 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100145 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000146 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100147 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
148 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
149 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
150 output[chars++] = Py_hexdigits[(v ) & 0xf];
151 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000152 output[chars++] = '\\';
153 }
Christian Heimes90540002008-05-08 14:29:10 +0000154 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200155 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
156 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
157 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
158 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000159 }
160 return chars;
161}
162
163static PyObject *
164ascii_escape_unicode(PyObject *pystr)
165{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000166 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000167 Py_ssize_t i;
168 Py_ssize_t input_chars;
169 Py_ssize_t output_size;
170 Py_ssize_t chars;
171 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200172 void *input;
173 unsigned char *output;
174 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000175
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200176 if (PyUnicode_READY(pystr) == -1)
177 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000178
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200179 input_chars = PyUnicode_GET_LENGTH(pystr);
180 input = PyUnicode_DATA(pystr);
181 kind = PyUnicode_KIND(pystr);
182
183 /* Compute the output size */
184 for (i = 0, output_size = 2; i < input_chars; i++) {
185 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500186 Py_ssize_t d;
187 if (S_CHAR(c)) {
188 d = 1;
189 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200190 else {
191 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200192 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200193 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500194 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200195 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500196 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 }
198 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500199 if (output_size > PY_SSIZE_T_MAX - d) {
200 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
201 return NULL;
202 }
203 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200204 }
205
206 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000207 if (rval == NULL) {
208 return NULL;
209 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200210 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000211 chars = 0;
212 output[chars++] = '"';
213 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200214 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000215 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000216 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000217 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000218 else {
219 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000220 }
Christian Heimes90540002008-05-08 14:29:10 +0000221 }
222 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100223#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200224 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100225#endif
Christian Heimes90540002008-05-08 14:29:10 +0000226 return rval;
227}
228
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100229static PyObject *
230escape_unicode(PyObject *pystr)
231{
232 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
233 Py_ssize_t i;
234 Py_ssize_t input_chars;
235 Py_ssize_t output_size;
236 Py_ssize_t chars;
237 PyObject *rval;
238 void *input;
239 int kind;
240 Py_UCS4 maxchar;
241
242 if (PyUnicode_READY(pystr) == -1)
243 return NULL;
244
245 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
246 input_chars = PyUnicode_GET_LENGTH(pystr);
247 input = PyUnicode_DATA(pystr);
248 kind = PyUnicode_KIND(pystr);
249
250 /* Compute the output size */
251 for (i = 0, output_size = 2; i < input_chars; i++) {
252 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500253 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100254 switch (c) {
255 case '\\': case '"': case '\b': case '\f':
256 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500257 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100258 break;
259 default:
260 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500261 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100262 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500263 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100264 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500265 if (output_size > PY_SSIZE_T_MAX - d) {
266 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
267 return NULL;
268 }
269 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100270 }
271
272 rval = PyUnicode_New(output_size, maxchar);
273 if (rval == NULL)
274 return NULL;
275
276 kind = PyUnicode_KIND(rval);
277
278#define ENCODE_OUTPUT do { \
279 chars = 0; \
280 output[chars++] = '"'; \
281 for (i = 0; i < input_chars; i++) { \
282 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
283 switch (c) { \
284 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
285 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
286 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
287 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
288 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
289 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
290 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
291 default: \
292 if (c <= 0x1f) { \
293 output[chars++] = '\\'; \
294 output[chars++] = 'u'; \
295 output[chars++] = '0'; \
296 output[chars++] = '0'; \
297 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
298 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
299 } else { \
300 output[chars++] = c; \
301 } \
302 } \
303 } \
304 output[chars++] = '"'; \
305 } while (0)
306
307 if (kind == PyUnicode_1BYTE_KIND) {
308 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
309 ENCODE_OUTPUT;
310 } else if (kind == PyUnicode_2BYTE_KIND) {
311 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
312 ENCODE_OUTPUT;
313 } else {
314 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
315 assert(kind == PyUnicode_4BYTE_KIND);
316 ENCODE_OUTPUT;
317 }
318#undef ENCODE_OUTPUT
319
320#ifdef Py_DEBUG
321 assert(_PyUnicode_CheckConsistency(rval, 1));
322#endif
323 return rval;
324}
325
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000326static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200327raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000328{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200329 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
330 static PyObject *JSONDecodeError = NULL;
331 PyObject *exc;
332 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000333 PyObject *decoder = PyImport_ImportModule("json.decoder");
334 if (decoder == NULL)
335 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200336 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000337 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200338 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000339 return;
Christian Heimes90540002008-05-08 14:29:10 +0000340 }
Victor Stinner4c381542016-12-09 00:33:39 +0100341 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200342 if (exc) {
343 PyErr_SetObject(JSONDecodeError, exc);
344 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000345 }
Christian Heimes90540002008-05-08 14:29:10 +0000346}
347
Ezio Melotti37623ab2013-01-03 08:44:15 +0200348static void
349raise_stop_iteration(Py_ssize_t idx)
350{
351 PyObject *value = PyLong_FromSsize_t(idx);
352 if (value != NULL) {
353 PyErr_SetObject(PyExc_StopIteration, value);
354 Py_DECREF(value);
355 }
356}
357
Christian Heimes90540002008-05-08 14:29:10 +0000358static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000359_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
360 /* return (rval, idx) tuple, stealing reference to rval */
361 PyObject *tpl;
362 PyObject *pyidx;
363 /*
364 steal a reference to rval, returns (rval, idx)
365 */
366 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000367 return NULL;
368 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000369 pyidx = PyLong_FromSsize_t(idx);
370 if (pyidx == NULL) {
371 Py_DECREF(rval);
372 return NULL;
373 }
374 tpl = PyTuple_New(2);
375 if (tpl == NULL) {
376 Py_DECREF(pyidx);
377 Py_DECREF(rval);
378 return NULL;
379 }
380 PyTuple_SET_ITEM(tpl, 0, rval);
381 PyTuple_SET_ITEM(tpl, 1, pyidx);
382 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000383}
384
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000385#define APPEND_OLD_CHUNK \
386 if (chunk != NULL) { \
387 if (chunks == NULL) { \
388 chunks = PyList_New(0); \
389 if (chunks == NULL) { \
390 goto bail; \
391 } \
392 } \
393 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200394 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000395 goto bail; \
396 } \
397 Py_CLEAR(chunk); \
398 }
399
Christian Heimes90540002008-05-08 14:29:10 +0000400static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000401scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000402{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000403 /* Read the JSON string from PyUnicode pystr.
404 end is the index of the first character after the quote.
405 if strict is zero then literal control characters are allowed
406 *next_end_ptr is a return-by-reference index of the character
407 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000408
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000409 Return value is a new PyUnicode
410 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000411 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200412 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000413 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000414 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200415 const void *buf;
416 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000417 PyObject *chunks = NULL;
418 PyObject *chunk = NULL;
419
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 if (PyUnicode_READY(pystr) == -1)
421 return 0;
422
423 len = PyUnicode_GET_LENGTH(pystr);
424 buf = PyUnicode_DATA(pystr);
425 kind = PyUnicode_KIND(pystr);
426
Ezio Melotti37623ab2013-01-03 08:44:15 +0200427 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000428 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
429 goto bail;
430 }
Christian Heimes90540002008-05-08 14:29:10 +0000431 while (1) {
432 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000434 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000436 if (c == '"' || c == '\\') {
437 break;
438 }
439 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000440 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000441 goto bail;
442 }
443 }
444 if (!(c == '"' || c == '\\')) {
445 raise_errmsg("Unterminated string starting at", pystr, begin);
446 goto bail;
447 }
448 /* Pick up this chunk if it's not zero length */
449 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000450 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 chunk = PyUnicode_FromKindAndData(
452 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200453 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000455 if (chunk == NULL) {
456 goto bail;
457 }
Christian Heimes90540002008-05-08 14:29:10 +0000458 }
459 next++;
460 if (c == '"') {
461 end = next;
462 break;
463 }
464 if (next == len) {
465 raise_errmsg("Unterminated string starting at", pystr, begin);
466 goto bail;
467 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200468 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000469 if (c != 'u') {
470 /* Non-unicode backslash escapes */
471 end = next + 1;
472 switch (c) {
473 case '"': break;
474 case '\\': break;
475 case '/': break;
476 case 'b': c = '\b'; break;
477 case 'f': c = '\f'; break;
478 case 'n': c = '\n'; break;
479 case 'r': c = '\r'; break;
480 case 't': c = '\t'; break;
481 default: c = 0;
482 }
483 if (c == 0) {
484 raise_errmsg("Invalid \\escape", pystr, end - 2);
485 goto bail;
486 }
487 }
488 else {
489 c = 0;
490 next++;
491 end = next + 4;
492 if (end >= len) {
493 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
494 goto bail;
495 }
496 /* Decode 4 hex digits */
497 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200498 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000499 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000500 switch (digit) {
501 case '0': case '1': case '2': case '3': case '4':
502 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000503 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000504 case 'a': case 'b': case 'c': case 'd': case 'e':
505 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000506 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000507 case 'A': case 'B': case 'C': case 'D': case 'E':
508 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000509 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000510 default:
511 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
512 goto bail;
513 }
514 }
Christian Heimes90540002008-05-08 14:29:10 +0000515 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200516 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
517 PyUnicode_READ(kind, buf, next++) == '\\' &&
518 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200519 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000520 end += 6;
521 /* Decode 4 hex digits */
522 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200523 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000524 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000525 switch (digit) {
526 case '0': case '1': case '2': case '3': case '4':
527 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000528 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000529 case 'a': case 'b': case 'c': case 'd': case 'e':
530 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000531 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000532 case 'A': case 'B': case 'C': case 'D': case 'E':
533 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000534 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000535 default:
536 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
537 goto bail;
538 }
539 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200540 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
541 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
542 else
543 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000544 }
Christian Heimes90540002008-05-08 14:29:10 +0000545 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000546 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200547 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000548 if (chunk == NULL) {
549 goto bail;
550 }
Christian Heimes90540002008-05-08 14:29:10 +0000551 }
552
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000553 if (chunks == NULL) {
554 if (chunk != NULL)
555 rval = chunk;
556 else
557 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000558 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000559 else {
560 APPEND_OLD_CHUNK
561 rval = join_list_unicode(chunks);
562 if (rval == NULL) {
563 goto bail;
564 }
565 Py_CLEAR(chunks);
566 }
567
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000568 *next_end_ptr = end;
569 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000570bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000571 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000572 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000573 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000574 return NULL;
575}
576
577PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000578 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000579 "\n"
580 "Scan the string s for a JSON string. End is the index of the\n"
581 "character in s after the quote that started the JSON string.\n"
582 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
583 "on attempt to decode an invalid string. If strict is False then literal\n"
584 "control characters are allowed in the string.\n"
585 "\n"
586 "Returns a tuple of the decoded string and the index of the character in s\n"
587 "after the end quote."
588);
Christian Heimes90540002008-05-08 14:29:10 +0000589
590static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000591py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000592{
593 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000594 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000595 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000596 Py_ssize_t next_end = -1;
597 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100598 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000599 return NULL;
600 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000601 if (PyUnicode_Check(pystr)) {
602 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000603 }
604 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000606 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000607 Py_TYPE(pystr)->tp_name);
608 return NULL;
609 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000610 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000611}
612
613PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000614 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000615 "\n"
616 "Return an ASCII-only JSON representation of a Python string"
617);
Christian Heimes90540002008-05-08 14:29:10 +0000618
619static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000620py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000621{
622 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000623 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000624 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000625 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000626 rval = ascii_escape_unicode(pystr);
627 }
628 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000629 PyErr_Format(PyExc_TypeError,
630 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000631 Py_TYPE(pystr)->tp_name);
632 return NULL;
633 }
Christian Heimes90540002008-05-08 14:29:10 +0000634 return rval;
635}
636
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100637
638PyDoc_STRVAR(pydoc_encode_basestring,
639 "encode_basestring(string) -> string\n"
640 "\n"
641 "Return a JSON representation of a Python string"
642);
643
644static PyObject *
645py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
646{
647 PyObject *rval;
648 /* Return a JSON representation of a Python string */
649 /* METH_O */
650 if (PyUnicode_Check(pystr)) {
651 rval = escape_unicode(pystr);
652 }
653 else {
654 PyErr_Format(PyExc_TypeError,
655 "first argument must be a string, not %.80s",
656 Py_TYPE(pystr)->tp_name);
657 return NULL;
658 }
659 return rval;
660}
661
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000662static void
663scanner_dealloc(PyObject *self)
664{
INADA Naokia6296d32017-08-24 14:55:17 +0900665 /* bpo-31095: UnTrack is needed before calling any callbacks */
666 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000667 scanner_clear(self);
668 Py_TYPE(self)->tp_free(self);
669}
670
671static int
672scanner_traverse(PyObject *self, visitproc visit, void *arg)
673{
674 PyScannerObject *s;
675 assert(PyScanner_Check(self));
676 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000677 Py_VISIT(s->object_hook);
678 Py_VISIT(s->object_pairs_hook);
679 Py_VISIT(s->parse_float);
680 Py_VISIT(s->parse_int);
681 Py_VISIT(s->parse_constant);
682 return 0;
683}
684
685static int
686scanner_clear(PyObject *self)
687{
688 PyScannerObject *s;
689 assert(PyScanner_Check(self));
690 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000691 Py_CLEAR(s->object_hook);
692 Py_CLEAR(s->object_pairs_hook);
693 Py_CLEAR(s->parse_float);
694 Py_CLEAR(s->parse_int);
695 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000696 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000697 return 0;
698}
699
700static PyObject *
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300701_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
702{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000703 /* Read a JSON object from PyUnicode pystr.
704 idx is the index of the first character after the opening curly brace.
705 *next_idx_ptr is a return-by-reference index to the first character after
706 the closing curly brace.
707
708 Returns a new PyObject (usually a dict, but object_hook can change that)
709 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200710 void *str;
711 int kind;
712 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000713 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000714 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000715 PyObject *key = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000716 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000717 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000718
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200719 if (PyUnicode_READY(pystr) == -1)
720 return NULL;
721
722 str = PyUnicode_DATA(pystr);
723 kind = PyUnicode_KIND(pystr);
724 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
725
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000726 if (has_pairs_hook)
727 rval = PyList_New(0);
728 else
729 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000730 if (rval == NULL)
731 return NULL;
732
733 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200734 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000735
736 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200737 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
738 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000739 PyObject *memokey;
740
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000741 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200742 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200743 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000744 goto bail;
745 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300746 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000747 if (key == NULL)
748 goto bail;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200749 memokey = PyDict_GetItemWithError(s->memo, key);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000750 if (memokey != NULL) {
751 Py_INCREF(memokey);
752 Py_DECREF(key);
753 key = memokey;
754 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200755 else if (PyErr_Occurred()) {
756 goto bail;
757 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000758 else {
759 if (PyDict_SetItem(s->memo, key, key) < 0)
760 goto bail;
761 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000762 idx = next_idx;
763
764 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200765 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
766 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200767 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000768 goto bail;
769 }
770 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200771 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000772
773 /* read any JSON term */
774 val = scan_once_unicode(s, pystr, idx, &next_idx);
775 if (val == NULL)
776 goto bail;
777
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000778 if (has_pairs_hook) {
779 PyObject *item = PyTuple_Pack(2, key, val);
780 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000781 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000782 Py_CLEAR(key);
783 Py_CLEAR(val);
784 if (PyList_Append(rval, item) == -1) {
785 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000786 goto bail;
787 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000788 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000789 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000790 else {
791 if (PyDict_SetItem(rval, key, val) < 0)
792 goto bail;
793 Py_CLEAR(key);
794 Py_CLEAR(val);
795 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000796 idx = next_idx;
797
798 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200799 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000800
801 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200802 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000803 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200804 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200805 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000806 goto bail;
807 }
808 idx++;
809
810 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200811 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000812 }
813 }
814
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000815 *next_idx_ptr = idx + 1;
816
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000817 if (has_pairs_hook) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100818 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000819 Py_DECREF(rval);
820 return val;
821 }
822
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000823 /* if object_hook is not None: rval = object_hook(rval) */
824 if (s->object_hook != Py_None) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100825 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000826 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000827 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000828 }
829 return rval;
830bail:
831 Py_XDECREF(key);
832 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000833 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000834 return NULL;
835}
836
837static PyObject *
838_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200839 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000840 idx is the index of the first character after the opening brace.
841 *next_idx_ptr is a return-by-reference index to the first character after
842 the closing brace.
843
844 Returns a new PyList
845 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200846 void *str;
847 int kind;
848 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000849 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200850 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000851 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000852
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200853 if (PyUnicode_READY(pystr) == -1)
854 return NULL;
855
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200856 rval = PyList_New(0);
857 if (rval == NULL)
858 return NULL;
859
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200860 str = PyUnicode_DATA(pystr);
861 kind = PyUnicode_KIND(pystr);
862 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
863
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000864 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200865 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000866
867 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200868 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
869 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000870
871 /* read any JSON term */
872 val = scan_once_unicode(s, pystr, idx, &next_idx);
873 if (val == NULL)
874 goto bail;
875
876 if (PyList_Append(rval, val) == -1)
877 goto bail;
878
879 Py_CLEAR(val);
880 idx = next_idx;
881
882 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200883 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000884
885 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200886 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000887 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200888 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200889 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000890 goto bail;
891 }
892 idx++;
893
894 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200895 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000896 }
897 }
898
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200899 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
900 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200901 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000902 goto bail;
903 }
904 *next_idx_ptr = idx + 1;
905 return rval;
906bail:
907 Py_XDECREF(val);
908 Py_DECREF(rval);
909 return NULL;
910}
911
912static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200913_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
914 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000915 constant is the constant string that was found
916 ("NaN", "Infinity", "-Infinity").
917 idx is the index of the first character of the constant
918 *next_idx_ptr is a return-by-reference index to the first character after
919 the constant.
920
921 Returns the result of parse_constant
922 */
923 PyObject *cstr;
924 PyObject *rval;
925 /* constant is "NaN", "Infinity", or "-Infinity" */
926 cstr = PyUnicode_InternFromString(constant);
927 if (cstr == NULL)
928 return NULL;
929
930 /* rval = parse_constant(constant) */
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100931 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200932 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000933 Py_DECREF(cstr);
934 *next_idx_ptr = idx;
935 return rval;
936}
937
938static PyObject *
939_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
940 /* Read a JSON number from PyUnicode pystr.
941 idx is the index of the first character of the number
942 *next_idx_ptr is a return-by-reference index to the first character after
943 the number.
944
945 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200946 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000947 May return other types if parse_int or parse_float are set
948 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200949 void *str;
950 int kind;
951 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000952 Py_ssize_t idx = start;
953 int is_float = 0;
954 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200955 PyObject *numstr = NULL;
956 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000957
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200958 if (PyUnicode_READY(pystr) == -1)
959 return NULL;
960
961 str = PyUnicode_DATA(pystr);
962 kind = PyUnicode_KIND(pystr);
963 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
964
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000965 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200966 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000967 idx++;
968 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200969 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000970 return NULL;
971 }
972 }
973
974 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200975 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000976 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200977 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000978 }
979 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200980 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000981 idx++;
982 }
983 /* no integer digits, error */
984 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200985 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000986 return NULL;
987 }
988
989 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200990 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000991 is_float = 1;
992 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200993 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000994 }
995
996 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200997 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000998 Py_ssize_t e_start = idx;
999 idx++;
1000
1001 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001002 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001003
1004 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001006
1007 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001008 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001009 is_float = 1;
1010 }
1011 else {
1012 idx = e_start;
1013 }
1014 }
1015
Antoine Pitrouf6454512011-04-25 19:16:06 +02001016 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1017 custom_func = s->parse_float;
1018 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1019 custom_func = s->parse_int;
1020 else
1021 custom_func = NULL;
1022
1023 if (custom_func) {
1024 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001025 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001026 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001027 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001028 if (numstr == NULL)
1029 return NULL;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001030 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001031 }
1032 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001033 Py_ssize_t i, n;
1034 char *buf;
1035 /* Straight conversion to ASCII, to avoid costly conversion of
1036 decimal unicode digits (which cannot appear here) */
1037 n = idx - start;
1038 numstr = PyBytes_FromStringAndSize(NULL, n);
1039 if (numstr == NULL)
1040 return NULL;
1041 buf = PyBytes_AS_STRING(numstr);
1042 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001043 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001044 }
1045 if (is_float)
1046 rval = PyFloat_FromString(numstr);
1047 else
1048 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001049 }
1050 Py_DECREF(numstr);
1051 *next_idx_ptr = idx;
1052 return rval;
1053}
1054
1055static PyObject *
1056scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1057{
1058 /* Read one JSON term (of any kind) from PyUnicode pystr.
1059 idx is the index of the first character of the term
1060 *next_idx_ptr is a return-by-reference index to the first character after
1061 the number.
1062
1063 Returns a new PyObject representation of the term.
1064 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001065 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001066 void *str;
1067 int kind;
1068 Py_ssize_t length;
1069
1070 if (PyUnicode_READY(pystr) == -1)
1071 return NULL;
1072
1073 str = PyUnicode_DATA(pystr);
1074 kind = PyUnicode_KIND(pystr);
1075 length = PyUnicode_GET_LENGTH(pystr);
1076
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001077 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001078 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001079 return NULL;
1080 }
1081 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001082 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001083 return NULL;
1084 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001085
1086 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001087 case '"':
1088 /* string */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001089 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001090 case '{':
1091 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001092 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1093 "from a unicode string"))
1094 return NULL;
1095 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1096 Py_LeaveRecursiveCall();
1097 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001098 case '[':
1099 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001100 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1101 "from a unicode string"))
1102 return NULL;
1103 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1104 Py_LeaveRecursiveCall();
1105 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001106 case 'n':
1107 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001108 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001109 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001110 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001111 }
1112 break;
1113 case 't':
1114 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001115 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001116 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001117 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001118 }
1119 break;
1120 case 'f':
1121 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001122 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1123 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1124 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001125 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001126 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001127 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001128 }
1129 break;
1130 case 'N':
1131 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001132 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001133 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001134 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1135 }
1136 break;
1137 case 'I':
1138 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001139 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1140 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1141 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001142 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001143 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1144 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001145 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001146 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1147 }
1148 break;
1149 case '-':
1150 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001151 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001152 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1153 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001154 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001155 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001156 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1157 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001158 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001159 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1160 }
1161 break;
1162 }
1163 /* Didn't find a string, object, array, or named constant. Look for a number. */
1164 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1165}
1166
1167static PyObject *
1168scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1169{
1170 /* Python callable interface to scan_once_{str,unicode} */
1171 PyObject *pystr;
1172 PyObject *rval;
1173 Py_ssize_t idx;
1174 Py_ssize_t next_idx = -1;
1175 static char *kwlist[] = {"string", "idx", NULL};
1176 PyScannerObject *s;
1177 assert(PyScanner_Check(self));
1178 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001179 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001180 return NULL;
1181
1182 if (PyUnicode_Check(pystr)) {
1183 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1184 }
1185 else {
1186 PyErr_Format(PyExc_TypeError,
1187 "first argument must be a string, not %.80s",
1188 Py_TYPE(pystr)->tp_name);
1189 return NULL;
1190 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001191 PyDict_Clear(s->memo);
1192 if (rval == NULL)
1193 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001194 return _build_rval_index_tuple(rval, next_idx);
1195}
1196
1197static PyObject *
1198scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1199{
1200 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001201 PyObject *ctx;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001202 PyObject *strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001203 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001204
1205 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001206 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001207
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001208 s = (PyScannerObject *)type->tp_alloc(type, 0);
1209 if (s == NULL) {
1210 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001211 }
1212
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001213 s->memo = PyDict_New();
1214 if (s->memo == NULL)
1215 goto bail;
1216
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001217 /* All of these will fail "gracefully" so we don't need to verify them */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001218 strict = PyObject_GetAttrString(ctx, "strict");
1219 if (strict == NULL)
1220 goto bail;
1221 s->strict = PyObject_IsTrue(strict);
1222 Py_DECREF(strict);
1223 if (s->strict < 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001224 goto bail;
1225 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1226 if (s->object_hook == NULL)
1227 goto bail;
1228 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1229 if (s->object_pairs_hook == NULL)
1230 goto bail;
1231 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1232 if (s->parse_float == NULL)
1233 goto bail;
1234 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1235 if (s->parse_int == NULL)
1236 goto bail;
1237 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1238 if (s->parse_constant == NULL)
1239 goto bail;
1240
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001241 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001242
1243bail:
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001244 Py_DECREF(s);
1245 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001246}
1247
1248PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1249
1250static
1251PyTypeObject PyScannerType = {
1252 PyVarObject_HEAD_INIT(NULL, 0)
1253 "_json.Scanner", /* tp_name */
1254 sizeof(PyScannerObject), /* tp_basicsize */
1255 0, /* tp_itemsize */
1256 scanner_dealloc, /* tp_dealloc */
1257 0, /* tp_print */
1258 0, /* tp_getattr */
1259 0, /* tp_setattr */
1260 0, /* tp_compare */
1261 0, /* tp_repr */
1262 0, /* tp_as_number */
1263 0, /* tp_as_sequence */
1264 0, /* tp_as_mapping */
1265 0, /* tp_hash */
1266 scanner_call, /* tp_call */
1267 0, /* tp_str */
1268 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1269 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1270 0, /* tp_as_buffer */
1271 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1272 scanner_doc, /* tp_doc */
1273 scanner_traverse, /* tp_traverse */
1274 scanner_clear, /* tp_clear */
1275 0, /* tp_richcompare */
1276 0, /* tp_weaklistoffset */
1277 0, /* tp_iter */
1278 0, /* tp_iternext */
1279 0, /* tp_methods */
1280 scanner_members, /* tp_members */
1281 0, /* tp_getset */
1282 0, /* tp_base */
1283 0, /* tp_dict */
1284 0, /* tp_descr_get */
1285 0, /* tp_descr_set */
1286 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001287 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001288 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1289 scanner_new, /* tp_new */
1290 0,/* PyObject_GC_Del, */ /* tp_free */
1291};
1292
1293static PyObject *
1294encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1295{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001296 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1297
1298 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001299 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001300 PyObject *item_separator;
1301 int sort_keys, skipkeys, allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001302
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001303 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001304 &markers, &defaultfn, &encoder, &indent,
1305 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001306 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001307 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001308
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001309 if (markers != Py_None && !PyDict_Check(markers)) {
1310 PyErr_Format(PyExc_TypeError,
1311 "make_encoder() argument 1 must be dict or None, "
1312 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001313 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001314 }
1315
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001316 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1317 if (s == NULL)
1318 return NULL;
1319
Antoine Pitrou781eba72009-12-08 15:57:31 +00001320 s->markers = markers;
1321 s->defaultfn = defaultfn;
1322 s->encoder = encoder;
1323 s->indent = indent;
1324 s->key_separator = key_separator;
1325 s->item_separator = item_separator;
1326 s->sort_keys = sort_keys;
1327 s->skipkeys = skipkeys;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001328 s->allow_nan = allow_nan;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001329 s->fast_encode = NULL;
1330 if (PyCFunction_Check(s->encoder)) {
1331 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1332 if (f == (PyCFunction)py_encode_basestring_ascii ||
1333 f == (PyCFunction)py_encode_basestring) {
1334 s->fast_encode = f;
1335 }
1336 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001337
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001338 Py_INCREF(s->markers);
1339 Py_INCREF(s->defaultfn);
1340 Py_INCREF(s->encoder);
1341 Py_INCREF(s->indent);
1342 Py_INCREF(s->key_separator);
1343 Py_INCREF(s->item_separator);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001344 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001345}
1346
1347static PyObject *
1348encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1349{
1350 /* Python callable interface to encode_listencode_obj */
1351 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1352 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001353 Py_ssize_t indent_level;
1354 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001355 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001356
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001357 assert(PyEncoder_Check(self));
1358 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001359 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1360 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001361 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001362 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001363 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001364 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001365 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001366 return NULL;
1367 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001368 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001369}
1370
1371static PyObject *
1372_encoded_const(PyObject *obj)
1373{
1374 /* Return the JSON string representation of None, True, False */
1375 if (obj == Py_None) {
1376 static PyObject *s_null = NULL;
1377 if (s_null == NULL) {
1378 s_null = PyUnicode_InternFromString("null");
1379 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001380 Py_XINCREF(s_null);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001381 return s_null;
1382 }
1383 else if (obj == Py_True) {
1384 static PyObject *s_true = NULL;
1385 if (s_true == NULL) {
1386 s_true = PyUnicode_InternFromString("true");
1387 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001388 Py_XINCREF(s_true);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001389 return s_true;
1390 }
1391 else if (obj == Py_False) {
1392 static PyObject *s_false = NULL;
1393 if (s_false == NULL) {
1394 s_false = PyUnicode_InternFromString("false");
1395 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001396 Py_XINCREF(s_false);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001397 return s_false;
1398 }
1399 else {
1400 PyErr_SetString(PyExc_ValueError, "not a const");
1401 return NULL;
1402 }
1403}
1404
1405static PyObject *
1406encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1407{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001408 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001409 double i = PyFloat_AS_DOUBLE(obj);
1410 if (!Py_IS_FINITE(i)) {
1411 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001412 PyErr_SetString(
1413 PyExc_ValueError,
1414 "Out of range float values are not JSON compliant"
1415 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001416 return NULL;
1417 }
1418 if (i > 0) {
1419 return PyUnicode_FromString("Infinity");
1420 }
1421 else if (i < 0) {
1422 return PyUnicode_FromString("-Infinity");
1423 }
1424 else {
1425 return PyUnicode_FromString("NaN");
1426 }
1427 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001428 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001429}
1430
1431static PyObject *
1432encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1433{
1434 /* Return the JSON representation of a string */
Oren Milman2b382dd2017-09-24 12:07:12 +03001435 PyObject *encoded;
1436
1437 if (s->fast_encode) {
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001438 return s->fast_encode(NULL, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001439 }
1440 encoded = PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1441 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1442 PyErr_Format(PyExc_TypeError,
1443 "encoder() must return a string, not %.80s",
1444 Py_TYPE(encoded)->tp_name);
1445 Py_DECREF(encoded);
1446 return NULL;
1447 }
1448 return encoded;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001449}
1450
1451static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001452_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001453{
1454 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001455 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001456 Py_DECREF(stolen);
1457 return rval;
1458}
1459
1460static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001461encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001462 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001463{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001464 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001465 PyObject *newobj;
1466 int rv;
1467
1468 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1469 PyObject *cstr = _encoded_const(obj);
1470 if (cstr == NULL)
1471 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001472 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001473 }
1474 else if (PyUnicode_Check(obj))
1475 {
1476 PyObject *encoded = encoder_encode_string(s, obj);
1477 if (encoded == NULL)
1478 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001479 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001480 }
1481 else if (PyLong_Check(obj)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001482 PyObject *encoded = PyLong_Type.tp_str(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001483 if (encoded == NULL)
1484 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001485 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001486 }
1487 else if (PyFloat_Check(obj)) {
1488 PyObject *encoded = encoder_encode_float(s, obj);
1489 if (encoded == NULL)
1490 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001491 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001492 }
1493 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001494 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1495 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001496 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001497 Py_LeaveRecursiveCall();
1498 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001499 }
1500 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001501 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1502 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001503 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001504 Py_LeaveRecursiveCall();
1505 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001506 }
1507 else {
1508 PyObject *ident = NULL;
1509 if (s->markers != Py_None) {
1510 int has_key;
1511 ident = PyLong_FromVoidPtr(obj);
1512 if (ident == NULL)
1513 return -1;
1514 has_key = PyDict_Contains(s->markers, ident);
1515 if (has_key) {
1516 if (has_key != -1)
1517 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1518 Py_DECREF(ident);
1519 return -1;
1520 }
1521 if (PyDict_SetItem(s->markers, ident, obj)) {
1522 Py_DECREF(ident);
1523 return -1;
1524 }
1525 }
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001526 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001527 if (newobj == NULL) {
1528 Py_XDECREF(ident);
1529 return -1;
1530 }
Ezio Melotti13672652011-05-11 01:02:56 +03001531
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001532 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1533 Py_DECREF(newobj);
1534 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001535 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001536 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001537 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001538 Py_LeaveRecursiveCall();
1539
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001540 Py_DECREF(newobj);
1541 if (rv) {
1542 Py_XDECREF(ident);
1543 return -1;
1544 }
1545 if (ident != NULL) {
1546 if (PyDict_DelItem(s->markers, ident)) {
1547 Py_XDECREF(ident);
1548 return -1;
1549 }
1550 Py_XDECREF(ident);
1551 }
1552 return rv;
1553 }
1554}
1555
1556static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001557encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001558 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001559{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001560 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001561 static PyObject *open_dict = NULL;
1562 static PyObject *close_dict = NULL;
1563 static PyObject *empty_dict = NULL;
1564 PyObject *kstr = NULL;
1565 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001566 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001567 PyObject *items;
1568 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001569 Py_ssize_t idx;
1570
1571 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1572 open_dict = PyUnicode_InternFromString("{");
1573 close_dict = PyUnicode_InternFromString("}");
1574 empty_dict = PyUnicode_InternFromString("{}");
1575 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1576 return -1;
1577 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001578 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001579 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001580
1581 if (s->markers != Py_None) {
1582 int has_key;
1583 ident = PyLong_FromVoidPtr(dct);
1584 if (ident == NULL)
1585 goto bail;
1586 has_key = PyDict_Contains(s->markers, ident);
1587 if (has_key) {
1588 if (has_key != -1)
1589 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1590 goto bail;
1591 }
1592 if (PyDict_SetItem(s->markers, ident, dct)) {
1593 goto bail;
1594 }
1595 }
1596
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001597 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001598 goto bail;
1599
1600 if (s->indent != Py_None) {
1601 /* TODO: DOES NOT RUN */
1602 indent_level += 1;
1603 /*
1604 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1605 separator = _item_separator + newline_indent
1606 buf += newline_indent
1607 */
1608 }
1609
Benjamin Peterson501182a2015-05-02 22:28:04 -04001610 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001611 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001612 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001613 if (s->sort_keys && PyList_Sort(items) < 0) {
1614 Py_DECREF(items);
Benjamin Peterson501182a2015-05-02 22:28:04 -04001615 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001616 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001617 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001618 Py_DECREF(items);
1619 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001620 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001621 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001622 while ((item = PyIter_Next(it)) != NULL) {
1623 PyObject *encoded, *key, *value;
Serhiy Storchakafff9a312017-03-21 08:53:25 +02001624 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001625 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1626 goto bail;
1627 }
1628 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001629 if (PyUnicode_Check(key)) {
1630 Py_INCREF(key);
1631 kstr = key;
1632 }
1633 else if (PyFloat_Check(key)) {
1634 kstr = encoder_encode_float(s, key);
1635 if (kstr == NULL)
1636 goto bail;
1637 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001638 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001639 /* This must come before the PyLong_Check because
1640 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001641 kstr = _encoded_const(key);
1642 if (kstr == NULL)
1643 goto bail;
1644 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001645 else if (PyLong_Check(key)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001646 kstr = PyLong_Type.tp_str(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001647 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001648 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001649 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001650 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001651 else if (s->skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001652 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001653 continue;
1654 }
1655 else {
Serhiy Storchakacfa797c2017-11-25 17:38:20 +02001656 PyErr_Format(PyExc_TypeError,
1657 "keys must be str, int, float, bool or None, "
1658 "not %.100s", key->ob_type->tp_name);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001659 goto bail;
1660 }
1661
1662 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001663 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001664 goto bail;
1665 }
1666
1667 encoded = encoder_encode_string(s, kstr);
1668 Py_CLEAR(kstr);
1669 if (encoded == NULL)
1670 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001671 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001672 Py_DECREF(encoded);
1673 goto bail;
1674 }
1675 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001676 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001677 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001678
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001679 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001680 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001681 goto bail;
1682 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001683 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001684 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001685 if (PyErr_Occurred())
1686 goto bail;
1687 Py_CLEAR(it);
1688
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001689 if (ident != NULL) {
1690 if (PyDict_DelItem(s->markers, ident))
1691 goto bail;
1692 Py_CLEAR(ident);
1693 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001694 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001695 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001696 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001697
1698 yield '\n' + (' ' * (_indent * _current_indent_level))
1699 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001700 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001701 goto bail;
1702 return 0;
1703
1704bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001705 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001706 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001707 Py_XDECREF(kstr);
1708 Py_XDECREF(ident);
1709 return -1;
1710}
1711
1712
1713static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001714encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001715 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001716{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001717 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001718 static PyObject *open_array = NULL;
1719 static PyObject *close_array = NULL;
1720 static PyObject *empty_array = NULL;
1721 PyObject *ident = NULL;
1722 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001723 Py_ssize_t i;
1724
1725 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1726 open_array = PyUnicode_InternFromString("[");
1727 close_array = PyUnicode_InternFromString("]");
1728 empty_array = PyUnicode_InternFromString("[]");
1729 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1730 return -1;
1731 }
1732 ident = NULL;
1733 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1734 if (s_fast == NULL)
1735 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001736 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001737 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001738 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001739 }
1740
1741 if (s->markers != Py_None) {
1742 int has_key;
1743 ident = PyLong_FromVoidPtr(seq);
1744 if (ident == NULL)
1745 goto bail;
1746 has_key = PyDict_Contains(s->markers, ident);
1747 if (has_key) {
1748 if (has_key != -1)
1749 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1750 goto bail;
1751 }
1752 if (PyDict_SetItem(s->markers, ident, seq)) {
1753 goto bail;
1754 }
1755 }
1756
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001757 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001758 goto bail;
1759 if (s->indent != Py_None) {
1760 /* TODO: DOES NOT RUN */
1761 indent_level += 1;
1762 /*
1763 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1764 separator = _item_separator + newline_indent
1765 buf += newline_indent
1766 */
1767 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001768 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1769 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001770 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001771 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001772 goto bail;
1773 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001774 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001775 goto bail;
1776 }
1777 if (ident != NULL) {
1778 if (PyDict_DelItem(s->markers, ident))
1779 goto bail;
1780 Py_CLEAR(ident);
1781 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001782
1783 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001784 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001785 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001786
1787 yield '\n' + (' ' * (_indent * _current_indent_level))
1788 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001789 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001790 goto bail;
1791 Py_DECREF(s_fast);
1792 return 0;
1793
1794bail:
1795 Py_XDECREF(ident);
1796 Py_DECREF(s_fast);
1797 return -1;
1798}
1799
1800static void
1801encoder_dealloc(PyObject *self)
1802{
INADA Naokia6296d32017-08-24 14:55:17 +09001803 /* bpo-31095: UnTrack is needed before calling any callbacks */
1804 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001805 encoder_clear(self);
1806 Py_TYPE(self)->tp_free(self);
1807}
1808
1809static int
1810encoder_traverse(PyObject *self, visitproc visit, void *arg)
1811{
1812 PyEncoderObject *s;
1813 assert(PyEncoder_Check(self));
1814 s = (PyEncoderObject *)self;
1815 Py_VISIT(s->markers);
1816 Py_VISIT(s->defaultfn);
1817 Py_VISIT(s->encoder);
1818 Py_VISIT(s->indent);
1819 Py_VISIT(s->key_separator);
1820 Py_VISIT(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001821 return 0;
1822}
1823
1824static int
1825encoder_clear(PyObject *self)
1826{
1827 /* Deallocate Encoder */
1828 PyEncoderObject *s;
1829 assert(PyEncoder_Check(self));
1830 s = (PyEncoderObject *)self;
1831 Py_CLEAR(s->markers);
1832 Py_CLEAR(s->defaultfn);
1833 Py_CLEAR(s->encoder);
1834 Py_CLEAR(s->indent);
1835 Py_CLEAR(s->key_separator);
1836 Py_CLEAR(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001837 return 0;
1838}
1839
1840PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1841
1842static
1843PyTypeObject PyEncoderType = {
1844 PyVarObject_HEAD_INIT(NULL, 0)
1845 "_json.Encoder", /* tp_name */
1846 sizeof(PyEncoderObject), /* tp_basicsize */
1847 0, /* tp_itemsize */
1848 encoder_dealloc, /* tp_dealloc */
1849 0, /* tp_print */
1850 0, /* tp_getattr */
1851 0, /* tp_setattr */
1852 0, /* tp_compare */
1853 0, /* tp_repr */
1854 0, /* tp_as_number */
1855 0, /* tp_as_sequence */
1856 0, /* tp_as_mapping */
1857 0, /* tp_hash */
1858 encoder_call, /* tp_call */
1859 0, /* tp_str */
1860 0, /* tp_getattro */
1861 0, /* tp_setattro */
1862 0, /* tp_as_buffer */
1863 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1864 encoder_doc, /* tp_doc */
1865 encoder_traverse, /* tp_traverse */
1866 encoder_clear, /* tp_clear */
1867 0, /* tp_richcompare */
1868 0, /* tp_weaklistoffset */
1869 0, /* tp_iter */
1870 0, /* tp_iternext */
1871 0, /* tp_methods */
1872 encoder_members, /* tp_members */
1873 0, /* tp_getset */
1874 0, /* tp_base */
1875 0, /* tp_dict */
1876 0, /* tp_descr_get */
1877 0, /* tp_descr_set */
1878 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001879 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001880 0, /* tp_alloc */
1881 encoder_new, /* tp_new */
1882 0, /* tp_free */
1883};
1884
1885static PyMethodDef speedups_methods[] = {
1886 {"encode_basestring_ascii",
1887 (PyCFunction)py_encode_basestring_ascii,
1888 METH_O,
1889 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001890 {"encode_basestring",
1891 (PyCFunction)py_encode_basestring,
1892 METH_O,
1893 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001894 {"scanstring",
1895 (PyCFunction)py_scanstring,
1896 METH_VARARGS,
1897 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001898 {NULL, NULL, 0, NULL}
1899};
1900
1901PyDoc_STRVAR(module_doc,
1902"json speedups\n");
1903
Martin v. Löwis1a214512008-06-11 05:26:20 +00001904static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001905 PyModuleDef_HEAD_INIT,
1906 "_json",
1907 module_doc,
1908 -1,
1909 speedups_methods,
1910 NULL,
1911 NULL,
1912 NULL,
1913 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001914};
1915
Victor Stinnerf024d262015-03-17 17:48:27 +01001916PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001917PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001918{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001919 PyObject *m = PyModule_Create(&jsonmodule);
1920 if (!m)
1921 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001922 if (PyType_Ready(&PyScannerType) < 0)
1923 goto fail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001924 if (PyType_Ready(&PyEncoderType) < 0)
1925 goto fail;
1926 Py_INCREF((PyObject*)&PyScannerType);
1927 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1928 Py_DECREF((PyObject*)&PyScannerType);
1929 goto fail;
1930 }
1931 Py_INCREF((PyObject*)&PyEncoderType);
1932 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1933 Py_DECREF((PyObject*)&PyEncoderType);
1934 goto fail;
1935 }
1936 return m;
1937 fail:
1938 Py_DECREF(m);
1939 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001940}