blob: 112903ea577a2632c96dc001330b3367e4dad41c [file] [log] [blame]
Victor Stinner5c75f372019-04-17 23:02:26 +02001/* JSON accelerator C extensor: _json module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006
Victor Stinner5c75f372019-04-17 23:02:26 +02007#if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8# error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06009#endif
10
Christian Heimes90540002008-05-08 14:29:10 +000011#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000012#include "structmember.h"
Victor Stinnere281f7d2018-11-01 02:30:36 +010013#include "pycore_accu.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +010014
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000015#ifdef __GNUC__
16#define UNUSED __attribute__((__unused__))
17#else
18#define UNUSED
19#endif
20
21#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
22#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
23#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
24#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
25
26static PyTypeObject PyScannerType;
27static PyTypeObject PyEncoderType;
28
29typedef struct _PyScannerObject {
30 PyObject_HEAD
Segev Finer541bd282017-07-13 08:52:08 +030031 signed char strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000032 PyObject *object_hook;
33 PyObject *object_pairs_hook;
34 PyObject *parse_float;
35 PyObject *parse_int;
36 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000037 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000038} PyScannerObject;
39
40static PyMemberDef scanner_members[] = {
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030041 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000042 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
43 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
44 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
45 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
46 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
47 {NULL}
48};
49
50typedef struct _PyEncoderObject {
51 PyObject_HEAD
52 PyObject *markers;
53 PyObject *defaultfn;
54 PyObject *encoder;
55 PyObject *indent;
56 PyObject *key_separator;
57 PyObject *item_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030058 char sort_keys;
59 char skipkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000060 int allow_nan;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030061 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000062} PyEncoderObject;
63
64static PyMemberDef encoder_members[] = {
65 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
66 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
67 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
68 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
69 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
70 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030071 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
72 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000073 {NULL}
74};
75
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020076static PyObject *
77join_list_unicode(PyObject *lst)
78{
79 /* return u''.join(lst) */
80 static PyObject *sep = NULL;
81 if (sep == NULL) {
82 sep = PyUnicode_FromStringAndSize("", 0);
83 if (sep == NULL)
84 return NULL;
85 }
86 return PyUnicode_Join(sep, lst);
87}
88
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020089/* Forward decls */
90
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000091static PyObject *
92ascii_escape_unicode(PyObject *pystr);
93static PyObject *
94py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
95void init_json(void);
96static PyObject *
97scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
98static PyObject *
99_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
100static PyObject *
101scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000102static void
103scanner_dealloc(PyObject *self);
104static int
105scanner_clear(PyObject *self);
106static PyObject *
107encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108static void
109encoder_dealloc(PyObject *self);
110static int
111encoder_clear(PyObject *self);
112static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200113encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000114static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200115encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000116static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200117encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000118static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000119_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000120static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200121raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000122static PyObject *
123encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000124static PyObject *
125encoder_encode_float(PyEncoderObject *s, PyObject *obj);
126
Christian Heimes90540002008-05-08 14:29:10 +0000127#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000128#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000129
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000130static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200131ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000132{
133 /* Escape unicode code point c to ASCII escape sequences
134 in char *output. output must have at least 12 bytes unused to
135 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000136 output[chars++] = '\\';
137 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000138 case '\\': output[chars++] = c; break;
139 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000140 case '\b': output[chars++] = 'b'; break;
141 case '\f': output[chars++] = 'f'; break;
142 case '\n': output[chars++] = 'n'; break;
143 case '\r': output[chars++] = 'r'; break;
144 case '\t': output[chars++] = 't'; break;
145 default:
Christian Heimes90540002008-05-08 14:29:10 +0000146 if (c >= 0x10000) {
147 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100148 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000149 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100150 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
151 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
152 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
153 output[chars++] = Py_hexdigits[(v ) & 0xf];
154 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000155 output[chars++] = '\\';
156 }
Christian Heimes90540002008-05-08 14:29:10 +0000157 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200158 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
159 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
160 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
161 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000162 }
163 return chars;
164}
165
166static PyObject *
167ascii_escape_unicode(PyObject *pystr)
168{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000169 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000170 Py_ssize_t i;
171 Py_ssize_t input_chars;
172 Py_ssize_t output_size;
173 Py_ssize_t chars;
174 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200175 void *input;
176 unsigned char *output;
177 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000178
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200179 if (PyUnicode_READY(pystr) == -1)
180 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000181
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200182 input_chars = PyUnicode_GET_LENGTH(pystr);
183 input = PyUnicode_DATA(pystr);
184 kind = PyUnicode_KIND(pystr);
185
186 /* Compute the output size */
187 for (i = 0, output_size = 2; i < input_chars; i++) {
188 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500189 Py_ssize_t d;
190 if (S_CHAR(c)) {
191 d = 1;
192 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200193 else {
194 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200195 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200196 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500197 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200198 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500199 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200200 }
201 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500202 if (output_size > PY_SSIZE_T_MAX - d) {
203 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
204 return NULL;
205 }
206 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200207 }
208
209 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000210 if (rval == NULL) {
211 return NULL;
212 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200213 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000214 chars = 0;
215 output[chars++] = '"';
216 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200217 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000218 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000219 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000220 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000221 else {
222 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000223 }
Christian Heimes90540002008-05-08 14:29:10 +0000224 }
225 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100226#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200227 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100228#endif
Christian Heimes90540002008-05-08 14:29:10 +0000229 return rval;
230}
231
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100232static PyObject *
233escape_unicode(PyObject *pystr)
234{
235 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
236 Py_ssize_t i;
237 Py_ssize_t input_chars;
238 Py_ssize_t output_size;
239 Py_ssize_t chars;
240 PyObject *rval;
241 void *input;
242 int kind;
243 Py_UCS4 maxchar;
244
245 if (PyUnicode_READY(pystr) == -1)
246 return NULL;
247
248 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
249 input_chars = PyUnicode_GET_LENGTH(pystr);
250 input = PyUnicode_DATA(pystr);
251 kind = PyUnicode_KIND(pystr);
252
253 /* Compute the output size */
254 for (i = 0, output_size = 2; i < input_chars; i++) {
255 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500256 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100257 switch (c) {
258 case '\\': case '"': case '\b': case '\f':
259 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500260 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100261 break;
262 default:
263 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500264 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100265 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500266 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100267 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500268 if (output_size > PY_SSIZE_T_MAX - d) {
269 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
270 return NULL;
271 }
272 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100273 }
274
275 rval = PyUnicode_New(output_size, maxchar);
276 if (rval == NULL)
277 return NULL;
278
279 kind = PyUnicode_KIND(rval);
280
281#define ENCODE_OUTPUT do { \
282 chars = 0; \
283 output[chars++] = '"'; \
284 for (i = 0; i < input_chars; i++) { \
285 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
286 switch (c) { \
287 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
288 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
289 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
290 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
291 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
292 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
293 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
294 default: \
295 if (c <= 0x1f) { \
296 output[chars++] = '\\'; \
297 output[chars++] = 'u'; \
298 output[chars++] = '0'; \
299 output[chars++] = '0'; \
300 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
301 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
302 } else { \
303 output[chars++] = c; \
304 } \
305 } \
306 } \
307 output[chars++] = '"'; \
308 } while (0)
309
310 if (kind == PyUnicode_1BYTE_KIND) {
311 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
312 ENCODE_OUTPUT;
313 } else if (kind == PyUnicode_2BYTE_KIND) {
314 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
315 ENCODE_OUTPUT;
316 } else {
317 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
318 assert(kind == PyUnicode_4BYTE_KIND);
319 ENCODE_OUTPUT;
320 }
321#undef ENCODE_OUTPUT
322
323#ifdef Py_DEBUG
324 assert(_PyUnicode_CheckConsistency(rval, 1));
325#endif
326 return rval;
327}
328
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000329static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200330raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000331{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200332 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
333 static PyObject *JSONDecodeError = NULL;
334 PyObject *exc;
335 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000336 PyObject *decoder = PyImport_ImportModule("json.decoder");
337 if (decoder == NULL)
338 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200339 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000340 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200341 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000342 return;
Christian Heimes90540002008-05-08 14:29:10 +0000343 }
Victor Stinner4c381542016-12-09 00:33:39 +0100344 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200345 if (exc) {
346 PyErr_SetObject(JSONDecodeError, exc);
347 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000348 }
Christian Heimes90540002008-05-08 14:29:10 +0000349}
350
Ezio Melotti37623ab2013-01-03 08:44:15 +0200351static void
352raise_stop_iteration(Py_ssize_t idx)
353{
354 PyObject *value = PyLong_FromSsize_t(idx);
355 if (value != NULL) {
356 PyErr_SetObject(PyExc_StopIteration, value);
357 Py_DECREF(value);
358 }
359}
360
Christian Heimes90540002008-05-08 14:29:10 +0000361static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000362_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
363 /* return (rval, idx) tuple, stealing reference to rval */
364 PyObject *tpl;
365 PyObject *pyidx;
366 /*
367 steal a reference to rval, returns (rval, idx)
368 */
369 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000370 return NULL;
371 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000372 pyidx = PyLong_FromSsize_t(idx);
373 if (pyidx == NULL) {
374 Py_DECREF(rval);
375 return NULL;
376 }
377 tpl = PyTuple_New(2);
378 if (tpl == NULL) {
379 Py_DECREF(pyidx);
380 Py_DECREF(rval);
381 return NULL;
382 }
383 PyTuple_SET_ITEM(tpl, 0, rval);
384 PyTuple_SET_ITEM(tpl, 1, pyidx);
385 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000386}
387
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000388#define APPEND_OLD_CHUNK \
389 if (chunk != NULL) { \
390 if (chunks == NULL) { \
391 chunks = PyList_New(0); \
392 if (chunks == NULL) { \
393 goto bail; \
394 } \
395 } \
396 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200397 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000398 goto bail; \
399 } \
400 Py_CLEAR(chunk); \
401 }
402
Christian Heimes90540002008-05-08 14:29:10 +0000403static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000404scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000405{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000406 /* Read the JSON string from PyUnicode pystr.
407 end is the index of the first character after the quote.
408 if strict is zero then literal control characters are allowed
409 *next_end_ptr is a return-by-reference index of the character
410 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000411
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000412 Return value is a new PyUnicode
413 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000414 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200415 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000416 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000417 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200418 const void *buf;
419 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000420 PyObject *chunks = NULL;
421 PyObject *chunk = NULL;
422
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200423 if (PyUnicode_READY(pystr) == -1)
424 return 0;
425
426 len = PyUnicode_GET_LENGTH(pystr);
427 buf = PyUnicode_DATA(pystr);
428 kind = PyUnicode_KIND(pystr);
429
Ezio Melotti37623ab2013-01-03 08:44:15 +0200430 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000431 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
432 goto bail;
433 }
Christian Heimes90540002008-05-08 14:29:10 +0000434 while (1) {
435 /* Find the end of the string or the next escape */
Inada Naoki2a570af2019-08-08 17:57:10 +0900436 Py_UCS4 c;
437 {
438 // Use tight scope variable to help register allocation.
439 Py_UCS4 d = 0;
440 for (next = end; next < len; next++) {
441 d = PyUnicode_READ(kind, buf, next);
442 if (d == '"' || d == '\\') {
443 break;
444 }
445 if (d <= 0x1f && strict) {
446 raise_errmsg("Invalid control character at", pystr, next);
447 goto bail;
448 }
Christian Heimes90540002008-05-08 14:29:10 +0000449 }
Inada Naoki2a570af2019-08-08 17:57:10 +0900450 c = d;
Christian Heimes90540002008-05-08 14:29:10 +0000451 }
452 if (!(c == '"' || c == '\\')) {
453 raise_errmsg("Unterminated string starting at", pystr, begin);
454 goto bail;
455 }
456 /* Pick up this chunk if it's not zero length */
457 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000458 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 chunk = PyUnicode_FromKindAndData(
460 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200461 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000463 if (chunk == NULL) {
464 goto bail;
465 }
Christian Heimes90540002008-05-08 14:29:10 +0000466 }
467 next++;
468 if (c == '"') {
469 end = next;
470 break;
471 }
472 if (next == len) {
473 raise_errmsg("Unterminated string starting at", pystr, begin);
474 goto bail;
475 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200476 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000477 if (c != 'u') {
478 /* Non-unicode backslash escapes */
479 end = next + 1;
480 switch (c) {
481 case '"': break;
482 case '\\': break;
483 case '/': break;
484 case 'b': c = '\b'; break;
485 case 'f': c = '\f'; break;
486 case 'n': c = '\n'; break;
487 case 'r': c = '\r'; break;
488 case 't': c = '\t'; break;
489 default: c = 0;
490 }
491 if (c == 0) {
492 raise_errmsg("Invalid \\escape", pystr, end - 2);
493 goto bail;
494 }
495 }
496 else {
497 c = 0;
498 next++;
499 end = next + 4;
500 if (end >= len) {
501 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
502 goto bail;
503 }
504 /* Decode 4 hex digits */
505 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200506 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000507 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000508 switch (digit) {
509 case '0': case '1': case '2': case '3': case '4':
510 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000511 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000512 case 'a': case 'b': case 'c': case 'd': case 'e':
513 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000514 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000515 case 'A': case 'B': case 'C': case 'D': case 'E':
516 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000517 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000518 default:
519 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
520 goto bail;
521 }
522 }
Christian Heimes90540002008-05-08 14:29:10 +0000523 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200524 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
525 PyUnicode_READ(kind, buf, next++) == '\\' &&
526 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200527 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000528 end += 6;
529 /* Decode 4 hex digits */
530 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200531 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000532 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000533 switch (digit) {
534 case '0': case '1': case '2': case '3': case '4':
535 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000536 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000537 case 'a': case 'b': case 'c': case 'd': case 'e':
538 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000539 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000540 case 'A': case 'B': case 'C': case 'D': case 'E':
541 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000542 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000543 default:
544 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
545 goto bail;
546 }
547 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200548 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
549 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
550 else
551 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000552 }
Christian Heimes90540002008-05-08 14:29:10 +0000553 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000554 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200555 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000556 if (chunk == NULL) {
557 goto bail;
558 }
Christian Heimes90540002008-05-08 14:29:10 +0000559 }
560
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000561 if (chunks == NULL) {
562 if (chunk != NULL)
563 rval = chunk;
564 else
565 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000566 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000567 else {
568 APPEND_OLD_CHUNK
569 rval = join_list_unicode(chunks);
570 if (rval == NULL) {
571 goto bail;
572 }
573 Py_CLEAR(chunks);
574 }
575
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000576 *next_end_ptr = end;
577 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000578bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000579 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000580 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000581 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000582 return NULL;
583}
584
585PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000586 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000587 "\n"
588 "Scan the string s for a JSON string. End is the index of the\n"
589 "character in s after the quote that started the JSON string.\n"
590 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
591 "on attempt to decode an invalid string. If strict is False then literal\n"
592 "control characters are allowed in the string.\n"
593 "\n"
594 "Returns a tuple of the decoded string and the index of the character in s\n"
595 "after the end quote."
596);
Christian Heimes90540002008-05-08 14:29:10 +0000597
598static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000599py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000600{
601 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000602 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000603 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000604 Py_ssize_t next_end = -1;
605 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100606 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000607 return NULL;
608 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000609 if (PyUnicode_Check(pystr)) {
610 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000611 }
612 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000613 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000614 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000615 Py_TYPE(pystr)->tp_name);
616 return NULL;
617 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000618 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000619}
620
621PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000622 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000623 "\n"
624 "Return an ASCII-only JSON representation of a Python string"
625);
Christian Heimes90540002008-05-08 14:29:10 +0000626
627static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000628py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000629{
630 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000631 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000632 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000633 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000634 rval = ascii_escape_unicode(pystr);
635 }
636 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000637 PyErr_Format(PyExc_TypeError,
638 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000639 Py_TYPE(pystr)->tp_name);
640 return NULL;
641 }
Christian Heimes90540002008-05-08 14:29:10 +0000642 return rval;
643}
644
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100645
646PyDoc_STRVAR(pydoc_encode_basestring,
647 "encode_basestring(string) -> string\n"
648 "\n"
649 "Return a JSON representation of a Python string"
650);
651
652static PyObject *
653py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
654{
655 PyObject *rval;
656 /* Return a JSON representation of a Python string */
657 /* METH_O */
658 if (PyUnicode_Check(pystr)) {
659 rval = escape_unicode(pystr);
660 }
661 else {
662 PyErr_Format(PyExc_TypeError,
663 "first argument must be a string, not %.80s",
664 Py_TYPE(pystr)->tp_name);
665 return NULL;
666 }
667 return rval;
668}
669
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000670static void
671scanner_dealloc(PyObject *self)
672{
INADA Naokia6296d32017-08-24 14:55:17 +0900673 /* bpo-31095: UnTrack is needed before calling any callbacks */
674 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000675 scanner_clear(self);
676 Py_TYPE(self)->tp_free(self);
677}
678
679static int
680scanner_traverse(PyObject *self, visitproc visit, void *arg)
681{
682 PyScannerObject *s;
683 assert(PyScanner_Check(self));
684 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000685 Py_VISIT(s->object_hook);
686 Py_VISIT(s->object_pairs_hook);
687 Py_VISIT(s->parse_float);
688 Py_VISIT(s->parse_int);
689 Py_VISIT(s->parse_constant);
690 return 0;
691}
692
693static int
694scanner_clear(PyObject *self)
695{
696 PyScannerObject *s;
697 assert(PyScanner_Check(self));
698 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000699 Py_CLEAR(s->object_hook);
700 Py_CLEAR(s->object_pairs_hook);
701 Py_CLEAR(s->parse_float);
702 Py_CLEAR(s->parse_int);
703 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000704 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000705 return 0;
706}
707
708static PyObject *
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300709_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
710{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000711 /* Read a JSON object from PyUnicode pystr.
712 idx is the index of the first character after the opening curly brace.
713 *next_idx_ptr is a return-by-reference index to the first character after
714 the closing curly brace.
715
716 Returns a new PyObject (usually a dict, but object_hook can change that)
717 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200718 void *str;
719 int kind;
720 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000721 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000722 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000723 PyObject *key = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000724 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000725 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000726
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200727 if (PyUnicode_READY(pystr) == -1)
728 return NULL;
729
730 str = PyUnicode_DATA(pystr);
731 kind = PyUnicode_KIND(pystr);
732 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
733
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000734 if (has_pairs_hook)
735 rval = PyList_New(0);
736 else
737 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000738 if (rval == NULL)
739 return NULL;
740
741 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200742 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000743
744 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200745 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
746 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000747 PyObject *memokey;
748
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000749 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200750 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200751 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000752 goto bail;
753 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300754 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000755 if (key == NULL)
756 goto bail;
Inada Naoki2a570af2019-08-08 17:57:10 +0900757 memokey = PyDict_SetDefault(s->memo, key, key);
758 if (memokey == NULL) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200759 goto bail;
760 }
Inada Naoki2a570af2019-08-08 17:57:10 +0900761 Py_INCREF(memokey);
762 Py_DECREF(key);
763 key = memokey;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000764 idx = next_idx;
765
766 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200767 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
768 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200769 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000770 goto bail;
771 }
772 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200773 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000774
775 /* read any JSON term */
776 val = scan_once_unicode(s, pystr, idx, &next_idx);
777 if (val == NULL)
778 goto bail;
779
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000780 if (has_pairs_hook) {
781 PyObject *item = PyTuple_Pack(2, key, val);
782 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000783 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000784 Py_CLEAR(key);
785 Py_CLEAR(val);
786 if (PyList_Append(rval, item) == -1) {
787 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000788 goto bail;
789 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000790 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000791 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000792 else {
793 if (PyDict_SetItem(rval, key, val) < 0)
794 goto bail;
795 Py_CLEAR(key);
796 Py_CLEAR(val);
797 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000798 idx = next_idx;
799
800 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200801 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000802
803 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200804 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000805 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200806 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200807 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000808 goto bail;
809 }
810 idx++;
811
812 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200813 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000814 }
815 }
816
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000817 *next_idx_ptr = idx + 1;
818
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000819 if (has_pairs_hook) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200820 val = _PyObject_CallOneArg(s->object_pairs_hook, rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000821 Py_DECREF(rval);
822 return val;
823 }
824
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000825 /* if object_hook is not None: rval = object_hook(rval) */
826 if (s->object_hook != Py_None) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200827 val = _PyObject_CallOneArg(s->object_hook, rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000828 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000829 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000830 }
831 return rval;
832bail:
833 Py_XDECREF(key);
834 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000835 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000836 return NULL;
837}
838
839static PyObject *
840_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200841 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000842 idx is the index of the first character after the opening brace.
843 *next_idx_ptr is a return-by-reference index to the first character after
844 the closing brace.
845
846 Returns a new PyList
847 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200848 void *str;
849 int kind;
850 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000851 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200852 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000853 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000854
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200855 if (PyUnicode_READY(pystr) == -1)
856 return NULL;
857
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200858 rval = PyList_New(0);
859 if (rval == NULL)
860 return NULL;
861
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200862 str = PyUnicode_DATA(pystr);
863 kind = PyUnicode_KIND(pystr);
864 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
865
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000866 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200867 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000868
869 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200870 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
871 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000872
873 /* read any JSON term */
874 val = scan_once_unicode(s, pystr, idx, &next_idx);
875 if (val == NULL)
876 goto bail;
877
878 if (PyList_Append(rval, val) == -1)
879 goto bail;
880
881 Py_CLEAR(val);
882 idx = next_idx;
883
884 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200885 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000886
887 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200888 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000889 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200890 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200891 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000892 goto bail;
893 }
894 idx++;
895
896 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200897 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000898 }
899 }
900
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200901 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
902 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200903 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000904 goto bail;
905 }
906 *next_idx_ptr = idx + 1;
907 return rval;
908bail:
909 Py_XDECREF(val);
910 Py_DECREF(rval);
911 return NULL;
912}
913
914static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200915_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
916 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000917 constant is the constant string that was found
918 ("NaN", "Infinity", "-Infinity").
919 idx is the index of the first character of the constant
920 *next_idx_ptr is a return-by-reference index to the first character after
921 the constant.
922
923 Returns the result of parse_constant
924 */
925 PyObject *cstr;
926 PyObject *rval;
927 /* constant is "NaN", "Infinity", or "-Infinity" */
928 cstr = PyUnicode_InternFromString(constant);
929 if (cstr == NULL)
930 return NULL;
931
932 /* rval = parse_constant(constant) */
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200933 rval = _PyObject_CallOneArg(s->parse_constant, cstr);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200934 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000935 Py_DECREF(cstr);
936 *next_idx_ptr = idx;
937 return rval;
938}
939
940static PyObject *
941_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
942 /* Read a JSON number from PyUnicode pystr.
943 idx is the index of the first character of the number
944 *next_idx_ptr is a return-by-reference index to the first character after
945 the number.
946
947 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200948 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000949 May return other types if parse_int or parse_float are set
950 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200951 void *str;
952 int kind;
953 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000954 Py_ssize_t idx = start;
955 int is_float = 0;
956 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200957 PyObject *numstr = NULL;
958 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000959
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200960 if (PyUnicode_READY(pystr) == -1)
961 return NULL;
962
963 str = PyUnicode_DATA(pystr);
964 kind = PyUnicode_KIND(pystr);
965 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
966
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000967 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200968 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000969 idx++;
970 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200971 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000972 return NULL;
973 }
974 }
975
976 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200977 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000978 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200979 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000980 }
981 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200982 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000983 idx++;
984 }
985 /* no integer digits, error */
986 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200987 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000988 return NULL;
989 }
990
991 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200992 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000993 is_float = 1;
994 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200995 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000996 }
997
998 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200999 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001000 Py_ssize_t e_start = idx;
1001 idx++;
1002
1003 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001004 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001005
1006 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001007 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001008
1009 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001010 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001011 is_float = 1;
1012 }
1013 else {
1014 idx = e_start;
1015 }
1016 }
1017
Antoine Pitrouf6454512011-04-25 19:16:06 +02001018 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1019 custom_func = s->parse_float;
1020 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1021 custom_func = s->parse_int;
1022 else
1023 custom_func = NULL;
1024
1025 if (custom_func) {
1026 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001027 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001028 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001029 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001030 if (numstr == NULL)
1031 return NULL;
Jeroen Demeyer196a5302019-07-04 12:31:34 +02001032 rval = _PyObject_CallOneArg(custom_func, numstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001033 }
1034 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001035 Py_ssize_t i, n;
1036 char *buf;
1037 /* Straight conversion to ASCII, to avoid costly conversion of
1038 decimal unicode digits (which cannot appear here) */
1039 n = idx - start;
1040 numstr = PyBytes_FromStringAndSize(NULL, n);
1041 if (numstr == NULL)
1042 return NULL;
1043 buf = PyBytes_AS_STRING(numstr);
1044 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001045 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001046 }
1047 if (is_float)
1048 rval = PyFloat_FromString(numstr);
1049 else
1050 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001051 }
1052 Py_DECREF(numstr);
1053 *next_idx_ptr = idx;
1054 return rval;
1055}
1056
1057static PyObject *
1058scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1059{
1060 /* Read one JSON term (of any kind) from PyUnicode pystr.
1061 idx is the index of the first character of the term
1062 *next_idx_ptr is a return-by-reference index to the first character after
1063 the number.
1064
1065 Returns a new PyObject representation of the term.
1066 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001067 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001068 void *str;
1069 int kind;
1070 Py_ssize_t length;
1071
1072 if (PyUnicode_READY(pystr) == -1)
1073 return NULL;
1074
1075 str = PyUnicode_DATA(pystr);
1076 kind = PyUnicode_KIND(pystr);
1077 length = PyUnicode_GET_LENGTH(pystr);
1078
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001079 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001080 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001081 return NULL;
1082 }
1083 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001084 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001085 return NULL;
1086 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001087
1088 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001089 case '"':
1090 /* string */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001091 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001092 case '{':
1093 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001094 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1095 "from a unicode string"))
1096 return NULL;
1097 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1098 Py_LeaveRecursiveCall();
1099 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001100 case '[':
1101 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001102 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1103 "from a unicode string"))
1104 return NULL;
1105 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1106 Py_LeaveRecursiveCall();
1107 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001108 case 'n':
1109 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001110 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001111 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001112 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001113 }
1114 break;
1115 case 't':
1116 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001117 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001118 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001119 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001120 }
1121 break;
1122 case 'f':
1123 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001124 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1125 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1126 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001127 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001128 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001129 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001130 }
1131 break;
1132 case 'N':
1133 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001134 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001135 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001136 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1137 }
1138 break;
1139 case 'I':
1140 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001141 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1142 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1143 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001144 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001145 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1146 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001147 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001148 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1149 }
1150 break;
1151 case '-':
1152 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001153 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001154 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1155 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001156 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001157 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001158 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1159 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001160 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001161 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1162 }
1163 break;
1164 }
1165 /* Didn't find a string, object, array, or named constant. Look for a number. */
1166 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1167}
1168
1169static PyObject *
1170scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1171{
1172 /* Python callable interface to scan_once_{str,unicode} */
1173 PyObject *pystr;
1174 PyObject *rval;
1175 Py_ssize_t idx;
1176 Py_ssize_t next_idx = -1;
1177 static char *kwlist[] = {"string", "idx", NULL};
1178 PyScannerObject *s;
1179 assert(PyScanner_Check(self));
1180 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001181 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001182 return NULL;
1183
1184 if (PyUnicode_Check(pystr)) {
1185 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1186 }
1187 else {
1188 PyErr_Format(PyExc_TypeError,
1189 "first argument must be a string, not %.80s",
1190 Py_TYPE(pystr)->tp_name);
1191 return NULL;
1192 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001193 PyDict_Clear(s->memo);
1194 if (rval == NULL)
1195 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001196 return _build_rval_index_tuple(rval, next_idx);
1197}
1198
1199static PyObject *
1200scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1201{
1202 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001203 PyObject *ctx;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001204 PyObject *strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001205 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001206
1207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001208 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001209
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001210 s = (PyScannerObject *)type->tp_alloc(type, 0);
1211 if (s == NULL) {
1212 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001213 }
1214
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001215 s->memo = PyDict_New();
1216 if (s->memo == NULL)
1217 goto bail;
1218
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001219 /* All of these will fail "gracefully" so we don't need to verify them */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001220 strict = PyObject_GetAttrString(ctx, "strict");
1221 if (strict == NULL)
1222 goto bail;
1223 s->strict = PyObject_IsTrue(strict);
1224 Py_DECREF(strict);
1225 if (s->strict < 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001226 goto bail;
1227 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1228 if (s->object_hook == NULL)
1229 goto bail;
1230 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1231 if (s->object_pairs_hook == NULL)
1232 goto bail;
1233 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1234 if (s->parse_float == NULL)
1235 goto bail;
1236 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1237 if (s->parse_int == NULL)
1238 goto bail;
1239 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1240 if (s->parse_constant == NULL)
1241 goto bail;
1242
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001243 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001244
1245bail:
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001246 Py_DECREF(s);
1247 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001248}
1249
1250PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1251
1252static
1253PyTypeObject PyScannerType = {
1254 PyVarObject_HEAD_INIT(NULL, 0)
1255 "_json.Scanner", /* tp_name */
1256 sizeof(PyScannerObject), /* tp_basicsize */
1257 0, /* tp_itemsize */
1258 scanner_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001259 0, /* tp_vectorcall_offset */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001260 0, /* tp_getattr */
1261 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001262 0, /* tp_as_async */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001263 0, /* tp_repr */
1264 0, /* tp_as_number */
1265 0, /* tp_as_sequence */
1266 0, /* tp_as_mapping */
1267 0, /* tp_hash */
1268 scanner_call, /* tp_call */
1269 0, /* tp_str */
1270 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1271 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1272 0, /* tp_as_buffer */
1273 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1274 scanner_doc, /* tp_doc */
1275 scanner_traverse, /* tp_traverse */
1276 scanner_clear, /* tp_clear */
1277 0, /* tp_richcompare */
1278 0, /* tp_weaklistoffset */
1279 0, /* tp_iter */
1280 0, /* tp_iternext */
1281 0, /* tp_methods */
1282 scanner_members, /* tp_members */
1283 0, /* tp_getset */
1284 0, /* tp_base */
1285 0, /* tp_dict */
1286 0, /* tp_descr_get */
1287 0, /* tp_descr_set */
1288 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001289 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001290 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1291 scanner_new, /* tp_new */
1292 0,/* PyObject_GC_Del, */ /* tp_free */
1293};
1294
1295static PyObject *
1296encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1297{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001298 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1299
1300 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001301 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001302 PyObject *item_separator;
1303 int sort_keys, skipkeys, allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001304
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001305 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001306 &markers, &defaultfn, &encoder, &indent,
1307 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001308 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001309 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001310
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001311 if (markers != Py_None && !PyDict_Check(markers)) {
1312 PyErr_Format(PyExc_TypeError,
1313 "make_encoder() argument 1 must be dict or None, "
1314 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001315 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001316 }
1317
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001318 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1319 if (s == NULL)
1320 return NULL;
1321
Antoine Pitrou781eba72009-12-08 15:57:31 +00001322 s->markers = markers;
1323 s->defaultfn = defaultfn;
1324 s->encoder = encoder;
1325 s->indent = indent;
1326 s->key_separator = key_separator;
1327 s->item_separator = item_separator;
1328 s->sort_keys = sort_keys;
1329 s->skipkeys = skipkeys;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001330 s->allow_nan = allow_nan;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001331 s->fast_encode = NULL;
1332 if (PyCFunction_Check(s->encoder)) {
1333 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1334 if (f == (PyCFunction)py_encode_basestring_ascii ||
1335 f == (PyCFunction)py_encode_basestring) {
1336 s->fast_encode = f;
1337 }
1338 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001339
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001340 Py_INCREF(s->markers);
1341 Py_INCREF(s->defaultfn);
1342 Py_INCREF(s->encoder);
1343 Py_INCREF(s->indent);
1344 Py_INCREF(s->key_separator);
1345 Py_INCREF(s->item_separator);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001346 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001347}
1348
1349static PyObject *
1350encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1351{
1352 /* Python callable interface to encode_listencode_obj */
1353 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1354 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001355 Py_ssize_t indent_level;
1356 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001357 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001358
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001359 assert(PyEncoder_Check(self));
1360 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001361 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1362 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001363 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001364 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001365 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001366 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001367 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001368 return NULL;
1369 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001370 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001371}
1372
1373static PyObject *
1374_encoded_const(PyObject *obj)
1375{
1376 /* Return the JSON string representation of None, True, False */
1377 if (obj == Py_None) {
1378 static PyObject *s_null = NULL;
1379 if (s_null == NULL) {
1380 s_null = PyUnicode_InternFromString("null");
1381 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001382 Py_XINCREF(s_null);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001383 return s_null;
1384 }
1385 else if (obj == Py_True) {
1386 static PyObject *s_true = NULL;
1387 if (s_true == NULL) {
1388 s_true = PyUnicode_InternFromString("true");
1389 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001390 Py_XINCREF(s_true);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001391 return s_true;
1392 }
1393 else if (obj == Py_False) {
1394 static PyObject *s_false = NULL;
1395 if (s_false == NULL) {
1396 s_false = PyUnicode_InternFromString("false");
1397 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001398 Py_XINCREF(s_false);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001399 return s_false;
1400 }
1401 else {
1402 PyErr_SetString(PyExc_ValueError, "not a const");
1403 return NULL;
1404 }
1405}
1406
1407static PyObject *
1408encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1409{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001410 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001411 double i = PyFloat_AS_DOUBLE(obj);
1412 if (!Py_IS_FINITE(i)) {
1413 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001414 PyErr_SetString(
1415 PyExc_ValueError,
1416 "Out of range float values are not JSON compliant"
1417 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001418 return NULL;
1419 }
1420 if (i > 0) {
1421 return PyUnicode_FromString("Infinity");
1422 }
1423 else if (i < 0) {
1424 return PyUnicode_FromString("-Infinity");
1425 }
1426 else {
1427 return PyUnicode_FromString("NaN");
1428 }
1429 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001430 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001431}
1432
1433static PyObject *
1434encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1435{
1436 /* Return the JSON representation of a string */
Oren Milman2b382dd2017-09-24 12:07:12 +03001437 PyObject *encoded;
1438
1439 if (s->fast_encode) {
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001440 return s->fast_encode(NULL, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001441 }
Jeroen Demeyer196a5302019-07-04 12:31:34 +02001442 encoded = _PyObject_CallOneArg(s->encoder, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001443 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1444 PyErr_Format(PyExc_TypeError,
1445 "encoder() must return a string, not %.80s",
1446 Py_TYPE(encoded)->tp_name);
1447 Py_DECREF(encoded);
1448 return NULL;
1449 }
1450 return encoded;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001451}
1452
1453static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001454_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001455{
1456 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001457 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001458 Py_DECREF(stolen);
1459 return rval;
1460}
1461
1462static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001463encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001464 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001465{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001466 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001467 PyObject *newobj;
1468 int rv;
1469
1470 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1471 PyObject *cstr = _encoded_const(obj);
1472 if (cstr == NULL)
1473 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001474 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001475 }
1476 else if (PyUnicode_Check(obj))
1477 {
1478 PyObject *encoded = encoder_encode_string(s, obj);
1479 if (encoded == NULL)
1480 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001481 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001482 }
1483 else if (PyLong_Check(obj)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001484 PyObject *encoded = PyLong_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001485 if (encoded == NULL)
1486 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001487 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001488 }
1489 else if (PyFloat_Check(obj)) {
1490 PyObject *encoded = encoder_encode_float(s, obj);
1491 if (encoded == NULL)
1492 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001493 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001494 }
1495 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001496 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1497 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001498 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001499 Py_LeaveRecursiveCall();
1500 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001501 }
1502 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001503 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1504 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001505 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001506 Py_LeaveRecursiveCall();
1507 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001508 }
1509 else {
1510 PyObject *ident = NULL;
1511 if (s->markers != Py_None) {
1512 int has_key;
1513 ident = PyLong_FromVoidPtr(obj);
1514 if (ident == NULL)
1515 return -1;
1516 has_key = PyDict_Contains(s->markers, ident);
1517 if (has_key) {
1518 if (has_key != -1)
1519 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1520 Py_DECREF(ident);
1521 return -1;
1522 }
1523 if (PyDict_SetItem(s->markers, ident, obj)) {
1524 Py_DECREF(ident);
1525 return -1;
1526 }
1527 }
Jeroen Demeyer196a5302019-07-04 12:31:34 +02001528 newobj = _PyObject_CallOneArg(s->defaultfn, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001529 if (newobj == NULL) {
1530 Py_XDECREF(ident);
1531 return -1;
1532 }
Ezio Melotti13672652011-05-11 01:02:56 +03001533
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001534 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1535 Py_DECREF(newobj);
1536 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001537 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001538 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001539 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001540 Py_LeaveRecursiveCall();
1541
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001542 Py_DECREF(newobj);
1543 if (rv) {
1544 Py_XDECREF(ident);
1545 return -1;
1546 }
1547 if (ident != NULL) {
1548 if (PyDict_DelItem(s->markers, ident)) {
1549 Py_XDECREF(ident);
1550 return -1;
1551 }
1552 Py_XDECREF(ident);
1553 }
1554 return rv;
1555 }
1556}
1557
1558static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001559encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001560 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001561{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001562 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001563 static PyObject *open_dict = NULL;
1564 static PyObject *close_dict = NULL;
1565 static PyObject *empty_dict = NULL;
1566 PyObject *kstr = NULL;
1567 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001568 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001569 PyObject *items;
1570 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001571 Py_ssize_t idx;
1572
1573 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1574 open_dict = PyUnicode_InternFromString("{");
1575 close_dict = PyUnicode_InternFromString("}");
1576 empty_dict = PyUnicode_InternFromString("{}");
1577 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1578 return -1;
1579 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001580 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001581 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001582
1583 if (s->markers != Py_None) {
1584 int has_key;
1585 ident = PyLong_FromVoidPtr(dct);
1586 if (ident == NULL)
1587 goto bail;
1588 has_key = PyDict_Contains(s->markers, ident);
1589 if (has_key) {
1590 if (has_key != -1)
1591 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1592 goto bail;
1593 }
1594 if (PyDict_SetItem(s->markers, ident, dct)) {
1595 goto bail;
1596 }
1597 }
1598
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001599 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001600 goto bail;
1601
1602 if (s->indent != Py_None) {
1603 /* TODO: DOES NOT RUN */
1604 indent_level += 1;
1605 /*
1606 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1607 separator = _item_separator + newline_indent
1608 buf += newline_indent
1609 */
1610 }
1611
Benjamin Peterson501182a2015-05-02 22:28:04 -04001612 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001613 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001614 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001615 if (s->sort_keys && PyList_Sort(items) < 0) {
1616 Py_DECREF(items);
Benjamin Peterson501182a2015-05-02 22:28:04 -04001617 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001618 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001619 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001620 Py_DECREF(items);
1621 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001622 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001623 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001624 while ((item = PyIter_Next(it)) != NULL) {
1625 PyObject *encoded, *key, *value;
Serhiy Storchakafff9a312017-03-21 08:53:25 +02001626 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001627 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1628 goto bail;
1629 }
1630 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001631 if (PyUnicode_Check(key)) {
1632 Py_INCREF(key);
1633 kstr = key;
1634 }
1635 else if (PyFloat_Check(key)) {
1636 kstr = encoder_encode_float(s, key);
1637 if (kstr == NULL)
1638 goto bail;
1639 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001640 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641 /* This must come before the PyLong_Check because
1642 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001643 kstr = _encoded_const(key);
1644 if (kstr == NULL)
1645 goto bail;
1646 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001647 else if (PyLong_Check(key)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001648 kstr = PyLong_Type.tp_repr(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001649 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001650 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001651 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001652 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001653 else if (s->skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001654 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001655 continue;
1656 }
1657 else {
Serhiy Storchakacfa797c2017-11-25 17:38:20 +02001658 PyErr_Format(PyExc_TypeError,
1659 "keys must be str, int, float, bool or None, "
1660 "not %.100s", key->ob_type->tp_name);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001661 goto bail;
1662 }
1663
1664 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001665 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001666 goto bail;
1667 }
1668
1669 encoded = encoder_encode_string(s, kstr);
1670 Py_CLEAR(kstr);
1671 if (encoded == NULL)
1672 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001673 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001674 Py_DECREF(encoded);
1675 goto bail;
1676 }
1677 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001678 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001679 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001680
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001681 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001682 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001683 goto bail;
1684 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001685 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001686 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001687 if (PyErr_Occurred())
1688 goto bail;
1689 Py_CLEAR(it);
1690
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001691 if (ident != NULL) {
1692 if (PyDict_DelItem(s->markers, ident))
1693 goto bail;
1694 Py_CLEAR(ident);
1695 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001696 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001697 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001698 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001699
1700 yield '\n' + (' ' * (_indent * _current_indent_level))
1701 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001702 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001703 goto bail;
1704 return 0;
1705
1706bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001707 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001708 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001709 Py_XDECREF(kstr);
1710 Py_XDECREF(ident);
1711 return -1;
1712}
1713
1714
1715static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001716encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001717 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001718{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001719 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001720 static PyObject *open_array = NULL;
1721 static PyObject *close_array = NULL;
1722 static PyObject *empty_array = NULL;
1723 PyObject *ident = NULL;
1724 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001725 Py_ssize_t i;
1726
1727 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1728 open_array = PyUnicode_InternFromString("[");
1729 close_array = PyUnicode_InternFromString("]");
1730 empty_array = PyUnicode_InternFromString("[]");
1731 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1732 return -1;
1733 }
1734 ident = NULL;
1735 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1736 if (s_fast == NULL)
1737 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001738 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001739 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001740 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001741 }
1742
1743 if (s->markers != Py_None) {
1744 int has_key;
1745 ident = PyLong_FromVoidPtr(seq);
1746 if (ident == NULL)
1747 goto bail;
1748 has_key = PyDict_Contains(s->markers, ident);
1749 if (has_key) {
1750 if (has_key != -1)
1751 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1752 goto bail;
1753 }
1754 if (PyDict_SetItem(s->markers, ident, seq)) {
1755 goto bail;
1756 }
1757 }
1758
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001759 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001760 goto bail;
1761 if (s->indent != Py_None) {
1762 /* TODO: DOES NOT RUN */
1763 indent_level += 1;
1764 /*
1765 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1766 separator = _item_separator + newline_indent
1767 buf += newline_indent
1768 */
1769 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001770 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1771 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001772 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001773 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001774 goto bail;
1775 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001776 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001777 goto bail;
1778 }
1779 if (ident != NULL) {
1780 if (PyDict_DelItem(s->markers, ident))
1781 goto bail;
1782 Py_CLEAR(ident);
1783 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001784
1785 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001786 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001787 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001788
1789 yield '\n' + (' ' * (_indent * _current_indent_level))
1790 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001791 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001792 goto bail;
1793 Py_DECREF(s_fast);
1794 return 0;
1795
1796bail:
1797 Py_XDECREF(ident);
1798 Py_DECREF(s_fast);
1799 return -1;
1800}
1801
1802static void
1803encoder_dealloc(PyObject *self)
1804{
INADA Naokia6296d32017-08-24 14:55:17 +09001805 /* bpo-31095: UnTrack is needed before calling any callbacks */
1806 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001807 encoder_clear(self);
1808 Py_TYPE(self)->tp_free(self);
1809}
1810
1811static int
1812encoder_traverse(PyObject *self, visitproc visit, void *arg)
1813{
1814 PyEncoderObject *s;
1815 assert(PyEncoder_Check(self));
1816 s = (PyEncoderObject *)self;
1817 Py_VISIT(s->markers);
1818 Py_VISIT(s->defaultfn);
1819 Py_VISIT(s->encoder);
1820 Py_VISIT(s->indent);
1821 Py_VISIT(s->key_separator);
1822 Py_VISIT(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001823 return 0;
1824}
1825
1826static int
1827encoder_clear(PyObject *self)
1828{
1829 /* Deallocate Encoder */
1830 PyEncoderObject *s;
1831 assert(PyEncoder_Check(self));
1832 s = (PyEncoderObject *)self;
1833 Py_CLEAR(s->markers);
1834 Py_CLEAR(s->defaultfn);
1835 Py_CLEAR(s->encoder);
1836 Py_CLEAR(s->indent);
1837 Py_CLEAR(s->key_separator);
1838 Py_CLEAR(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001839 return 0;
1840}
1841
1842PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1843
1844static
1845PyTypeObject PyEncoderType = {
1846 PyVarObject_HEAD_INIT(NULL, 0)
1847 "_json.Encoder", /* tp_name */
1848 sizeof(PyEncoderObject), /* tp_basicsize */
1849 0, /* tp_itemsize */
1850 encoder_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001851 0, /* tp_vectorcall_offset */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001852 0, /* tp_getattr */
1853 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001854 0, /* tp_as_async */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001855 0, /* tp_repr */
1856 0, /* tp_as_number */
1857 0, /* tp_as_sequence */
1858 0, /* tp_as_mapping */
1859 0, /* tp_hash */
1860 encoder_call, /* tp_call */
1861 0, /* tp_str */
1862 0, /* tp_getattro */
1863 0, /* tp_setattro */
1864 0, /* tp_as_buffer */
1865 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1866 encoder_doc, /* tp_doc */
1867 encoder_traverse, /* tp_traverse */
1868 encoder_clear, /* tp_clear */
1869 0, /* tp_richcompare */
1870 0, /* tp_weaklistoffset */
1871 0, /* tp_iter */
1872 0, /* tp_iternext */
1873 0, /* tp_methods */
1874 encoder_members, /* tp_members */
1875 0, /* tp_getset */
1876 0, /* tp_base */
1877 0, /* tp_dict */
1878 0, /* tp_descr_get */
1879 0, /* tp_descr_set */
1880 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001881 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001882 0, /* tp_alloc */
1883 encoder_new, /* tp_new */
1884 0, /* tp_free */
1885};
1886
1887static PyMethodDef speedups_methods[] = {
1888 {"encode_basestring_ascii",
1889 (PyCFunction)py_encode_basestring_ascii,
1890 METH_O,
1891 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001892 {"encode_basestring",
1893 (PyCFunction)py_encode_basestring,
1894 METH_O,
1895 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001896 {"scanstring",
1897 (PyCFunction)py_scanstring,
1898 METH_VARARGS,
1899 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001900 {NULL, NULL, 0, NULL}
1901};
1902
1903PyDoc_STRVAR(module_doc,
1904"json speedups\n");
1905
Martin v. Löwis1a214512008-06-11 05:26:20 +00001906static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001907 PyModuleDef_HEAD_INIT,
1908 "_json",
1909 module_doc,
1910 -1,
1911 speedups_methods,
1912 NULL,
1913 NULL,
1914 NULL,
1915 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001916};
1917
Victor Stinnerf024d262015-03-17 17:48:27 +01001918PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001919PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001920{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001921 PyObject *m = PyModule_Create(&jsonmodule);
1922 if (!m)
1923 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001924 if (PyType_Ready(&PyScannerType) < 0)
1925 goto fail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001926 if (PyType_Ready(&PyEncoderType) < 0)
1927 goto fail;
1928 Py_INCREF((PyObject*)&PyScannerType);
1929 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1930 Py_DECREF((PyObject*)&PyScannerType);
1931 goto fail;
1932 }
1933 Py_INCREF((PyObject*)&PyEncoderType);
1934 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1935 Py_DECREF((PyObject*)&PyEncoderType);
1936 goto fail;
1937 }
1938 return m;
1939 fail:
1940 Py_DECREF(m);
1941 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001942}