blob: faa3944eedd74c60f6f46f3eeb64baebd9eeb7f3 [file] [log] [blame]
Victor Stinner5c75f372019-04-17 23:02:26 +02001/* JSON accelerator C extensor: _json module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006
Victor Stinner5c75f372019-04-17 23:02:26 +02007#if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8# error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06009#endif
10
Christian Heimes90540002008-05-08 14:29:10 +000011#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020012#include "structmember.h" // PyMemberDef
Victor Stinnere281f7d2018-11-01 02:30:36 +010013#include "pycore_accu.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +010014
Dong-hee Na33f15a12020-03-27 19:59:59 +090015typedef struct {
16 PyObject *PyScannerType;
17 PyObject *PyEncoderType;
18} _jsonmodulestate;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000019
Dong-hee Na33f15a12020-03-27 19:59:59 +090020static inline _jsonmodulestate*
21get_json_state(PyObject *module)
22{
23 void *state = PyModule_GetState(module);
24 assert(state != NULL);
25 return (_jsonmodulestate *)state;
26}
27
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028
29typedef struct _PyScannerObject {
30 PyObject_HEAD
Segev Finer541bd282017-07-13 08:52:08 +030031 signed char strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000032 PyObject *object_hook;
33 PyObject *object_pairs_hook;
34 PyObject *parse_float;
35 PyObject *parse_int;
36 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000037 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000038} PyScannerObject;
39
40static PyMemberDef scanner_members[] = {
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030041 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000042 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
43 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
44 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
45 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
46 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
47 {NULL}
48};
49
50typedef struct _PyEncoderObject {
51 PyObject_HEAD
52 PyObject *markers;
53 PyObject *defaultfn;
54 PyObject *encoder;
55 PyObject *indent;
56 PyObject *key_separator;
57 PyObject *item_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030058 char sort_keys;
59 char skipkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000060 int allow_nan;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030061 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000062} PyEncoderObject;
63
64static PyMemberDef encoder_members[] = {
65 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
66 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
67 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
68 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
69 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
70 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030071 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
72 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000073 {NULL}
74};
75
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020076/* Forward decls */
77
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000078static PyObject *
79ascii_escape_unicode(PyObject *pystr);
80static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +010081py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000082void init_json(void);
83static PyObject *
84scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
85static PyObject *
86_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
87static PyObject *
88scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000089static void
90scanner_dealloc(PyObject *self);
91static int
Dong-hee Na33f15a12020-03-27 19:59:59 +090092scanner_clear(PyScannerObject *self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000093static PyObject *
94encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000095static void
96encoder_dealloc(PyObject *self);
97static int
Dong-hee Na33f15a12020-03-27 19:59:59 +090098encoder_clear(PyEncoderObject *self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000099static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200100encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000101static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200102encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000103static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200104encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000105static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000106_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000107static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200108raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000109static PyObject *
110encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111static PyObject *
112encoder_encode_float(PyEncoderObject *s, PyObject *obj);
113
Christian Heimes90540002008-05-08 14:29:10 +0000114#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000115#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000116
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000117static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200118ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000119{
120 /* Escape unicode code point c to ASCII escape sequences
121 in char *output. output must have at least 12 bytes unused to
122 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000123 output[chars++] = '\\';
124 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000125 case '\\': output[chars++] = c; break;
126 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000127 case '\b': output[chars++] = 'b'; break;
128 case '\f': output[chars++] = 'f'; break;
129 case '\n': output[chars++] = 'n'; break;
130 case '\r': output[chars++] = 'r'; break;
131 case '\t': output[chars++] = 't'; break;
132 default:
Christian Heimes90540002008-05-08 14:29:10 +0000133 if (c >= 0x10000) {
134 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100135 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000136 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100137 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
138 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
139 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
140 output[chars++] = Py_hexdigits[(v ) & 0xf];
141 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000142 output[chars++] = '\\';
143 }
Christian Heimes90540002008-05-08 14:29:10 +0000144 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200145 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
146 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
147 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
148 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000149 }
150 return chars;
151}
152
153static PyObject *
154ascii_escape_unicode(PyObject *pystr)
155{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000156 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000157 Py_ssize_t i;
158 Py_ssize_t input_chars;
159 Py_ssize_t output_size;
160 Py_ssize_t chars;
161 PyObject *rval;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300162 const void *input;
163 Py_UCS1 *output;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200164 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000165
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200166 if (PyUnicode_READY(pystr) == -1)
167 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000168
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200169 input_chars = PyUnicode_GET_LENGTH(pystr);
170 input = PyUnicode_DATA(pystr);
171 kind = PyUnicode_KIND(pystr);
172
173 /* Compute the output size */
174 for (i = 0, output_size = 2; i < input_chars; i++) {
175 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500176 Py_ssize_t d;
177 if (S_CHAR(c)) {
178 d = 1;
179 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200180 else {
181 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200182 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200183 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500184 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200185 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500186 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200187 }
188 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500189 if (output_size > PY_SSIZE_T_MAX - d) {
190 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
191 return NULL;
192 }
193 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200194 }
195
196 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000197 if (rval == NULL) {
198 return NULL;
199 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200200 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000201 chars = 0;
202 output[chars++] = '"';
203 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200204 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000205 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000206 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000207 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000208 else {
209 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000210 }
Christian Heimes90540002008-05-08 14:29:10 +0000211 }
212 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100213#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200214 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100215#endif
Christian Heimes90540002008-05-08 14:29:10 +0000216 return rval;
217}
218
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100219static PyObject *
220escape_unicode(PyObject *pystr)
221{
222 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
223 Py_ssize_t i;
224 Py_ssize_t input_chars;
225 Py_ssize_t output_size;
226 Py_ssize_t chars;
227 PyObject *rval;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300228 const void *input;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100229 int kind;
230 Py_UCS4 maxchar;
231
232 if (PyUnicode_READY(pystr) == -1)
233 return NULL;
234
235 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
236 input_chars = PyUnicode_GET_LENGTH(pystr);
237 input = PyUnicode_DATA(pystr);
238 kind = PyUnicode_KIND(pystr);
239
240 /* Compute the output size */
241 for (i = 0, output_size = 2; i < input_chars; i++) {
242 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500243 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100244 switch (c) {
245 case '\\': case '"': case '\b': case '\f':
246 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500247 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100248 break;
249 default:
250 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500251 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100252 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500253 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100254 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500255 if (output_size > PY_SSIZE_T_MAX - d) {
256 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
257 return NULL;
258 }
259 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100260 }
261
262 rval = PyUnicode_New(output_size, maxchar);
263 if (rval == NULL)
264 return NULL;
265
266 kind = PyUnicode_KIND(rval);
267
268#define ENCODE_OUTPUT do { \
269 chars = 0; \
270 output[chars++] = '"'; \
271 for (i = 0; i < input_chars; i++) { \
272 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
273 switch (c) { \
274 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
275 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
276 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
277 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
278 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
279 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
280 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
281 default: \
282 if (c <= 0x1f) { \
283 output[chars++] = '\\'; \
284 output[chars++] = 'u'; \
285 output[chars++] = '0'; \
286 output[chars++] = '0'; \
287 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
288 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
289 } else { \
290 output[chars++] = c; \
291 } \
292 } \
293 } \
294 output[chars++] = '"'; \
295 } while (0)
296
297 if (kind == PyUnicode_1BYTE_KIND) {
298 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
299 ENCODE_OUTPUT;
300 } else if (kind == PyUnicode_2BYTE_KIND) {
301 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
302 ENCODE_OUTPUT;
303 } else {
304 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
305 assert(kind == PyUnicode_4BYTE_KIND);
306 ENCODE_OUTPUT;
307 }
308#undef ENCODE_OUTPUT
309
310#ifdef Py_DEBUG
311 assert(_PyUnicode_CheckConsistency(rval, 1));
312#endif
313 return rval;
314}
315
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000316static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200317raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000318{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200319 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
320 static PyObject *JSONDecodeError = NULL;
321 PyObject *exc;
322 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000323 PyObject *decoder = PyImport_ImportModule("json.decoder");
324 if (decoder == NULL)
325 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200326 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000327 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200328 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000329 return;
Christian Heimes90540002008-05-08 14:29:10 +0000330 }
Victor Stinner4c381542016-12-09 00:33:39 +0100331 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200332 if (exc) {
333 PyErr_SetObject(JSONDecodeError, exc);
334 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000335 }
Christian Heimes90540002008-05-08 14:29:10 +0000336}
337
Ezio Melotti37623ab2013-01-03 08:44:15 +0200338static void
339raise_stop_iteration(Py_ssize_t idx)
340{
341 PyObject *value = PyLong_FromSsize_t(idx);
342 if (value != NULL) {
343 PyErr_SetObject(PyExc_StopIteration, value);
344 Py_DECREF(value);
345 }
346}
347
Christian Heimes90540002008-05-08 14:29:10 +0000348static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000349_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
350 /* return (rval, idx) tuple, stealing reference to rval */
351 PyObject *tpl;
352 PyObject *pyidx;
353 /*
354 steal a reference to rval, returns (rval, idx)
355 */
356 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000357 return NULL;
358 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000359 pyidx = PyLong_FromSsize_t(idx);
360 if (pyidx == NULL) {
361 Py_DECREF(rval);
362 return NULL;
363 }
364 tpl = PyTuple_New(2);
365 if (tpl == NULL) {
366 Py_DECREF(pyidx);
367 Py_DECREF(rval);
368 return NULL;
369 }
370 PyTuple_SET_ITEM(tpl, 0, rval);
371 PyTuple_SET_ITEM(tpl, 1, pyidx);
372 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000373}
374
375static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000376scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000377{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000378 /* Read the JSON string from PyUnicode pystr.
379 end is the index of the first character after the quote.
380 if strict is zero then literal control characters are allowed
381 *next_end_ptr is a return-by-reference index of the character
382 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000383
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000384 Return value is a new PyUnicode
385 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000386 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200387 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000388 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000389 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200390 const void *buf;
391 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000392
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200393 if (PyUnicode_READY(pystr) == -1)
394 return 0;
395
Inada Naoki9c110292019-10-17 16:12:41 +0900396 _PyUnicodeWriter writer;
397 _PyUnicodeWriter_Init(&writer);
398 writer.overallocate = 1;
399
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200400 len = PyUnicode_GET_LENGTH(pystr);
401 buf = PyUnicode_DATA(pystr);
402 kind = PyUnicode_KIND(pystr);
403
Ezio Melotti37623ab2013-01-03 08:44:15 +0200404 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000405 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
406 goto bail;
407 }
Christian Heimes90540002008-05-08 14:29:10 +0000408 while (1) {
409 /* Find the end of the string or the next escape */
Inada Naoki2a570af2019-08-08 17:57:10 +0900410 Py_UCS4 c;
411 {
412 // Use tight scope variable to help register allocation.
413 Py_UCS4 d = 0;
414 for (next = end; next < len; next++) {
415 d = PyUnicode_READ(kind, buf, next);
416 if (d == '"' || d == '\\') {
417 break;
418 }
419 if (d <= 0x1f && strict) {
420 raise_errmsg("Invalid control character at", pystr, next);
421 goto bail;
422 }
Christian Heimes90540002008-05-08 14:29:10 +0000423 }
Inada Naoki2a570af2019-08-08 17:57:10 +0900424 c = d;
Christian Heimes90540002008-05-08 14:29:10 +0000425 }
Inada Naoki9c110292019-10-17 16:12:41 +0900426
427 if (c == '"') {
428 // Fast path for simple case.
429 if (writer.buffer == NULL) {
430 PyObject *ret = PyUnicode_Substring(pystr, end, next);
431 if (ret == NULL) {
432 goto bail;
433 }
434 *next_end_ptr = next + 1;;
435 return ret;
436 }
437 }
438 else if (c != '\\') {
Christian Heimes90540002008-05-08 14:29:10 +0000439 raise_errmsg("Unterminated string starting at", pystr, begin);
440 goto bail;
441 }
Inada Naoki9c110292019-10-17 16:12:41 +0900442
Christian Heimes90540002008-05-08 14:29:10 +0000443 /* Pick up this chunk if it's not zero length */
444 if (next != end) {
Inada Naoki9c110292019-10-17 16:12:41 +0900445 if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
Christian Heimes90540002008-05-08 14:29:10 +0000446 goto bail;
447 }
Christian Heimes90540002008-05-08 14:29:10 +0000448 }
449 next++;
450 if (c == '"') {
451 end = next;
452 break;
453 }
454 if (next == len) {
455 raise_errmsg("Unterminated string starting at", pystr, begin);
456 goto bail;
457 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200458 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000459 if (c != 'u') {
460 /* Non-unicode backslash escapes */
461 end = next + 1;
462 switch (c) {
463 case '"': break;
464 case '\\': break;
465 case '/': break;
466 case 'b': c = '\b'; break;
467 case 'f': c = '\f'; break;
468 case 'n': c = '\n'; break;
469 case 'r': c = '\r'; break;
470 case 't': c = '\t'; break;
471 default: c = 0;
472 }
473 if (c == 0) {
474 raise_errmsg("Invalid \\escape", pystr, end - 2);
475 goto bail;
476 }
477 }
478 else {
479 c = 0;
480 next++;
481 end = next + 4;
482 if (end >= len) {
483 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
484 goto bail;
485 }
486 /* Decode 4 hex digits */
487 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200488 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000489 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000490 switch (digit) {
491 case '0': case '1': case '2': case '3': case '4':
492 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000493 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000494 case 'a': case 'b': case 'c': case 'd': case 'e':
495 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000496 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000497 case 'A': case 'B': case 'C': case 'D': case 'E':
498 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000499 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000500 default:
501 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
502 goto bail;
503 }
504 }
Christian Heimes90540002008-05-08 14:29:10 +0000505 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200506 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
507 PyUnicode_READ(kind, buf, next++) == '\\' &&
508 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200509 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000510 end += 6;
511 /* Decode 4 hex digits */
512 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200513 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000514 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000515 switch (digit) {
516 case '0': case '1': case '2': case '3': case '4':
517 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000518 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000519 case 'a': case 'b': case 'c': case 'd': case 'e':
520 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000521 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000522 case 'A': case 'B': case 'C': case 'D': case 'E':
523 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000524 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000525 default:
526 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
527 goto bail;
528 }
529 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200530 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
531 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
532 else
533 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000534 }
Christian Heimes90540002008-05-08 14:29:10 +0000535 }
Inada Naoki9c110292019-10-17 16:12:41 +0900536 if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
Christian Heimes90540002008-05-08 14:29:10 +0000537 goto bail;
538 }
Christian Heimes90540002008-05-08 14:29:10 +0000539 }
540
Inada Naoki9c110292019-10-17 16:12:41 +0900541 rval = _PyUnicodeWriter_Finish(&writer);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000542 *next_end_ptr = end;
543 return rval;
Inada Naoki9c110292019-10-17 16:12:41 +0900544
Christian Heimes90540002008-05-08 14:29:10 +0000545bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000546 *next_end_ptr = -1;
Inada Naoki9c110292019-10-17 16:12:41 +0900547 _PyUnicodeWriter_Dealloc(&writer);
Christian Heimes90540002008-05-08 14:29:10 +0000548 return NULL;
549}
550
551PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000552 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000553 "\n"
554 "Scan the string s for a JSON string. End is the index of the\n"
555 "character in s after the quote that started the JSON string.\n"
556 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
557 "on attempt to decode an invalid string. If strict is False then literal\n"
558 "control characters are allowed in the string.\n"
559 "\n"
560 "Returns a tuple of the decoded string and the index of the character in s\n"
561 "after the end quote."
562);
Christian Heimes90540002008-05-08 14:29:10 +0000563
564static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100565py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000566{
567 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000568 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000569 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000570 Py_ssize_t next_end = -1;
571 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100572 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000573 return NULL;
574 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000575 if (PyUnicode_Check(pystr)) {
576 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000577 }
578 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000579 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000580 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000581 Py_TYPE(pystr)->tp_name);
582 return NULL;
583 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000584 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000585}
586
587PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000588 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000589 "\n"
590 "Return an ASCII-only JSON representation of a Python string"
591);
Christian Heimes90540002008-05-08 14:29:10 +0000592
593static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100594py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000595{
596 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000597 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000598 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000599 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000600 rval = ascii_escape_unicode(pystr);
601 }
602 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000603 PyErr_Format(PyExc_TypeError,
604 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000605 Py_TYPE(pystr)->tp_name);
606 return NULL;
607 }
Christian Heimes90540002008-05-08 14:29:10 +0000608 return rval;
609}
610
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100611
612PyDoc_STRVAR(pydoc_encode_basestring,
613 "encode_basestring(string) -> string\n"
614 "\n"
615 "Return a JSON representation of a Python string"
616);
617
618static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100619py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100620{
621 PyObject *rval;
622 /* Return a JSON representation of a Python string */
623 /* METH_O */
624 if (PyUnicode_Check(pystr)) {
625 rval = escape_unicode(pystr);
626 }
627 else {
628 PyErr_Format(PyExc_TypeError,
629 "first argument must be a string, not %.80s",
630 Py_TYPE(pystr)->tp_name);
631 return NULL;
632 }
633 return rval;
634}
635
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000636static void
637scanner_dealloc(PyObject *self)
638{
Dong-hee Na33f15a12020-03-27 19:59:59 +0900639 PyTypeObject *tp = Py_TYPE(self);
INADA Naokia6296d32017-08-24 14:55:17 +0900640 /* bpo-31095: UnTrack is needed before calling any callbacks */
641 PyObject_GC_UnTrack(self);
Dong-hee Na33f15a12020-03-27 19:59:59 +0900642 scanner_clear((PyScannerObject *)self);
643 tp->tp_free(self);
644 Py_DECREF(tp);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000645}
646
647static int
Dong-hee Na33f15a12020-03-27 19:59:59 +0900648scanner_traverse(PyScannerObject *self, visitproc visit, void *arg)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000649{
Pablo Galindo1cf15af2020-05-27 10:03:38 +0100650 Py_VISIT(Py_TYPE(self));
Dong-hee Na33f15a12020-03-27 19:59:59 +0900651 Py_VISIT(self->object_hook);
652 Py_VISIT(self->object_pairs_hook);
653 Py_VISIT(self->parse_float);
654 Py_VISIT(self->parse_int);
655 Py_VISIT(self->parse_constant);
Hai Shib7093022020-04-05 03:24:16 +0800656 Py_VISIT(self->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000657 return 0;
658}
659
660static int
Dong-hee Na33f15a12020-03-27 19:59:59 +0900661scanner_clear(PyScannerObject *self)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000662{
Dong-hee Na33f15a12020-03-27 19:59:59 +0900663 Py_CLEAR(self->object_hook);
664 Py_CLEAR(self->object_pairs_hook);
665 Py_CLEAR(self->parse_float);
666 Py_CLEAR(self->parse_int);
667 Py_CLEAR(self->parse_constant);
668 Py_CLEAR(self->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000669 return 0;
670}
671
672static PyObject *
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300673_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
674{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000675 /* Read a JSON object from PyUnicode pystr.
676 idx is the index of the first character after the opening curly brace.
677 *next_idx_ptr is a return-by-reference index to the first character after
678 the closing curly brace.
679
680 Returns a new PyObject (usually a dict, but object_hook can change that)
681 */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300682 const void *str;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200683 int kind;
684 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000685 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000686 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000687 PyObject *key = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000688 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000689 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000690
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200691 if (PyUnicode_READY(pystr) == -1)
692 return NULL;
693
694 str = PyUnicode_DATA(pystr);
695 kind = PyUnicode_KIND(pystr);
696 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
697
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000698 if (has_pairs_hook)
699 rval = PyList_New(0);
700 else
701 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000702 if (rval == NULL)
703 return NULL;
704
705 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200706 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000707
708 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200709 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
710 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000711 PyObject *memokey;
712
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000713 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200714 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200715 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000716 goto bail;
717 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300718 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000719 if (key == NULL)
720 goto bail;
Inada Naoki2a570af2019-08-08 17:57:10 +0900721 memokey = PyDict_SetDefault(s->memo, key, key);
722 if (memokey == NULL) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200723 goto bail;
724 }
Inada Naoki2a570af2019-08-08 17:57:10 +0900725 Py_INCREF(memokey);
726 Py_DECREF(key);
727 key = memokey;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000728 idx = next_idx;
729
730 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200731 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
732 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200733 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000734 goto bail;
735 }
736 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200737 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000738
739 /* read any JSON term */
740 val = scan_once_unicode(s, pystr, idx, &next_idx);
741 if (val == NULL)
742 goto bail;
743
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000744 if (has_pairs_hook) {
745 PyObject *item = PyTuple_Pack(2, key, val);
746 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000747 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000748 Py_CLEAR(key);
749 Py_CLEAR(val);
750 if (PyList_Append(rval, item) == -1) {
751 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000752 goto bail;
753 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000754 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000755 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000756 else {
757 if (PyDict_SetItem(rval, key, val) < 0)
758 goto bail;
759 Py_CLEAR(key);
760 Py_CLEAR(val);
761 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000762 idx = next_idx;
763
764 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200765 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000766
767 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200768 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000769 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200770 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200771 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000772 goto bail;
773 }
774 idx++;
775
776 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200777 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000778 }
779 }
780
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000781 *next_idx_ptr = idx + 1;
782
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000783 if (has_pairs_hook) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100784 val = PyObject_CallOneArg(s->object_pairs_hook, rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000785 Py_DECREF(rval);
786 return val;
787 }
788
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000789 /* if object_hook is not None: rval = object_hook(rval) */
790 if (s->object_hook != Py_None) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100791 val = PyObject_CallOneArg(s->object_hook, rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000792 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000793 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000794 }
795 return rval;
796bail:
797 Py_XDECREF(key);
798 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000799 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000800 return NULL;
801}
802
803static PyObject *
804_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200805 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000806 idx is the index of the first character after the opening brace.
807 *next_idx_ptr is a return-by-reference index to the first character after
808 the closing brace.
809
810 Returns a new PyList
811 */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300812 const void *str;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200813 int kind;
814 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000815 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200816 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000817 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000818
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200819 if (PyUnicode_READY(pystr) == -1)
820 return NULL;
821
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200822 rval = PyList_New(0);
823 if (rval == NULL)
824 return NULL;
825
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200826 str = PyUnicode_DATA(pystr);
827 kind = PyUnicode_KIND(pystr);
828 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
829
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000830 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200831 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000832
833 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200834 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
835 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000836
837 /* read any JSON term */
838 val = scan_once_unicode(s, pystr, idx, &next_idx);
839 if (val == NULL)
840 goto bail;
841
842 if (PyList_Append(rval, val) == -1)
843 goto bail;
844
845 Py_CLEAR(val);
846 idx = next_idx;
847
848 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200849 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000850
851 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200852 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000853 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200854 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200855 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000856 goto bail;
857 }
858 idx++;
859
860 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200861 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000862 }
863 }
864
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200865 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
866 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200867 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000868 goto bail;
869 }
870 *next_idx_ptr = idx + 1;
871 return rval;
872bail:
873 Py_XDECREF(val);
874 Py_DECREF(rval);
875 return NULL;
876}
877
878static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200879_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
880 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000881 constant is the constant string that was found
882 ("NaN", "Infinity", "-Infinity").
883 idx is the index of the first character of the constant
884 *next_idx_ptr is a return-by-reference index to the first character after
885 the constant.
886
887 Returns the result of parse_constant
888 */
889 PyObject *cstr;
890 PyObject *rval;
891 /* constant is "NaN", "Infinity", or "-Infinity" */
892 cstr = PyUnicode_InternFromString(constant);
893 if (cstr == NULL)
894 return NULL;
895
896 /* rval = parse_constant(constant) */
Petr Viktorinffd97532020-02-11 17:46:57 +0100897 rval = PyObject_CallOneArg(s->parse_constant, cstr);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200898 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000899 Py_DECREF(cstr);
900 *next_idx_ptr = idx;
901 return rval;
902}
903
904static PyObject *
905_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
906 /* Read a JSON number from PyUnicode pystr.
907 idx is the index of the first character of the number
908 *next_idx_ptr is a return-by-reference index to the first character after
909 the number.
910
911 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200912 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000913 May return other types if parse_int or parse_float are set
914 */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300915 const void *str;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200916 int kind;
917 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000918 Py_ssize_t idx = start;
919 int is_float = 0;
920 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200921 PyObject *numstr = NULL;
922 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000923
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200924 if (PyUnicode_READY(pystr) == -1)
925 return NULL;
926
927 str = PyUnicode_DATA(pystr);
928 kind = PyUnicode_KIND(pystr);
929 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
930
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000931 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200932 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000933 idx++;
934 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200935 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000936 return NULL;
937 }
938 }
939
940 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200941 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000942 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200943 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000944 }
945 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200946 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000947 idx++;
948 }
949 /* no integer digits, error */
950 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200951 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000952 return NULL;
953 }
954
955 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200956 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000957 is_float = 1;
958 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200959 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000960 }
961
962 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200963 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000964 Py_ssize_t e_start = idx;
965 idx++;
966
967 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200968 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000969
970 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200971 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000972
973 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200974 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000975 is_float = 1;
976 }
977 else {
978 idx = e_start;
979 }
980 }
981
Antoine Pitrouf6454512011-04-25 19:16:06 +0200982 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
983 custom_func = s->parse_float;
984 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
985 custom_func = s->parse_int;
986 else
987 custom_func = NULL;
988
989 if (custom_func) {
990 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200991 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200992 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200993 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +0200994 if (numstr == NULL)
995 return NULL;
Petr Viktorinffd97532020-02-11 17:46:57 +0100996 rval = PyObject_CallOneArg(custom_func, numstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000997 }
998 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +0200999 Py_ssize_t i, n;
1000 char *buf;
1001 /* Straight conversion to ASCII, to avoid costly conversion of
1002 decimal unicode digits (which cannot appear here) */
1003 n = idx - start;
1004 numstr = PyBytes_FromStringAndSize(NULL, n);
1005 if (numstr == NULL)
1006 return NULL;
1007 buf = PyBytes_AS_STRING(numstr);
1008 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001009 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001010 }
1011 if (is_float)
1012 rval = PyFloat_FromString(numstr);
1013 else
1014 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001015 }
1016 Py_DECREF(numstr);
1017 *next_idx_ptr = idx;
1018 return rval;
1019}
1020
1021static PyObject *
1022scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1023{
1024 /* Read one JSON term (of any kind) from PyUnicode pystr.
1025 idx is the index of the first character of the term
1026 *next_idx_ptr is a return-by-reference index to the first character after
1027 the number.
1028
1029 Returns a new PyObject representation of the term.
1030 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001031 PyObject *res;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001032 const void *str;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001033 int kind;
1034 Py_ssize_t length;
1035
1036 if (PyUnicode_READY(pystr) == -1)
1037 return NULL;
1038
1039 str = PyUnicode_DATA(pystr);
1040 kind = PyUnicode_KIND(pystr);
1041 length = PyUnicode_GET_LENGTH(pystr);
1042
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001043 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001044 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001045 return NULL;
1046 }
1047 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001048 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001049 return NULL;
1050 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001051
1052 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001053 case '"':
1054 /* string */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001055 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001056 case '{':
1057 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001058 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1059 "from a unicode string"))
1060 return NULL;
1061 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1062 Py_LeaveRecursiveCall();
1063 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001064 case '[':
1065 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001066 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1067 "from a unicode string"))
1068 return NULL;
1069 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1070 Py_LeaveRecursiveCall();
1071 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001072 case 'n':
1073 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001074 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001075 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001076 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001077 }
1078 break;
1079 case 't':
1080 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001081 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001082 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001083 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001084 }
1085 break;
1086 case 'f':
1087 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001088 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1089 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1090 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001091 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001092 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001093 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001094 }
1095 break;
1096 case 'N':
1097 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001098 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001099 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001100 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1101 }
1102 break;
1103 case 'I':
1104 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001105 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1106 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1107 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001108 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001109 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1110 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001111 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001112 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1113 }
1114 break;
1115 case '-':
1116 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001117 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001118 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1119 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001120 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001121 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001122 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1123 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001124 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001125 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1126 }
1127 break;
1128 }
1129 /* Didn't find a string, object, array, or named constant. Look for a number. */
1130 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1131}
1132
1133static PyObject *
Dong-hee Na33f15a12020-03-27 19:59:59 +09001134scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001135{
1136 /* Python callable interface to scan_once_{str,unicode} */
1137 PyObject *pystr;
1138 PyObject *rval;
1139 Py_ssize_t idx;
1140 Py_ssize_t next_idx = -1;
1141 static char *kwlist[] = {"string", "idx", NULL};
Antoine Pitroucbb02842012-12-01 19:34:16 +01001142 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001143 return NULL;
1144
1145 if (PyUnicode_Check(pystr)) {
Dong-hee Na33f15a12020-03-27 19:59:59 +09001146 rval = scan_once_unicode(self, pystr, idx, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001147 }
1148 else {
1149 PyErr_Format(PyExc_TypeError,
1150 "first argument must be a string, not %.80s",
1151 Py_TYPE(pystr)->tp_name);
1152 return NULL;
1153 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001154 PyDict_Clear(self->memo);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001155 if (rval == NULL)
1156 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001157 return _build_rval_index_tuple(rval, next_idx);
1158}
1159
1160static PyObject *
1161scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1162{
1163 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001164 PyObject *ctx;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001165 PyObject *strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001166 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001167
1168 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001169 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001170
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001171 s = (PyScannerObject *)type->tp_alloc(type, 0);
1172 if (s == NULL) {
1173 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001174 }
1175
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001176 s->memo = PyDict_New();
1177 if (s->memo == NULL)
1178 goto bail;
1179
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001180 /* All of these will fail "gracefully" so we don't need to verify them */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001181 strict = PyObject_GetAttrString(ctx, "strict");
1182 if (strict == NULL)
1183 goto bail;
1184 s->strict = PyObject_IsTrue(strict);
1185 Py_DECREF(strict);
1186 if (s->strict < 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001187 goto bail;
1188 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1189 if (s->object_hook == NULL)
1190 goto bail;
1191 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1192 if (s->object_pairs_hook == NULL)
1193 goto bail;
1194 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1195 if (s->parse_float == NULL)
1196 goto bail;
1197 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1198 if (s->parse_int == NULL)
1199 goto bail;
1200 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1201 if (s->parse_constant == NULL)
1202 goto bail;
1203
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001204 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001205
1206bail:
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001207 Py_DECREF(s);
1208 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001209}
1210
1211PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1212
Dong-hee Na33f15a12020-03-27 19:59:59 +09001213static PyType_Slot PyScannerType_slots[] = {
1214 {Py_tp_doc, (void *)scanner_doc},
1215 {Py_tp_dealloc, scanner_dealloc},
1216 {Py_tp_call, scanner_call},
1217 {Py_tp_traverse, scanner_traverse},
1218 {Py_tp_clear, scanner_clear},
1219 {Py_tp_members, scanner_members},
1220 {Py_tp_new, scanner_new},
1221 {0, 0}
1222};
1223
1224static PyType_Spec PyScannerType_spec = {
1225 .name = "_json.Scanner",
1226 .basicsize = sizeof(PyScannerObject),
1227 .itemsize = 0,
1228 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1229 .slots = PyScannerType_slots,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001230};
1231
1232static PyObject *
1233encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1234{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001235 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1236
1237 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001238 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001239 PyObject *item_separator;
1240 int sort_keys, skipkeys, allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001241
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001242 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001243 &markers, &defaultfn, &encoder, &indent,
1244 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001245 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001246 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001247
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001248 if (markers != Py_None && !PyDict_Check(markers)) {
1249 PyErr_Format(PyExc_TypeError,
1250 "make_encoder() argument 1 must be dict or None, "
1251 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001252 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001253 }
1254
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001255 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1256 if (s == NULL)
1257 return NULL;
1258
Antoine Pitrou781eba72009-12-08 15:57:31 +00001259 s->markers = markers;
1260 s->defaultfn = defaultfn;
1261 s->encoder = encoder;
1262 s->indent = indent;
1263 s->key_separator = key_separator;
1264 s->item_separator = item_separator;
1265 s->sort_keys = sort_keys;
1266 s->skipkeys = skipkeys;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001267 s->allow_nan = allow_nan;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001268 s->fast_encode = NULL;
1269 if (PyCFunction_Check(s->encoder)) {
1270 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1271 if (f == (PyCFunction)py_encode_basestring_ascii ||
1272 f == (PyCFunction)py_encode_basestring) {
1273 s->fast_encode = f;
1274 }
1275 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001276
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001277 Py_INCREF(s->markers);
1278 Py_INCREF(s->defaultfn);
1279 Py_INCREF(s->encoder);
1280 Py_INCREF(s->indent);
1281 Py_INCREF(s->key_separator);
1282 Py_INCREF(s->item_separator);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001283 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001284}
1285
1286static PyObject *
Dong-hee Na33f15a12020-03-27 19:59:59 +09001287encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001288{
1289 /* Python callable interface to encode_listencode_obj */
1290 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1291 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001292 Py_ssize_t indent_level;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001293 _PyAccu acc;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001294 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1295 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001296 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001297 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001298 return NULL;
Dong-hee Na33f15a12020-03-27 19:59:59 +09001299 if (encoder_listencode_obj(self, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001300 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001301 return NULL;
1302 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001303 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001304}
1305
1306static PyObject *
1307_encoded_const(PyObject *obj)
1308{
1309 /* Return the JSON string representation of None, True, False */
1310 if (obj == Py_None) {
1311 static PyObject *s_null = NULL;
1312 if (s_null == NULL) {
1313 s_null = PyUnicode_InternFromString("null");
1314 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001315 Py_XINCREF(s_null);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001316 return s_null;
1317 }
1318 else if (obj == Py_True) {
1319 static PyObject *s_true = NULL;
1320 if (s_true == NULL) {
1321 s_true = PyUnicode_InternFromString("true");
1322 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001323 Py_XINCREF(s_true);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001324 return s_true;
1325 }
1326 else if (obj == Py_False) {
1327 static PyObject *s_false = NULL;
1328 if (s_false == NULL) {
1329 s_false = PyUnicode_InternFromString("false");
1330 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001331 Py_XINCREF(s_false);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001332 return s_false;
1333 }
1334 else {
1335 PyErr_SetString(PyExc_ValueError, "not a const");
1336 return NULL;
1337 }
1338}
1339
1340static PyObject *
1341encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1342{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001343 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001344 double i = PyFloat_AS_DOUBLE(obj);
1345 if (!Py_IS_FINITE(i)) {
1346 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001347 PyErr_SetString(
1348 PyExc_ValueError,
1349 "Out of range float values are not JSON compliant"
1350 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001351 return NULL;
1352 }
1353 if (i > 0) {
1354 return PyUnicode_FromString("Infinity");
1355 }
1356 else if (i < 0) {
1357 return PyUnicode_FromString("-Infinity");
1358 }
1359 else {
1360 return PyUnicode_FromString("NaN");
1361 }
1362 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001363 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001364}
1365
1366static PyObject *
1367encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1368{
1369 /* Return the JSON representation of a string */
Oren Milman2b382dd2017-09-24 12:07:12 +03001370 PyObject *encoded;
1371
1372 if (s->fast_encode) {
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001373 return s->fast_encode(NULL, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001374 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001375 encoded = PyObject_CallOneArg(s->encoder, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001376 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1377 PyErr_Format(PyExc_TypeError,
1378 "encoder() must return a string, not %.80s",
1379 Py_TYPE(encoded)->tp_name);
1380 Py_DECREF(encoded);
1381 return NULL;
1382 }
1383 return encoded;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001384}
1385
1386static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001387_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001388{
1389 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001390 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001391 Py_DECREF(stolen);
1392 return rval;
1393}
1394
1395static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001396encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001397 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001398{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001399 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001400 PyObject *newobj;
1401 int rv;
1402
1403 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1404 PyObject *cstr = _encoded_const(obj);
1405 if (cstr == NULL)
1406 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001407 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001408 }
1409 else if (PyUnicode_Check(obj))
1410 {
1411 PyObject *encoded = encoder_encode_string(s, obj);
1412 if (encoded == NULL)
1413 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001414 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001415 }
1416 else if (PyLong_Check(obj)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001417 PyObject *encoded = PyLong_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001418 if (encoded == NULL)
1419 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001420 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001421 }
1422 else if (PyFloat_Check(obj)) {
1423 PyObject *encoded = encoder_encode_float(s, obj);
1424 if (encoded == NULL)
1425 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001426 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001427 }
1428 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001429 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1430 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001431 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001432 Py_LeaveRecursiveCall();
1433 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001434 }
1435 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001436 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1437 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001438 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001439 Py_LeaveRecursiveCall();
1440 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001441 }
1442 else {
1443 PyObject *ident = NULL;
1444 if (s->markers != Py_None) {
1445 int has_key;
1446 ident = PyLong_FromVoidPtr(obj);
1447 if (ident == NULL)
1448 return -1;
1449 has_key = PyDict_Contains(s->markers, ident);
1450 if (has_key) {
1451 if (has_key != -1)
1452 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1453 Py_DECREF(ident);
1454 return -1;
1455 }
1456 if (PyDict_SetItem(s->markers, ident, obj)) {
1457 Py_DECREF(ident);
1458 return -1;
1459 }
1460 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001461 newobj = PyObject_CallOneArg(s->defaultfn, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001462 if (newobj == NULL) {
1463 Py_XDECREF(ident);
1464 return -1;
1465 }
Ezio Melotti13672652011-05-11 01:02:56 +03001466
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001467 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1468 Py_DECREF(newobj);
1469 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001470 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001471 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001472 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001473 Py_LeaveRecursiveCall();
1474
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001475 Py_DECREF(newobj);
1476 if (rv) {
1477 Py_XDECREF(ident);
1478 return -1;
1479 }
1480 if (ident != NULL) {
1481 if (PyDict_DelItem(s->markers, ident)) {
1482 Py_XDECREF(ident);
1483 return -1;
1484 }
1485 Py_XDECREF(ident);
1486 }
1487 return rv;
1488 }
1489}
1490
1491static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001492encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001493 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001494{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001495 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001496 static PyObject *open_dict = NULL;
1497 static PyObject *close_dict = NULL;
1498 static PyObject *empty_dict = NULL;
1499 PyObject *kstr = NULL;
1500 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001501 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001502 PyObject *items;
1503 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001504 Py_ssize_t idx;
1505
1506 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1507 open_dict = PyUnicode_InternFromString("{");
1508 close_dict = PyUnicode_InternFromString("}");
1509 empty_dict = PyUnicode_InternFromString("{}");
1510 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1511 return -1;
1512 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001513 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001514 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001515
1516 if (s->markers != Py_None) {
1517 int has_key;
1518 ident = PyLong_FromVoidPtr(dct);
1519 if (ident == NULL)
1520 goto bail;
1521 has_key = PyDict_Contains(s->markers, ident);
1522 if (has_key) {
1523 if (has_key != -1)
1524 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1525 goto bail;
1526 }
1527 if (PyDict_SetItem(s->markers, ident, dct)) {
1528 goto bail;
1529 }
1530 }
1531
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001532 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001533 goto bail;
1534
1535 if (s->indent != Py_None) {
1536 /* TODO: DOES NOT RUN */
1537 indent_level += 1;
1538 /*
1539 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1540 separator = _item_separator + newline_indent
1541 buf += newline_indent
1542 */
1543 }
1544
Benjamin Peterson501182a2015-05-02 22:28:04 -04001545 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001546 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001547 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001548 if (s->sort_keys && PyList_Sort(items) < 0) {
1549 Py_DECREF(items);
Benjamin Peterson501182a2015-05-02 22:28:04 -04001550 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001551 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001552 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001553 Py_DECREF(items);
1554 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001555 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001556 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001557 while ((item = PyIter_Next(it)) != NULL) {
1558 PyObject *encoded, *key, *value;
Serhiy Storchakafff9a312017-03-21 08:53:25 +02001559 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001560 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1561 goto bail;
1562 }
1563 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001564 if (PyUnicode_Check(key)) {
1565 Py_INCREF(key);
1566 kstr = key;
1567 }
1568 else if (PyFloat_Check(key)) {
1569 kstr = encoder_encode_float(s, key);
1570 if (kstr == NULL)
1571 goto bail;
1572 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001573 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001574 /* This must come before the PyLong_Check because
1575 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001576 kstr = _encoded_const(key);
1577 if (kstr == NULL)
1578 goto bail;
1579 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001580 else if (PyLong_Check(key)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001581 kstr = PyLong_Type.tp_repr(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001582 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001583 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001584 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001585 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001586 else if (s->skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001587 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001588 continue;
1589 }
1590 else {
Serhiy Storchakacfa797c2017-11-25 17:38:20 +02001591 PyErr_Format(PyExc_TypeError,
1592 "keys must be str, int, float, bool or None, "
Victor Stinnerdaa97562020-02-07 03:37:06 +01001593 "not %.100s", Py_TYPE(key)->tp_name);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001594 goto bail;
1595 }
1596
1597 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001598 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001599 goto bail;
1600 }
1601
1602 encoded = encoder_encode_string(s, kstr);
1603 Py_CLEAR(kstr);
1604 if (encoded == NULL)
1605 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001606 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001607 Py_DECREF(encoded);
1608 goto bail;
1609 }
1610 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001611 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001612 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001613
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001614 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001615 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001616 goto bail;
1617 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001618 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001619 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001620 if (PyErr_Occurred())
1621 goto bail;
1622 Py_CLEAR(it);
1623
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001624 if (ident != NULL) {
1625 if (PyDict_DelItem(s->markers, ident))
1626 goto bail;
1627 Py_CLEAR(ident);
1628 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001629 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001630 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001631 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001632
1633 yield '\n' + (' ' * (_indent * _current_indent_level))
1634 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001635 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001636 goto bail;
1637 return 0;
1638
1639bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001640 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001641 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001642 Py_XDECREF(kstr);
1643 Py_XDECREF(ident);
1644 return -1;
1645}
1646
1647
1648static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001649encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001650 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001651{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001652 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001653 static PyObject *open_array = NULL;
1654 static PyObject *close_array = NULL;
1655 static PyObject *empty_array = NULL;
1656 PyObject *ident = NULL;
1657 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001658 Py_ssize_t i;
1659
1660 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1661 open_array = PyUnicode_InternFromString("[");
1662 close_array = PyUnicode_InternFromString("]");
1663 empty_array = PyUnicode_InternFromString("[]");
1664 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1665 return -1;
1666 }
1667 ident = NULL;
1668 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1669 if (s_fast == NULL)
1670 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001671 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001672 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001673 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001674 }
1675
1676 if (s->markers != Py_None) {
1677 int has_key;
1678 ident = PyLong_FromVoidPtr(seq);
1679 if (ident == NULL)
1680 goto bail;
1681 has_key = PyDict_Contains(s->markers, ident);
1682 if (has_key) {
1683 if (has_key != -1)
1684 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1685 goto bail;
1686 }
1687 if (PyDict_SetItem(s->markers, ident, seq)) {
1688 goto bail;
1689 }
1690 }
1691
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001692 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001693 goto bail;
1694 if (s->indent != Py_None) {
1695 /* TODO: DOES NOT RUN */
1696 indent_level += 1;
1697 /*
1698 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1699 separator = _item_separator + newline_indent
1700 buf += newline_indent
1701 */
1702 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001703 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1704 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001705 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001706 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001707 goto bail;
1708 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001709 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001710 goto bail;
1711 }
1712 if (ident != NULL) {
1713 if (PyDict_DelItem(s->markers, ident))
1714 goto bail;
1715 Py_CLEAR(ident);
1716 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001717
1718 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001719 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001720 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001721
1722 yield '\n' + (' ' * (_indent * _current_indent_level))
1723 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001724 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001725 goto bail;
1726 Py_DECREF(s_fast);
1727 return 0;
1728
1729bail:
1730 Py_XDECREF(ident);
1731 Py_DECREF(s_fast);
1732 return -1;
1733}
1734
1735static void
1736encoder_dealloc(PyObject *self)
1737{
Dong-hee Na33f15a12020-03-27 19:59:59 +09001738 PyTypeObject *tp = Py_TYPE(self);
INADA Naokia6296d32017-08-24 14:55:17 +09001739 /* bpo-31095: UnTrack is needed before calling any callbacks */
1740 PyObject_GC_UnTrack(self);
Dong-hee Na33f15a12020-03-27 19:59:59 +09001741 encoder_clear((PyEncoderObject *)self);
1742 tp->tp_free(self);
1743 Py_DECREF(tp);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001744}
1745
1746static int
Dong-hee Na33f15a12020-03-27 19:59:59 +09001747encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001748{
Pablo Galindo1cf15af2020-05-27 10:03:38 +01001749 Py_VISIT(Py_TYPE(self));
Dong-hee Na33f15a12020-03-27 19:59:59 +09001750 Py_VISIT(self->markers);
1751 Py_VISIT(self->defaultfn);
1752 Py_VISIT(self->encoder);
1753 Py_VISIT(self->indent);
1754 Py_VISIT(self->key_separator);
1755 Py_VISIT(self->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001756 return 0;
1757}
1758
1759static int
Dong-hee Na33f15a12020-03-27 19:59:59 +09001760encoder_clear(PyEncoderObject *self)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001761{
1762 /* Deallocate Encoder */
Dong-hee Na33f15a12020-03-27 19:59:59 +09001763 Py_CLEAR(self->markers);
1764 Py_CLEAR(self->defaultfn);
1765 Py_CLEAR(self->encoder);
1766 Py_CLEAR(self->indent);
1767 Py_CLEAR(self->key_separator);
1768 Py_CLEAR(self->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001769 return 0;
1770}
1771
1772PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1773
Dong-hee Na33f15a12020-03-27 19:59:59 +09001774static PyType_Slot PyEncoderType_slots[] = {
1775 {Py_tp_doc, (void *)encoder_doc},
1776 {Py_tp_dealloc, encoder_dealloc},
1777 {Py_tp_call, encoder_call},
1778 {Py_tp_traverse, encoder_traverse},
1779 {Py_tp_clear, encoder_clear},
1780 {Py_tp_members, encoder_members},
1781 {Py_tp_new, encoder_new},
1782 {0, 0}
1783};
1784
1785static PyType_Spec PyEncoderType_spec = {
1786 .name = "_json.Encoder",
1787 .basicsize = sizeof(PyEncoderObject),
1788 .itemsize = 0,
1789 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1790 .slots = PyEncoderType_slots
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001791};
1792
1793static PyMethodDef speedups_methods[] = {
1794 {"encode_basestring_ascii",
1795 (PyCFunction)py_encode_basestring_ascii,
1796 METH_O,
1797 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001798 {"encode_basestring",
1799 (PyCFunction)py_encode_basestring,
1800 METH_O,
1801 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001802 {"scanstring",
1803 (PyCFunction)py_scanstring,
1804 METH_VARARGS,
1805 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001806 {NULL, NULL, 0, NULL}
1807};
1808
1809PyDoc_STRVAR(module_doc,
1810"json speedups\n");
1811
Hai Shied154c32020-01-16 00:32:51 +08001812static int
1813_json_exec(PyObject *module)
1814{
Dong-hee Na33f15a12020-03-27 19:59:59 +09001815 _jsonmodulestate *state = get_json_state(module);
1816
1817 state->PyScannerType = PyType_FromSpec(&PyScannerType_spec);
1818 if (state->PyScannerType == NULL) {
Hai Shied154c32020-01-16 00:32:51 +08001819 return -1;
1820 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001821 Py_INCREF(state->PyScannerType);
1822 if (PyModule_AddObject(module, "make_scanner", state->PyScannerType) < 0) {
Hai Shidcb04d92020-04-09 23:10:29 +08001823 Py_DECREF(state->PyScannerType);
Hai Shied154c32020-01-16 00:32:51 +08001824 return -1;
1825 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001826
1827 state->PyEncoderType = PyType_FromSpec(&PyEncoderType_spec);
1828 if (state->PyEncoderType == NULL) {
Hai Shied154c32020-01-16 00:32:51 +08001829 return -1;
1830 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001831 Py_INCREF(state->PyEncoderType);
1832 if (PyModule_AddObject(module, "make_encoder", state->PyEncoderType) < 0) {
Hai Shidcb04d92020-04-09 23:10:29 +08001833 Py_DECREF(state->PyEncoderType);
Hai Shied154c32020-01-16 00:32:51 +08001834 return -1;
1835 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001836
Hai Shied154c32020-01-16 00:32:51 +08001837 return 0;
1838}
1839
Dong-hee Na33f15a12020-03-27 19:59:59 +09001840static int
1841_jsonmodule_traverse(PyObject *module, visitproc visit, void *arg)
1842{
1843 _jsonmodulestate *state = get_json_state(module);
1844 Py_VISIT(state->PyScannerType);
1845 Py_VISIT(state->PyEncoderType);
1846 return 0;
1847}
1848
1849static int
1850_jsonmodule_clear(PyObject *module)
1851{
1852 _jsonmodulestate *state = get_json_state(module);
1853 Py_CLEAR(state->PyScannerType);
1854 Py_CLEAR(state->PyEncoderType);
1855 return 0;
1856}
1857
1858static void
1859_jsonmodule_free(void *module)
1860{
1861 _jsonmodule_clear((PyObject *)module);
1862}
1863
Hai Shied154c32020-01-16 00:32:51 +08001864static PyModuleDef_Slot _json_slots[] = {
1865 {Py_mod_exec, _json_exec},
1866 {0, NULL}
1867};
1868
Martin v. Löwis1a214512008-06-11 05:26:20 +00001869static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 PyModuleDef_HEAD_INIT,
1871 "_json",
1872 module_doc,
Dong-hee Na33f15a12020-03-27 19:59:59 +09001873 sizeof(_jsonmodulestate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001874 speedups_methods,
Hai Shied154c32020-01-16 00:32:51 +08001875 _json_slots,
Dong-hee Na33f15a12020-03-27 19:59:59 +09001876 _jsonmodule_traverse,
1877 _jsonmodule_clear,
1878 _jsonmodule_free,
Martin v. Löwis1a214512008-06-11 05:26:20 +00001879};
1880
Victor Stinnerf024d262015-03-17 17:48:27 +01001881PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001882PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001883{
Hai Shied154c32020-01-16 00:32:51 +08001884 return PyModuleDef_Init(&jsonmodule);
Christian Heimes90540002008-05-08 14:29:10 +00001885}