blob: e10f83c96c565f409de6588c18d8b3b1c47b4646 [file] [log] [blame]
Victor Stinner5c75f372019-04-17 23:02:26 +02001/* JSON accelerator C extensor: _json module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006
Victor Stinner5c75f372019-04-17 23:02:26 +02007#if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8# error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06009#endif
10
Christian Heimes90540002008-05-08 14:29:10 +000011#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020012#include "structmember.h" // PyMemberDef
Victor Stinnere281f7d2018-11-01 02:30:36 +010013#include "pycore_accu.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +010014
Dong-hee Na33f15a12020-03-27 19:59:59 +090015typedef struct {
16 PyObject *PyScannerType;
17 PyObject *PyEncoderType;
18} _jsonmodulestate;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000019
Dong-hee Na33f15a12020-03-27 19:59:59 +090020static inline _jsonmodulestate*
21get_json_state(PyObject *module)
22{
23 void *state = PyModule_GetState(module);
24 assert(state != NULL);
25 return (_jsonmodulestate *)state;
26}
27
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028
29typedef struct _PyScannerObject {
30 PyObject_HEAD
Segev Finer541bd282017-07-13 08:52:08 +030031 signed char strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000032 PyObject *object_hook;
33 PyObject *object_pairs_hook;
34 PyObject *parse_float;
35 PyObject *parse_int;
36 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000037 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000038} PyScannerObject;
39
40static PyMemberDef scanner_members[] = {
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030041 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000042 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
43 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
44 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
45 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
46 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
47 {NULL}
48};
49
50typedef struct _PyEncoderObject {
51 PyObject_HEAD
52 PyObject *markers;
53 PyObject *defaultfn;
54 PyObject *encoder;
55 PyObject *indent;
56 PyObject *key_separator;
57 PyObject *item_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030058 char sort_keys;
59 char skipkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000060 int allow_nan;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030061 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000062} PyEncoderObject;
63
64static PyMemberDef encoder_members[] = {
65 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
66 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
67 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
68 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
69 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
70 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030071 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
72 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000073 {NULL}
74};
75
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020076/* Forward decls */
77
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000078static PyObject *
79ascii_escape_unicode(PyObject *pystr);
80static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +010081py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000082static PyObject *
83scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
84static PyObject *
85_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
86static PyObject *
87scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000088static void
89scanner_dealloc(PyObject *self);
90static int
Dong-hee Na33f15a12020-03-27 19:59:59 +090091scanner_clear(PyScannerObject *self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000092static PyObject *
93encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000094static void
95encoder_dealloc(PyObject *self);
96static int
Dong-hee Na33f15a12020-03-27 19:59:59 +090097encoder_clear(PyEncoderObject *self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000098static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +020099encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000100static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200101encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000102static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200103encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000104static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000105_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000106static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200107raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108static PyObject *
109encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000110static PyObject *
111encoder_encode_float(PyEncoderObject *s, PyObject *obj);
112
Christian Heimes90540002008-05-08 14:29:10 +0000113#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000114#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000115
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000116static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200117ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000118{
119 /* Escape unicode code point c to ASCII escape sequences
120 in char *output. output must have at least 12 bytes unused to
121 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000122 output[chars++] = '\\';
123 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000124 case '\\': output[chars++] = c; break;
125 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000126 case '\b': output[chars++] = 'b'; break;
127 case '\f': output[chars++] = 'f'; break;
128 case '\n': output[chars++] = 'n'; break;
129 case '\r': output[chars++] = 'r'; break;
130 case '\t': output[chars++] = 't'; break;
131 default:
Christian Heimes90540002008-05-08 14:29:10 +0000132 if (c >= 0x10000) {
133 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100134 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000135 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100136 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
137 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
138 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
139 output[chars++] = Py_hexdigits[(v ) & 0xf];
140 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000141 output[chars++] = '\\';
142 }
Christian Heimes90540002008-05-08 14:29:10 +0000143 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200144 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
145 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
146 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
147 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000148 }
149 return chars;
150}
151
152static PyObject *
153ascii_escape_unicode(PyObject *pystr)
154{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000155 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000156 Py_ssize_t i;
157 Py_ssize_t input_chars;
158 Py_ssize_t output_size;
159 Py_ssize_t chars;
160 PyObject *rval;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300161 const void *input;
162 Py_UCS1 *output;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200163 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000164
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200165 if (PyUnicode_READY(pystr) == -1)
166 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000167
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200168 input_chars = PyUnicode_GET_LENGTH(pystr);
169 input = PyUnicode_DATA(pystr);
170 kind = PyUnicode_KIND(pystr);
171
172 /* Compute the output size */
173 for (i = 0, output_size = 2; i < input_chars; i++) {
174 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500175 Py_ssize_t d;
176 if (S_CHAR(c)) {
177 d = 1;
178 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200179 else {
180 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200181 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200182 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500183 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200184 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500185 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200186 }
187 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500188 if (output_size > PY_SSIZE_T_MAX - d) {
189 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
190 return NULL;
191 }
192 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200193 }
194
195 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000196 if (rval == NULL) {
197 return NULL;
198 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200199 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000200 chars = 0;
201 output[chars++] = '"';
202 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200203 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000204 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000205 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000206 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000207 else {
208 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000209 }
Christian Heimes90540002008-05-08 14:29:10 +0000210 }
211 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100212#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200213 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100214#endif
Christian Heimes90540002008-05-08 14:29:10 +0000215 return rval;
216}
217
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100218static PyObject *
219escape_unicode(PyObject *pystr)
220{
221 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
222 Py_ssize_t i;
223 Py_ssize_t input_chars;
224 Py_ssize_t output_size;
225 Py_ssize_t chars;
226 PyObject *rval;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300227 const void *input;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100228 int kind;
229 Py_UCS4 maxchar;
230
231 if (PyUnicode_READY(pystr) == -1)
232 return NULL;
233
234 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
235 input_chars = PyUnicode_GET_LENGTH(pystr);
236 input = PyUnicode_DATA(pystr);
237 kind = PyUnicode_KIND(pystr);
238
239 /* Compute the output size */
240 for (i = 0, output_size = 2; i < input_chars; i++) {
241 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500242 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100243 switch (c) {
244 case '\\': case '"': case '\b': case '\f':
245 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500246 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100247 break;
248 default:
249 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500250 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100251 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500252 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100253 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500254 if (output_size > PY_SSIZE_T_MAX - d) {
255 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
256 return NULL;
257 }
258 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100259 }
260
261 rval = PyUnicode_New(output_size, maxchar);
262 if (rval == NULL)
263 return NULL;
264
265 kind = PyUnicode_KIND(rval);
266
267#define ENCODE_OUTPUT do { \
268 chars = 0; \
269 output[chars++] = '"'; \
270 for (i = 0; i < input_chars; i++) { \
271 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
272 switch (c) { \
273 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
274 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
275 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
276 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
277 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
278 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
279 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
280 default: \
281 if (c <= 0x1f) { \
282 output[chars++] = '\\'; \
283 output[chars++] = 'u'; \
284 output[chars++] = '0'; \
285 output[chars++] = '0'; \
286 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
287 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
288 } else { \
289 output[chars++] = c; \
290 } \
291 } \
292 } \
293 output[chars++] = '"'; \
294 } while (0)
295
296 if (kind == PyUnicode_1BYTE_KIND) {
297 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
298 ENCODE_OUTPUT;
299 } else if (kind == PyUnicode_2BYTE_KIND) {
300 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
301 ENCODE_OUTPUT;
302 } else {
303 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
304 assert(kind == PyUnicode_4BYTE_KIND);
305 ENCODE_OUTPUT;
306 }
307#undef ENCODE_OUTPUT
308
309#ifdef Py_DEBUG
310 assert(_PyUnicode_CheckConsistency(rval, 1));
311#endif
312 return rval;
313}
314
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000315static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200316raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000317{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200318 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
Ken Jinb5931f12021-02-02 00:26:56 +0800319 _Py_static_string(PyId_decoder, "json.decoder");
320 PyObject *decoder = _PyImport_GetModuleId(&PyId_decoder);
321 if (decoder == NULL) {
322 return;
Christian Heimes90540002008-05-08 14:29:10 +0000323 }
Ken Jinb5931f12021-02-02 00:26:56 +0800324
325 _Py_IDENTIFIER(JSONDecodeError);
326 PyObject *JSONDecodeError = _PyObject_GetAttrId(decoder, &PyId_JSONDecodeError);
327 Py_DECREF(decoder);
328 if (JSONDecodeError == NULL) {
329 return;
330 }
331
332 PyObject *exc;
Victor Stinner4c381542016-12-09 00:33:39 +0100333 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Ken Jinb5931f12021-02-02 00:26:56 +0800334 Py_DECREF(JSONDecodeError);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200335 if (exc) {
336 PyErr_SetObject(JSONDecodeError, exc);
337 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000338 }
Christian Heimes90540002008-05-08 14:29:10 +0000339}
340
Ezio Melotti37623ab2013-01-03 08:44:15 +0200341static void
342raise_stop_iteration(Py_ssize_t idx)
343{
344 PyObject *value = PyLong_FromSsize_t(idx);
345 if (value != NULL) {
346 PyErr_SetObject(PyExc_StopIteration, value);
347 Py_DECREF(value);
348 }
349}
350
Christian Heimes90540002008-05-08 14:29:10 +0000351static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000352_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
353 /* return (rval, idx) tuple, stealing reference to rval */
354 PyObject *tpl;
355 PyObject *pyidx;
356 /*
357 steal a reference to rval, returns (rval, idx)
358 */
359 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000360 return NULL;
361 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000362 pyidx = PyLong_FromSsize_t(idx);
363 if (pyidx == NULL) {
364 Py_DECREF(rval);
365 return NULL;
366 }
367 tpl = PyTuple_New(2);
368 if (tpl == NULL) {
369 Py_DECREF(pyidx);
370 Py_DECREF(rval);
371 return NULL;
372 }
373 PyTuple_SET_ITEM(tpl, 0, rval);
374 PyTuple_SET_ITEM(tpl, 1, pyidx);
375 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000376}
377
378static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000379scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000380{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000381 /* Read the JSON string from PyUnicode pystr.
382 end is the index of the first character after the quote.
383 if strict is zero then literal control characters are allowed
384 *next_end_ptr is a return-by-reference index of the character
385 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000386
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000387 Return value is a new PyUnicode
388 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000389 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200390 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000391 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000392 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200393 const void *buf;
394 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000395
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200396 if (PyUnicode_READY(pystr) == -1)
397 return 0;
398
Inada Naoki9c110292019-10-17 16:12:41 +0900399 _PyUnicodeWriter writer;
400 _PyUnicodeWriter_Init(&writer);
401 writer.overallocate = 1;
402
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200403 len = PyUnicode_GET_LENGTH(pystr);
404 buf = PyUnicode_DATA(pystr);
405 kind = PyUnicode_KIND(pystr);
406
Ezio Melotti37623ab2013-01-03 08:44:15 +0200407 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000408 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
409 goto bail;
410 }
Christian Heimes90540002008-05-08 14:29:10 +0000411 while (1) {
412 /* Find the end of the string or the next escape */
Inada Naoki2a570af2019-08-08 17:57:10 +0900413 Py_UCS4 c;
414 {
415 // Use tight scope variable to help register allocation.
416 Py_UCS4 d = 0;
417 for (next = end; next < len; next++) {
418 d = PyUnicode_READ(kind, buf, next);
419 if (d == '"' || d == '\\') {
420 break;
421 }
422 if (d <= 0x1f && strict) {
423 raise_errmsg("Invalid control character at", pystr, next);
424 goto bail;
425 }
Christian Heimes90540002008-05-08 14:29:10 +0000426 }
Inada Naoki2a570af2019-08-08 17:57:10 +0900427 c = d;
Christian Heimes90540002008-05-08 14:29:10 +0000428 }
Inada Naoki9c110292019-10-17 16:12:41 +0900429
430 if (c == '"') {
431 // Fast path for simple case.
432 if (writer.buffer == NULL) {
433 PyObject *ret = PyUnicode_Substring(pystr, end, next);
434 if (ret == NULL) {
435 goto bail;
436 }
437 *next_end_ptr = next + 1;;
438 return ret;
439 }
440 }
441 else if (c != '\\') {
Christian Heimes90540002008-05-08 14:29:10 +0000442 raise_errmsg("Unterminated string starting at", pystr, begin);
443 goto bail;
444 }
Inada Naoki9c110292019-10-17 16:12:41 +0900445
Christian Heimes90540002008-05-08 14:29:10 +0000446 /* Pick up this chunk if it's not zero length */
447 if (next != end) {
Inada Naoki9c110292019-10-17 16:12:41 +0900448 if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
Christian Heimes90540002008-05-08 14:29:10 +0000449 goto bail;
450 }
Christian Heimes90540002008-05-08 14:29:10 +0000451 }
452 next++;
453 if (c == '"') {
454 end = next;
455 break;
456 }
457 if (next == len) {
458 raise_errmsg("Unterminated string starting at", pystr, begin);
459 goto bail;
460 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200461 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000462 if (c != 'u') {
463 /* Non-unicode backslash escapes */
464 end = next + 1;
465 switch (c) {
466 case '"': break;
467 case '\\': break;
468 case '/': break;
469 case 'b': c = '\b'; break;
470 case 'f': c = '\f'; break;
471 case 'n': c = '\n'; break;
472 case 'r': c = '\r'; break;
473 case 't': c = '\t'; break;
474 default: c = 0;
475 }
476 if (c == 0) {
477 raise_errmsg("Invalid \\escape", pystr, end - 2);
478 goto bail;
479 }
480 }
481 else {
482 c = 0;
483 next++;
484 end = next + 4;
485 if (end >= len) {
486 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
487 goto bail;
488 }
489 /* Decode 4 hex digits */
490 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000492 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000493 switch (digit) {
494 case '0': case '1': case '2': case '3': case '4':
495 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000496 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000497 case 'a': case 'b': case 'c': case 'd': case 'e':
498 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000499 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000500 case 'A': case 'B': case 'C': case 'D': case 'E':
501 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000502 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000503 default:
504 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
505 goto bail;
506 }
507 }
Christian Heimes90540002008-05-08 14:29:10 +0000508 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200509 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
510 PyUnicode_READ(kind, buf, next++) == '\\' &&
511 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200512 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000513 end += 6;
514 /* Decode 4 hex digits */
515 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200516 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000517 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000518 switch (digit) {
519 case '0': case '1': case '2': case '3': case '4':
520 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000521 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000522 case 'a': case 'b': case 'c': case 'd': case 'e':
523 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000524 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000525 case 'A': case 'B': case 'C': case 'D': case 'E':
526 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000527 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000528 default:
529 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
530 goto bail;
531 }
532 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200533 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
534 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
535 else
536 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000537 }
Christian Heimes90540002008-05-08 14:29:10 +0000538 }
Inada Naoki9c110292019-10-17 16:12:41 +0900539 if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
Christian Heimes90540002008-05-08 14:29:10 +0000540 goto bail;
541 }
Christian Heimes90540002008-05-08 14:29:10 +0000542 }
543
Inada Naoki9c110292019-10-17 16:12:41 +0900544 rval = _PyUnicodeWriter_Finish(&writer);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000545 *next_end_ptr = end;
546 return rval;
Inada Naoki9c110292019-10-17 16:12:41 +0900547
Christian Heimes90540002008-05-08 14:29:10 +0000548bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000549 *next_end_ptr = -1;
Inada Naoki9c110292019-10-17 16:12:41 +0900550 _PyUnicodeWriter_Dealloc(&writer);
Christian Heimes90540002008-05-08 14:29:10 +0000551 return NULL;
552}
553
554PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000555 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000556 "\n"
557 "Scan the string s for a JSON string. End is the index of the\n"
558 "character in s after the quote that started the JSON string.\n"
559 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
560 "on attempt to decode an invalid string. If strict is False then literal\n"
561 "control characters are allowed in the string.\n"
562 "\n"
563 "Returns a tuple of the decoded string and the index of the character in s\n"
564 "after the end quote."
565);
Christian Heimes90540002008-05-08 14:29:10 +0000566
567static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100568py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000569{
570 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000571 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000572 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000573 Py_ssize_t next_end = -1;
574 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100575 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000576 return NULL;
577 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000578 if (PyUnicode_Check(pystr)) {
579 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000580 }
581 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000582 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000583 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000584 Py_TYPE(pystr)->tp_name);
585 return NULL;
586 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000587 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000588}
589
590PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000591 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000592 "\n"
593 "Return an ASCII-only JSON representation of a Python string"
594);
Christian Heimes90540002008-05-08 14:29:10 +0000595
596static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100597py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000598{
599 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000600 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000601 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000602 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000603 rval = ascii_escape_unicode(pystr);
604 }
605 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000606 PyErr_Format(PyExc_TypeError,
607 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000608 Py_TYPE(pystr)->tp_name);
609 return NULL;
610 }
Christian Heimes90540002008-05-08 14:29:10 +0000611 return rval;
612}
613
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100614
615PyDoc_STRVAR(pydoc_encode_basestring,
616 "encode_basestring(string) -> string\n"
617 "\n"
618 "Return a JSON representation of a Python string"
619);
620
621static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100622py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100623{
624 PyObject *rval;
625 /* Return a JSON representation of a Python string */
626 /* METH_O */
627 if (PyUnicode_Check(pystr)) {
628 rval = escape_unicode(pystr);
629 }
630 else {
631 PyErr_Format(PyExc_TypeError,
632 "first argument must be a string, not %.80s",
633 Py_TYPE(pystr)->tp_name);
634 return NULL;
635 }
636 return rval;
637}
638
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000639static void
640scanner_dealloc(PyObject *self)
641{
Dong-hee Na33f15a12020-03-27 19:59:59 +0900642 PyTypeObject *tp = Py_TYPE(self);
INADA Naokia6296d32017-08-24 14:55:17 +0900643 /* bpo-31095: UnTrack is needed before calling any callbacks */
644 PyObject_GC_UnTrack(self);
Dong-hee Na33f15a12020-03-27 19:59:59 +0900645 scanner_clear((PyScannerObject *)self);
646 tp->tp_free(self);
647 Py_DECREF(tp);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000648}
649
650static int
Dong-hee Na33f15a12020-03-27 19:59:59 +0900651scanner_traverse(PyScannerObject *self, visitproc visit, void *arg)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000652{
Pablo Galindo1cf15af2020-05-27 10:03:38 +0100653 Py_VISIT(Py_TYPE(self));
Dong-hee Na33f15a12020-03-27 19:59:59 +0900654 Py_VISIT(self->object_hook);
655 Py_VISIT(self->object_pairs_hook);
656 Py_VISIT(self->parse_float);
657 Py_VISIT(self->parse_int);
658 Py_VISIT(self->parse_constant);
Hai Shib7093022020-04-05 03:24:16 +0800659 Py_VISIT(self->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000660 return 0;
661}
662
663static int
Dong-hee Na33f15a12020-03-27 19:59:59 +0900664scanner_clear(PyScannerObject *self)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000665{
Dong-hee Na33f15a12020-03-27 19:59:59 +0900666 Py_CLEAR(self->object_hook);
667 Py_CLEAR(self->object_pairs_hook);
668 Py_CLEAR(self->parse_float);
669 Py_CLEAR(self->parse_int);
670 Py_CLEAR(self->parse_constant);
671 Py_CLEAR(self->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000672 return 0;
673}
674
675static PyObject *
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300676_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
677{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000678 /* Read a JSON object from PyUnicode pystr.
679 idx is the index of the first character after the opening curly brace.
680 *next_idx_ptr is a return-by-reference index to the first character after
681 the closing curly brace.
682
683 Returns a new PyObject (usually a dict, but object_hook can change that)
684 */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300685 const void *str;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200686 int kind;
687 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000688 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000689 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000690 PyObject *key = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000691 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000692 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000693
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200694 if (PyUnicode_READY(pystr) == -1)
695 return NULL;
696
697 str = PyUnicode_DATA(pystr);
698 kind = PyUnicode_KIND(pystr);
699 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
700
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000701 if (has_pairs_hook)
702 rval = PyList_New(0);
703 else
704 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000705 if (rval == NULL)
706 return NULL;
707
708 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200709 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000710
711 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200712 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
713 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000714 PyObject *memokey;
715
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000716 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200717 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200718 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000719 goto bail;
720 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300721 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000722 if (key == NULL)
723 goto bail;
Inada Naoki2a570af2019-08-08 17:57:10 +0900724 memokey = PyDict_SetDefault(s->memo, key, key);
725 if (memokey == NULL) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200726 goto bail;
727 }
Inada Naoki2a570af2019-08-08 17:57:10 +0900728 Py_INCREF(memokey);
729 Py_DECREF(key);
730 key = memokey;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000731 idx = next_idx;
732
733 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200734 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
735 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200736 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000737 goto bail;
738 }
739 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200740 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000741
742 /* read any JSON term */
743 val = scan_once_unicode(s, pystr, idx, &next_idx);
744 if (val == NULL)
745 goto bail;
746
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000747 if (has_pairs_hook) {
748 PyObject *item = PyTuple_Pack(2, key, val);
749 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000750 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000751 Py_CLEAR(key);
752 Py_CLEAR(val);
753 if (PyList_Append(rval, item) == -1) {
754 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000755 goto bail;
756 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000757 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000758 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000759 else {
760 if (PyDict_SetItem(rval, key, val) < 0)
761 goto bail;
762 Py_CLEAR(key);
763 Py_CLEAR(val);
764 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000765 idx = next_idx;
766
767 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200768 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000769
770 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200771 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000772 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200773 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200774 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000775 goto bail;
776 }
777 idx++;
778
779 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200780 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000781 }
782 }
783
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000784 *next_idx_ptr = idx + 1;
785
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000786 if (has_pairs_hook) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100787 val = PyObject_CallOneArg(s->object_pairs_hook, rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000788 Py_DECREF(rval);
789 return val;
790 }
791
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000792 /* if object_hook is not None: rval = object_hook(rval) */
793 if (s->object_hook != Py_None) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100794 val = PyObject_CallOneArg(s->object_hook, rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000795 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000796 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000797 }
798 return rval;
799bail:
800 Py_XDECREF(key);
801 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000802 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000803 return NULL;
804}
805
806static PyObject *
807_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200808 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000809 idx is the index of the first character after the opening brace.
810 *next_idx_ptr is a return-by-reference index to the first character after
811 the closing brace.
812
813 Returns a new PyList
814 */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300815 const void *str;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200816 int kind;
817 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000818 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200819 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000820 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000821
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200822 if (PyUnicode_READY(pystr) == -1)
823 return NULL;
824
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200825 rval = PyList_New(0);
826 if (rval == NULL)
827 return NULL;
828
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200829 str = PyUnicode_DATA(pystr);
830 kind = PyUnicode_KIND(pystr);
831 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
832
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000833 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200834 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000835
836 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200837 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
838 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000839
840 /* read any JSON term */
841 val = scan_once_unicode(s, pystr, idx, &next_idx);
842 if (val == NULL)
843 goto bail;
844
845 if (PyList_Append(rval, val) == -1)
846 goto bail;
847
848 Py_CLEAR(val);
849 idx = next_idx;
850
851 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200852 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000853
854 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200855 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000856 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200857 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200858 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000859 goto bail;
860 }
861 idx++;
862
863 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200864 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000865 }
866 }
867
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200868 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
869 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200870 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000871 goto bail;
872 }
873 *next_idx_ptr = idx + 1;
874 return rval;
875bail:
876 Py_XDECREF(val);
877 Py_DECREF(rval);
878 return NULL;
879}
880
881static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200882_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
883 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000884 constant is the constant string that was found
885 ("NaN", "Infinity", "-Infinity").
886 idx is the index of the first character of the constant
887 *next_idx_ptr is a return-by-reference index to the first character after
888 the constant.
889
890 Returns the result of parse_constant
891 */
892 PyObject *cstr;
893 PyObject *rval;
894 /* constant is "NaN", "Infinity", or "-Infinity" */
895 cstr = PyUnicode_InternFromString(constant);
896 if (cstr == NULL)
897 return NULL;
898
899 /* rval = parse_constant(constant) */
Petr Viktorinffd97532020-02-11 17:46:57 +0100900 rval = PyObject_CallOneArg(s->parse_constant, cstr);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200901 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000902 Py_DECREF(cstr);
903 *next_idx_ptr = idx;
904 return rval;
905}
906
907static PyObject *
908_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
909 /* Read a JSON number from PyUnicode pystr.
910 idx is the index of the first character of the number
911 *next_idx_ptr is a return-by-reference index to the first character after
912 the number.
913
914 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200915 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000916 May return other types if parse_int or parse_float are set
917 */
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300918 const void *str;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200919 int kind;
920 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000921 Py_ssize_t idx = start;
922 int is_float = 0;
923 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200924 PyObject *numstr = NULL;
925 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000926
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200927 if (PyUnicode_READY(pystr) == -1)
928 return NULL;
929
930 str = PyUnicode_DATA(pystr);
931 kind = PyUnicode_KIND(pystr);
932 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
933
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000934 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200935 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000936 idx++;
937 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200938 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000939 return NULL;
940 }
941 }
942
943 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200944 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000945 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200946 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000947 }
948 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200949 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000950 idx++;
951 }
952 /* no integer digits, error */
953 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200954 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000955 return NULL;
956 }
957
958 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200959 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000960 is_float = 1;
961 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200962 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000963 }
964
965 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200966 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000967 Py_ssize_t e_start = idx;
968 idx++;
969
970 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200971 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000972
973 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200974 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000975
976 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200977 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000978 is_float = 1;
979 }
980 else {
981 idx = e_start;
982 }
983 }
984
Antoine Pitrouf6454512011-04-25 19:16:06 +0200985 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
986 custom_func = s->parse_float;
987 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
988 custom_func = s->parse_int;
989 else
990 custom_func = NULL;
991
992 if (custom_func) {
993 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200994 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200995 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200996 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +0200997 if (numstr == NULL)
998 return NULL;
Petr Viktorinffd97532020-02-11 17:46:57 +0100999 rval = PyObject_CallOneArg(custom_func, numstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001000 }
1001 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001002 Py_ssize_t i, n;
1003 char *buf;
1004 /* Straight conversion to ASCII, to avoid costly conversion of
1005 decimal unicode digits (which cannot appear here) */
1006 n = idx - start;
1007 numstr = PyBytes_FromStringAndSize(NULL, n);
1008 if (numstr == NULL)
1009 return NULL;
1010 buf = PyBytes_AS_STRING(numstr);
1011 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001012 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001013 }
1014 if (is_float)
1015 rval = PyFloat_FromString(numstr);
1016 else
1017 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001018 }
1019 Py_DECREF(numstr);
1020 *next_idx_ptr = idx;
1021 return rval;
1022}
1023
1024static PyObject *
1025scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1026{
1027 /* Read one JSON term (of any kind) from PyUnicode pystr.
1028 idx is the index of the first character of the term
1029 *next_idx_ptr is a return-by-reference index to the first character after
1030 the number.
1031
1032 Returns a new PyObject representation of the term.
1033 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001034 PyObject *res;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001035 const void *str;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001036 int kind;
1037 Py_ssize_t length;
1038
1039 if (PyUnicode_READY(pystr) == -1)
1040 return NULL;
1041
1042 str = PyUnicode_DATA(pystr);
1043 kind = PyUnicode_KIND(pystr);
1044 length = PyUnicode_GET_LENGTH(pystr);
1045
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001046 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001047 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001048 return NULL;
1049 }
1050 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001051 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001052 return NULL;
1053 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001054
1055 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001056 case '"':
1057 /* string */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001058 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001059 case '{':
1060 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001061 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1062 "from a unicode string"))
1063 return NULL;
1064 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1065 Py_LeaveRecursiveCall();
1066 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001067 case '[':
1068 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001069 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1070 "from a unicode string"))
1071 return NULL;
1072 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1073 Py_LeaveRecursiveCall();
1074 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001075 case 'n':
1076 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001077 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001078 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001079 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001080 }
1081 break;
1082 case 't':
1083 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001084 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001085 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001086 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001087 }
1088 break;
1089 case 'f':
1090 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001091 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1092 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1093 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001094 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001095 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001096 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001097 }
1098 break;
1099 case 'N':
1100 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001101 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001102 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001103 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1104 }
1105 break;
1106 case 'I':
1107 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001108 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1109 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1110 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001111 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001112 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1113 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001114 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001115 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1116 }
1117 break;
1118 case '-':
1119 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001120 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001121 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1122 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001123 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001124 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001125 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1126 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001127 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001128 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1129 }
1130 break;
1131 }
1132 /* Didn't find a string, object, array, or named constant. Look for a number. */
1133 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1134}
1135
1136static PyObject *
Dong-hee Na33f15a12020-03-27 19:59:59 +09001137scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001138{
1139 /* Python callable interface to scan_once_{str,unicode} */
1140 PyObject *pystr;
1141 PyObject *rval;
1142 Py_ssize_t idx;
1143 Py_ssize_t next_idx = -1;
1144 static char *kwlist[] = {"string", "idx", NULL};
Antoine Pitroucbb02842012-12-01 19:34:16 +01001145 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001146 return NULL;
1147
1148 if (PyUnicode_Check(pystr)) {
Dong-hee Na33f15a12020-03-27 19:59:59 +09001149 rval = scan_once_unicode(self, pystr, idx, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001150 }
1151 else {
1152 PyErr_Format(PyExc_TypeError,
1153 "first argument must be a string, not %.80s",
1154 Py_TYPE(pystr)->tp_name);
1155 return NULL;
1156 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001157 PyDict_Clear(self->memo);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001158 if (rval == NULL)
1159 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001160 return _build_rval_index_tuple(rval, next_idx);
1161}
1162
1163static PyObject *
1164scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1165{
1166 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001167 PyObject *ctx;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001168 PyObject *strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001169 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001170
1171 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001172 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001173
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001174 s = (PyScannerObject *)type->tp_alloc(type, 0);
1175 if (s == NULL) {
1176 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001177 }
1178
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001179 s->memo = PyDict_New();
1180 if (s->memo == NULL)
1181 goto bail;
1182
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001183 /* All of these will fail "gracefully" so we don't need to verify them */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001184 strict = PyObject_GetAttrString(ctx, "strict");
1185 if (strict == NULL)
1186 goto bail;
1187 s->strict = PyObject_IsTrue(strict);
1188 Py_DECREF(strict);
1189 if (s->strict < 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001190 goto bail;
1191 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1192 if (s->object_hook == NULL)
1193 goto bail;
1194 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1195 if (s->object_pairs_hook == NULL)
1196 goto bail;
1197 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1198 if (s->parse_float == NULL)
1199 goto bail;
1200 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1201 if (s->parse_int == NULL)
1202 goto bail;
1203 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1204 if (s->parse_constant == NULL)
1205 goto bail;
1206
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001207 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001208
1209bail:
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001210 Py_DECREF(s);
1211 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001212}
1213
1214PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1215
Dong-hee Na33f15a12020-03-27 19:59:59 +09001216static PyType_Slot PyScannerType_slots[] = {
1217 {Py_tp_doc, (void *)scanner_doc},
1218 {Py_tp_dealloc, scanner_dealloc},
1219 {Py_tp_call, scanner_call},
1220 {Py_tp_traverse, scanner_traverse},
1221 {Py_tp_clear, scanner_clear},
1222 {Py_tp_members, scanner_members},
1223 {Py_tp_new, scanner_new},
1224 {0, 0}
1225};
1226
1227static PyType_Spec PyScannerType_spec = {
1228 .name = "_json.Scanner",
1229 .basicsize = sizeof(PyScannerObject),
1230 .itemsize = 0,
1231 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1232 .slots = PyScannerType_slots,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001233};
1234
1235static PyObject *
1236encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1237{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001238 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1239
1240 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001241 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001242 PyObject *item_separator;
1243 int sort_keys, skipkeys, allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001244
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001245 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001246 &markers, &defaultfn, &encoder, &indent,
1247 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001248 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001249 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001250
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001251 if (markers != Py_None && !PyDict_Check(markers)) {
1252 PyErr_Format(PyExc_TypeError,
1253 "make_encoder() argument 1 must be dict or None, "
1254 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001255 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001256 }
1257
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001258 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1259 if (s == NULL)
1260 return NULL;
1261
Antoine Pitrou781eba72009-12-08 15:57:31 +00001262 s->markers = markers;
1263 s->defaultfn = defaultfn;
1264 s->encoder = encoder;
1265 s->indent = indent;
1266 s->key_separator = key_separator;
1267 s->item_separator = item_separator;
1268 s->sort_keys = sort_keys;
1269 s->skipkeys = skipkeys;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001270 s->allow_nan = allow_nan;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001271 s->fast_encode = NULL;
1272 if (PyCFunction_Check(s->encoder)) {
1273 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1274 if (f == (PyCFunction)py_encode_basestring_ascii ||
1275 f == (PyCFunction)py_encode_basestring) {
1276 s->fast_encode = f;
1277 }
1278 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001279
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001280 Py_INCREF(s->markers);
1281 Py_INCREF(s->defaultfn);
1282 Py_INCREF(s->encoder);
1283 Py_INCREF(s->indent);
1284 Py_INCREF(s->key_separator);
1285 Py_INCREF(s->item_separator);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001286 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001287}
1288
1289static PyObject *
Dong-hee Na33f15a12020-03-27 19:59:59 +09001290encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001291{
1292 /* Python callable interface to encode_listencode_obj */
1293 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1294 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001295 Py_ssize_t indent_level;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001296 _PyAccu acc;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001297 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1298 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001299 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001300 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001301 return NULL;
Dong-hee Na33f15a12020-03-27 19:59:59 +09001302 if (encoder_listencode_obj(self, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001303 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001304 return NULL;
1305 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001306 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001307}
1308
1309static PyObject *
1310_encoded_const(PyObject *obj)
1311{
1312 /* Return the JSON string representation of None, True, False */
1313 if (obj == Py_None) {
Ken Jinb5931f12021-02-02 00:26:56 +08001314 _Py_static_string(PyId_null, "null");
1315 PyObject *s_null = _PyUnicode_FromId(&PyId_null);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001316 if (s_null == NULL) {
Ken Jinb5931f12021-02-02 00:26:56 +08001317 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001318 }
Ken Jinb5931f12021-02-02 00:26:56 +08001319 return Py_NewRef(s_null);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001320 }
1321 else if (obj == Py_True) {
Ken Jinb5931f12021-02-02 00:26:56 +08001322 _Py_static_string(PyId_true, "true");
1323 PyObject *s_true = _PyUnicode_FromId(&PyId_true);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001324 if (s_true == NULL) {
Ken Jinb5931f12021-02-02 00:26:56 +08001325 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001326 }
Ken Jinb5931f12021-02-02 00:26:56 +08001327 return Py_NewRef(s_true);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001328 }
1329 else if (obj == Py_False) {
Ken Jinb5931f12021-02-02 00:26:56 +08001330 _Py_static_string(PyId_false, "false");
1331 PyObject *s_false = _PyUnicode_FromId(&PyId_false);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001332 if (s_false == NULL) {
Ken Jinb5931f12021-02-02 00:26:56 +08001333 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001334 }
Ken Jinb5931f12021-02-02 00:26:56 +08001335 return Py_NewRef(s_false);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001336 }
1337 else {
1338 PyErr_SetString(PyExc_ValueError, "not a const");
1339 return NULL;
1340 }
1341}
1342
1343static PyObject *
1344encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1345{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001346 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001347 double i = PyFloat_AS_DOUBLE(obj);
1348 if (!Py_IS_FINITE(i)) {
1349 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001350 PyErr_SetString(
1351 PyExc_ValueError,
1352 "Out of range float values are not JSON compliant"
1353 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001354 return NULL;
1355 }
1356 if (i > 0) {
1357 return PyUnicode_FromString("Infinity");
1358 }
1359 else if (i < 0) {
1360 return PyUnicode_FromString("-Infinity");
1361 }
1362 else {
1363 return PyUnicode_FromString("NaN");
1364 }
1365 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001366 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001367}
1368
1369static PyObject *
1370encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1371{
1372 /* Return the JSON representation of a string */
Oren Milman2b382dd2017-09-24 12:07:12 +03001373 PyObject *encoded;
1374
1375 if (s->fast_encode) {
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001376 return s->fast_encode(NULL, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001377 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001378 encoded = PyObject_CallOneArg(s->encoder, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001379 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1380 PyErr_Format(PyExc_TypeError,
1381 "encoder() must return a string, not %.80s",
1382 Py_TYPE(encoded)->tp_name);
1383 Py_DECREF(encoded);
1384 return NULL;
1385 }
1386 return encoded;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001387}
1388
1389static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001390_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001391{
1392 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001393 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001394 Py_DECREF(stolen);
1395 return rval;
1396}
1397
1398static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001399encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001400 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001401{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001402 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001403 PyObject *newobj;
1404 int rv;
1405
1406 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1407 PyObject *cstr = _encoded_const(obj);
1408 if (cstr == NULL)
1409 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001410 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001411 }
1412 else if (PyUnicode_Check(obj))
1413 {
1414 PyObject *encoded = encoder_encode_string(s, obj);
1415 if (encoded == NULL)
1416 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001417 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001418 }
1419 else if (PyLong_Check(obj)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001420 PyObject *encoded = PyLong_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001421 if (encoded == NULL)
1422 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001423 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001424 }
1425 else if (PyFloat_Check(obj)) {
1426 PyObject *encoded = encoder_encode_float(s, obj);
1427 if (encoded == NULL)
1428 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001429 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001430 }
1431 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001432 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1433 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001434 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001435 Py_LeaveRecursiveCall();
1436 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001437 }
1438 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001439 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1440 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001441 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001442 Py_LeaveRecursiveCall();
1443 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001444 }
1445 else {
1446 PyObject *ident = NULL;
1447 if (s->markers != Py_None) {
1448 int has_key;
1449 ident = PyLong_FromVoidPtr(obj);
1450 if (ident == NULL)
1451 return -1;
1452 has_key = PyDict_Contains(s->markers, ident);
1453 if (has_key) {
1454 if (has_key != -1)
1455 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1456 Py_DECREF(ident);
1457 return -1;
1458 }
1459 if (PyDict_SetItem(s->markers, ident, obj)) {
1460 Py_DECREF(ident);
1461 return -1;
1462 }
1463 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001464 newobj = PyObject_CallOneArg(s->defaultfn, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001465 if (newobj == NULL) {
1466 Py_XDECREF(ident);
1467 return -1;
1468 }
Ezio Melotti13672652011-05-11 01:02:56 +03001469
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001470 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1471 Py_DECREF(newobj);
1472 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001473 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001474 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001475 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001476 Py_LeaveRecursiveCall();
1477
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001478 Py_DECREF(newobj);
1479 if (rv) {
1480 Py_XDECREF(ident);
1481 return -1;
1482 }
1483 if (ident != NULL) {
1484 if (PyDict_DelItem(s->markers, ident)) {
1485 Py_XDECREF(ident);
1486 return -1;
1487 }
1488 Py_XDECREF(ident);
1489 }
1490 return rv;
1491 }
1492}
1493
1494static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001495encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001496 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001497{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001498 /* Encode Python dict dct a JSON term */
Ken Jinb5931f12021-02-02 00:26:56 +08001499 _Py_static_string(PyId_open_dict, "{");
1500 _Py_static_string(PyId_close_dict, "}");
1501 _Py_static_string(PyId_empty_dict, "{}");
1502 PyObject *open_dict = _PyUnicode_FromId(&PyId_open_dict); // borrowed ref
1503 PyObject *close_dict = _PyUnicode_FromId(&PyId_close_dict); // borrowed ref
1504 PyObject *empty_dict = _PyUnicode_FromId(&PyId_empty_dict); // borrowed ref
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001505 PyObject *kstr = NULL;
1506 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001507 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001508 PyObject *items;
1509 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001510 Py_ssize_t idx;
1511
1512 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
Ken Jinb5931f12021-02-02 00:26:56 +08001513 return -1;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001514 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001515 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001516 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001517
1518 if (s->markers != Py_None) {
1519 int has_key;
1520 ident = PyLong_FromVoidPtr(dct);
1521 if (ident == NULL)
1522 goto bail;
1523 has_key = PyDict_Contains(s->markers, ident);
1524 if (has_key) {
1525 if (has_key != -1)
1526 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1527 goto bail;
1528 }
1529 if (PyDict_SetItem(s->markers, ident, dct)) {
1530 goto bail;
1531 }
1532 }
1533
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001534 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001535 goto bail;
1536
1537 if (s->indent != Py_None) {
1538 /* TODO: DOES NOT RUN */
1539 indent_level += 1;
1540 /*
1541 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1542 separator = _item_separator + newline_indent
1543 buf += newline_indent
1544 */
1545 }
1546
Benjamin Peterson501182a2015-05-02 22:28:04 -04001547 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001548 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001549 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001550 if (s->sort_keys && PyList_Sort(items) < 0) {
1551 Py_DECREF(items);
Benjamin Peterson501182a2015-05-02 22:28:04 -04001552 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001553 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001554 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001555 Py_DECREF(items);
1556 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001557 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001558 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001559 while ((item = PyIter_Next(it)) != NULL) {
1560 PyObject *encoded, *key, *value;
Serhiy Storchakafff9a312017-03-21 08:53:25 +02001561 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001562 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1563 goto bail;
1564 }
1565 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001566 if (PyUnicode_Check(key)) {
1567 Py_INCREF(key);
1568 kstr = key;
1569 }
1570 else if (PyFloat_Check(key)) {
1571 kstr = encoder_encode_float(s, key);
1572 if (kstr == NULL)
1573 goto bail;
1574 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001575 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001576 /* This must come before the PyLong_Check because
1577 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001578 kstr = _encoded_const(key);
1579 if (kstr == NULL)
1580 goto bail;
1581 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001582 else if (PyLong_Check(key)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001583 kstr = PyLong_Type.tp_repr(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001584 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001585 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001586 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001587 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001588 else if (s->skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001589 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001590 continue;
1591 }
1592 else {
Serhiy Storchakacfa797c2017-11-25 17:38:20 +02001593 PyErr_Format(PyExc_TypeError,
1594 "keys must be str, int, float, bool or None, "
Victor Stinnerdaa97562020-02-07 03:37:06 +01001595 "not %.100s", Py_TYPE(key)->tp_name);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001596 goto bail;
1597 }
1598
1599 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001600 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001601 goto bail;
1602 }
1603
1604 encoded = encoder_encode_string(s, kstr);
1605 Py_CLEAR(kstr);
1606 if (encoded == NULL)
1607 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001608 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001609 Py_DECREF(encoded);
1610 goto bail;
1611 }
1612 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001613 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001614 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001615
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001616 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001617 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001618 goto bail;
1619 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001620 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001621 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001622 if (PyErr_Occurred())
1623 goto bail;
1624 Py_CLEAR(it);
1625
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001626 if (ident != NULL) {
1627 if (PyDict_DelItem(s->markers, ident))
1628 goto bail;
1629 Py_CLEAR(ident);
1630 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001631 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001632 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001633 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001634
1635 yield '\n' + (' ' * (_indent * _current_indent_level))
1636 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001637 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001638 goto bail;
1639 return 0;
1640
1641bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001642 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001643 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001644 Py_XDECREF(kstr);
1645 Py_XDECREF(ident);
1646 return -1;
1647}
1648
1649
1650static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001651encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001652 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001653{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001654 /* Encode Python list seq to a JSON term */
Ken Jinb5931f12021-02-02 00:26:56 +08001655 _Py_static_string(PyId_open_array, "[");
1656 _Py_static_string(PyId_close_array, "]");
1657 _Py_static_string(PyId_empty_array, "[]");
1658 PyObject *open_array = _PyUnicode_FromId(&PyId_open_array); // borrowed ref
1659 PyObject *close_array = _PyUnicode_FromId(&PyId_close_array); // borrowed ref
1660 PyObject *empty_array = _PyUnicode_FromId(&PyId_empty_array); // borrowed ref
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001661 PyObject *ident = NULL;
1662 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001663 Py_ssize_t i;
1664
1665 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
Ken Jinb5931f12021-02-02 00:26:56 +08001666 return -1;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001667 }
1668 ident = NULL;
1669 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1670 if (s_fast == NULL)
1671 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001672 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001673 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001674 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001675 }
1676
1677 if (s->markers != Py_None) {
1678 int has_key;
1679 ident = PyLong_FromVoidPtr(seq);
1680 if (ident == NULL)
1681 goto bail;
1682 has_key = PyDict_Contains(s->markers, ident);
1683 if (has_key) {
1684 if (has_key != -1)
1685 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1686 goto bail;
1687 }
1688 if (PyDict_SetItem(s->markers, ident, seq)) {
1689 goto bail;
1690 }
1691 }
1692
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001693 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001694 goto bail;
1695 if (s->indent != Py_None) {
1696 /* TODO: DOES NOT RUN */
1697 indent_level += 1;
1698 /*
1699 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1700 separator = _item_separator + newline_indent
1701 buf += newline_indent
1702 */
1703 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001704 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1705 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001706 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001707 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001708 goto bail;
1709 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001710 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001711 goto bail;
1712 }
1713 if (ident != NULL) {
1714 if (PyDict_DelItem(s->markers, ident))
1715 goto bail;
1716 Py_CLEAR(ident);
1717 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001718
1719 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001720 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001721 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001722
1723 yield '\n' + (' ' * (_indent * _current_indent_level))
1724 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001725 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001726 goto bail;
1727 Py_DECREF(s_fast);
1728 return 0;
1729
1730bail:
1731 Py_XDECREF(ident);
1732 Py_DECREF(s_fast);
1733 return -1;
1734}
1735
1736static void
1737encoder_dealloc(PyObject *self)
1738{
Dong-hee Na33f15a12020-03-27 19:59:59 +09001739 PyTypeObject *tp = Py_TYPE(self);
INADA Naokia6296d32017-08-24 14:55:17 +09001740 /* bpo-31095: UnTrack is needed before calling any callbacks */
1741 PyObject_GC_UnTrack(self);
Dong-hee Na33f15a12020-03-27 19:59:59 +09001742 encoder_clear((PyEncoderObject *)self);
1743 tp->tp_free(self);
1744 Py_DECREF(tp);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001745}
1746
1747static int
Dong-hee Na33f15a12020-03-27 19:59:59 +09001748encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001749{
Pablo Galindo1cf15af2020-05-27 10:03:38 +01001750 Py_VISIT(Py_TYPE(self));
Dong-hee Na33f15a12020-03-27 19:59:59 +09001751 Py_VISIT(self->markers);
1752 Py_VISIT(self->defaultfn);
1753 Py_VISIT(self->encoder);
1754 Py_VISIT(self->indent);
1755 Py_VISIT(self->key_separator);
1756 Py_VISIT(self->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001757 return 0;
1758}
1759
1760static int
Dong-hee Na33f15a12020-03-27 19:59:59 +09001761encoder_clear(PyEncoderObject *self)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001762{
1763 /* Deallocate Encoder */
Dong-hee Na33f15a12020-03-27 19:59:59 +09001764 Py_CLEAR(self->markers);
1765 Py_CLEAR(self->defaultfn);
1766 Py_CLEAR(self->encoder);
1767 Py_CLEAR(self->indent);
1768 Py_CLEAR(self->key_separator);
1769 Py_CLEAR(self->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001770 return 0;
1771}
1772
1773PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1774
Dong-hee Na33f15a12020-03-27 19:59:59 +09001775static PyType_Slot PyEncoderType_slots[] = {
1776 {Py_tp_doc, (void *)encoder_doc},
1777 {Py_tp_dealloc, encoder_dealloc},
1778 {Py_tp_call, encoder_call},
1779 {Py_tp_traverse, encoder_traverse},
1780 {Py_tp_clear, encoder_clear},
1781 {Py_tp_members, encoder_members},
1782 {Py_tp_new, encoder_new},
1783 {0, 0}
1784};
1785
1786static PyType_Spec PyEncoderType_spec = {
1787 .name = "_json.Encoder",
1788 .basicsize = sizeof(PyEncoderObject),
1789 .itemsize = 0,
1790 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1791 .slots = PyEncoderType_slots
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001792};
1793
1794static PyMethodDef speedups_methods[] = {
1795 {"encode_basestring_ascii",
1796 (PyCFunction)py_encode_basestring_ascii,
1797 METH_O,
1798 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001799 {"encode_basestring",
1800 (PyCFunction)py_encode_basestring,
1801 METH_O,
1802 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001803 {"scanstring",
1804 (PyCFunction)py_scanstring,
1805 METH_VARARGS,
1806 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001807 {NULL, NULL, 0, NULL}
1808};
1809
1810PyDoc_STRVAR(module_doc,
1811"json speedups\n");
1812
Hai Shied154c32020-01-16 00:32:51 +08001813static int
1814_json_exec(PyObject *module)
1815{
Dong-hee Na33f15a12020-03-27 19:59:59 +09001816 _jsonmodulestate *state = get_json_state(module);
1817
1818 state->PyScannerType = PyType_FromSpec(&PyScannerType_spec);
1819 if (state->PyScannerType == NULL) {
Hai Shied154c32020-01-16 00:32:51 +08001820 return -1;
1821 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001822 Py_INCREF(state->PyScannerType);
1823 if (PyModule_AddObject(module, "make_scanner", state->PyScannerType) < 0) {
Hai Shidcb04d92020-04-09 23:10:29 +08001824 Py_DECREF(state->PyScannerType);
Hai Shied154c32020-01-16 00:32:51 +08001825 return -1;
1826 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001827
1828 state->PyEncoderType = PyType_FromSpec(&PyEncoderType_spec);
1829 if (state->PyEncoderType == NULL) {
Hai Shied154c32020-01-16 00:32:51 +08001830 return -1;
1831 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001832 Py_INCREF(state->PyEncoderType);
1833 if (PyModule_AddObject(module, "make_encoder", state->PyEncoderType) < 0) {
Hai Shidcb04d92020-04-09 23:10:29 +08001834 Py_DECREF(state->PyEncoderType);
Hai Shied154c32020-01-16 00:32:51 +08001835 return -1;
1836 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001837
Hai Shied154c32020-01-16 00:32:51 +08001838 return 0;
1839}
1840
Dong-hee Na33f15a12020-03-27 19:59:59 +09001841static int
1842_jsonmodule_traverse(PyObject *module, visitproc visit, void *arg)
1843{
1844 _jsonmodulestate *state = get_json_state(module);
1845 Py_VISIT(state->PyScannerType);
1846 Py_VISIT(state->PyEncoderType);
1847 return 0;
1848}
1849
1850static int
1851_jsonmodule_clear(PyObject *module)
1852{
1853 _jsonmodulestate *state = get_json_state(module);
1854 Py_CLEAR(state->PyScannerType);
1855 Py_CLEAR(state->PyEncoderType);
1856 return 0;
1857}
1858
1859static void
1860_jsonmodule_free(void *module)
1861{
1862 _jsonmodule_clear((PyObject *)module);
1863}
1864
Hai Shied154c32020-01-16 00:32:51 +08001865static PyModuleDef_Slot _json_slots[] = {
1866 {Py_mod_exec, _json_exec},
1867 {0, NULL}
1868};
1869
Martin v. Löwis1a214512008-06-11 05:26:20 +00001870static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 PyModuleDef_HEAD_INIT,
1872 "_json",
1873 module_doc,
Dong-hee Na33f15a12020-03-27 19:59:59 +09001874 sizeof(_jsonmodulestate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001875 speedups_methods,
Hai Shied154c32020-01-16 00:32:51 +08001876 _json_slots,
Dong-hee Na33f15a12020-03-27 19:59:59 +09001877 _jsonmodule_traverse,
1878 _jsonmodule_clear,
1879 _jsonmodule_free,
Martin v. Löwis1a214512008-06-11 05:26:20 +00001880};
1881
Victor Stinnerf024d262015-03-17 17:48:27 +01001882PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001883PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001884{
Hai Shied154c32020-01-16 00:32:51 +08001885 return PyModuleDef_Init(&jsonmodule);
Christian Heimes90540002008-05-08 14:29:10 +00001886}