blob: 8117d1601bd3f86079950af2c97ab7f0c9e5a77b [file] [log] [blame]
Victor Stinner5c75f372019-04-17 23:02:26 +02001/* JSON accelerator C extensor: _json module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006
Victor Stinner5c75f372019-04-17 23:02:26 +02007#if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8# error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06009#endif
10
Christian Heimes90540002008-05-08 14:29:10 +000011#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000012#include "structmember.h"
Victor Stinnere281f7d2018-11-01 02:30:36 +010013#include "pycore_accu.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +010014
Dong-hee Na33f15a12020-03-27 19:59:59 +090015typedef struct {
16 PyObject *PyScannerType;
17 PyObject *PyEncoderType;
18} _jsonmodulestate;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000019
Dong-hee Na33f15a12020-03-27 19:59:59 +090020static inline _jsonmodulestate*
21get_json_state(PyObject *module)
22{
23 void *state = PyModule_GetState(module);
24 assert(state != NULL);
25 return (_jsonmodulestate *)state;
26}
27
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028
29typedef struct _PyScannerObject {
30 PyObject_HEAD
Segev Finer541bd282017-07-13 08:52:08 +030031 signed char strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000032 PyObject *object_hook;
33 PyObject *object_pairs_hook;
34 PyObject *parse_float;
35 PyObject *parse_int;
36 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000037 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000038} PyScannerObject;
39
40static PyMemberDef scanner_members[] = {
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030041 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000042 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
43 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
44 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
45 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
46 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
47 {NULL}
48};
49
50typedef struct _PyEncoderObject {
51 PyObject_HEAD
52 PyObject *markers;
53 PyObject *defaultfn;
54 PyObject *encoder;
55 PyObject *indent;
56 PyObject *key_separator;
57 PyObject *item_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030058 char sort_keys;
59 char skipkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000060 int allow_nan;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030061 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000062} PyEncoderObject;
63
64static PyMemberDef encoder_members[] = {
65 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
66 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
67 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
68 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
69 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
70 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030071 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
72 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000073 {NULL}
74};
75
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020076/* Forward decls */
77
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000078static PyObject *
79ascii_escape_unicode(PyObject *pystr);
80static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +010081py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000082void init_json(void);
83static PyObject *
84scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
85static PyObject *
86_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
87static PyObject *
88scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000089static void
90scanner_dealloc(PyObject *self);
91static int
Dong-hee Na33f15a12020-03-27 19:59:59 +090092scanner_clear(PyScannerObject *self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000093static PyObject *
94encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000095static void
96encoder_dealloc(PyObject *self);
97static int
Dong-hee Na33f15a12020-03-27 19:59:59 +090098encoder_clear(PyEncoderObject *self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000099static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200100encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000101static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200102encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000103static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200104encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000105static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000106_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000107static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200108raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000109static PyObject *
110encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111static PyObject *
112encoder_encode_float(PyEncoderObject *s, PyObject *obj);
113
Christian Heimes90540002008-05-08 14:29:10 +0000114#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000115#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000116
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000117static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200118ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000119{
120 /* Escape unicode code point c to ASCII escape sequences
121 in char *output. output must have at least 12 bytes unused to
122 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000123 output[chars++] = '\\';
124 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000125 case '\\': output[chars++] = c; break;
126 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000127 case '\b': output[chars++] = 'b'; break;
128 case '\f': output[chars++] = 'f'; break;
129 case '\n': output[chars++] = 'n'; break;
130 case '\r': output[chars++] = 'r'; break;
131 case '\t': output[chars++] = 't'; break;
132 default:
Christian Heimes90540002008-05-08 14:29:10 +0000133 if (c >= 0x10000) {
134 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100135 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000136 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100137 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
138 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
139 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
140 output[chars++] = Py_hexdigits[(v ) & 0xf];
141 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000142 output[chars++] = '\\';
143 }
Christian Heimes90540002008-05-08 14:29:10 +0000144 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200145 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
146 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
147 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
148 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000149 }
150 return chars;
151}
152
153static PyObject *
154ascii_escape_unicode(PyObject *pystr)
155{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000156 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000157 Py_ssize_t i;
158 Py_ssize_t input_chars;
159 Py_ssize_t output_size;
160 Py_ssize_t chars;
161 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200162 void *input;
163 unsigned char *output;
164 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000165
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200166 if (PyUnicode_READY(pystr) == -1)
167 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000168
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200169 input_chars = PyUnicode_GET_LENGTH(pystr);
170 input = PyUnicode_DATA(pystr);
171 kind = PyUnicode_KIND(pystr);
172
173 /* Compute the output size */
174 for (i = 0, output_size = 2; i < input_chars; i++) {
175 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500176 Py_ssize_t d;
177 if (S_CHAR(c)) {
178 d = 1;
179 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200180 else {
181 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200182 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200183 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500184 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200185 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500186 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200187 }
188 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500189 if (output_size > PY_SSIZE_T_MAX - d) {
190 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
191 return NULL;
192 }
193 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200194 }
195
196 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000197 if (rval == NULL) {
198 return NULL;
199 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200200 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000201 chars = 0;
202 output[chars++] = '"';
203 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200204 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000205 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000206 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000207 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000208 else {
209 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000210 }
Christian Heimes90540002008-05-08 14:29:10 +0000211 }
212 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100213#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200214 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100215#endif
Christian Heimes90540002008-05-08 14:29:10 +0000216 return rval;
217}
218
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100219static PyObject *
220escape_unicode(PyObject *pystr)
221{
222 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
223 Py_ssize_t i;
224 Py_ssize_t input_chars;
225 Py_ssize_t output_size;
226 Py_ssize_t chars;
227 PyObject *rval;
228 void *input;
229 int kind;
230 Py_UCS4 maxchar;
231
232 if (PyUnicode_READY(pystr) == -1)
233 return NULL;
234
235 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
236 input_chars = PyUnicode_GET_LENGTH(pystr);
237 input = PyUnicode_DATA(pystr);
238 kind = PyUnicode_KIND(pystr);
239
240 /* Compute the output size */
241 for (i = 0, output_size = 2; i < input_chars; i++) {
242 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500243 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100244 switch (c) {
245 case '\\': case '"': case '\b': case '\f':
246 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500247 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100248 break;
249 default:
250 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500251 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100252 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500253 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100254 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500255 if (output_size > PY_SSIZE_T_MAX - d) {
256 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
257 return NULL;
258 }
259 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100260 }
261
262 rval = PyUnicode_New(output_size, maxchar);
263 if (rval == NULL)
264 return NULL;
265
266 kind = PyUnicode_KIND(rval);
267
268#define ENCODE_OUTPUT do { \
269 chars = 0; \
270 output[chars++] = '"'; \
271 for (i = 0; i < input_chars; i++) { \
272 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
273 switch (c) { \
274 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
275 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
276 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
277 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
278 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
279 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
280 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
281 default: \
282 if (c <= 0x1f) { \
283 output[chars++] = '\\'; \
284 output[chars++] = 'u'; \
285 output[chars++] = '0'; \
286 output[chars++] = '0'; \
287 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
288 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
289 } else { \
290 output[chars++] = c; \
291 } \
292 } \
293 } \
294 output[chars++] = '"'; \
295 } while (0)
296
297 if (kind == PyUnicode_1BYTE_KIND) {
298 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
299 ENCODE_OUTPUT;
300 } else if (kind == PyUnicode_2BYTE_KIND) {
301 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
302 ENCODE_OUTPUT;
303 } else {
304 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
305 assert(kind == PyUnicode_4BYTE_KIND);
306 ENCODE_OUTPUT;
307 }
308#undef ENCODE_OUTPUT
309
310#ifdef Py_DEBUG
311 assert(_PyUnicode_CheckConsistency(rval, 1));
312#endif
313 return rval;
314}
315
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000316static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200317raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000318{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200319 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
320 static PyObject *JSONDecodeError = NULL;
321 PyObject *exc;
322 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000323 PyObject *decoder = PyImport_ImportModule("json.decoder");
324 if (decoder == NULL)
325 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200326 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000327 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200328 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000329 return;
Christian Heimes90540002008-05-08 14:29:10 +0000330 }
Victor Stinner4c381542016-12-09 00:33:39 +0100331 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200332 if (exc) {
333 PyErr_SetObject(JSONDecodeError, exc);
334 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000335 }
Christian Heimes90540002008-05-08 14:29:10 +0000336}
337
Ezio Melotti37623ab2013-01-03 08:44:15 +0200338static void
339raise_stop_iteration(Py_ssize_t idx)
340{
341 PyObject *value = PyLong_FromSsize_t(idx);
342 if (value != NULL) {
343 PyErr_SetObject(PyExc_StopIteration, value);
344 Py_DECREF(value);
345 }
346}
347
Christian Heimes90540002008-05-08 14:29:10 +0000348static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000349_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
350 /* return (rval, idx) tuple, stealing reference to rval */
351 PyObject *tpl;
352 PyObject *pyidx;
353 /*
354 steal a reference to rval, returns (rval, idx)
355 */
356 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000357 return NULL;
358 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000359 pyidx = PyLong_FromSsize_t(idx);
360 if (pyidx == NULL) {
361 Py_DECREF(rval);
362 return NULL;
363 }
364 tpl = PyTuple_New(2);
365 if (tpl == NULL) {
366 Py_DECREF(pyidx);
367 Py_DECREF(rval);
368 return NULL;
369 }
370 PyTuple_SET_ITEM(tpl, 0, rval);
371 PyTuple_SET_ITEM(tpl, 1, pyidx);
372 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000373}
374
375static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000376scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000377{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000378 /* Read the JSON string from PyUnicode pystr.
379 end is the index of the first character after the quote.
380 if strict is zero then literal control characters are allowed
381 *next_end_ptr is a return-by-reference index of the character
382 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000383
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000384 Return value is a new PyUnicode
385 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000386 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200387 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000388 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000389 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200390 const void *buf;
391 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000392
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200393 if (PyUnicode_READY(pystr) == -1)
394 return 0;
395
Inada Naoki9c110292019-10-17 16:12:41 +0900396 _PyUnicodeWriter writer;
397 _PyUnicodeWriter_Init(&writer);
398 writer.overallocate = 1;
399
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200400 len = PyUnicode_GET_LENGTH(pystr);
401 buf = PyUnicode_DATA(pystr);
402 kind = PyUnicode_KIND(pystr);
403
Ezio Melotti37623ab2013-01-03 08:44:15 +0200404 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000405 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
406 goto bail;
407 }
Christian Heimes90540002008-05-08 14:29:10 +0000408 while (1) {
409 /* Find the end of the string or the next escape */
Inada Naoki2a570af2019-08-08 17:57:10 +0900410 Py_UCS4 c;
411 {
412 // Use tight scope variable to help register allocation.
413 Py_UCS4 d = 0;
414 for (next = end; next < len; next++) {
415 d = PyUnicode_READ(kind, buf, next);
416 if (d == '"' || d == '\\') {
417 break;
418 }
419 if (d <= 0x1f && strict) {
420 raise_errmsg("Invalid control character at", pystr, next);
421 goto bail;
422 }
Christian Heimes90540002008-05-08 14:29:10 +0000423 }
Inada Naoki2a570af2019-08-08 17:57:10 +0900424 c = d;
Christian Heimes90540002008-05-08 14:29:10 +0000425 }
Inada Naoki9c110292019-10-17 16:12:41 +0900426
427 if (c == '"') {
428 // Fast path for simple case.
429 if (writer.buffer == NULL) {
430 PyObject *ret = PyUnicode_Substring(pystr, end, next);
431 if (ret == NULL) {
432 goto bail;
433 }
434 *next_end_ptr = next + 1;;
435 return ret;
436 }
437 }
438 else if (c != '\\') {
Christian Heimes90540002008-05-08 14:29:10 +0000439 raise_errmsg("Unterminated string starting at", pystr, begin);
440 goto bail;
441 }
Inada Naoki9c110292019-10-17 16:12:41 +0900442
Christian Heimes90540002008-05-08 14:29:10 +0000443 /* Pick up this chunk if it's not zero length */
444 if (next != end) {
Inada Naoki9c110292019-10-17 16:12:41 +0900445 if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
Christian Heimes90540002008-05-08 14:29:10 +0000446 goto bail;
447 }
Christian Heimes90540002008-05-08 14:29:10 +0000448 }
449 next++;
450 if (c == '"') {
451 end = next;
452 break;
453 }
454 if (next == len) {
455 raise_errmsg("Unterminated string starting at", pystr, begin);
456 goto bail;
457 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200458 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000459 if (c != 'u') {
460 /* Non-unicode backslash escapes */
461 end = next + 1;
462 switch (c) {
463 case '"': break;
464 case '\\': break;
465 case '/': break;
466 case 'b': c = '\b'; break;
467 case 'f': c = '\f'; break;
468 case 'n': c = '\n'; break;
469 case 'r': c = '\r'; break;
470 case 't': c = '\t'; break;
471 default: c = 0;
472 }
473 if (c == 0) {
474 raise_errmsg("Invalid \\escape", pystr, end - 2);
475 goto bail;
476 }
477 }
478 else {
479 c = 0;
480 next++;
481 end = next + 4;
482 if (end >= len) {
483 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
484 goto bail;
485 }
486 /* Decode 4 hex digits */
487 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200488 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000489 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000490 switch (digit) {
491 case '0': case '1': case '2': case '3': case '4':
492 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000493 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000494 case 'a': case 'b': case 'c': case 'd': case 'e':
495 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000496 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000497 case 'A': case 'B': case 'C': case 'D': case 'E':
498 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000499 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000500 default:
501 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
502 goto bail;
503 }
504 }
Christian Heimes90540002008-05-08 14:29:10 +0000505 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200506 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
507 PyUnicode_READ(kind, buf, next++) == '\\' &&
508 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200509 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000510 end += 6;
511 /* Decode 4 hex digits */
512 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200513 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000514 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000515 switch (digit) {
516 case '0': case '1': case '2': case '3': case '4':
517 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000518 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000519 case 'a': case 'b': case 'c': case 'd': case 'e':
520 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000521 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000522 case 'A': case 'B': case 'C': case 'D': case 'E':
523 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000524 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000525 default:
526 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
527 goto bail;
528 }
529 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200530 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
531 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
532 else
533 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000534 }
Christian Heimes90540002008-05-08 14:29:10 +0000535 }
Inada Naoki9c110292019-10-17 16:12:41 +0900536 if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
Christian Heimes90540002008-05-08 14:29:10 +0000537 goto bail;
538 }
Christian Heimes90540002008-05-08 14:29:10 +0000539 }
540
Inada Naoki9c110292019-10-17 16:12:41 +0900541 rval = _PyUnicodeWriter_Finish(&writer);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000542 *next_end_ptr = end;
543 return rval;
Inada Naoki9c110292019-10-17 16:12:41 +0900544
Christian Heimes90540002008-05-08 14:29:10 +0000545bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000546 *next_end_ptr = -1;
Inada Naoki9c110292019-10-17 16:12:41 +0900547 _PyUnicodeWriter_Dealloc(&writer);
Christian Heimes90540002008-05-08 14:29:10 +0000548 return NULL;
549}
550
551PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000552 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000553 "\n"
554 "Scan the string s for a JSON string. End is the index of the\n"
555 "character in s after the quote that started the JSON string.\n"
556 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
557 "on attempt to decode an invalid string. If strict is False then literal\n"
558 "control characters are allowed in the string.\n"
559 "\n"
560 "Returns a tuple of the decoded string and the index of the character in s\n"
561 "after the end quote."
562);
Christian Heimes90540002008-05-08 14:29:10 +0000563
564static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100565py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000566{
567 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000568 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000569 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000570 Py_ssize_t next_end = -1;
571 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100572 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000573 return NULL;
574 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000575 if (PyUnicode_Check(pystr)) {
576 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000577 }
578 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000579 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000580 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000581 Py_TYPE(pystr)->tp_name);
582 return NULL;
583 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000584 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000585}
586
587PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000588 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000589 "\n"
590 "Return an ASCII-only JSON representation of a Python string"
591);
Christian Heimes90540002008-05-08 14:29:10 +0000592
593static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100594py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000595{
596 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000597 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000598 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000599 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000600 rval = ascii_escape_unicode(pystr);
601 }
602 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000603 PyErr_Format(PyExc_TypeError,
604 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000605 Py_TYPE(pystr)->tp_name);
606 return NULL;
607 }
Christian Heimes90540002008-05-08 14:29:10 +0000608 return rval;
609}
610
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100611
612PyDoc_STRVAR(pydoc_encode_basestring,
613 "encode_basestring(string) -> string\n"
614 "\n"
615 "Return a JSON representation of a Python string"
616);
617
618static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100619py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100620{
621 PyObject *rval;
622 /* Return a JSON representation of a Python string */
623 /* METH_O */
624 if (PyUnicode_Check(pystr)) {
625 rval = escape_unicode(pystr);
626 }
627 else {
628 PyErr_Format(PyExc_TypeError,
629 "first argument must be a string, not %.80s",
630 Py_TYPE(pystr)->tp_name);
631 return NULL;
632 }
633 return rval;
634}
635
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000636static void
637scanner_dealloc(PyObject *self)
638{
Dong-hee Na33f15a12020-03-27 19:59:59 +0900639 PyTypeObject *tp = Py_TYPE(self);
INADA Naokia6296d32017-08-24 14:55:17 +0900640 /* bpo-31095: UnTrack is needed before calling any callbacks */
641 PyObject_GC_UnTrack(self);
Dong-hee Na33f15a12020-03-27 19:59:59 +0900642 scanner_clear((PyScannerObject *)self);
643 tp->tp_free(self);
644 Py_DECREF(tp);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000645}
646
647static int
Dong-hee Na33f15a12020-03-27 19:59:59 +0900648scanner_traverse(PyScannerObject *self, visitproc visit, void *arg)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000649{
Dong-hee Na33f15a12020-03-27 19:59:59 +0900650 Py_VISIT(self->object_hook);
651 Py_VISIT(self->object_pairs_hook);
652 Py_VISIT(self->parse_float);
653 Py_VISIT(self->parse_int);
654 Py_VISIT(self->parse_constant);
Hai Shib7093022020-04-05 03:24:16 +0800655 Py_VISIT(self->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000656 return 0;
657}
658
659static int
Dong-hee Na33f15a12020-03-27 19:59:59 +0900660scanner_clear(PyScannerObject *self)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000661{
Dong-hee Na33f15a12020-03-27 19:59:59 +0900662 Py_CLEAR(self->object_hook);
663 Py_CLEAR(self->object_pairs_hook);
664 Py_CLEAR(self->parse_float);
665 Py_CLEAR(self->parse_int);
666 Py_CLEAR(self->parse_constant);
667 Py_CLEAR(self->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000668 return 0;
669}
670
671static PyObject *
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300672_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
673{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000674 /* Read a JSON object from PyUnicode pystr.
675 idx is the index of the first character after the opening curly brace.
676 *next_idx_ptr is a return-by-reference index to the first character after
677 the closing curly brace.
678
679 Returns a new PyObject (usually a dict, but object_hook can change that)
680 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200681 void *str;
682 int kind;
683 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000684 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000685 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000686 PyObject *key = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000687 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000688 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000689
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200690 if (PyUnicode_READY(pystr) == -1)
691 return NULL;
692
693 str = PyUnicode_DATA(pystr);
694 kind = PyUnicode_KIND(pystr);
695 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
696
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000697 if (has_pairs_hook)
698 rval = PyList_New(0);
699 else
700 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000701 if (rval == NULL)
702 return NULL;
703
704 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200705 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000706
707 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200708 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
709 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000710 PyObject *memokey;
711
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000712 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200713 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200714 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000715 goto bail;
716 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300717 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000718 if (key == NULL)
719 goto bail;
Inada Naoki2a570af2019-08-08 17:57:10 +0900720 memokey = PyDict_SetDefault(s->memo, key, key);
721 if (memokey == NULL) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200722 goto bail;
723 }
Inada Naoki2a570af2019-08-08 17:57:10 +0900724 Py_INCREF(memokey);
725 Py_DECREF(key);
726 key = memokey;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000727 idx = next_idx;
728
729 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200730 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
731 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200732 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000733 goto bail;
734 }
735 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200736 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000737
738 /* read any JSON term */
739 val = scan_once_unicode(s, pystr, idx, &next_idx);
740 if (val == NULL)
741 goto bail;
742
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000743 if (has_pairs_hook) {
744 PyObject *item = PyTuple_Pack(2, key, val);
745 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000746 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000747 Py_CLEAR(key);
748 Py_CLEAR(val);
749 if (PyList_Append(rval, item) == -1) {
750 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000751 goto bail;
752 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000753 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000754 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000755 else {
756 if (PyDict_SetItem(rval, key, val) < 0)
757 goto bail;
758 Py_CLEAR(key);
759 Py_CLEAR(val);
760 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000761 idx = next_idx;
762
763 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200764 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000765
766 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200767 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000768 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200769 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200770 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000771 goto bail;
772 }
773 idx++;
774
775 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200776 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000777 }
778 }
779
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000780 *next_idx_ptr = idx + 1;
781
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000782 if (has_pairs_hook) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100783 val = PyObject_CallOneArg(s->object_pairs_hook, rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000784 Py_DECREF(rval);
785 return val;
786 }
787
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000788 /* if object_hook is not None: rval = object_hook(rval) */
789 if (s->object_hook != Py_None) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100790 val = PyObject_CallOneArg(s->object_hook, rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000791 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000792 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000793 }
794 return rval;
795bail:
796 Py_XDECREF(key);
797 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000798 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000799 return NULL;
800}
801
802static PyObject *
803_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200804 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000805 idx is the index of the first character after the opening brace.
806 *next_idx_ptr is a return-by-reference index to the first character after
807 the closing brace.
808
809 Returns a new PyList
810 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200811 void *str;
812 int kind;
813 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000814 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200815 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000816 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000817
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200818 if (PyUnicode_READY(pystr) == -1)
819 return NULL;
820
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200821 rval = PyList_New(0);
822 if (rval == NULL)
823 return NULL;
824
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200825 str = PyUnicode_DATA(pystr);
826 kind = PyUnicode_KIND(pystr);
827 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
828
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000829 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200830 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000831
832 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200833 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
834 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000835
836 /* read any JSON term */
837 val = scan_once_unicode(s, pystr, idx, &next_idx);
838 if (val == NULL)
839 goto bail;
840
841 if (PyList_Append(rval, val) == -1)
842 goto bail;
843
844 Py_CLEAR(val);
845 idx = next_idx;
846
847 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200848 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000849
850 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200851 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000852 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200853 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200854 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000855 goto bail;
856 }
857 idx++;
858
859 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200860 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000861 }
862 }
863
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200864 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
865 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200866 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000867 goto bail;
868 }
869 *next_idx_ptr = idx + 1;
870 return rval;
871bail:
872 Py_XDECREF(val);
873 Py_DECREF(rval);
874 return NULL;
875}
876
877static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200878_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
879 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000880 constant is the constant string that was found
881 ("NaN", "Infinity", "-Infinity").
882 idx is the index of the first character of the constant
883 *next_idx_ptr is a return-by-reference index to the first character after
884 the constant.
885
886 Returns the result of parse_constant
887 */
888 PyObject *cstr;
889 PyObject *rval;
890 /* constant is "NaN", "Infinity", or "-Infinity" */
891 cstr = PyUnicode_InternFromString(constant);
892 if (cstr == NULL)
893 return NULL;
894
895 /* rval = parse_constant(constant) */
Petr Viktorinffd97532020-02-11 17:46:57 +0100896 rval = PyObject_CallOneArg(s->parse_constant, cstr);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200897 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000898 Py_DECREF(cstr);
899 *next_idx_ptr = idx;
900 return rval;
901}
902
903static PyObject *
904_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
905 /* Read a JSON number from PyUnicode pystr.
906 idx is the index of the first character of the number
907 *next_idx_ptr is a return-by-reference index to the first character after
908 the number.
909
910 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200911 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000912 May return other types if parse_int or parse_float are set
913 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200914 void *str;
915 int kind;
916 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000917 Py_ssize_t idx = start;
918 int is_float = 0;
919 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200920 PyObject *numstr = NULL;
921 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000922
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200923 if (PyUnicode_READY(pystr) == -1)
924 return NULL;
925
926 str = PyUnicode_DATA(pystr);
927 kind = PyUnicode_KIND(pystr);
928 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
929
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000930 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200931 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000932 idx++;
933 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200934 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000935 return NULL;
936 }
937 }
938
939 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200940 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000941 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200942 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000943 }
944 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200945 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000946 idx++;
947 }
948 /* no integer digits, error */
949 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200950 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000951 return NULL;
952 }
953
954 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200955 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000956 is_float = 1;
957 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200958 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000959 }
960
961 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200962 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000963 Py_ssize_t e_start = idx;
964 idx++;
965
966 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200967 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000968
969 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200970 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000971
972 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200973 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000974 is_float = 1;
975 }
976 else {
977 idx = e_start;
978 }
979 }
980
Antoine Pitrouf6454512011-04-25 19:16:06 +0200981 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
982 custom_func = s->parse_float;
983 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
984 custom_func = s->parse_int;
985 else
986 custom_func = NULL;
987
988 if (custom_func) {
989 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200990 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200991 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200992 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +0200993 if (numstr == NULL)
994 return NULL;
Petr Viktorinffd97532020-02-11 17:46:57 +0100995 rval = PyObject_CallOneArg(custom_func, numstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000996 }
997 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +0200998 Py_ssize_t i, n;
999 char *buf;
1000 /* Straight conversion to ASCII, to avoid costly conversion of
1001 decimal unicode digits (which cannot appear here) */
1002 n = idx - start;
1003 numstr = PyBytes_FromStringAndSize(NULL, n);
1004 if (numstr == NULL)
1005 return NULL;
1006 buf = PyBytes_AS_STRING(numstr);
1007 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001008 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001009 }
1010 if (is_float)
1011 rval = PyFloat_FromString(numstr);
1012 else
1013 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001014 }
1015 Py_DECREF(numstr);
1016 *next_idx_ptr = idx;
1017 return rval;
1018}
1019
1020static PyObject *
1021scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1022{
1023 /* Read one JSON term (of any kind) from PyUnicode pystr.
1024 idx is the index of the first character of the term
1025 *next_idx_ptr is a return-by-reference index to the first character after
1026 the number.
1027
1028 Returns a new PyObject representation of the term.
1029 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001030 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001031 void *str;
1032 int kind;
1033 Py_ssize_t length;
1034
1035 if (PyUnicode_READY(pystr) == -1)
1036 return NULL;
1037
1038 str = PyUnicode_DATA(pystr);
1039 kind = PyUnicode_KIND(pystr);
1040 length = PyUnicode_GET_LENGTH(pystr);
1041
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001042 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001043 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001044 return NULL;
1045 }
1046 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001047 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001048 return NULL;
1049 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001050
1051 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001052 case '"':
1053 /* string */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001054 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001055 case '{':
1056 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001057 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1058 "from a unicode string"))
1059 return NULL;
1060 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1061 Py_LeaveRecursiveCall();
1062 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001063 case '[':
1064 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001065 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1066 "from a unicode string"))
1067 return NULL;
1068 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1069 Py_LeaveRecursiveCall();
1070 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001071 case 'n':
1072 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001073 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001074 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001075 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001076 }
1077 break;
1078 case 't':
1079 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001080 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001081 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001082 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001083 }
1084 break;
1085 case 'f':
1086 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001087 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1088 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1089 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001090 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001091 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001092 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001093 }
1094 break;
1095 case 'N':
1096 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001097 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001098 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001099 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1100 }
1101 break;
1102 case 'I':
1103 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001104 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1105 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1106 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001107 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001108 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1109 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001110 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001111 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1112 }
1113 break;
1114 case '-':
1115 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001116 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001117 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1118 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001119 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001120 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001121 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1122 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001123 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001124 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1125 }
1126 break;
1127 }
1128 /* Didn't find a string, object, array, or named constant. Look for a number. */
1129 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1130}
1131
1132static PyObject *
Dong-hee Na33f15a12020-03-27 19:59:59 +09001133scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001134{
1135 /* Python callable interface to scan_once_{str,unicode} */
1136 PyObject *pystr;
1137 PyObject *rval;
1138 Py_ssize_t idx;
1139 Py_ssize_t next_idx = -1;
1140 static char *kwlist[] = {"string", "idx", NULL};
Antoine Pitroucbb02842012-12-01 19:34:16 +01001141 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001142 return NULL;
1143
1144 if (PyUnicode_Check(pystr)) {
Dong-hee Na33f15a12020-03-27 19:59:59 +09001145 rval = scan_once_unicode(self, pystr, idx, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001146 }
1147 else {
1148 PyErr_Format(PyExc_TypeError,
1149 "first argument must be a string, not %.80s",
1150 Py_TYPE(pystr)->tp_name);
1151 return NULL;
1152 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001153 PyDict_Clear(self->memo);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001154 if (rval == NULL)
1155 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001156 return _build_rval_index_tuple(rval, next_idx);
1157}
1158
1159static PyObject *
1160scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1161{
1162 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001163 PyObject *ctx;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001164 PyObject *strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001165 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001166
1167 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001168 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001169
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001170 s = (PyScannerObject *)type->tp_alloc(type, 0);
1171 if (s == NULL) {
1172 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001173 }
1174
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001175 s->memo = PyDict_New();
1176 if (s->memo == NULL)
1177 goto bail;
1178
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001179 /* All of these will fail "gracefully" so we don't need to verify them */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001180 strict = PyObject_GetAttrString(ctx, "strict");
1181 if (strict == NULL)
1182 goto bail;
1183 s->strict = PyObject_IsTrue(strict);
1184 Py_DECREF(strict);
1185 if (s->strict < 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001186 goto bail;
1187 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1188 if (s->object_hook == NULL)
1189 goto bail;
1190 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1191 if (s->object_pairs_hook == NULL)
1192 goto bail;
1193 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1194 if (s->parse_float == NULL)
1195 goto bail;
1196 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1197 if (s->parse_int == NULL)
1198 goto bail;
1199 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1200 if (s->parse_constant == NULL)
1201 goto bail;
1202
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001203 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001204
1205bail:
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001206 Py_DECREF(s);
1207 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001208}
1209
1210PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1211
Dong-hee Na33f15a12020-03-27 19:59:59 +09001212static PyType_Slot PyScannerType_slots[] = {
1213 {Py_tp_doc, (void *)scanner_doc},
1214 {Py_tp_dealloc, scanner_dealloc},
1215 {Py_tp_call, scanner_call},
1216 {Py_tp_traverse, scanner_traverse},
1217 {Py_tp_clear, scanner_clear},
1218 {Py_tp_members, scanner_members},
1219 {Py_tp_new, scanner_new},
1220 {0, 0}
1221};
1222
1223static PyType_Spec PyScannerType_spec = {
1224 .name = "_json.Scanner",
1225 .basicsize = sizeof(PyScannerObject),
1226 .itemsize = 0,
1227 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1228 .slots = PyScannerType_slots,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001229};
1230
1231static PyObject *
1232encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1233{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001234 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1235
1236 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001237 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001238 PyObject *item_separator;
1239 int sort_keys, skipkeys, allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001240
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001241 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001242 &markers, &defaultfn, &encoder, &indent,
1243 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001244 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001245 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001246
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001247 if (markers != Py_None && !PyDict_Check(markers)) {
1248 PyErr_Format(PyExc_TypeError,
1249 "make_encoder() argument 1 must be dict or None, "
1250 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001251 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001252 }
1253
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001254 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1255 if (s == NULL)
1256 return NULL;
1257
Antoine Pitrou781eba72009-12-08 15:57:31 +00001258 s->markers = markers;
1259 s->defaultfn = defaultfn;
1260 s->encoder = encoder;
1261 s->indent = indent;
1262 s->key_separator = key_separator;
1263 s->item_separator = item_separator;
1264 s->sort_keys = sort_keys;
1265 s->skipkeys = skipkeys;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001266 s->allow_nan = allow_nan;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001267 s->fast_encode = NULL;
1268 if (PyCFunction_Check(s->encoder)) {
1269 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1270 if (f == (PyCFunction)py_encode_basestring_ascii ||
1271 f == (PyCFunction)py_encode_basestring) {
1272 s->fast_encode = f;
1273 }
1274 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001275
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001276 Py_INCREF(s->markers);
1277 Py_INCREF(s->defaultfn);
1278 Py_INCREF(s->encoder);
1279 Py_INCREF(s->indent);
1280 Py_INCREF(s->key_separator);
1281 Py_INCREF(s->item_separator);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001282 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001283}
1284
1285static PyObject *
Dong-hee Na33f15a12020-03-27 19:59:59 +09001286encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001287{
1288 /* Python callable interface to encode_listencode_obj */
1289 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1290 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001291 Py_ssize_t indent_level;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001292 _PyAccu acc;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001293 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1294 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001295 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001296 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001297 return NULL;
Dong-hee Na33f15a12020-03-27 19:59:59 +09001298 if (encoder_listencode_obj(self, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001299 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001300 return NULL;
1301 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001302 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001303}
1304
1305static PyObject *
1306_encoded_const(PyObject *obj)
1307{
1308 /* Return the JSON string representation of None, True, False */
1309 if (obj == Py_None) {
1310 static PyObject *s_null = NULL;
1311 if (s_null == NULL) {
1312 s_null = PyUnicode_InternFromString("null");
1313 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001314 Py_XINCREF(s_null);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001315 return s_null;
1316 }
1317 else if (obj == Py_True) {
1318 static PyObject *s_true = NULL;
1319 if (s_true == NULL) {
1320 s_true = PyUnicode_InternFromString("true");
1321 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001322 Py_XINCREF(s_true);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001323 return s_true;
1324 }
1325 else if (obj == Py_False) {
1326 static PyObject *s_false = NULL;
1327 if (s_false == NULL) {
1328 s_false = PyUnicode_InternFromString("false");
1329 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001330 Py_XINCREF(s_false);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001331 return s_false;
1332 }
1333 else {
1334 PyErr_SetString(PyExc_ValueError, "not a const");
1335 return NULL;
1336 }
1337}
1338
1339static PyObject *
1340encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1341{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001342 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001343 double i = PyFloat_AS_DOUBLE(obj);
1344 if (!Py_IS_FINITE(i)) {
1345 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001346 PyErr_SetString(
1347 PyExc_ValueError,
1348 "Out of range float values are not JSON compliant"
1349 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001350 return NULL;
1351 }
1352 if (i > 0) {
1353 return PyUnicode_FromString("Infinity");
1354 }
1355 else if (i < 0) {
1356 return PyUnicode_FromString("-Infinity");
1357 }
1358 else {
1359 return PyUnicode_FromString("NaN");
1360 }
1361 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001362 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001363}
1364
1365static PyObject *
1366encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1367{
1368 /* Return the JSON representation of a string */
Oren Milman2b382dd2017-09-24 12:07:12 +03001369 PyObject *encoded;
1370
1371 if (s->fast_encode) {
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001372 return s->fast_encode(NULL, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001373 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001374 encoded = PyObject_CallOneArg(s->encoder, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001375 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1376 PyErr_Format(PyExc_TypeError,
1377 "encoder() must return a string, not %.80s",
1378 Py_TYPE(encoded)->tp_name);
1379 Py_DECREF(encoded);
1380 return NULL;
1381 }
1382 return encoded;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001383}
1384
1385static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001386_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001387{
1388 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001389 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001390 Py_DECREF(stolen);
1391 return rval;
1392}
1393
1394static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001395encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001396 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001397{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001398 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001399 PyObject *newobj;
1400 int rv;
1401
1402 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1403 PyObject *cstr = _encoded_const(obj);
1404 if (cstr == NULL)
1405 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001406 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001407 }
1408 else if (PyUnicode_Check(obj))
1409 {
1410 PyObject *encoded = encoder_encode_string(s, obj);
1411 if (encoded == NULL)
1412 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001413 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001414 }
1415 else if (PyLong_Check(obj)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001416 PyObject *encoded = PyLong_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001417 if (encoded == NULL)
1418 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001419 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001420 }
1421 else if (PyFloat_Check(obj)) {
1422 PyObject *encoded = encoder_encode_float(s, obj);
1423 if (encoded == NULL)
1424 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001425 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001426 }
1427 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001428 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1429 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001430 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001431 Py_LeaveRecursiveCall();
1432 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001433 }
1434 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001435 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1436 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001437 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001438 Py_LeaveRecursiveCall();
1439 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001440 }
1441 else {
1442 PyObject *ident = NULL;
1443 if (s->markers != Py_None) {
1444 int has_key;
1445 ident = PyLong_FromVoidPtr(obj);
1446 if (ident == NULL)
1447 return -1;
1448 has_key = PyDict_Contains(s->markers, ident);
1449 if (has_key) {
1450 if (has_key != -1)
1451 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1452 Py_DECREF(ident);
1453 return -1;
1454 }
1455 if (PyDict_SetItem(s->markers, ident, obj)) {
1456 Py_DECREF(ident);
1457 return -1;
1458 }
1459 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001460 newobj = PyObject_CallOneArg(s->defaultfn, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001461 if (newobj == NULL) {
1462 Py_XDECREF(ident);
1463 return -1;
1464 }
Ezio Melotti13672652011-05-11 01:02:56 +03001465
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001466 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1467 Py_DECREF(newobj);
1468 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001469 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001470 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001471 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001472 Py_LeaveRecursiveCall();
1473
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001474 Py_DECREF(newobj);
1475 if (rv) {
1476 Py_XDECREF(ident);
1477 return -1;
1478 }
1479 if (ident != NULL) {
1480 if (PyDict_DelItem(s->markers, ident)) {
1481 Py_XDECREF(ident);
1482 return -1;
1483 }
1484 Py_XDECREF(ident);
1485 }
1486 return rv;
1487 }
1488}
1489
1490static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001491encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001492 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001493{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001494 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001495 static PyObject *open_dict = NULL;
1496 static PyObject *close_dict = NULL;
1497 static PyObject *empty_dict = NULL;
1498 PyObject *kstr = NULL;
1499 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001500 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001501 PyObject *items;
1502 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001503 Py_ssize_t idx;
1504
1505 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1506 open_dict = PyUnicode_InternFromString("{");
1507 close_dict = PyUnicode_InternFromString("}");
1508 empty_dict = PyUnicode_InternFromString("{}");
1509 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1510 return -1;
1511 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001512 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001513 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001514
1515 if (s->markers != Py_None) {
1516 int has_key;
1517 ident = PyLong_FromVoidPtr(dct);
1518 if (ident == NULL)
1519 goto bail;
1520 has_key = PyDict_Contains(s->markers, ident);
1521 if (has_key) {
1522 if (has_key != -1)
1523 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1524 goto bail;
1525 }
1526 if (PyDict_SetItem(s->markers, ident, dct)) {
1527 goto bail;
1528 }
1529 }
1530
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001531 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001532 goto bail;
1533
1534 if (s->indent != Py_None) {
1535 /* TODO: DOES NOT RUN */
1536 indent_level += 1;
1537 /*
1538 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1539 separator = _item_separator + newline_indent
1540 buf += newline_indent
1541 */
1542 }
1543
Benjamin Peterson501182a2015-05-02 22:28:04 -04001544 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001545 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001546 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001547 if (s->sort_keys && PyList_Sort(items) < 0) {
1548 Py_DECREF(items);
Benjamin Peterson501182a2015-05-02 22:28:04 -04001549 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001550 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001551 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001552 Py_DECREF(items);
1553 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001554 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001555 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001556 while ((item = PyIter_Next(it)) != NULL) {
1557 PyObject *encoded, *key, *value;
Serhiy Storchakafff9a312017-03-21 08:53:25 +02001558 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001559 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1560 goto bail;
1561 }
1562 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001563 if (PyUnicode_Check(key)) {
1564 Py_INCREF(key);
1565 kstr = key;
1566 }
1567 else if (PyFloat_Check(key)) {
1568 kstr = encoder_encode_float(s, key);
1569 if (kstr == NULL)
1570 goto bail;
1571 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001572 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001573 /* This must come before the PyLong_Check because
1574 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001575 kstr = _encoded_const(key);
1576 if (kstr == NULL)
1577 goto bail;
1578 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001579 else if (PyLong_Check(key)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001580 kstr = PyLong_Type.tp_repr(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001581 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001582 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001583 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001584 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001585 else if (s->skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001586 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001587 continue;
1588 }
1589 else {
Serhiy Storchakacfa797c2017-11-25 17:38:20 +02001590 PyErr_Format(PyExc_TypeError,
1591 "keys must be str, int, float, bool or None, "
Victor Stinnerdaa97562020-02-07 03:37:06 +01001592 "not %.100s", Py_TYPE(key)->tp_name);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001593 goto bail;
1594 }
1595
1596 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001597 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001598 goto bail;
1599 }
1600
1601 encoded = encoder_encode_string(s, kstr);
1602 Py_CLEAR(kstr);
1603 if (encoded == NULL)
1604 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001605 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001606 Py_DECREF(encoded);
1607 goto bail;
1608 }
1609 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001610 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001611 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001612
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001613 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001614 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001615 goto bail;
1616 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001617 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001618 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001619 if (PyErr_Occurred())
1620 goto bail;
1621 Py_CLEAR(it);
1622
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001623 if (ident != NULL) {
1624 if (PyDict_DelItem(s->markers, ident))
1625 goto bail;
1626 Py_CLEAR(ident);
1627 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001628 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001629 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001630 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001631
1632 yield '\n' + (' ' * (_indent * _current_indent_level))
1633 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001634 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001635 goto bail;
1636 return 0;
1637
1638bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001639 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001640 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001641 Py_XDECREF(kstr);
1642 Py_XDECREF(ident);
1643 return -1;
1644}
1645
1646
1647static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001648encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001649 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001650{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001651 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001652 static PyObject *open_array = NULL;
1653 static PyObject *close_array = NULL;
1654 static PyObject *empty_array = NULL;
1655 PyObject *ident = NULL;
1656 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001657 Py_ssize_t i;
1658
1659 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1660 open_array = PyUnicode_InternFromString("[");
1661 close_array = PyUnicode_InternFromString("]");
1662 empty_array = PyUnicode_InternFromString("[]");
1663 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1664 return -1;
1665 }
1666 ident = NULL;
1667 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1668 if (s_fast == NULL)
1669 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001670 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001671 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001672 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001673 }
1674
1675 if (s->markers != Py_None) {
1676 int has_key;
1677 ident = PyLong_FromVoidPtr(seq);
1678 if (ident == NULL)
1679 goto bail;
1680 has_key = PyDict_Contains(s->markers, ident);
1681 if (has_key) {
1682 if (has_key != -1)
1683 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1684 goto bail;
1685 }
1686 if (PyDict_SetItem(s->markers, ident, seq)) {
1687 goto bail;
1688 }
1689 }
1690
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001691 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001692 goto bail;
1693 if (s->indent != Py_None) {
1694 /* TODO: DOES NOT RUN */
1695 indent_level += 1;
1696 /*
1697 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1698 separator = _item_separator + newline_indent
1699 buf += newline_indent
1700 */
1701 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001702 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1703 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001704 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001705 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001706 goto bail;
1707 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001708 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001709 goto bail;
1710 }
1711 if (ident != NULL) {
1712 if (PyDict_DelItem(s->markers, ident))
1713 goto bail;
1714 Py_CLEAR(ident);
1715 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001716
1717 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001718 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001719 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001720
1721 yield '\n' + (' ' * (_indent * _current_indent_level))
1722 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001723 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001724 goto bail;
1725 Py_DECREF(s_fast);
1726 return 0;
1727
1728bail:
1729 Py_XDECREF(ident);
1730 Py_DECREF(s_fast);
1731 return -1;
1732}
1733
1734static void
1735encoder_dealloc(PyObject *self)
1736{
Dong-hee Na33f15a12020-03-27 19:59:59 +09001737 PyTypeObject *tp = Py_TYPE(self);
INADA Naokia6296d32017-08-24 14:55:17 +09001738 /* bpo-31095: UnTrack is needed before calling any callbacks */
1739 PyObject_GC_UnTrack(self);
Dong-hee Na33f15a12020-03-27 19:59:59 +09001740 encoder_clear((PyEncoderObject *)self);
1741 tp->tp_free(self);
1742 Py_DECREF(tp);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001743}
1744
1745static int
Dong-hee Na33f15a12020-03-27 19:59:59 +09001746encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001747{
Dong-hee Na33f15a12020-03-27 19:59:59 +09001748 Py_VISIT(self->markers);
1749 Py_VISIT(self->defaultfn);
1750 Py_VISIT(self->encoder);
1751 Py_VISIT(self->indent);
1752 Py_VISIT(self->key_separator);
1753 Py_VISIT(self->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001754 return 0;
1755}
1756
1757static int
Dong-hee Na33f15a12020-03-27 19:59:59 +09001758encoder_clear(PyEncoderObject *self)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001759{
1760 /* Deallocate Encoder */
Dong-hee Na33f15a12020-03-27 19:59:59 +09001761 Py_CLEAR(self->markers);
1762 Py_CLEAR(self->defaultfn);
1763 Py_CLEAR(self->encoder);
1764 Py_CLEAR(self->indent);
1765 Py_CLEAR(self->key_separator);
1766 Py_CLEAR(self->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001767 return 0;
1768}
1769
1770PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1771
Dong-hee Na33f15a12020-03-27 19:59:59 +09001772static PyType_Slot PyEncoderType_slots[] = {
1773 {Py_tp_doc, (void *)encoder_doc},
1774 {Py_tp_dealloc, encoder_dealloc},
1775 {Py_tp_call, encoder_call},
1776 {Py_tp_traverse, encoder_traverse},
1777 {Py_tp_clear, encoder_clear},
1778 {Py_tp_members, encoder_members},
1779 {Py_tp_new, encoder_new},
1780 {0, 0}
1781};
1782
1783static PyType_Spec PyEncoderType_spec = {
1784 .name = "_json.Encoder",
1785 .basicsize = sizeof(PyEncoderObject),
1786 .itemsize = 0,
1787 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1788 .slots = PyEncoderType_slots
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001789};
1790
1791static PyMethodDef speedups_methods[] = {
1792 {"encode_basestring_ascii",
1793 (PyCFunction)py_encode_basestring_ascii,
1794 METH_O,
1795 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001796 {"encode_basestring",
1797 (PyCFunction)py_encode_basestring,
1798 METH_O,
1799 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001800 {"scanstring",
1801 (PyCFunction)py_scanstring,
1802 METH_VARARGS,
1803 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001804 {NULL, NULL, 0, NULL}
1805};
1806
1807PyDoc_STRVAR(module_doc,
1808"json speedups\n");
1809
Hai Shied154c32020-01-16 00:32:51 +08001810static int
1811_json_exec(PyObject *module)
1812{
Dong-hee Na33f15a12020-03-27 19:59:59 +09001813 _jsonmodulestate *state = get_json_state(module);
1814
1815 state->PyScannerType = PyType_FromSpec(&PyScannerType_spec);
1816 if (state->PyScannerType == NULL) {
Hai Shied154c32020-01-16 00:32:51 +08001817 return -1;
1818 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001819 Py_INCREF(state->PyScannerType);
1820 if (PyModule_AddObject(module, "make_scanner", state->PyScannerType) < 0) {
1821 Py_DECREF((PyObject*)state->PyScannerType);
Hai Shied154c32020-01-16 00:32:51 +08001822 return -1;
1823 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001824
1825 state->PyEncoderType = PyType_FromSpec(&PyEncoderType_spec);
1826 if (state->PyEncoderType == NULL) {
Hai Shied154c32020-01-16 00:32:51 +08001827 return -1;
1828 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001829 Py_INCREF(state->PyEncoderType);
1830 if (PyModule_AddObject(module, "make_encoder", state->PyEncoderType) < 0) {
1831 Py_DECREF((PyObject*)state->PyEncoderType);
Hai Shied154c32020-01-16 00:32:51 +08001832 return -1;
1833 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001834
Hai Shied154c32020-01-16 00:32:51 +08001835 return 0;
1836}
1837
Dong-hee Na33f15a12020-03-27 19:59:59 +09001838static int
1839_jsonmodule_traverse(PyObject *module, visitproc visit, void *arg)
1840{
1841 _jsonmodulestate *state = get_json_state(module);
1842 Py_VISIT(state->PyScannerType);
1843 Py_VISIT(state->PyEncoderType);
1844 return 0;
1845}
1846
1847static int
1848_jsonmodule_clear(PyObject *module)
1849{
1850 _jsonmodulestate *state = get_json_state(module);
1851 Py_CLEAR(state->PyScannerType);
1852 Py_CLEAR(state->PyEncoderType);
1853 return 0;
1854}
1855
1856static void
1857_jsonmodule_free(void *module)
1858{
1859 _jsonmodule_clear((PyObject *)module);
1860}
1861
Hai Shied154c32020-01-16 00:32:51 +08001862static PyModuleDef_Slot _json_slots[] = {
1863 {Py_mod_exec, _json_exec},
1864 {0, NULL}
1865};
1866
Martin v. Löwis1a214512008-06-11 05:26:20 +00001867static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001868 PyModuleDef_HEAD_INIT,
1869 "_json",
1870 module_doc,
Dong-hee Na33f15a12020-03-27 19:59:59 +09001871 sizeof(_jsonmodulestate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001872 speedups_methods,
Hai Shied154c32020-01-16 00:32:51 +08001873 _json_slots,
Dong-hee Na33f15a12020-03-27 19:59:59 +09001874 _jsonmodule_traverse,
1875 _jsonmodule_clear,
1876 _jsonmodule_free,
Martin v. Löwis1a214512008-06-11 05:26:20 +00001877};
1878
Victor Stinnerf024d262015-03-17 17:48:27 +01001879PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001880PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001881{
Hai Shied154c32020-01-16 00:32:51 +08001882 return PyModuleDef_Init(&jsonmodule);
Christian Heimes90540002008-05-08 14:29:10 +00001883}