blob: 4682cf84621ee9b5f92119bd5ff756c96a7047d6 [file] [log] [blame]
Victor Stinner5c75f372019-04-17 23:02:26 +02001/* JSON accelerator C extensor: _json module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006
Victor Stinner5c75f372019-04-17 23:02:26 +02007#if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8# error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06009#endif
10
Christian Heimes90540002008-05-08 14:29:10 +000011#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000012#include "structmember.h"
Victor Stinnere281f7d2018-11-01 02:30:36 +010013#include "pycore_accu.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +010014
Dong-hee Na33f15a12020-03-27 19:59:59 +090015typedef struct {
16 PyObject *PyScannerType;
17 PyObject *PyEncoderType;
18} _jsonmodulestate;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000019
Dong-hee Na33f15a12020-03-27 19:59:59 +090020static inline _jsonmodulestate*
21get_json_state(PyObject *module)
22{
23 void *state = PyModule_GetState(module);
24 assert(state != NULL);
25 return (_jsonmodulestate *)state;
26}
27
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028
29typedef struct _PyScannerObject {
30 PyObject_HEAD
Segev Finer541bd282017-07-13 08:52:08 +030031 signed char strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000032 PyObject *object_hook;
33 PyObject *object_pairs_hook;
34 PyObject *parse_float;
35 PyObject *parse_int;
36 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000037 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000038} PyScannerObject;
39
40static PyMemberDef scanner_members[] = {
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030041 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000042 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
43 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
44 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
45 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
46 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
47 {NULL}
48};
49
50typedef struct _PyEncoderObject {
51 PyObject_HEAD
52 PyObject *markers;
53 PyObject *defaultfn;
54 PyObject *encoder;
55 PyObject *indent;
56 PyObject *key_separator;
57 PyObject *item_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030058 char sort_keys;
59 char skipkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000060 int allow_nan;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030061 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000062} PyEncoderObject;
63
64static PyMemberDef encoder_members[] = {
65 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
66 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
67 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
68 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
69 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
70 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030071 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
72 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000073 {NULL}
74};
75
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020076/* Forward decls */
77
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000078static PyObject *
79ascii_escape_unicode(PyObject *pystr);
80static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +010081py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000082void init_json(void);
83static PyObject *
84scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
85static PyObject *
86_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
87static PyObject *
88scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000089static void
90scanner_dealloc(PyObject *self);
91static int
Dong-hee Na33f15a12020-03-27 19:59:59 +090092scanner_clear(PyScannerObject *self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000093static PyObject *
94encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000095static void
96encoder_dealloc(PyObject *self);
97static int
Dong-hee Na33f15a12020-03-27 19:59:59 +090098encoder_clear(PyEncoderObject *self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000099static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200100encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000101static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200102encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000103static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200104encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000105static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000106_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000107static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200108raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000109static PyObject *
110encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111static PyObject *
112encoder_encode_float(PyEncoderObject *s, PyObject *obj);
113
Christian Heimes90540002008-05-08 14:29:10 +0000114#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000115#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000116
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000117static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200118ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000119{
120 /* Escape unicode code point c to ASCII escape sequences
121 in char *output. output must have at least 12 bytes unused to
122 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000123 output[chars++] = '\\';
124 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000125 case '\\': output[chars++] = c; break;
126 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000127 case '\b': output[chars++] = 'b'; break;
128 case '\f': output[chars++] = 'f'; break;
129 case '\n': output[chars++] = 'n'; break;
130 case '\r': output[chars++] = 'r'; break;
131 case '\t': output[chars++] = 't'; break;
132 default:
Christian Heimes90540002008-05-08 14:29:10 +0000133 if (c >= 0x10000) {
134 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100135 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000136 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100137 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
138 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
139 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
140 output[chars++] = Py_hexdigits[(v ) & 0xf];
141 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000142 output[chars++] = '\\';
143 }
Christian Heimes90540002008-05-08 14:29:10 +0000144 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200145 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
146 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
147 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
148 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000149 }
150 return chars;
151}
152
153static PyObject *
154ascii_escape_unicode(PyObject *pystr)
155{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000156 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000157 Py_ssize_t i;
158 Py_ssize_t input_chars;
159 Py_ssize_t output_size;
160 Py_ssize_t chars;
161 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200162 void *input;
163 unsigned char *output;
164 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000165
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200166 if (PyUnicode_READY(pystr) == -1)
167 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000168
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200169 input_chars = PyUnicode_GET_LENGTH(pystr);
170 input = PyUnicode_DATA(pystr);
171 kind = PyUnicode_KIND(pystr);
172
173 /* Compute the output size */
174 for (i = 0, output_size = 2; i < input_chars; i++) {
175 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500176 Py_ssize_t d;
177 if (S_CHAR(c)) {
178 d = 1;
179 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200180 else {
181 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200182 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200183 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500184 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200185 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500186 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200187 }
188 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500189 if (output_size > PY_SSIZE_T_MAX - d) {
190 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
191 return NULL;
192 }
193 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200194 }
195
196 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000197 if (rval == NULL) {
198 return NULL;
199 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200200 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000201 chars = 0;
202 output[chars++] = '"';
203 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200204 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000205 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000206 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000207 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000208 else {
209 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000210 }
Christian Heimes90540002008-05-08 14:29:10 +0000211 }
212 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100213#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200214 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100215#endif
Christian Heimes90540002008-05-08 14:29:10 +0000216 return rval;
217}
218
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100219static PyObject *
220escape_unicode(PyObject *pystr)
221{
222 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
223 Py_ssize_t i;
224 Py_ssize_t input_chars;
225 Py_ssize_t output_size;
226 Py_ssize_t chars;
227 PyObject *rval;
228 void *input;
229 int kind;
230 Py_UCS4 maxchar;
231
232 if (PyUnicode_READY(pystr) == -1)
233 return NULL;
234
235 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
236 input_chars = PyUnicode_GET_LENGTH(pystr);
237 input = PyUnicode_DATA(pystr);
238 kind = PyUnicode_KIND(pystr);
239
240 /* Compute the output size */
241 for (i = 0, output_size = 2; i < input_chars; i++) {
242 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500243 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100244 switch (c) {
245 case '\\': case '"': case '\b': case '\f':
246 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500247 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100248 break;
249 default:
250 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500251 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100252 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500253 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100254 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500255 if (output_size > PY_SSIZE_T_MAX - d) {
256 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
257 return NULL;
258 }
259 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100260 }
261
262 rval = PyUnicode_New(output_size, maxchar);
263 if (rval == NULL)
264 return NULL;
265
266 kind = PyUnicode_KIND(rval);
267
268#define ENCODE_OUTPUT do { \
269 chars = 0; \
270 output[chars++] = '"'; \
271 for (i = 0; i < input_chars; i++) { \
272 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
273 switch (c) { \
274 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
275 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
276 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
277 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
278 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
279 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
280 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
281 default: \
282 if (c <= 0x1f) { \
283 output[chars++] = '\\'; \
284 output[chars++] = 'u'; \
285 output[chars++] = '0'; \
286 output[chars++] = '0'; \
287 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
288 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
289 } else { \
290 output[chars++] = c; \
291 } \
292 } \
293 } \
294 output[chars++] = '"'; \
295 } while (0)
296
297 if (kind == PyUnicode_1BYTE_KIND) {
298 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
299 ENCODE_OUTPUT;
300 } else if (kind == PyUnicode_2BYTE_KIND) {
301 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
302 ENCODE_OUTPUT;
303 } else {
304 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
305 assert(kind == PyUnicode_4BYTE_KIND);
306 ENCODE_OUTPUT;
307 }
308#undef ENCODE_OUTPUT
309
310#ifdef Py_DEBUG
311 assert(_PyUnicode_CheckConsistency(rval, 1));
312#endif
313 return rval;
314}
315
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000316static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200317raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000318{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200319 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
320 static PyObject *JSONDecodeError = NULL;
321 PyObject *exc;
322 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000323 PyObject *decoder = PyImport_ImportModule("json.decoder");
324 if (decoder == NULL)
325 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200326 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000327 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200328 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000329 return;
Christian Heimes90540002008-05-08 14:29:10 +0000330 }
Victor Stinner4c381542016-12-09 00:33:39 +0100331 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200332 if (exc) {
333 PyErr_SetObject(JSONDecodeError, exc);
334 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000335 }
Christian Heimes90540002008-05-08 14:29:10 +0000336}
337
Ezio Melotti37623ab2013-01-03 08:44:15 +0200338static void
339raise_stop_iteration(Py_ssize_t idx)
340{
341 PyObject *value = PyLong_FromSsize_t(idx);
342 if (value != NULL) {
343 PyErr_SetObject(PyExc_StopIteration, value);
344 Py_DECREF(value);
345 }
346}
347
Christian Heimes90540002008-05-08 14:29:10 +0000348static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000349_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
350 /* return (rval, idx) tuple, stealing reference to rval */
351 PyObject *tpl;
352 PyObject *pyidx;
353 /*
354 steal a reference to rval, returns (rval, idx)
355 */
356 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000357 return NULL;
358 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000359 pyidx = PyLong_FromSsize_t(idx);
360 if (pyidx == NULL) {
361 Py_DECREF(rval);
362 return NULL;
363 }
364 tpl = PyTuple_New(2);
365 if (tpl == NULL) {
366 Py_DECREF(pyidx);
367 Py_DECREF(rval);
368 return NULL;
369 }
370 PyTuple_SET_ITEM(tpl, 0, rval);
371 PyTuple_SET_ITEM(tpl, 1, pyidx);
372 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000373}
374
375static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000376scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000377{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000378 /* Read the JSON string from PyUnicode pystr.
379 end is the index of the first character after the quote.
380 if strict is zero then literal control characters are allowed
381 *next_end_ptr is a return-by-reference index of the character
382 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000383
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000384 Return value is a new PyUnicode
385 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000386 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200387 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000388 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000389 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200390 const void *buf;
391 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000392
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200393 if (PyUnicode_READY(pystr) == -1)
394 return 0;
395
Inada Naoki9c110292019-10-17 16:12:41 +0900396 _PyUnicodeWriter writer;
397 _PyUnicodeWriter_Init(&writer);
398 writer.overallocate = 1;
399
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200400 len = PyUnicode_GET_LENGTH(pystr);
401 buf = PyUnicode_DATA(pystr);
402 kind = PyUnicode_KIND(pystr);
403
Ezio Melotti37623ab2013-01-03 08:44:15 +0200404 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000405 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
406 goto bail;
407 }
Christian Heimes90540002008-05-08 14:29:10 +0000408 while (1) {
409 /* Find the end of the string or the next escape */
Inada Naoki2a570af2019-08-08 17:57:10 +0900410 Py_UCS4 c;
411 {
412 // Use tight scope variable to help register allocation.
413 Py_UCS4 d = 0;
414 for (next = end; next < len; next++) {
415 d = PyUnicode_READ(kind, buf, next);
416 if (d == '"' || d == '\\') {
417 break;
418 }
419 if (d <= 0x1f && strict) {
420 raise_errmsg("Invalid control character at", pystr, next);
421 goto bail;
422 }
Christian Heimes90540002008-05-08 14:29:10 +0000423 }
Inada Naoki2a570af2019-08-08 17:57:10 +0900424 c = d;
Christian Heimes90540002008-05-08 14:29:10 +0000425 }
Inada Naoki9c110292019-10-17 16:12:41 +0900426
427 if (c == '"') {
428 // Fast path for simple case.
429 if (writer.buffer == NULL) {
430 PyObject *ret = PyUnicode_Substring(pystr, end, next);
431 if (ret == NULL) {
432 goto bail;
433 }
434 *next_end_ptr = next + 1;;
435 return ret;
436 }
437 }
438 else if (c != '\\') {
Christian Heimes90540002008-05-08 14:29:10 +0000439 raise_errmsg("Unterminated string starting at", pystr, begin);
440 goto bail;
441 }
Inada Naoki9c110292019-10-17 16:12:41 +0900442
Christian Heimes90540002008-05-08 14:29:10 +0000443 /* Pick up this chunk if it's not zero length */
444 if (next != end) {
Inada Naoki9c110292019-10-17 16:12:41 +0900445 if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
Christian Heimes90540002008-05-08 14:29:10 +0000446 goto bail;
447 }
Christian Heimes90540002008-05-08 14:29:10 +0000448 }
449 next++;
450 if (c == '"') {
451 end = next;
452 break;
453 }
454 if (next == len) {
455 raise_errmsg("Unterminated string starting at", pystr, begin);
456 goto bail;
457 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200458 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000459 if (c != 'u') {
460 /* Non-unicode backslash escapes */
461 end = next + 1;
462 switch (c) {
463 case '"': break;
464 case '\\': break;
465 case '/': break;
466 case 'b': c = '\b'; break;
467 case 'f': c = '\f'; break;
468 case 'n': c = '\n'; break;
469 case 'r': c = '\r'; break;
470 case 't': c = '\t'; break;
471 default: c = 0;
472 }
473 if (c == 0) {
474 raise_errmsg("Invalid \\escape", pystr, end - 2);
475 goto bail;
476 }
477 }
478 else {
479 c = 0;
480 next++;
481 end = next + 4;
482 if (end >= len) {
483 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
484 goto bail;
485 }
486 /* Decode 4 hex digits */
487 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200488 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000489 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000490 switch (digit) {
491 case '0': case '1': case '2': case '3': case '4':
492 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000493 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000494 case 'a': case 'b': case 'c': case 'd': case 'e':
495 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000496 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000497 case 'A': case 'B': case 'C': case 'D': case 'E':
498 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000499 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000500 default:
501 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
502 goto bail;
503 }
504 }
Christian Heimes90540002008-05-08 14:29:10 +0000505 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200506 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
507 PyUnicode_READ(kind, buf, next++) == '\\' &&
508 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200509 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000510 end += 6;
511 /* Decode 4 hex digits */
512 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200513 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000514 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000515 switch (digit) {
516 case '0': case '1': case '2': case '3': case '4':
517 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000518 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000519 case 'a': case 'b': case 'c': case 'd': case 'e':
520 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000521 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000522 case 'A': case 'B': case 'C': case 'D': case 'E':
523 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000524 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000525 default:
526 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
527 goto bail;
528 }
529 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200530 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
531 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
532 else
533 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000534 }
Christian Heimes90540002008-05-08 14:29:10 +0000535 }
Inada Naoki9c110292019-10-17 16:12:41 +0900536 if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
Christian Heimes90540002008-05-08 14:29:10 +0000537 goto bail;
538 }
Christian Heimes90540002008-05-08 14:29:10 +0000539 }
540
Inada Naoki9c110292019-10-17 16:12:41 +0900541 rval = _PyUnicodeWriter_Finish(&writer);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000542 *next_end_ptr = end;
543 return rval;
Inada Naoki9c110292019-10-17 16:12:41 +0900544
Christian Heimes90540002008-05-08 14:29:10 +0000545bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000546 *next_end_ptr = -1;
Inada Naoki9c110292019-10-17 16:12:41 +0900547 _PyUnicodeWriter_Dealloc(&writer);
Christian Heimes90540002008-05-08 14:29:10 +0000548 return NULL;
549}
550
551PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000552 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000553 "\n"
554 "Scan the string s for a JSON string. End is the index of the\n"
555 "character in s after the quote that started the JSON string.\n"
556 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
557 "on attempt to decode an invalid string. If strict is False then literal\n"
558 "control characters are allowed in the string.\n"
559 "\n"
560 "Returns a tuple of the decoded string and the index of the character in s\n"
561 "after the end quote."
562);
Christian Heimes90540002008-05-08 14:29:10 +0000563
564static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100565py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000566{
567 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000568 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000569 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000570 Py_ssize_t next_end = -1;
571 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100572 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000573 return NULL;
574 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000575 if (PyUnicode_Check(pystr)) {
576 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000577 }
578 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000579 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000580 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000581 Py_TYPE(pystr)->tp_name);
582 return NULL;
583 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000584 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000585}
586
587PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000588 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000589 "\n"
590 "Return an ASCII-only JSON representation of a Python string"
591);
Christian Heimes90540002008-05-08 14:29:10 +0000592
593static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100594py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000595{
596 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000597 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000598 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000599 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000600 rval = ascii_escape_unicode(pystr);
601 }
602 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000603 PyErr_Format(PyExc_TypeError,
604 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000605 Py_TYPE(pystr)->tp_name);
606 return NULL;
607 }
Christian Heimes90540002008-05-08 14:29:10 +0000608 return rval;
609}
610
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100611
612PyDoc_STRVAR(pydoc_encode_basestring,
613 "encode_basestring(string) -> string\n"
614 "\n"
615 "Return a JSON representation of a Python string"
616);
617
618static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100619py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100620{
621 PyObject *rval;
622 /* Return a JSON representation of a Python string */
623 /* METH_O */
624 if (PyUnicode_Check(pystr)) {
625 rval = escape_unicode(pystr);
626 }
627 else {
628 PyErr_Format(PyExc_TypeError,
629 "first argument must be a string, not %.80s",
630 Py_TYPE(pystr)->tp_name);
631 return NULL;
632 }
633 return rval;
634}
635
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000636static void
637scanner_dealloc(PyObject *self)
638{
Dong-hee Na33f15a12020-03-27 19:59:59 +0900639 PyTypeObject *tp = Py_TYPE(self);
INADA Naokia6296d32017-08-24 14:55:17 +0900640 /* bpo-31095: UnTrack is needed before calling any callbacks */
641 PyObject_GC_UnTrack(self);
Dong-hee Na33f15a12020-03-27 19:59:59 +0900642 scanner_clear((PyScannerObject *)self);
643 tp->tp_free(self);
644 Py_DECREF(tp);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000645}
646
647static int
Dong-hee Na33f15a12020-03-27 19:59:59 +0900648scanner_traverse(PyScannerObject *self, visitproc visit, void *arg)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000649{
Dong-hee Na33f15a12020-03-27 19:59:59 +0900650 Py_VISIT(self->object_hook);
651 Py_VISIT(self->object_pairs_hook);
652 Py_VISIT(self->parse_float);
653 Py_VISIT(self->parse_int);
654 Py_VISIT(self->parse_constant);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000655 return 0;
656}
657
658static int
Dong-hee Na33f15a12020-03-27 19:59:59 +0900659scanner_clear(PyScannerObject *self)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000660{
Dong-hee Na33f15a12020-03-27 19:59:59 +0900661 Py_CLEAR(self->object_hook);
662 Py_CLEAR(self->object_pairs_hook);
663 Py_CLEAR(self->parse_float);
664 Py_CLEAR(self->parse_int);
665 Py_CLEAR(self->parse_constant);
666 Py_CLEAR(self->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000667 return 0;
668}
669
670static PyObject *
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300671_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
672{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000673 /* Read a JSON object from PyUnicode pystr.
674 idx is the index of the first character after the opening curly brace.
675 *next_idx_ptr is a return-by-reference index to the first character after
676 the closing curly brace.
677
678 Returns a new PyObject (usually a dict, but object_hook can change that)
679 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200680 void *str;
681 int kind;
682 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000683 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000684 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000685 PyObject *key = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000686 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000687 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000688
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200689 if (PyUnicode_READY(pystr) == -1)
690 return NULL;
691
692 str = PyUnicode_DATA(pystr);
693 kind = PyUnicode_KIND(pystr);
694 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
695
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000696 if (has_pairs_hook)
697 rval = PyList_New(0);
698 else
699 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000700 if (rval == NULL)
701 return NULL;
702
703 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200704 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000705
706 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200707 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
708 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000709 PyObject *memokey;
710
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000711 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200712 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200713 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000714 goto bail;
715 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300716 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000717 if (key == NULL)
718 goto bail;
Inada Naoki2a570af2019-08-08 17:57:10 +0900719 memokey = PyDict_SetDefault(s->memo, key, key);
720 if (memokey == NULL) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200721 goto bail;
722 }
Inada Naoki2a570af2019-08-08 17:57:10 +0900723 Py_INCREF(memokey);
724 Py_DECREF(key);
725 key = memokey;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000726 idx = next_idx;
727
728 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200729 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
730 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200731 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000732 goto bail;
733 }
734 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200735 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000736
737 /* read any JSON term */
738 val = scan_once_unicode(s, pystr, idx, &next_idx);
739 if (val == NULL)
740 goto bail;
741
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000742 if (has_pairs_hook) {
743 PyObject *item = PyTuple_Pack(2, key, val);
744 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000745 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000746 Py_CLEAR(key);
747 Py_CLEAR(val);
748 if (PyList_Append(rval, item) == -1) {
749 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000750 goto bail;
751 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000752 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000753 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000754 else {
755 if (PyDict_SetItem(rval, key, val) < 0)
756 goto bail;
757 Py_CLEAR(key);
758 Py_CLEAR(val);
759 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000760 idx = next_idx;
761
762 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200763 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000764
765 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200766 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000767 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200768 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200769 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000770 goto bail;
771 }
772 idx++;
773
774 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200775 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000776 }
777 }
778
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000779 *next_idx_ptr = idx + 1;
780
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000781 if (has_pairs_hook) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100782 val = PyObject_CallOneArg(s->object_pairs_hook, rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000783 Py_DECREF(rval);
784 return val;
785 }
786
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000787 /* if object_hook is not None: rval = object_hook(rval) */
788 if (s->object_hook != Py_None) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100789 val = PyObject_CallOneArg(s->object_hook, rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000790 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000791 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000792 }
793 return rval;
794bail:
795 Py_XDECREF(key);
796 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000797 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000798 return NULL;
799}
800
801static PyObject *
802_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200803 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000804 idx is the index of the first character after the opening brace.
805 *next_idx_ptr is a return-by-reference index to the first character after
806 the closing brace.
807
808 Returns a new PyList
809 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200810 void *str;
811 int kind;
812 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000813 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200814 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000815 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000816
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200817 if (PyUnicode_READY(pystr) == -1)
818 return NULL;
819
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200820 rval = PyList_New(0);
821 if (rval == NULL)
822 return NULL;
823
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200824 str = PyUnicode_DATA(pystr);
825 kind = PyUnicode_KIND(pystr);
826 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
827
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000828 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200829 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000830
831 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200832 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
833 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000834
835 /* read any JSON term */
836 val = scan_once_unicode(s, pystr, idx, &next_idx);
837 if (val == NULL)
838 goto bail;
839
840 if (PyList_Append(rval, val) == -1)
841 goto bail;
842
843 Py_CLEAR(val);
844 idx = next_idx;
845
846 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200847 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000848
849 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200850 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000851 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200852 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200853 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000854 goto bail;
855 }
856 idx++;
857
858 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200859 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000860 }
861 }
862
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200863 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
864 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200865 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000866 goto bail;
867 }
868 *next_idx_ptr = idx + 1;
869 return rval;
870bail:
871 Py_XDECREF(val);
872 Py_DECREF(rval);
873 return NULL;
874}
875
876static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200877_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
878 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000879 constant is the constant string that was found
880 ("NaN", "Infinity", "-Infinity").
881 idx is the index of the first character of the constant
882 *next_idx_ptr is a return-by-reference index to the first character after
883 the constant.
884
885 Returns the result of parse_constant
886 */
887 PyObject *cstr;
888 PyObject *rval;
889 /* constant is "NaN", "Infinity", or "-Infinity" */
890 cstr = PyUnicode_InternFromString(constant);
891 if (cstr == NULL)
892 return NULL;
893
894 /* rval = parse_constant(constant) */
Petr Viktorinffd97532020-02-11 17:46:57 +0100895 rval = PyObject_CallOneArg(s->parse_constant, cstr);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200896 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000897 Py_DECREF(cstr);
898 *next_idx_ptr = idx;
899 return rval;
900}
901
902static PyObject *
903_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
904 /* Read a JSON number from PyUnicode pystr.
905 idx is the index of the first character of the number
906 *next_idx_ptr is a return-by-reference index to the first character after
907 the number.
908
909 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200910 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000911 May return other types if parse_int or parse_float are set
912 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200913 void *str;
914 int kind;
915 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000916 Py_ssize_t idx = start;
917 int is_float = 0;
918 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200919 PyObject *numstr = NULL;
920 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000921
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200922 if (PyUnicode_READY(pystr) == -1)
923 return NULL;
924
925 str = PyUnicode_DATA(pystr);
926 kind = PyUnicode_KIND(pystr);
927 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
928
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000929 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200930 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000931 idx++;
932 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200933 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000934 return NULL;
935 }
936 }
937
938 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200939 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000940 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200941 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000942 }
943 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200944 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000945 idx++;
946 }
947 /* no integer digits, error */
948 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200949 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000950 return NULL;
951 }
952
953 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200954 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000955 is_float = 1;
956 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200957 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000958 }
959
960 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200961 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000962 Py_ssize_t e_start = idx;
963 idx++;
964
965 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200966 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000967
968 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200969 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000970
971 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200972 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000973 is_float = 1;
974 }
975 else {
976 idx = e_start;
977 }
978 }
979
Antoine Pitrouf6454512011-04-25 19:16:06 +0200980 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
981 custom_func = s->parse_float;
982 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
983 custom_func = s->parse_int;
984 else
985 custom_func = NULL;
986
987 if (custom_func) {
988 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200989 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200990 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200991 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +0200992 if (numstr == NULL)
993 return NULL;
Petr Viktorinffd97532020-02-11 17:46:57 +0100994 rval = PyObject_CallOneArg(custom_func, numstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000995 }
996 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +0200997 Py_ssize_t i, n;
998 char *buf;
999 /* Straight conversion to ASCII, to avoid costly conversion of
1000 decimal unicode digits (which cannot appear here) */
1001 n = idx - start;
1002 numstr = PyBytes_FromStringAndSize(NULL, n);
1003 if (numstr == NULL)
1004 return NULL;
1005 buf = PyBytes_AS_STRING(numstr);
1006 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001007 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001008 }
1009 if (is_float)
1010 rval = PyFloat_FromString(numstr);
1011 else
1012 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001013 }
1014 Py_DECREF(numstr);
1015 *next_idx_ptr = idx;
1016 return rval;
1017}
1018
1019static PyObject *
1020scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1021{
1022 /* Read one JSON term (of any kind) from PyUnicode pystr.
1023 idx is the index of the first character of the term
1024 *next_idx_ptr is a return-by-reference index to the first character after
1025 the number.
1026
1027 Returns a new PyObject representation of the term.
1028 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001029 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001030 void *str;
1031 int kind;
1032 Py_ssize_t length;
1033
1034 if (PyUnicode_READY(pystr) == -1)
1035 return NULL;
1036
1037 str = PyUnicode_DATA(pystr);
1038 kind = PyUnicode_KIND(pystr);
1039 length = PyUnicode_GET_LENGTH(pystr);
1040
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001041 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001042 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001043 return NULL;
1044 }
1045 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001046 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001047 return NULL;
1048 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001049
1050 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001051 case '"':
1052 /* string */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001053 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001054 case '{':
1055 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001056 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1057 "from a unicode string"))
1058 return NULL;
1059 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1060 Py_LeaveRecursiveCall();
1061 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001062 case '[':
1063 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001064 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1065 "from a unicode string"))
1066 return NULL;
1067 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1068 Py_LeaveRecursiveCall();
1069 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001070 case 'n':
1071 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001072 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001073 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001074 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001075 }
1076 break;
1077 case 't':
1078 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001079 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001080 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001081 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001082 }
1083 break;
1084 case 'f':
1085 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001086 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1087 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1088 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001089 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001090 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001091 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001092 }
1093 break;
1094 case 'N':
1095 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001096 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001097 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001098 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1099 }
1100 break;
1101 case 'I':
1102 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001103 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1104 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1105 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001106 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001107 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1108 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001109 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001110 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1111 }
1112 break;
1113 case '-':
1114 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001115 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001116 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1117 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001118 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001119 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001120 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1121 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001123 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1124 }
1125 break;
1126 }
1127 /* Didn't find a string, object, array, or named constant. Look for a number. */
1128 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1129}
1130
1131static PyObject *
Dong-hee Na33f15a12020-03-27 19:59:59 +09001132scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001133{
1134 /* Python callable interface to scan_once_{str,unicode} */
1135 PyObject *pystr;
1136 PyObject *rval;
1137 Py_ssize_t idx;
1138 Py_ssize_t next_idx = -1;
1139 static char *kwlist[] = {"string", "idx", NULL};
Antoine Pitroucbb02842012-12-01 19:34:16 +01001140 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001141 return NULL;
1142
1143 if (PyUnicode_Check(pystr)) {
Dong-hee Na33f15a12020-03-27 19:59:59 +09001144 rval = scan_once_unicode(self, pystr, idx, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001145 }
1146 else {
1147 PyErr_Format(PyExc_TypeError,
1148 "first argument must be a string, not %.80s",
1149 Py_TYPE(pystr)->tp_name);
1150 return NULL;
1151 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001152 PyDict_Clear(self->memo);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001153 if (rval == NULL)
1154 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001155 return _build_rval_index_tuple(rval, next_idx);
1156}
1157
1158static PyObject *
1159scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1160{
1161 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001162 PyObject *ctx;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001163 PyObject *strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001164 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001165
1166 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001167 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001168
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001169 s = (PyScannerObject *)type->tp_alloc(type, 0);
1170 if (s == NULL) {
1171 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001172 }
1173
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001174 s->memo = PyDict_New();
1175 if (s->memo == NULL)
1176 goto bail;
1177
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001178 /* All of these will fail "gracefully" so we don't need to verify them */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001179 strict = PyObject_GetAttrString(ctx, "strict");
1180 if (strict == NULL)
1181 goto bail;
1182 s->strict = PyObject_IsTrue(strict);
1183 Py_DECREF(strict);
1184 if (s->strict < 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001185 goto bail;
1186 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1187 if (s->object_hook == NULL)
1188 goto bail;
1189 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1190 if (s->object_pairs_hook == NULL)
1191 goto bail;
1192 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1193 if (s->parse_float == NULL)
1194 goto bail;
1195 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1196 if (s->parse_int == NULL)
1197 goto bail;
1198 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1199 if (s->parse_constant == NULL)
1200 goto bail;
1201
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001202 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001203
1204bail:
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001205 Py_DECREF(s);
1206 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001207}
1208
1209PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1210
Dong-hee Na33f15a12020-03-27 19:59:59 +09001211static PyType_Slot PyScannerType_slots[] = {
1212 {Py_tp_doc, (void *)scanner_doc},
1213 {Py_tp_dealloc, scanner_dealloc},
1214 {Py_tp_call, scanner_call},
1215 {Py_tp_traverse, scanner_traverse},
1216 {Py_tp_clear, scanner_clear},
1217 {Py_tp_members, scanner_members},
1218 {Py_tp_new, scanner_new},
1219 {0, 0}
1220};
1221
1222static PyType_Spec PyScannerType_spec = {
1223 .name = "_json.Scanner",
1224 .basicsize = sizeof(PyScannerObject),
1225 .itemsize = 0,
1226 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1227 .slots = PyScannerType_slots,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001228};
1229
1230static PyObject *
1231encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1232{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001233 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1234
1235 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001236 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001237 PyObject *item_separator;
1238 int sort_keys, skipkeys, allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001239
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001240 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001241 &markers, &defaultfn, &encoder, &indent,
1242 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001243 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001244 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001245
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001246 if (markers != Py_None && !PyDict_Check(markers)) {
1247 PyErr_Format(PyExc_TypeError,
1248 "make_encoder() argument 1 must be dict or None, "
1249 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001250 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001251 }
1252
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001253 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1254 if (s == NULL)
1255 return NULL;
1256
Antoine Pitrou781eba72009-12-08 15:57:31 +00001257 s->markers = markers;
1258 s->defaultfn = defaultfn;
1259 s->encoder = encoder;
1260 s->indent = indent;
1261 s->key_separator = key_separator;
1262 s->item_separator = item_separator;
1263 s->sort_keys = sort_keys;
1264 s->skipkeys = skipkeys;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001265 s->allow_nan = allow_nan;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001266 s->fast_encode = NULL;
1267 if (PyCFunction_Check(s->encoder)) {
1268 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1269 if (f == (PyCFunction)py_encode_basestring_ascii ||
1270 f == (PyCFunction)py_encode_basestring) {
1271 s->fast_encode = f;
1272 }
1273 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001274
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001275 Py_INCREF(s->markers);
1276 Py_INCREF(s->defaultfn);
1277 Py_INCREF(s->encoder);
1278 Py_INCREF(s->indent);
1279 Py_INCREF(s->key_separator);
1280 Py_INCREF(s->item_separator);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001281 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001282}
1283
1284static PyObject *
Dong-hee Na33f15a12020-03-27 19:59:59 +09001285encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001286{
1287 /* Python callable interface to encode_listencode_obj */
1288 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1289 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001290 Py_ssize_t indent_level;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001291 _PyAccu acc;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001292 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1293 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001294 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001295 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001296 return NULL;
Dong-hee Na33f15a12020-03-27 19:59:59 +09001297 if (encoder_listencode_obj(self, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001298 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001299 return NULL;
1300 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001301 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001302}
1303
1304static PyObject *
1305_encoded_const(PyObject *obj)
1306{
1307 /* Return the JSON string representation of None, True, False */
1308 if (obj == Py_None) {
1309 static PyObject *s_null = NULL;
1310 if (s_null == NULL) {
1311 s_null = PyUnicode_InternFromString("null");
1312 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001313 Py_XINCREF(s_null);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001314 return s_null;
1315 }
1316 else if (obj == Py_True) {
1317 static PyObject *s_true = NULL;
1318 if (s_true == NULL) {
1319 s_true = PyUnicode_InternFromString("true");
1320 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001321 Py_XINCREF(s_true);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001322 return s_true;
1323 }
1324 else if (obj == Py_False) {
1325 static PyObject *s_false = NULL;
1326 if (s_false == NULL) {
1327 s_false = PyUnicode_InternFromString("false");
1328 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001329 Py_XINCREF(s_false);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001330 return s_false;
1331 }
1332 else {
1333 PyErr_SetString(PyExc_ValueError, "not a const");
1334 return NULL;
1335 }
1336}
1337
1338static PyObject *
1339encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1340{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001341 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001342 double i = PyFloat_AS_DOUBLE(obj);
1343 if (!Py_IS_FINITE(i)) {
1344 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001345 PyErr_SetString(
1346 PyExc_ValueError,
1347 "Out of range float values are not JSON compliant"
1348 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001349 return NULL;
1350 }
1351 if (i > 0) {
1352 return PyUnicode_FromString("Infinity");
1353 }
1354 else if (i < 0) {
1355 return PyUnicode_FromString("-Infinity");
1356 }
1357 else {
1358 return PyUnicode_FromString("NaN");
1359 }
1360 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001361 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001362}
1363
1364static PyObject *
1365encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1366{
1367 /* Return the JSON representation of a string */
Oren Milman2b382dd2017-09-24 12:07:12 +03001368 PyObject *encoded;
1369
1370 if (s->fast_encode) {
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001371 return s->fast_encode(NULL, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001372 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001373 encoded = PyObject_CallOneArg(s->encoder, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001374 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1375 PyErr_Format(PyExc_TypeError,
1376 "encoder() must return a string, not %.80s",
1377 Py_TYPE(encoded)->tp_name);
1378 Py_DECREF(encoded);
1379 return NULL;
1380 }
1381 return encoded;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001382}
1383
1384static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001385_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001386{
1387 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001388 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001389 Py_DECREF(stolen);
1390 return rval;
1391}
1392
1393static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001394encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001395 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001396{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001397 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001398 PyObject *newobj;
1399 int rv;
1400
1401 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1402 PyObject *cstr = _encoded_const(obj);
1403 if (cstr == NULL)
1404 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001405 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001406 }
1407 else if (PyUnicode_Check(obj))
1408 {
1409 PyObject *encoded = encoder_encode_string(s, obj);
1410 if (encoded == NULL)
1411 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001412 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001413 }
1414 else if (PyLong_Check(obj)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001415 PyObject *encoded = PyLong_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001416 if (encoded == NULL)
1417 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001418 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001419 }
1420 else if (PyFloat_Check(obj)) {
1421 PyObject *encoded = encoder_encode_float(s, obj);
1422 if (encoded == NULL)
1423 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001424 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001425 }
1426 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001427 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1428 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001429 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001430 Py_LeaveRecursiveCall();
1431 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001432 }
1433 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001434 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1435 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001436 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001437 Py_LeaveRecursiveCall();
1438 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001439 }
1440 else {
1441 PyObject *ident = NULL;
1442 if (s->markers != Py_None) {
1443 int has_key;
1444 ident = PyLong_FromVoidPtr(obj);
1445 if (ident == NULL)
1446 return -1;
1447 has_key = PyDict_Contains(s->markers, ident);
1448 if (has_key) {
1449 if (has_key != -1)
1450 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1451 Py_DECREF(ident);
1452 return -1;
1453 }
1454 if (PyDict_SetItem(s->markers, ident, obj)) {
1455 Py_DECREF(ident);
1456 return -1;
1457 }
1458 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001459 newobj = PyObject_CallOneArg(s->defaultfn, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001460 if (newobj == NULL) {
1461 Py_XDECREF(ident);
1462 return -1;
1463 }
Ezio Melotti13672652011-05-11 01:02:56 +03001464
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001465 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1466 Py_DECREF(newobj);
1467 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001468 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001469 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001470 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001471 Py_LeaveRecursiveCall();
1472
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001473 Py_DECREF(newobj);
1474 if (rv) {
1475 Py_XDECREF(ident);
1476 return -1;
1477 }
1478 if (ident != NULL) {
1479 if (PyDict_DelItem(s->markers, ident)) {
1480 Py_XDECREF(ident);
1481 return -1;
1482 }
1483 Py_XDECREF(ident);
1484 }
1485 return rv;
1486 }
1487}
1488
1489static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001490encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001491 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001492{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001493 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001494 static PyObject *open_dict = NULL;
1495 static PyObject *close_dict = NULL;
1496 static PyObject *empty_dict = NULL;
1497 PyObject *kstr = NULL;
1498 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001499 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001500 PyObject *items;
1501 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001502 Py_ssize_t idx;
1503
1504 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1505 open_dict = PyUnicode_InternFromString("{");
1506 close_dict = PyUnicode_InternFromString("}");
1507 empty_dict = PyUnicode_InternFromString("{}");
1508 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1509 return -1;
1510 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001511 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001512 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001513
1514 if (s->markers != Py_None) {
1515 int has_key;
1516 ident = PyLong_FromVoidPtr(dct);
1517 if (ident == NULL)
1518 goto bail;
1519 has_key = PyDict_Contains(s->markers, ident);
1520 if (has_key) {
1521 if (has_key != -1)
1522 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1523 goto bail;
1524 }
1525 if (PyDict_SetItem(s->markers, ident, dct)) {
1526 goto bail;
1527 }
1528 }
1529
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001530 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001531 goto bail;
1532
1533 if (s->indent != Py_None) {
1534 /* TODO: DOES NOT RUN */
1535 indent_level += 1;
1536 /*
1537 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1538 separator = _item_separator + newline_indent
1539 buf += newline_indent
1540 */
1541 }
1542
Benjamin Peterson501182a2015-05-02 22:28:04 -04001543 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001544 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001545 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001546 if (s->sort_keys && PyList_Sort(items) < 0) {
1547 Py_DECREF(items);
Benjamin Peterson501182a2015-05-02 22:28:04 -04001548 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001549 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001550 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001551 Py_DECREF(items);
1552 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001553 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001554 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001555 while ((item = PyIter_Next(it)) != NULL) {
1556 PyObject *encoded, *key, *value;
Serhiy Storchakafff9a312017-03-21 08:53:25 +02001557 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001558 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1559 goto bail;
1560 }
1561 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001562 if (PyUnicode_Check(key)) {
1563 Py_INCREF(key);
1564 kstr = key;
1565 }
1566 else if (PyFloat_Check(key)) {
1567 kstr = encoder_encode_float(s, key);
1568 if (kstr == NULL)
1569 goto bail;
1570 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001571 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 /* This must come before the PyLong_Check because
1573 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001574 kstr = _encoded_const(key);
1575 if (kstr == NULL)
1576 goto bail;
1577 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001578 else if (PyLong_Check(key)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001579 kstr = PyLong_Type.tp_repr(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001580 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001581 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001582 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001583 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001584 else if (s->skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001585 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001586 continue;
1587 }
1588 else {
Serhiy Storchakacfa797c2017-11-25 17:38:20 +02001589 PyErr_Format(PyExc_TypeError,
1590 "keys must be str, int, float, bool or None, "
Victor Stinnerdaa97562020-02-07 03:37:06 +01001591 "not %.100s", Py_TYPE(key)->tp_name);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001592 goto bail;
1593 }
1594
1595 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001596 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001597 goto bail;
1598 }
1599
1600 encoded = encoder_encode_string(s, kstr);
1601 Py_CLEAR(kstr);
1602 if (encoded == NULL)
1603 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001604 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001605 Py_DECREF(encoded);
1606 goto bail;
1607 }
1608 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001609 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001610 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001611
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001612 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001613 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001614 goto bail;
1615 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001616 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001617 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001618 if (PyErr_Occurred())
1619 goto bail;
1620 Py_CLEAR(it);
1621
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001622 if (ident != NULL) {
1623 if (PyDict_DelItem(s->markers, ident))
1624 goto bail;
1625 Py_CLEAR(ident);
1626 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001627 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001628 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001629 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001630
1631 yield '\n' + (' ' * (_indent * _current_indent_level))
1632 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001633 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001634 goto bail;
1635 return 0;
1636
1637bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001638 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001639 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001640 Py_XDECREF(kstr);
1641 Py_XDECREF(ident);
1642 return -1;
1643}
1644
1645
1646static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001647encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001648 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001649{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001650 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001651 static PyObject *open_array = NULL;
1652 static PyObject *close_array = NULL;
1653 static PyObject *empty_array = NULL;
1654 PyObject *ident = NULL;
1655 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001656 Py_ssize_t i;
1657
1658 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1659 open_array = PyUnicode_InternFromString("[");
1660 close_array = PyUnicode_InternFromString("]");
1661 empty_array = PyUnicode_InternFromString("[]");
1662 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1663 return -1;
1664 }
1665 ident = NULL;
1666 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1667 if (s_fast == NULL)
1668 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001669 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001670 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001671 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001672 }
1673
1674 if (s->markers != Py_None) {
1675 int has_key;
1676 ident = PyLong_FromVoidPtr(seq);
1677 if (ident == NULL)
1678 goto bail;
1679 has_key = PyDict_Contains(s->markers, ident);
1680 if (has_key) {
1681 if (has_key != -1)
1682 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1683 goto bail;
1684 }
1685 if (PyDict_SetItem(s->markers, ident, seq)) {
1686 goto bail;
1687 }
1688 }
1689
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001690 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001691 goto bail;
1692 if (s->indent != Py_None) {
1693 /* TODO: DOES NOT RUN */
1694 indent_level += 1;
1695 /*
1696 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1697 separator = _item_separator + newline_indent
1698 buf += newline_indent
1699 */
1700 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001701 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1702 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001703 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001704 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001705 goto bail;
1706 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001707 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001708 goto bail;
1709 }
1710 if (ident != NULL) {
1711 if (PyDict_DelItem(s->markers, ident))
1712 goto bail;
1713 Py_CLEAR(ident);
1714 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001715
1716 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001717 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001718 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001719
1720 yield '\n' + (' ' * (_indent * _current_indent_level))
1721 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001722 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001723 goto bail;
1724 Py_DECREF(s_fast);
1725 return 0;
1726
1727bail:
1728 Py_XDECREF(ident);
1729 Py_DECREF(s_fast);
1730 return -1;
1731}
1732
1733static void
1734encoder_dealloc(PyObject *self)
1735{
Dong-hee Na33f15a12020-03-27 19:59:59 +09001736 PyTypeObject *tp = Py_TYPE(self);
INADA Naokia6296d32017-08-24 14:55:17 +09001737 /* bpo-31095: UnTrack is needed before calling any callbacks */
1738 PyObject_GC_UnTrack(self);
Dong-hee Na33f15a12020-03-27 19:59:59 +09001739 encoder_clear((PyEncoderObject *)self);
1740 tp->tp_free(self);
1741 Py_DECREF(tp);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001742}
1743
1744static int
Dong-hee Na33f15a12020-03-27 19:59:59 +09001745encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001746{
Dong-hee Na33f15a12020-03-27 19:59:59 +09001747 Py_VISIT(self->markers);
1748 Py_VISIT(self->defaultfn);
1749 Py_VISIT(self->encoder);
1750 Py_VISIT(self->indent);
1751 Py_VISIT(self->key_separator);
1752 Py_VISIT(self->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001753 return 0;
1754}
1755
1756static int
Dong-hee Na33f15a12020-03-27 19:59:59 +09001757encoder_clear(PyEncoderObject *self)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001758{
1759 /* Deallocate Encoder */
Dong-hee Na33f15a12020-03-27 19:59:59 +09001760 Py_CLEAR(self->markers);
1761 Py_CLEAR(self->defaultfn);
1762 Py_CLEAR(self->encoder);
1763 Py_CLEAR(self->indent);
1764 Py_CLEAR(self->key_separator);
1765 Py_CLEAR(self->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001766 return 0;
1767}
1768
1769PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1770
Dong-hee Na33f15a12020-03-27 19:59:59 +09001771static PyType_Slot PyEncoderType_slots[] = {
1772 {Py_tp_doc, (void *)encoder_doc},
1773 {Py_tp_dealloc, encoder_dealloc},
1774 {Py_tp_call, encoder_call},
1775 {Py_tp_traverse, encoder_traverse},
1776 {Py_tp_clear, encoder_clear},
1777 {Py_tp_members, encoder_members},
1778 {Py_tp_new, encoder_new},
1779 {0, 0}
1780};
1781
1782static PyType_Spec PyEncoderType_spec = {
1783 .name = "_json.Encoder",
1784 .basicsize = sizeof(PyEncoderObject),
1785 .itemsize = 0,
1786 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1787 .slots = PyEncoderType_slots
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001788};
1789
1790static PyMethodDef speedups_methods[] = {
1791 {"encode_basestring_ascii",
1792 (PyCFunction)py_encode_basestring_ascii,
1793 METH_O,
1794 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001795 {"encode_basestring",
1796 (PyCFunction)py_encode_basestring,
1797 METH_O,
1798 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001799 {"scanstring",
1800 (PyCFunction)py_scanstring,
1801 METH_VARARGS,
1802 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001803 {NULL, NULL, 0, NULL}
1804};
1805
1806PyDoc_STRVAR(module_doc,
1807"json speedups\n");
1808
Hai Shied154c32020-01-16 00:32:51 +08001809static int
1810_json_exec(PyObject *module)
1811{
Dong-hee Na33f15a12020-03-27 19:59:59 +09001812 _jsonmodulestate *state = get_json_state(module);
1813
1814 state->PyScannerType = PyType_FromSpec(&PyScannerType_spec);
1815 if (state->PyScannerType == NULL) {
Hai Shied154c32020-01-16 00:32:51 +08001816 return -1;
1817 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001818 Py_INCREF(state->PyScannerType);
1819 if (PyModule_AddObject(module, "make_scanner", state->PyScannerType) < 0) {
1820 Py_DECREF((PyObject*)state->PyScannerType);
Hai Shied154c32020-01-16 00:32:51 +08001821 return -1;
1822 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001823
1824 state->PyEncoderType = PyType_FromSpec(&PyEncoderType_spec);
1825 if (state->PyEncoderType == NULL) {
Hai Shied154c32020-01-16 00:32:51 +08001826 return -1;
1827 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001828 Py_INCREF(state->PyEncoderType);
1829 if (PyModule_AddObject(module, "make_encoder", state->PyEncoderType) < 0) {
1830 Py_DECREF((PyObject*)state->PyEncoderType);
Hai Shied154c32020-01-16 00:32:51 +08001831 return -1;
1832 }
Dong-hee Na33f15a12020-03-27 19:59:59 +09001833
Hai Shied154c32020-01-16 00:32:51 +08001834 return 0;
1835}
1836
Dong-hee Na33f15a12020-03-27 19:59:59 +09001837static int
1838_jsonmodule_traverse(PyObject *module, visitproc visit, void *arg)
1839{
1840 _jsonmodulestate *state = get_json_state(module);
1841 Py_VISIT(state->PyScannerType);
1842 Py_VISIT(state->PyEncoderType);
1843 return 0;
1844}
1845
1846static int
1847_jsonmodule_clear(PyObject *module)
1848{
1849 _jsonmodulestate *state = get_json_state(module);
1850 Py_CLEAR(state->PyScannerType);
1851 Py_CLEAR(state->PyEncoderType);
1852 return 0;
1853}
1854
1855static void
1856_jsonmodule_free(void *module)
1857{
1858 _jsonmodule_clear((PyObject *)module);
1859}
1860
Hai Shied154c32020-01-16 00:32:51 +08001861static PyModuleDef_Slot _json_slots[] = {
1862 {Py_mod_exec, _json_exec},
1863 {0, NULL}
1864};
1865
Martin v. Löwis1a214512008-06-11 05:26:20 +00001866static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 PyModuleDef_HEAD_INIT,
1868 "_json",
1869 module_doc,
Dong-hee Na33f15a12020-03-27 19:59:59 +09001870 sizeof(_jsonmodulestate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001871 speedups_methods,
Hai Shied154c32020-01-16 00:32:51 +08001872 _json_slots,
Dong-hee Na33f15a12020-03-27 19:59:59 +09001873 _jsonmodule_traverse,
1874 _jsonmodule_clear,
1875 _jsonmodule_free,
Martin v. Löwis1a214512008-06-11 05:26:20 +00001876};
1877
Victor Stinnerf024d262015-03-17 17:48:27 +01001878PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001879PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001880{
Hai Shied154c32020-01-16 00:32:51 +08001881 return PyModuleDef_Init(&jsonmodule);
Christian Heimes90540002008-05-08 14:29:10 +00001882}