blob: a70043b605f6b1d9999ff8703bf80c12530c8837 [file] [log] [blame]
Victor Stinner5c75f372019-04-17 23:02:26 +02001/* JSON accelerator C extensor: _json module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006
Victor Stinner5c75f372019-04-17 23:02:26 +02007#if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8# error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06009#endif
10
Christian Heimes90540002008-05-08 14:29:10 +000011#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000012#include "structmember.h"
Victor Stinnere281f7d2018-11-01 02:30:36 +010013#include "pycore_accu.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +010014
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000015#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
16#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
17#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
18#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
19
20static PyTypeObject PyScannerType;
21static PyTypeObject PyEncoderType;
22
23typedef struct _PyScannerObject {
24 PyObject_HEAD
Segev Finer541bd282017-07-13 08:52:08 +030025 signed char strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000026 PyObject *object_hook;
27 PyObject *object_pairs_hook;
28 PyObject *parse_float;
29 PyObject *parse_int;
30 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000031 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000032} PyScannerObject;
33
34static PyMemberDef scanner_members[] = {
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030035 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000036 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
37 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
38 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
39 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
40 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
41 {NULL}
42};
43
44typedef struct _PyEncoderObject {
45 PyObject_HEAD
46 PyObject *markers;
47 PyObject *defaultfn;
48 PyObject *encoder;
49 PyObject *indent;
50 PyObject *key_separator;
51 PyObject *item_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030052 char sort_keys;
53 char skipkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000054 int allow_nan;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030055 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000056} PyEncoderObject;
57
58static PyMemberDef encoder_members[] = {
59 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
60 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
61 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
62 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
63 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
64 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030065 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
66 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000067 {NULL}
68};
69
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020070/* Forward decls */
71
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000072static PyObject *
73ascii_escape_unicode(PyObject *pystr);
74static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +010075py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000076void init_json(void);
77static PyObject *
78scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
79static PyObject *
80_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
81static PyObject *
82scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000083static void
84scanner_dealloc(PyObject *self);
85static int
86scanner_clear(PyObject *self);
87static PyObject *
88encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000089static void
90encoder_dealloc(PyObject *self);
91static int
92encoder_clear(PyObject *self);
93static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +020094encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000095static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +020096encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000097static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +020098encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000099static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000100_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000101static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200102raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000103static PyObject *
104encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000105static PyObject *
106encoder_encode_float(PyEncoderObject *s, PyObject *obj);
107
Christian Heimes90540002008-05-08 14:29:10 +0000108#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000109#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000110
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200112ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000113{
114 /* Escape unicode code point c to ASCII escape sequences
115 in char *output. output must have at least 12 bytes unused to
116 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000117 output[chars++] = '\\';
118 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000119 case '\\': output[chars++] = c; break;
120 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000121 case '\b': output[chars++] = 'b'; break;
122 case '\f': output[chars++] = 'f'; break;
123 case '\n': output[chars++] = 'n'; break;
124 case '\r': output[chars++] = 'r'; break;
125 case '\t': output[chars++] = 't'; break;
126 default:
Christian Heimes90540002008-05-08 14:29:10 +0000127 if (c >= 0x10000) {
128 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100129 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000130 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100131 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
132 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
133 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
134 output[chars++] = Py_hexdigits[(v ) & 0xf];
135 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000136 output[chars++] = '\\';
137 }
Christian Heimes90540002008-05-08 14:29:10 +0000138 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200139 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
140 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
141 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
142 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000143 }
144 return chars;
145}
146
147static PyObject *
148ascii_escape_unicode(PyObject *pystr)
149{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000150 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000151 Py_ssize_t i;
152 Py_ssize_t input_chars;
153 Py_ssize_t output_size;
154 Py_ssize_t chars;
155 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200156 void *input;
157 unsigned char *output;
158 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000159
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200160 if (PyUnicode_READY(pystr) == -1)
161 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000162
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200163 input_chars = PyUnicode_GET_LENGTH(pystr);
164 input = PyUnicode_DATA(pystr);
165 kind = PyUnicode_KIND(pystr);
166
167 /* Compute the output size */
168 for (i = 0, output_size = 2; i < input_chars; i++) {
169 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500170 Py_ssize_t d;
171 if (S_CHAR(c)) {
172 d = 1;
173 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200174 else {
175 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200176 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200177 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500178 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200179 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500180 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200181 }
182 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500183 if (output_size > PY_SSIZE_T_MAX - d) {
184 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
185 return NULL;
186 }
187 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200188 }
189
190 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000191 if (rval == NULL) {
192 return NULL;
193 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200194 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000195 chars = 0;
196 output[chars++] = '"';
197 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200198 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000199 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000200 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000201 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000202 else {
203 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000204 }
Christian Heimes90540002008-05-08 14:29:10 +0000205 }
206 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100207#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200208 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100209#endif
Christian Heimes90540002008-05-08 14:29:10 +0000210 return rval;
211}
212
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100213static PyObject *
214escape_unicode(PyObject *pystr)
215{
216 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
217 Py_ssize_t i;
218 Py_ssize_t input_chars;
219 Py_ssize_t output_size;
220 Py_ssize_t chars;
221 PyObject *rval;
222 void *input;
223 int kind;
224 Py_UCS4 maxchar;
225
226 if (PyUnicode_READY(pystr) == -1)
227 return NULL;
228
229 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
230 input_chars = PyUnicode_GET_LENGTH(pystr);
231 input = PyUnicode_DATA(pystr);
232 kind = PyUnicode_KIND(pystr);
233
234 /* Compute the output size */
235 for (i = 0, output_size = 2; i < input_chars; i++) {
236 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500237 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100238 switch (c) {
239 case '\\': case '"': case '\b': case '\f':
240 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500241 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100242 break;
243 default:
244 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500245 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100246 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500247 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100248 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500249 if (output_size > PY_SSIZE_T_MAX - d) {
250 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
251 return NULL;
252 }
253 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100254 }
255
256 rval = PyUnicode_New(output_size, maxchar);
257 if (rval == NULL)
258 return NULL;
259
260 kind = PyUnicode_KIND(rval);
261
262#define ENCODE_OUTPUT do { \
263 chars = 0; \
264 output[chars++] = '"'; \
265 for (i = 0; i < input_chars; i++) { \
266 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
267 switch (c) { \
268 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
269 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
270 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
271 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
272 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
273 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
274 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
275 default: \
276 if (c <= 0x1f) { \
277 output[chars++] = '\\'; \
278 output[chars++] = 'u'; \
279 output[chars++] = '0'; \
280 output[chars++] = '0'; \
281 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
282 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
283 } else { \
284 output[chars++] = c; \
285 } \
286 } \
287 } \
288 output[chars++] = '"'; \
289 } while (0)
290
291 if (kind == PyUnicode_1BYTE_KIND) {
292 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
293 ENCODE_OUTPUT;
294 } else if (kind == PyUnicode_2BYTE_KIND) {
295 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
296 ENCODE_OUTPUT;
297 } else {
298 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
299 assert(kind == PyUnicode_4BYTE_KIND);
300 ENCODE_OUTPUT;
301 }
302#undef ENCODE_OUTPUT
303
304#ifdef Py_DEBUG
305 assert(_PyUnicode_CheckConsistency(rval, 1));
306#endif
307 return rval;
308}
309
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000310static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200311raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000312{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200313 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
314 static PyObject *JSONDecodeError = NULL;
315 PyObject *exc;
316 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000317 PyObject *decoder = PyImport_ImportModule("json.decoder");
318 if (decoder == NULL)
319 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200320 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000321 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200322 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000323 return;
Christian Heimes90540002008-05-08 14:29:10 +0000324 }
Victor Stinner4c381542016-12-09 00:33:39 +0100325 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200326 if (exc) {
327 PyErr_SetObject(JSONDecodeError, exc);
328 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000329 }
Christian Heimes90540002008-05-08 14:29:10 +0000330}
331
Ezio Melotti37623ab2013-01-03 08:44:15 +0200332static void
333raise_stop_iteration(Py_ssize_t idx)
334{
335 PyObject *value = PyLong_FromSsize_t(idx);
336 if (value != NULL) {
337 PyErr_SetObject(PyExc_StopIteration, value);
338 Py_DECREF(value);
339 }
340}
341
Christian Heimes90540002008-05-08 14:29:10 +0000342static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000343_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
344 /* return (rval, idx) tuple, stealing reference to rval */
345 PyObject *tpl;
346 PyObject *pyidx;
347 /*
348 steal a reference to rval, returns (rval, idx)
349 */
350 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000351 return NULL;
352 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000353 pyidx = PyLong_FromSsize_t(idx);
354 if (pyidx == NULL) {
355 Py_DECREF(rval);
356 return NULL;
357 }
358 tpl = PyTuple_New(2);
359 if (tpl == NULL) {
360 Py_DECREF(pyidx);
361 Py_DECREF(rval);
362 return NULL;
363 }
364 PyTuple_SET_ITEM(tpl, 0, rval);
365 PyTuple_SET_ITEM(tpl, 1, pyidx);
366 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000367}
368
369static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000370scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000371{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000372 /* Read the JSON string from PyUnicode pystr.
373 end is the index of the first character after the quote.
374 if strict is zero then literal control characters are allowed
375 *next_end_ptr is a return-by-reference index of the character
376 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000377
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000378 Return value is a new PyUnicode
379 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000380 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200381 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000382 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000383 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200384 const void *buf;
385 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000386
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200387 if (PyUnicode_READY(pystr) == -1)
388 return 0;
389
Inada Naoki9c110292019-10-17 16:12:41 +0900390 _PyUnicodeWriter writer;
391 _PyUnicodeWriter_Init(&writer);
392 writer.overallocate = 1;
393
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200394 len = PyUnicode_GET_LENGTH(pystr);
395 buf = PyUnicode_DATA(pystr);
396 kind = PyUnicode_KIND(pystr);
397
Ezio Melotti37623ab2013-01-03 08:44:15 +0200398 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000399 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
400 goto bail;
401 }
Christian Heimes90540002008-05-08 14:29:10 +0000402 while (1) {
403 /* Find the end of the string or the next escape */
Inada Naoki2a570af2019-08-08 17:57:10 +0900404 Py_UCS4 c;
405 {
406 // Use tight scope variable to help register allocation.
407 Py_UCS4 d = 0;
408 for (next = end; next < len; next++) {
409 d = PyUnicode_READ(kind, buf, next);
410 if (d == '"' || d == '\\') {
411 break;
412 }
413 if (d <= 0x1f && strict) {
414 raise_errmsg("Invalid control character at", pystr, next);
415 goto bail;
416 }
Christian Heimes90540002008-05-08 14:29:10 +0000417 }
Inada Naoki2a570af2019-08-08 17:57:10 +0900418 c = d;
Christian Heimes90540002008-05-08 14:29:10 +0000419 }
Inada Naoki9c110292019-10-17 16:12:41 +0900420
421 if (c == '"') {
422 // Fast path for simple case.
423 if (writer.buffer == NULL) {
424 PyObject *ret = PyUnicode_Substring(pystr, end, next);
425 if (ret == NULL) {
426 goto bail;
427 }
428 *next_end_ptr = next + 1;;
429 return ret;
430 }
431 }
432 else if (c != '\\') {
Christian Heimes90540002008-05-08 14:29:10 +0000433 raise_errmsg("Unterminated string starting at", pystr, begin);
434 goto bail;
435 }
Inada Naoki9c110292019-10-17 16:12:41 +0900436
Christian Heimes90540002008-05-08 14:29:10 +0000437 /* Pick up this chunk if it's not zero length */
438 if (next != end) {
Inada Naoki9c110292019-10-17 16:12:41 +0900439 if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
Christian Heimes90540002008-05-08 14:29:10 +0000440 goto bail;
441 }
Christian Heimes90540002008-05-08 14:29:10 +0000442 }
443 next++;
444 if (c == '"') {
445 end = next;
446 break;
447 }
448 if (next == len) {
449 raise_errmsg("Unterminated string starting at", pystr, begin);
450 goto bail;
451 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200452 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000453 if (c != 'u') {
454 /* Non-unicode backslash escapes */
455 end = next + 1;
456 switch (c) {
457 case '"': break;
458 case '\\': break;
459 case '/': break;
460 case 'b': c = '\b'; break;
461 case 'f': c = '\f'; break;
462 case 'n': c = '\n'; break;
463 case 'r': c = '\r'; break;
464 case 't': c = '\t'; break;
465 default: c = 0;
466 }
467 if (c == 0) {
468 raise_errmsg("Invalid \\escape", pystr, end - 2);
469 goto bail;
470 }
471 }
472 else {
473 c = 0;
474 next++;
475 end = next + 4;
476 if (end >= len) {
477 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
478 goto bail;
479 }
480 /* Decode 4 hex digits */
481 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200482 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000483 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000484 switch (digit) {
485 case '0': case '1': case '2': case '3': case '4':
486 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000487 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000488 case 'a': case 'b': case 'c': case 'd': case 'e':
489 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000490 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000491 case 'A': case 'B': case 'C': case 'D': case 'E':
492 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000493 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000494 default:
495 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
496 goto bail;
497 }
498 }
Christian Heimes90540002008-05-08 14:29:10 +0000499 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200500 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
501 PyUnicode_READ(kind, buf, next++) == '\\' &&
502 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200503 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000504 end += 6;
505 /* Decode 4 hex digits */
506 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200507 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000508 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000509 switch (digit) {
510 case '0': case '1': case '2': case '3': case '4':
511 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000512 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000513 case 'a': case 'b': case 'c': case 'd': case 'e':
514 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000515 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000516 case 'A': case 'B': case 'C': case 'D': case 'E':
517 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000518 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000519 default:
520 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
521 goto bail;
522 }
523 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200524 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
525 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
526 else
527 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000528 }
Christian Heimes90540002008-05-08 14:29:10 +0000529 }
Inada Naoki9c110292019-10-17 16:12:41 +0900530 if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
Christian Heimes90540002008-05-08 14:29:10 +0000531 goto bail;
532 }
Christian Heimes90540002008-05-08 14:29:10 +0000533 }
534
Inada Naoki9c110292019-10-17 16:12:41 +0900535 rval = _PyUnicodeWriter_Finish(&writer);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000536 *next_end_ptr = end;
537 return rval;
Inada Naoki9c110292019-10-17 16:12:41 +0900538
Christian Heimes90540002008-05-08 14:29:10 +0000539bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000540 *next_end_ptr = -1;
Inada Naoki9c110292019-10-17 16:12:41 +0900541 _PyUnicodeWriter_Dealloc(&writer);
Christian Heimes90540002008-05-08 14:29:10 +0000542 return NULL;
543}
544
545PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000546 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000547 "\n"
548 "Scan the string s for a JSON string. End is the index of the\n"
549 "character in s after the quote that started the JSON string.\n"
550 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
551 "on attempt to decode an invalid string. If strict is False then literal\n"
552 "control characters are allowed in the string.\n"
553 "\n"
554 "Returns a tuple of the decoded string and the index of the character in s\n"
555 "after the end quote."
556);
Christian Heimes90540002008-05-08 14:29:10 +0000557
558static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100559py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000560{
561 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000562 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000563 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000564 Py_ssize_t next_end = -1;
565 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100566 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000567 return NULL;
568 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000569 if (PyUnicode_Check(pystr)) {
570 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000571 }
572 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000573 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000574 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000575 Py_TYPE(pystr)->tp_name);
576 return NULL;
577 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000578 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000579}
580
581PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000582 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000583 "\n"
584 "Return an ASCII-only JSON representation of a Python string"
585);
Christian Heimes90540002008-05-08 14:29:10 +0000586
587static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100588py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000589{
590 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000591 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000592 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000593 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000594 rval = ascii_escape_unicode(pystr);
595 }
596 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000597 PyErr_Format(PyExc_TypeError,
598 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000599 Py_TYPE(pystr)->tp_name);
600 return NULL;
601 }
Christian Heimes90540002008-05-08 14:29:10 +0000602 return rval;
603}
604
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100605
606PyDoc_STRVAR(pydoc_encode_basestring,
607 "encode_basestring(string) -> string\n"
608 "\n"
609 "Return a JSON representation of a Python string"
610);
611
612static PyObject *
Victor Stinnerfbbfcce2019-11-05 11:44:28 +0100613py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100614{
615 PyObject *rval;
616 /* Return a JSON representation of a Python string */
617 /* METH_O */
618 if (PyUnicode_Check(pystr)) {
619 rval = escape_unicode(pystr);
620 }
621 else {
622 PyErr_Format(PyExc_TypeError,
623 "first argument must be a string, not %.80s",
624 Py_TYPE(pystr)->tp_name);
625 return NULL;
626 }
627 return rval;
628}
629
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000630static void
631scanner_dealloc(PyObject *self)
632{
INADA Naokia6296d32017-08-24 14:55:17 +0900633 /* bpo-31095: UnTrack is needed before calling any callbacks */
634 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000635 scanner_clear(self);
636 Py_TYPE(self)->tp_free(self);
637}
638
639static int
640scanner_traverse(PyObject *self, visitproc visit, void *arg)
641{
642 PyScannerObject *s;
643 assert(PyScanner_Check(self));
644 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000645 Py_VISIT(s->object_hook);
646 Py_VISIT(s->object_pairs_hook);
647 Py_VISIT(s->parse_float);
648 Py_VISIT(s->parse_int);
649 Py_VISIT(s->parse_constant);
650 return 0;
651}
652
653static int
654scanner_clear(PyObject *self)
655{
656 PyScannerObject *s;
657 assert(PyScanner_Check(self));
658 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000659 Py_CLEAR(s->object_hook);
660 Py_CLEAR(s->object_pairs_hook);
661 Py_CLEAR(s->parse_float);
662 Py_CLEAR(s->parse_int);
663 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000664 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000665 return 0;
666}
667
668static PyObject *
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300669_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
670{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000671 /* Read a JSON object from PyUnicode pystr.
672 idx is the index of the first character after the opening curly brace.
673 *next_idx_ptr is a return-by-reference index to the first character after
674 the closing curly brace.
675
676 Returns a new PyObject (usually a dict, but object_hook can change that)
677 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200678 void *str;
679 int kind;
680 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000681 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000682 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000683 PyObject *key = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000684 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000685 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000686
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200687 if (PyUnicode_READY(pystr) == -1)
688 return NULL;
689
690 str = PyUnicode_DATA(pystr);
691 kind = PyUnicode_KIND(pystr);
692 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
693
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000694 if (has_pairs_hook)
695 rval = PyList_New(0);
696 else
697 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000698 if (rval == NULL)
699 return NULL;
700
701 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200702 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000703
704 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200705 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
706 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000707 PyObject *memokey;
708
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000709 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200710 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200711 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000712 goto bail;
713 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300714 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000715 if (key == NULL)
716 goto bail;
Inada Naoki2a570af2019-08-08 17:57:10 +0900717 memokey = PyDict_SetDefault(s->memo, key, key);
718 if (memokey == NULL) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200719 goto bail;
720 }
Inada Naoki2a570af2019-08-08 17:57:10 +0900721 Py_INCREF(memokey);
722 Py_DECREF(key);
723 key = memokey;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000724 idx = next_idx;
725
726 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200727 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
728 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200729 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000730 goto bail;
731 }
732 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200733 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000734
735 /* read any JSON term */
736 val = scan_once_unicode(s, pystr, idx, &next_idx);
737 if (val == NULL)
738 goto bail;
739
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000740 if (has_pairs_hook) {
741 PyObject *item = PyTuple_Pack(2, key, val);
742 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000743 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000744 Py_CLEAR(key);
745 Py_CLEAR(val);
746 if (PyList_Append(rval, item) == -1) {
747 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000748 goto bail;
749 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000750 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000751 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000752 else {
753 if (PyDict_SetItem(rval, key, val) < 0)
754 goto bail;
755 Py_CLEAR(key);
756 Py_CLEAR(val);
757 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000758 idx = next_idx;
759
760 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200761 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000762
763 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200764 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000765 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200766 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200767 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000768 goto bail;
769 }
770 idx++;
771
772 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200773 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000774 }
775 }
776
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000777 *next_idx_ptr = idx + 1;
778
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000779 if (has_pairs_hook) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200780 val = _PyObject_CallOneArg(s->object_pairs_hook, rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000781 Py_DECREF(rval);
782 return val;
783 }
784
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000785 /* if object_hook is not None: rval = object_hook(rval) */
786 if (s->object_hook != Py_None) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200787 val = _PyObject_CallOneArg(s->object_hook, rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000788 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000789 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000790 }
791 return rval;
792bail:
793 Py_XDECREF(key);
794 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000795 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000796 return NULL;
797}
798
799static PyObject *
800_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200801 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000802 idx is the index of the first character after the opening brace.
803 *next_idx_ptr is a return-by-reference index to the first character after
804 the closing brace.
805
806 Returns a new PyList
807 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200808 void *str;
809 int kind;
810 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000811 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200812 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000813 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000814
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200815 if (PyUnicode_READY(pystr) == -1)
816 return NULL;
817
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200818 rval = PyList_New(0);
819 if (rval == NULL)
820 return NULL;
821
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200822 str = PyUnicode_DATA(pystr);
823 kind = PyUnicode_KIND(pystr);
824 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
825
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000826 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200827 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000828
829 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200830 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
831 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000832
833 /* read any JSON term */
834 val = scan_once_unicode(s, pystr, idx, &next_idx);
835 if (val == NULL)
836 goto bail;
837
838 if (PyList_Append(rval, val) == -1)
839 goto bail;
840
841 Py_CLEAR(val);
842 idx = next_idx;
843
844 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200845 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000846
847 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200848 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000849 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200850 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200851 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000852 goto bail;
853 }
854 idx++;
855
856 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200857 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000858 }
859 }
860
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200861 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
862 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200863 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000864 goto bail;
865 }
866 *next_idx_ptr = idx + 1;
867 return rval;
868bail:
869 Py_XDECREF(val);
870 Py_DECREF(rval);
871 return NULL;
872}
873
874static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200875_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
876 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000877 constant is the constant string that was found
878 ("NaN", "Infinity", "-Infinity").
879 idx is the index of the first character of the constant
880 *next_idx_ptr is a return-by-reference index to the first character after
881 the constant.
882
883 Returns the result of parse_constant
884 */
885 PyObject *cstr;
886 PyObject *rval;
887 /* constant is "NaN", "Infinity", or "-Infinity" */
888 cstr = PyUnicode_InternFromString(constant);
889 if (cstr == NULL)
890 return NULL;
891
892 /* rval = parse_constant(constant) */
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200893 rval = _PyObject_CallOneArg(s->parse_constant, cstr);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200894 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000895 Py_DECREF(cstr);
896 *next_idx_ptr = idx;
897 return rval;
898}
899
900static PyObject *
901_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
902 /* Read a JSON number from PyUnicode pystr.
903 idx is the index of the first character of the number
904 *next_idx_ptr is a return-by-reference index to the first character after
905 the number.
906
907 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200908 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000909 May return other types if parse_int or parse_float are set
910 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200911 void *str;
912 int kind;
913 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000914 Py_ssize_t idx = start;
915 int is_float = 0;
916 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200917 PyObject *numstr = NULL;
918 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000919
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200920 if (PyUnicode_READY(pystr) == -1)
921 return NULL;
922
923 str = PyUnicode_DATA(pystr);
924 kind = PyUnicode_KIND(pystr);
925 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
926
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000927 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200928 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000929 idx++;
930 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200931 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000932 return NULL;
933 }
934 }
935
936 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200937 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000938 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200939 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000940 }
941 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200942 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000943 idx++;
944 }
945 /* no integer digits, error */
946 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200947 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000948 return NULL;
949 }
950
951 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200952 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000953 is_float = 1;
954 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200955 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000956 }
957
958 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200959 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000960 Py_ssize_t e_start = idx;
961 idx++;
962
963 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200964 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000965
966 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200967 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000968
969 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200970 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000971 is_float = 1;
972 }
973 else {
974 idx = e_start;
975 }
976 }
977
Antoine Pitrouf6454512011-04-25 19:16:06 +0200978 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
979 custom_func = s->parse_float;
980 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
981 custom_func = s->parse_int;
982 else
983 custom_func = NULL;
984
985 if (custom_func) {
986 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200987 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200988 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200989 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +0200990 if (numstr == NULL)
991 return NULL;
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200992 rval = _PyObject_CallOneArg(custom_func, numstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000993 }
994 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +0200995 Py_ssize_t i, n;
996 char *buf;
997 /* Straight conversion to ASCII, to avoid costly conversion of
998 decimal unicode digits (which cannot appear here) */
999 n = idx - start;
1000 numstr = PyBytes_FromStringAndSize(NULL, n);
1001 if (numstr == NULL)
1002 return NULL;
1003 buf = PyBytes_AS_STRING(numstr);
1004 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001006 }
1007 if (is_float)
1008 rval = PyFloat_FromString(numstr);
1009 else
1010 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001011 }
1012 Py_DECREF(numstr);
1013 *next_idx_ptr = idx;
1014 return rval;
1015}
1016
1017static PyObject *
1018scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1019{
1020 /* Read one JSON term (of any kind) from PyUnicode pystr.
1021 idx is the index of the first character of the term
1022 *next_idx_ptr is a return-by-reference index to the first character after
1023 the number.
1024
1025 Returns a new PyObject representation of the term.
1026 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001027 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001028 void *str;
1029 int kind;
1030 Py_ssize_t length;
1031
1032 if (PyUnicode_READY(pystr) == -1)
1033 return NULL;
1034
1035 str = PyUnicode_DATA(pystr);
1036 kind = PyUnicode_KIND(pystr);
1037 length = PyUnicode_GET_LENGTH(pystr);
1038
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001039 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001040 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001041 return NULL;
1042 }
1043 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001044 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001045 return NULL;
1046 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001047
1048 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001049 case '"':
1050 /* string */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001051 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001052 case '{':
1053 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001054 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1055 "from a unicode string"))
1056 return NULL;
1057 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1058 Py_LeaveRecursiveCall();
1059 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001060 case '[':
1061 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001062 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1063 "from a unicode string"))
1064 return NULL;
1065 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1066 Py_LeaveRecursiveCall();
1067 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001068 case 'n':
1069 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001070 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001071 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001072 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001073 }
1074 break;
1075 case 't':
1076 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001077 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001078 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001079 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001080 }
1081 break;
1082 case 'f':
1083 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001084 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1085 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1086 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001087 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001088 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001089 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001090 }
1091 break;
1092 case 'N':
1093 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001094 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001095 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001096 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1097 }
1098 break;
1099 case 'I':
1100 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001101 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1102 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1103 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001104 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001105 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1106 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001107 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001108 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1109 }
1110 break;
1111 case '-':
1112 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001113 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001114 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1115 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001116 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001117 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001118 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1119 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001120 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001121 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1122 }
1123 break;
1124 }
1125 /* Didn't find a string, object, array, or named constant. Look for a number. */
1126 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1127}
1128
1129static PyObject *
1130scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1131{
1132 /* Python callable interface to scan_once_{str,unicode} */
1133 PyObject *pystr;
1134 PyObject *rval;
1135 Py_ssize_t idx;
1136 Py_ssize_t next_idx = -1;
1137 static char *kwlist[] = {"string", "idx", NULL};
1138 PyScannerObject *s;
1139 assert(PyScanner_Check(self));
1140 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001141 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001142 return NULL;
1143
1144 if (PyUnicode_Check(pystr)) {
1145 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1146 }
1147 else {
1148 PyErr_Format(PyExc_TypeError,
1149 "first argument must be a string, not %.80s",
1150 Py_TYPE(pystr)->tp_name);
1151 return NULL;
1152 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001153 PyDict_Clear(s->memo);
1154 if (rval == NULL)
1155 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001156 return _build_rval_index_tuple(rval, next_idx);
1157}
1158
1159static PyObject *
1160scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1161{
1162 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001163 PyObject *ctx;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001164 PyObject *strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001165 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001166
1167 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001168 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001169
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001170 s = (PyScannerObject *)type->tp_alloc(type, 0);
1171 if (s == NULL) {
1172 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001173 }
1174
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001175 s->memo = PyDict_New();
1176 if (s->memo == NULL)
1177 goto bail;
1178
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001179 /* All of these will fail "gracefully" so we don't need to verify them */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001180 strict = PyObject_GetAttrString(ctx, "strict");
1181 if (strict == NULL)
1182 goto bail;
1183 s->strict = PyObject_IsTrue(strict);
1184 Py_DECREF(strict);
1185 if (s->strict < 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001186 goto bail;
1187 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1188 if (s->object_hook == NULL)
1189 goto bail;
1190 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1191 if (s->object_pairs_hook == NULL)
1192 goto bail;
1193 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1194 if (s->parse_float == NULL)
1195 goto bail;
1196 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1197 if (s->parse_int == NULL)
1198 goto bail;
1199 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1200 if (s->parse_constant == NULL)
1201 goto bail;
1202
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001203 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001204
1205bail:
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001206 Py_DECREF(s);
1207 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001208}
1209
1210PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1211
1212static
1213PyTypeObject PyScannerType = {
1214 PyVarObject_HEAD_INIT(NULL, 0)
1215 "_json.Scanner", /* tp_name */
1216 sizeof(PyScannerObject), /* tp_basicsize */
1217 0, /* tp_itemsize */
1218 scanner_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001219 0, /* tp_vectorcall_offset */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001220 0, /* tp_getattr */
1221 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001222 0, /* tp_as_async */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001223 0, /* tp_repr */
1224 0, /* tp_as_number */
1225 0, /* tp_as_sequence */
1226 0, /* tp_as_mapping */
1227 0, /* tp_hash */
1228 scanner_call, /* tp_call */
1229 0, /* tp_str */
1230 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1231 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1232 0, /* tp_as_buffer */
1233 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1234 scanner_doc, /* tp_doc */
1235 scanner_traverse, /* tp_traverse */
1236 scanner_clear, /* tp_clear */
1237 0, /* tp_richcompare */
1238 0, /* tp_weaklistoffset */
1239 0, /* tp_iter */
1240 0, /* tp_iternext */
1241 0, /* tp_methods */
1242 scanner_members, /* tp_members */
1243 0, /* tp_getset */
1244 0, /* tp_base */
1245 0, /* tp_dict */
1246 0, /* tp_descr_get */
1247 0, /* tp_descr_set */
1248 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001249 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001250 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1251 scanner_new, /* tp_new */
1252 0,/* PyObject_GC_Del, */ /* tp_free */
1253};
1254
1255static PyObject *
1256encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1257{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001258 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1259
1260 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001261 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001262 PyObject *item_separator;
1263 int sort_keys, skipkeys, allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001264
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001265 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001266 &markers, &defaultfn, &encoder, &indent,
1267 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001268 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001269 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001270
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001271 if (markers != Py_None && !PyDict_Check(markers)) {
1272 PyErr_Format(PyExc_TypeError,
1273 "make_encoder() argument 1 must be dict or None, "
1274 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001275 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001276 }
1277
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001278 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1279 if (s == NULL)
1280 return NULL;
1281
Antoine Pitrou781eba72009-12-08 15:57:31 +00001282 s->markers = markers;
1283 s->defaultfn = defaultfn;
1284 s->encoder = encoder;
1285 s->indent = indent;
1286 s->key_separator = key_separator;
1287 s->item_separator = item_separator;
1288 s->sort_keys = sort_keys;
1289 s->skipkeys = skipkeys;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001290 s->allow_nan = allow_nan;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001291 s->fast_encode = NULL;
1292 if (PyCFunction_Check(s->encoder)) {
1293 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1294 if (f == (PyCFunction)py_encode_basestring_ascii ||
1295 f == (PyCFunction)py_encode_basestring) {
1296 s->fast_encode = f;
1297 }
1298 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001299
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001300 Py_INCREF(s->markers);
1301 Py_INCREF(s->defaultfn);
1302 Py_INCREF(s->encoder);
1303 Py_INCREF(s->indent);
1304 Py_INCREF(s->key_separator);
1305 Py_INCREF(s->item_separator);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001306 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001307}
1308
1309static PyObject *
1310encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1311{
1312 /* Python callable interface to encode_listencode_obj */
1313 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1314 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001315 Py_ssize_t indent_level;
1316 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001317 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001318
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001319 assert(PyEncoder_Check(self));
1320 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001321 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1322 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001323 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001324 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001325 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001326 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001327 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001328 return NULL;
1329 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001330 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001331}
1332
1333static PyObject *
1334_encoded_const(PyObject *obj)
1335{
1336 /* Return the JSON string representation of None, True, False */
1337 if (obj == Py_None) {
1338 static PyObject *s_null = NULL;
1339 if (s_null == NULL) {
1340 s_null = PyUnicode_InternFromString("null");
1341 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001342 Py_XINCREF(s_null);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001343 return s_null;
1344 }
1345 else if (obj == Py_True) {
1346 static PyObject *s_true = NULL;
1347 if (s_true == NULL) {
1348 s_true = PyUnicode_InternFromString("true");
1349 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001350 Py_XINCREF(s_true);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001351 return s_true;
1352 }
1353 else if (obj == Py_False) {
1354 static PyObject *s_false = NULL;
1355 if (s_false == NULL) {
1356 s_false = PyUnicode_InternFromString("false");
1357 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001358 Py_XINCREF(s_false);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001359 return s_false;
1360 }
1361 else {
1362 PyErr_SetString(PyExc_ValueError, "not a const");
1363 return NULL;
1364 }
1365}
1366
1367static PyObject *
1368encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1369{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001370 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001371 double i = PyFloat_AS_DOUBLE(obj);
1372 if (!Py_IS_FINITE(i)) {
1373 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001374 PyErr_SetString(
1375 PyExc_ValueError,
1376 "Out of range float values are not JSON compliant"
1377 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001378 return NULL;
1379 }
1380 if (i > 0) {
1381 return PyUnicode_FromString("Infinity");
1382 }
1383 else if (i < 0) {
1384 return PyUnicode_FromString("-Infinity");
1385 }
1386 else {
1387 return PyUnicode_FromString("NaN");
1388 }
1389 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001390 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001391}
1392
1393static PyObject *
1394encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1395{
1396 /* Return the JSON representation of a string */
Oren Milman2b382dd2017-09-24 12:07:12 +03001397 PyObject *encoded;
1398
1399 if (s->fast_encode) {
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001400 return s->fast_encode(NULL, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001401 }
Jeroen Demeyer196a5302019-07-04 12:31:34 +02001402 encoded = _PyObject_CallOneArg(s->encoder, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001403 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1404 PyErr_Format(PyExc_TypeError,
1405 "encoder() must return a string, not %.80s",
1406 Py_TYPE(encoded)->tp_name);
1407 Py_DECREF(encoded);
1408 return NULL;
1409 }
1410 return encoded;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001411}
1412
1413static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001414_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001415{
1416 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001417 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001418 Py_DECREF(stolen);
1419 return rval;
1420}
1421
1422static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001423encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001424 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001425{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001426 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001427 PyObject *newobj;
1428 int rv;
1429
1430 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1431 PyObject *cstr = _encoded_const(obj);
1432 if (cstr == NULL)
1433 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001434 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001435 }
1436 else if (PyUnicode_Check(obj))
1437 {
1438 PyObject *encoded = encoder_encode_string(s, obj);
1439 if (encoded == NULL)
1440 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001441 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001442 }
1443 else if (PyLong_Check(obj)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001444 PyObject *encoded = PyLong_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001445 if (encoded == NULL)
1446 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001447 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001448 }
1449 else if (PyFloat_Check(obj)) {
1450 PyObject *encoded = encoder_encode_float(s, obj);
1451 if (encoded == NULL)
1452 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001453 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001454 }
1455 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001456 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1457 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001458 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001459 Py_LeaveRecursiveCall();
1460 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001461 }
1462 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001463 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1464 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001465 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001466 Py_LeaveRecursiveCall();
1467 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001468 }
1469 else {
1470 PyObject *ident = NULL;
1471 if (s->markers != Py_None) {
1472 int has_key;
1473 ident = PyLong_FromVoidPtr(obj);
1474 if (ident == NULL)
1475 return -1;
1476 has_key = PyDict_Contains(s->markers, ident);
1477 if (has_key) {
1478 if (has_key != -1)
1479 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1480 Py_DECREF(ident);
1481 return -1;
1482 }
1483 if (PyDict_SetItem(s->markers, ident, obj)) {
1484 Py_DECREF(ident);
1485 return -1;
1486 }
1487 }
Jeroen Demeyer196a5302019-07-04 12:31:34 +02001488 newobj = _PyObject_CallOneArg(s->defaultfn, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001489 if (newobj == NULL) {
1490 Py_XDECREF(ident);
1491 return -1;
1492 }
Ezio Melotti13672652011-05-11 01:02:56 +03001493
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001494 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1495 Py_DECREF(newobj);
1496 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001497 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001498 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001499 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001500 Py_LeaveRecursiveCall();
1501
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001502 Py_DECREF(newobj);
1503 if (rv) {
1504 Py_XDECREF(ident);
1505 return -1;
1506 }
1507 if (ident != NULL) {
1508 if (PyDict_DelItem(s->markers, ident)) {
1509 Py_XDECREF(ident);
1510 return -1;
1511 }
1512 Py_XDECREF(ident);
1513 }
1514 return rv;
1515 }
1516}
1517
1518static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001519encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001520 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001521{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001522 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001523 static PyObject *open_dict = NULL;
1524 static PyObject *close_dict = NULL;
1525 static PyObject *empty_dict = NULL;
1526 PyObject *kstr = NULL;
1527 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001528 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001529 PyObject *items;
1530 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001531 Py_ssize_t idx;
1532
1533 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1534 open_dict = PyUnicode_InternFromString("{");
1535 close_dict = PyUnicode_InternFromString("}");
1536 empty_dict = PyUnicode_InternFromString("{}");
1537 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1538 return -1;
1539 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001540 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001541 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001542
1543 if (s->markers != Py_None) {
1544 int has_key;
1545 ident = PyLong_FromVoidPtr(dct);
1546 if (ident == NULL)
1547 goto bail;
1548 has_key = PyDict_Contains(s->markers, ident);
1549 if (has_key) {
1550 if (has_key != -1)
1551 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1552 goto bail;
1553 }
1554 if (PyDict_SetItem(s->markers, ident, dct)) {
1555 goto bail;
1556 }
1557 }
1558
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001559 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001560 goto bail;
1561
1562 if (s->indent != Py_None) {
1563 /* TODO: DOES NOT RUN */
1564 indent_level += 1;
1565 /*
1566 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1567 separator = _item_separator + newline_indent
1568 buf += newline_indent
1569 */
1570 }
1571
Benjamin Peterson501182a2015-05-02 22:28:04 -04001572 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001573 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001574 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001575 if (s->sort_keys && PyList_Sort(items) < 0) {
1576 Py_DECREF(items);
Benjamin Peterson501182a2015-05-02 22:28:04 -04001577 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001578 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001579 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001580 Py_DECREF(items);
1581 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001582 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001583 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001584 while ((item = PyIter_Next(it)) != NULL) {
1585 PyObject *encoded, *key, *value;
Serhiy Storchakafff9a312017-03-21 08:53:25 +02001586 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001587 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1588 goto bail;
1589 }
1590 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001591 if (PyUnicode_Check(key)) {
1592 Py_INCREF(key);
1593 kstr = key;
1594 }
1595 else if (PyFloat_Check(key)) {
1596 kstr = encoder_encode_float(s, key);
1597 if (kstr == NULL)
1598 goto bail;
1599 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001600 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 /* This must come before the PyLong_Check because
1602 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001603 kstr = _encoded_const(key);
1604 if (kstr == NULL)
1605 goto bail;
1606 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001607 else if (PyLong_Check(key)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001608 kstr = PyLong_Type.tp_repr(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001609 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001610 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001611 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001612 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001613 else if (s->skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001614 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001615 continue;
1616 }
1617 else {
Serhiy Storchakacfa797c2017-11-25 17:38:20 +02001618 PyErr_Format(PyExc_TypeError,
1619 "keys must be str, int, float, bool or None, "
Victor Stinnerdaa97562020-02-07 03:37:06 +01001620 "not %.100s", Py_TYPE(key)->tp_name);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001621 goto bail;
1622 }
1623
1624 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001625 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001626 goto bail;
1627 }
1628
1629 encoded = encoder_encode_string(s, kstr);
1630 Py_CLEAR(kstr);
1631 if (encoded == NULL)
1632 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001633 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001634 Py_DECREF(encoded);
1635 goto bail;
1636 }
1637 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001638 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001639 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001640
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001641 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001642 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001643 goto bail;
1644 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001645 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001646 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001647 if (PyErr_Occurred())
1648 goto bail;
1649 Py_CLEAR(it);
1650
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001651 if (ident != NULL) {
1652 if (PyDict_DelItem(s->markers, ident))
1653 goto bail;
1654 Py_CLEAR(ident);
1655 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001656 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001657 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001658 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001659
1660 yield '\n' + (' ' * (_indent * _current_indent_level))
1661 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001662 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001663 goto bail;
1664 return 0;
1665
1666bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001667 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001668 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001669 Py_XDECREF(kstr);
1670 Py_XDECREF(ident);
1671 return -1;
1672}
1673
1674
1675static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001676encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001677 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001678{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001679 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001680 static PyObject *open_array = NULL;
1681 static PyObject *close_array = NULL;
1682 static PyObject *empty_array = NULL;
1683 PyObject *ident = NULL;
1684 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001685 Py_ssize_t i;
1686
1687 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1688 open_array = PyUnicode_InternFromString("[");
1689 close_array = PyUnicode_InternFromString("]");
1690 empty_array = PyUnicode_InternFromString("[]");
1691 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1692 return -1;
1693 }
1694 ident = NULL;
1695 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1696 if (s_fast == NULL)
1697 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001698 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001699 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001700 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001701 }
1702
1703 if (s->markers != Py_None) {
1704 int has_key;
1705 ident = PyLong_FromVoidPtr(seq);
1706 if (ident == NULL)
1707 goto bail;
1708 has_key = PyDict_Contains(s->markers, ident);
1709 if (has_key) {
1710 if (has_key != -1)
1711 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1712 goto bail;
1713 }
1714 if (PyDict_SetItem(s->markers, ident, seq)) {
1715 goto bail;
1716 }
1717 }
1718
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001719 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001720 goto bail;
1721 if (s->indent != Py_None) {
1722 /* TODO: DOES NOT RUN */
1723 indent_level += 1;
1724 /*
1725 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1726 separator = _item_separator + newline_indent
1727 buf += newline_indent
1728 */
1729 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001730 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1731 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001732 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001733 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001734 goto bail;
1735 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001736 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001737 goto bail;
1738 }
1739 if (ident != NULL) {
1740 if (PyDict_DelItem(s->markers, ident))
1741 goto bail;
1742 Py_CLEAR(ident);
1743 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001744
1745 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001746 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001747 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001748
1749 yield '\n' + (' ' * (_indent * _current_indent_level))
1750 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001751 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001752 goto bail;
1753 Py_DECREF(s_fast);
1754 return 0;
1755
1756bail:
1757 Py_XDECREF(ident);
1758 Py_DECREF(s_fast);
1759 return -1;
1760}
1761
1762static void
1763encoder_dealloc(PyObject *self)
1764{
INADA Naokia6296d32017-08-24 14:55:17 +09001765 /* bpo-31095: UnTrack is needed before calling any callbacks */
1766 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001767 encoder_clear(self);
1768 Py_TYPE(self)->tp_free(self);
1769}
1770
1771static int
1772encoder_traverse(PyObject *self, visitproc visit, void *arg)
1773{
1774 PyEncoderObject *s;
1775 assert(PyEncoder_Check(self));
1776 s = (PyEncoderObject *)self;
1777 Py_VISIT(s->markers);
1778 Py_VISIT(s->defaultfn);
1779 Py_VISIT(s->encoder);
1780 Py_VISIT(s->indent);
1781 Py_VISIT(s->key_separator);
1782 Py_VISIT(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001783 return 0;
1784}
1785
1786static int
1787encoder_clear(PyObject *self)
1788{
1789 /* Deallocate Encoder */
1790 PyEncoderObject *s;
1791 assert(PyEncoder_Check(self));
1792 s = (PyEncoderObject *)self;
1793 Py_CLEAR(s->markers);
1794 Py_CLEAR(s->defaultfn);
1795 Py_CLEAR(s->encoder);
1796 Py_CLEAR(s->indent);
1797 Py_CLEAR(s->key_separator);
1798 Py_CLEAR(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001799 return 0;
1800}
1801
1802PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1803
1804static
1805PyTypeObject PyEncoderType = {
1806 PyVarObject_HEAD_INIT(NULL, 0)
1807 "_json.Encoder", /* tp_name */
1808 sizeof(PyEncoderObject), /* tp_basicsize */
1809 0, /* tp_itemsize */
1810 encoder_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001811 0, /* tp_vectorcall_offset */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001812 0, /* tp_getattr */
1813 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001814 0, /* tp_as_async */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001815 0, /* tp_repr */
1816 0, /* tp_as_number */
1817 0, /* tp_as_sequence */
1818 0, /* tp_as_mapping */
1819 0, /* tp_hash */
1820 encoder_call, /* tp_call */
1821 0, /* tp_str */
1822 0, /* tp_getattro */
1823 0, /* tp_setattro */
1824 0, /* tp_as_buffer */
1825 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1826 encoder_doc, /* tp_doc */
1827 encoder_traverse, /* tp_traverse */
1828 encoder_clear, /* tp_clear */
1829 0, /* tp_richcompare */
1830 0, /* tp_weaklistoffset */
1831 0, /* tp_iter */
1832 0, /* tp_iternext */
1833 0, /* tp_methods */
1834 encoder_members, /* tp_members */
1835 0, /* tp_getset */
1836 0, /* tp_base */
1837 0, /* tp_dict */
1838 0, /* tp_descr_get */
1839 0, /* tp_descr_set */
1840 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001841 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001842 0, /* tp_alloc */
1843 encoder_new, /* tp_new */
1844 0, /* tp_free */
1845};
1846
1847static PyMethodDef speedups_methods[] = {
1848 {"encode_basestring_ascii",
1849 (PyCFunction)py_encode_basestring_ascii,
1850 METH_O,
1851 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001852 {"encode_basestring",
1853 (PyCFunction)py_encode_basestring,
1854 METH_O,
1855 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001856 {"scanstring",
1857 (PyCFunction)py_scanstring,
1858 METH_VARARGS,
1859 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001860 {NULL, NULL, 0, NULL}
1861};
1862
1863PyDoc_STRVAR(module_doc,
1864"json speedups\n");
1865
Hai Shied154c32020-01-16 00:32:51 +08001866static int
1867_json_exec(PyObject *module)
1868{
1869 if (PyType_Ready(&PyScannerType) < 0) {
1870 return -1;
1871 }
1872 if (PyType_Ready(&PyEncoderType) < 0) {
1873 return -1;
1874 }
1875 Py_INCREF((PyObject*)&PyScannerType);
1876 if (PyModule_AddObject(module, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1877 Py_DECREF((PyObject*)&PyScannerType);
1878 return -1;
1879 }
1880 Py_INCREF((PyObject*)&PyEncoderType);
1881 if (PyModule_AddObject(module, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1882 Py_DECREF((PyObject*)&PyEncoderType);
1883 return -1;
1884 }
1885 return 0;
1886}
1887
1888static PyModuleDef_Slot _json_slots[] = {
1889 {Py_mod_exec, _json_exec},
1890 {0, NULL}
1891};
1892
Martin v. Löwis1a214512008-06-11 05:26:20 +00001893static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001894 PyModuleDef_HEAD_INIT,
1895 "_json",
1896 module_doc,
Hai Shied154c32020-01-16 00:32:51 +08001897 0,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001898 speedups_methods,
Hai Shied154c32020-01-16 00:32:51 +08001899 _json_slots,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001900 NULL,
1901 NULL,
1902 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001903};
1904
Victor Stinnerf024d262015-03-17 17:48:27 +01001905PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001906PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001907{
Hai Shied154c32020-01-16 00:32:51 +08001908 return PyModuleDef_Init(&jsonmodule);
Christian Heimes90540002008-05-08 14:29:10 +00001909}