blob: 54ac605fd7ef4223ffb042f54e0e7914e7495949 [file] [log] [blame]
Victor Stinner5c75f372019-04-17 23:02:26 +02001/* JSON accelerator C extensor: _json module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006
Victor Stinner5c75f372019-04-17 23:02:26 +02007#if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8# error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06009#endif
10
Christian Heimes90540002008-05-08 14:29:10 +000011#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000012#include "structmember.h"
Victor Stinnere281f7d2018-11-01 02:30:36 +010013#include "pycore_accu.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +010014
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000015#ifdef __GNUC__
16#define UNUSED __attribute__((__unused__))
17#else
18#define UNUSED
19#endif
20
21#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
22#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
23#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
24#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
25
26static PyTypeObject PyScannerType;
27static PyTypeObject PyEncoderType;
28
29typedef struct _PyScannerObject {
30 PyObject_HEAD
Segev Finer541bd282017-07-13 08:52:08 +030031 signed char strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000032 PyObject *object_hook;
33 PyObject *object_pairs_hook;
34 PyObject *parse_float;
35 PyObject *parse_int;
36 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000037 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000038} PyScannerObject;
39
40static PyMemberDef scanner_members[] = {
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030041 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000042 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
43 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
44 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
45 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
46 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
47 {NULL}
48};
49
50typedef struct _PyEncoderObject {
51 PyObject_HEAD
52 PyObject *markers;
53 PyObject *defaultfn;
54 PyObject *encoder;
55 PyObject *indent;
56 PyObject *key_separator;
57 PyObject *item_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030058 char sort_keys;
59 char skipkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000060 int allow_nan;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030061 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000062} PyEncoderObject;
63
64static PyMemberDef encoder_members[] = {
65 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
66 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
67 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
68 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
69 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
70 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030071 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
72 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000073 {NULL}
74};
75
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020076/* Forward decls */
77
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000078static PyObject *
79ascii_escape_unicode(PyObject *pystr);
80static PyObject *
81py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
82void init_json(void);
83static PyObject *
84scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
85static PyObject *
86_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
87static PyObject *
88scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000089static void
90scanner_dealloc(PyObject *self);
91static int
92scanner_clear(PyObject *self);
93static PyObject *
94encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000095static void
96encoder_dealloc(PyObject *self);
97static int
98encoder_clear(PyObject *self);
99static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200100encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000101static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200102encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000103static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200104encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000105static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000106_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000107static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200108raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000109static PyObject *
110encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111static PyObject *
112encoder_encode_float(PyEncoderObject *s, PyObject *obj);
113
Christian Heimes90540002008-05-08 14:29:10 +0000114#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000115#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000116
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000117static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200118ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000119{
120 /* Escape unicode code point c to ASCII escape sequences
121 in char *output. output must have at least 12 bytes unused to
122 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000123 output[chars++] = '\\';
124 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000125 case '\\': output[chars++] = c; break;
126 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000127 case '\b': output[chars++] = 'b'; break;
128 case '\f': output[chars++] = 'f'; break;
129 case '\n': output[chars++] = 'n'; break;
130 case '\r': output[chars++] = 'r'; break;
131 case '\t': output[chars++] = 't'; break;
132 default:
Christian Heimes90540002008-05-08 14:29:10 +0000133 if (c >= 0x10000) {
134 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100135 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000136 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100137 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
138 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
139 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
140 output[chars++] = Py_hexdigits[(v ) & 0xf];
141 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000142 output[chars++] = '\\';
143 }
Christian Heimes90540002008-05-08 14:29:10 +0000144 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200145 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
146 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
147 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
148 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000149 }
150 return chars;
151}
152
153static PyObject *
154ascii_escape_unicode(PyObject *pystr)
155{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000156 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000157 Py_ssize_t i;
158 Py_ssize_t input_chars;
159 Py_ssize_t output_size;
160 Py_ssize_t chars;
161 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200162 void *input;
163 unsigned char *output;
164 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000165
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200166 if (PyUnicode_READY(pystr) == -1)
167 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000168
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200169 input_chars = PyUnicode_GET_LENGTH(pystr);
170 input = PyUnicode_DATA(pystr);
171 kind = PyUnicode_KIND(pystr);
172
173 /* Compute the output size */
174 for (i = 0, output_size = 2; i < input_chars; i++) {
175 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500176 Py_ssize_t d;
177 if (S_CHAR(c)) {
178 d = 1;
179 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200180 else {
181 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200182 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200183 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500184 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200185 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500186 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200187 }
188 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500189 if (output_size > PY_SSIZE_T_MAX - d) {
190 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
191 return NULL;
192 }
193 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200194 }
195
196 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000197 if (rval == NULL) {
198 return NULL;
199 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200200 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000201 chars = 0;
202 output[chars++] = '"';
203 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200204 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000205 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000206 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000207 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000208 else {
209 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000210 }
Christian Heimes90540002008-05-08 14:29:10 +0000211 }
212 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100213#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200214 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100215#endif
Christian Heimes90540002008-05-08 14:29:10 +0000216 return rval;
217}
218
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100219static PyObject *
220escape_unicode(PyObject *pystr)
221{
222 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
223 Py_ssize_t i;
224 Py_ssize_t input_chars;
225 Py_ssize_t output_size;
226 Py_ssize_t chars;
227 PyObject *rval;
228 void *input;
229 int kind;
230 Py_UCS4 maxchar;
231
232 if (PyUnicode_READY(pystr) == -1)
233 return NULL;
234
235 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
236 input_chars = PyUnicode_GET_LENGTH(pystr);
237 input = PyUnicode_DATA(pystr);
238 kind = PyUnicode_KIND(pystr);
239
240 /* Compute the output size */
241 for (i = 0, output_size = 2; i < input_chars; i++) {
242 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500243 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100244 switch (c) {
245 case '\\': case '"': case '\b': case '\f':
246 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500247 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100248 break;
249 default:
250 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500251 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100252 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500253 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100254 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500255 if (output_size > PY_SSIZE_T_MAX - d) {
256 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
257 return NULL;
258 }
259 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100260 }
261
262 rval = PyUnicode_New(output_size, maxchar);
263 if (rval == NULL)
264 return NULL;
265
266 kind = PyUnicode_KIND(rval);
267
268#define ENCODE_OUTPUT do { \
269 chars = 0; \
270 output[chars++] = '"'; \
271 for (i = 0; i < input_chars; i++) { \
272 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
273 switch (c) { \
274 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
275 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
276 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
277 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
278 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
279 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
280 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
281 default: \
282 if (c <= 0x1f) { \
283 output[chars++] = '\\'; \
284 output[chars++] = 'u'; \
285 output[chars++] = '0'; \
286 output[chars++] = '0'; \
287 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
288 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
289 } else { \
290 output[chars++] = c; \
291 } \
292 } \
293 } \
294 output[chars++] = '"'; \
295 } while (0)
296
297 if (kind == PyUnicode_1BYTE_KIND) {
298 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
299 ENCODE_OUTPUT;
300 } else if (kind == PyUnicode_2BYTE_KIND) {
301 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
302 ENCODE_OUTPUT;
303 } else {
304 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
305 assert(kind == PyUnicode_4BYTE_KIND);
306 ENCODE_OUTPUT;
307 }
308#undef ENCODE_OUTPUT
309
310#ifdef Py_DEBUG
311 assert(_PyUnicode_CheckConsistency(rval, 1));
312#endif
313 return rval;
314}
315
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000316static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200317raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000318{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200319 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
320 static PyObject *JSONDecodeError = NULL;
321 PyObject *exc;
322 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000323 PyObject *decoder = PyImport_ImportModule("json.decoder");
324 if (decoder == NULL)
325 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200326 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000327 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200328 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000329 return;
Christian Heimes90540002008-05-08 14:29:10 +0000330 }
Victor Stinner4c381542016-12-09 00:33:39 +0100331 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200332 if (exc) {
333 PyErr_SetObject(JSONDecodeError, exc);
334 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000335 }
Christian Heimes90540002008-05-08 14:29:10 +0000336}
337
Ezio Melotti37623ab2013-01-03 08:44:15 +0200338static void
339raise_stop_iteration(Py_ssize_t idx)
340{
341 PyObject *value = PyLong_FromSsize_t(idx);
342 if (value != NULL) {
343 PyErr_SetObject(PyExc_StopIteration, value);
344 Py_DECREF(value);
345 }
346}
347
Christian Heimes90540002008-05-08 14:29:10 +0000348static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000349_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
350 /* return (rval, idx) tuple, stealing reference to rval */
351 PyObject *tpl;
352 PyObject *pyidx;
353 /*
354 steal a reference to rval, returns (rval, idx)
355 */
356 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000357 return NULL;
358 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000359 pyidx = PyLong_FromSsize_t(idx);
360 if (pyidx == NULL) {
361 Py_DECREF(rval);
362 return NULL;
363 }
364 tpl = PyTuple_New(2);
365 if (tpl == NULL) {
366 Py_DECREF(pyidx);
367 Py_DECREF(rval);
368 return NULL;
369 }
370 PyTuple_SET_ITEM(tpl, 0, rval);
371 PyTuple_SET_ITEM(tpl, 1, pyidx);
372 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000373}
374
375static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000376scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000377{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000378 /* Read the JSON string from PyUnicode pystr.
379 end is the index of the first character after the quote.
380 if strict is zero then literal control characters are allowed
381 *next_end_ptr is a return-by-reference index of the character
382 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000383
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000384 Return value is a new PyUnicode
385 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000386 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200387 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000388 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000389 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200390 const void *buf;
391 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000392
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200393 if (PyUnicode_READY(pystr) == -1)
394 return 0;
395
Inada Naoki9c110292019-10-17 16:12:41 +0900396 _PyUnicodeWriter writer;
397 _PyUnicodeWriter_Init(&writer);
398 writer.overallocate = 1;
399
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200400 len = PyUnicode_GET_LENGTH(pystr);
401 buf = PyUnicode_DATA(pystr);
402 kind = PyUnicode_KIND(pystr);
403
Ezio Melotti37623ab2013-01-03 08:44:15 +0200404 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000405 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
406 goto bail;
407 }
Christian Heimes90540002008-05-08 14:29:10 +0000408 while (1) {
409 /* Find the end of the string or the next escape */
Inada Naoki2a570af2019-08-08 17:57:10 +0900410 Py_UCS4 c;
411 {
412 // Use tight scope variable to help register allocation.
413 Py_UCS4 d = 0;
414 for (next = end; next < len; next++) {
415 d = PyUnicode_READ(kind, buf, next);
416 if (d == '"' || d == '\\') {
417 break;
418 }
419 if (d <= 0x1f && strict) {
420 raise_errmsg("Invalid control character at", pystr, next);
421 goto bail;
422 }
Christian Heimes90540002008-05-08 14:29:10 +0000423 }
Inada Naoki2a570af2019-08-08 17:57:10 +0900424 c = d;
Christian Heimes90540002008-05-08 14:29:10 +0000425 }
Inada Naoki9c110292019-10-17 16:12:41 +0900426
427 if (c == '"') {
428 // Fast path for simple case.
429 if (writer.buffer == NULL) {
430 PyObject *ret = PyUnicode_Substring(pystr, end, next);
431 if (ret == NULL) {
432 goto bail;
433 }
434 *next_end_ptr = next + 1;;
435 return ret;
436 }
437 }
438 else if (c != '\\') {
Christian Heimes90540002008-05-08 14:29:10 +0000439 raise_errmsg("Unterminated string starting at", pystr, begin);
440 goto bail;
441 }
Inada Naoki9c110292019-10-17 16:12:41 +0900442
Christian Heimes90540002008-05-08 14:29:10 +0000443 /* Pick up this chunk if it's not zero length */
444 if (next != end) {
Inada Naoki9c110292019-10-17 16:12:41 +0900445 if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
Christian Heimes90540002008-05-08 14:29:10 +0000446 goto bail;
447 }
Christian Heimes90540002008-05-08 14:29:10 +0000448 }
449 next++;
450 if (c == '"') {
451 end = next;
452 break;
453 }
454 if (next == len) {
455 raise_errmsg("Unterminated string starting at", pystr, begin);
456 goto bail;
457 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200458 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000459 if (c != 'u') {
460 /* Non-unicode backslash escapes */
461 end = next + 1;
462 switch (c) {
463 case '"': break;
464 case '\\': break;
465 case '/': break;
466 case 'b': c = '\b'; break;
467 case 'f': c = '\f'; break;
468 case 'n': c = '\n'; break;
469 case 'r': c = '\r'; break;
470 case 't': c = '\t'; break;
471 default: c = 0;
472 }
473 if (c == 0) {
474 raise_errmsg("Invalid \\escape", pystr, end - 2);
475 goto bail;
476 }
477 }
478 else {
479 c = 0;
480 next++;
481 end = next + 4;
482 if (end >= len) {
483 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
484 goto bail;
485 }
486 /* Decode 4 hex digits */
487 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200488 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000489 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000490 switch (digit) {
491 case '0': case '1': case '2': case '3': case '4':
492 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000493 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000494 case 'a': case 'b': case 'c': case 'd': case 'e':
495 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000496 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000497 case 'A': case 'B': case 'C': case 'D': case 'E':
498 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000499 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000500 default:
501 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
502 goto bail;
503 }
504 }
Christian Heimes90540002008-05-08 14:29:10 +0000505 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200506 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
507 PyUnicode_READ(kind, buf, next++) == '\\' &&
508 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200509 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000510 end += 6;
511 /* Decode 4 hex digits */
512 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200513 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000514 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000515 switch (digit) {
516 case '0': case '1': case '2': case '3': case '4':
517 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000518 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000519 case 'a': case 'b': case 'c': case 'd': case 'e':
520 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000521 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000522 case 'A': case 'B': case 'C': case 'D': case 'E':
523 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000524 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000525 default:
526 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
527 goto bail;
528 }
529 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200530 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
531 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
532 else
533 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000534 }
Christian Heimes90540002008-05-08 14:29:10 +0000535 }
Inada Naoki9c110292019-10-17 16:12:41 +0900536 if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
Christian Heimes90540002008-05-08 14:29:10 +0000537 goto bail;
538 }
Christian Heimes90540002008-05-08 14:29:10 +0000539 }
540
Inada Naoki9c110292019-10-17 16:12:41 +0900541 rval = _PyUnicodeWriter_Finish(&writer);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000542 *next_end_ptr = end;
543 return rval;
Inada Naoki9c110292019-10-17 16:12:41 +0900544
Christian Heimes90540002008-05-08 14:29:10 +0000545bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000546 *next_end_ptr = -1;
Inada Naoki9c110292019-10-17 16:12:41 +0900547 _PyUnicodeWriter_Dealloc(&writer);
Christian Heimes90540002008-05-08 14:29:10 +0000548 return NULL;
549}
550
551PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000552 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000553 "\n"
554 "Scan the string s for a JSON string. End is the index of the\n"
555 "character in s after the quote that started the JSON string.\n"
556 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
557 "on attempt to decode an invalid string. If strict is False then literal\n"
558 "control characters are allowed in the string.\n"
559 "\n"
560 "Returns a tuple of the decoded string and the index of the character in s\n"
561 "after the end quote."
562);
Christian Heimes90540002008-05-08 14:29:10 +0000563
564static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000565py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000566{
567 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000568 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000569 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000570 Py_ssize_t next_end = -1;
571 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100572 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000573 return NULL;
574 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000575 if (PyUnicode_Check(pystr)) {
576 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000577 }
578 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000579 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000580 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000581 Py_TYPE(pystr)->tp_name);
582 return NULL;
583 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000584 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000585}
586
587PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000588 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000589 "\n"
590 "Return an ASCII-only JSON representation of a Python string"
591);
Christian Heimes90540002008-05-08 14:29:10 +0000592
593static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000594py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000595{
596 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000597 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000598 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000599 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000600 rval = ascii_escape_unicode(pystr);
601 }
602 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000603 PyErr_Format(PyExc_TypeError,
604 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000605 Py_TYPE(pystr)->tp_name);
606 return NULL;
607 }
Christian Heimes90540002008-05-08 14:29:10 +0000608 return rval;
609}
610
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100611
612PyDoc_STRVAR(pydoc_encode_basestring,
613 "encode_basestring(string) -> string\n"
614 "\n"
615 "Return a JSON representation of a Python string"
616);
617
618static PyObject *
619py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
620{
621 PyObject *rval;
622 /* Return a JSON representation of a Python string */
623 /* METH_O */
624 if (PyUnicode_Check(pystr)) {
625 rval = escape_unicode(pystr);
626 }
627 else {
628 PyErr_Format(PyExc_TypeError,
629 "first argument must be a string, not %.80s",
630 Py_TYPE(pystr)->tp_name);
631 return NULL;
632 }
633 return rval;
634}
635
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000636static void
637scanner_dealloc(PyObject *self)
638{
INADA Naokia6296d32017-08-24 14:55:17 +0900639 /* bpo-31095: UnTrack is needed before calling any callbacks */
640 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000641 scanner_clear(self);
642 Py_TYPE(self)->tp_free(self);
643}
644
645static int
646scanner_traverse(PyObject *self, visitproc visit, void *arg)
647{
648 PyScannerObject *s;
649 assert(PyScanner_Check(self));
650 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000651 Py_VISIT(s->object_hook);
652 Py_VISIT(s->object_pairs_hook);
653 Py_VISIT(s->parse_float);
654 Py_VISIT(s->parse_int);
655 Py_VISIT(s->parse_constant);
656 return 0;
657}
658
659static int
660scanner_clear(PyObject *self)
661{
662 PyScannerObject *s;
663 assert(PyScanner_Check(self));
664 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000665 Py_CLEAR(s->object_hook);
666 Py_CLEAR(s->object_pairs_hook);
667 Py_CLEAR(s->parse_float);
668 Py_CLEAR(s->parse_int);
669 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000670 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000671 return 0;
672}
673
674static PyObject *
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300675_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
676{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000677 /* Read a JSON object from PyUnicode pystr.
678 idx is the index of the first character after the opening curly brace.
679 *next_idx_ptr is a return-by-reference index to the first character after
680 the closing curly brace.
681
682 Returns a new PyObject (usually a dict, but object_hook can change that)
683 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200684 void *str;
685 int kind;
686 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000687 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000688 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000689 PyObject *key = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000690 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000691 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000692
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200693 if (PyUnicode_READY(pystr) == -1)
694 return NULL;
695
696 str = PyUnicode_DATA(pystr);
697 kind = PyUnicode_KIND(pystr);
698 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
699
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000700 if (has_pairs_hook)
701 rval = PyList_New(0);
702 else
703 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000704 if (rval == NULL)
705 return NULL;
706
707 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200708 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000709
710 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200711 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
712 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000713 PyObject *memokey;
714
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000715 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200716 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200717 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000718 goto bail;
719 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300720 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000721 if (key == NULL)
722 goto bail;
Inada Naoki2a570af2019-08-08 17:57:10 +0900723 memokey = PyDict_SetDefault(s->memo, key, key);
724 if (memokey == NULL) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200725 goto bail;
726 }
Inada Naoki2a570af2019-08-08 17:57:10 +0900727 Py_INCREF(memokey);
728 Py_DECREF(key);
729 key = memokey;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000730 idx = next_idx;
731
732 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200733 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
734 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200735 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000736 goto bail;
737 }
738 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200739 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000740
741 /* read any JSON term */
742 val = scan_once_unicode(s, pystr, idx, &next_idx);
743 if (val == NULL)
744 goto bail;
745
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000746 if (has_pairs_hook) {
747 PyObject *item = PyTuple_Pack(2, key, val);
748 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000749 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000750 Py_CLEAR(key);
751 Py_CLEAR(val);
752 if (PyList_Append(rval, item) == -1) {
753 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000754 goto bail;
755 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000756 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000757 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000758 else {
759 if (PyDict_SetItem(rval, key, val) < 0)
760 goto bail;
761 Py_CLEAR(key);
762 Py_CLEAR(val);
763 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000764 idx = next_idx;
765
766 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200767 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000768
769 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200770 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000771 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200772 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200773 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000774 goto bail;
775 }
776 idx++;
777
778 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200779 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000780 }
781 }
782
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000783 *next_idx_ptr = idx + 1;
784
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000785 if (has_pairs_hook) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200786 val = _PyObject_CallOneArg(s->object_pairs_hook, rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000787 Py_DECREF(rval);
788 return val;
789 }
790
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000791 /* if object_hook is not None: rval = object_hook(rval) */
792 if (s->object_hook != Py_None) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200793 val = _PyObject_CallOneArg(s->object_hook, rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000794 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000795 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000796 }
797 return rval;
798bail:
799 Py_XDECREF(key);
800 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000801 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000802 return NULL;
803}
804
805static PyObject *
806_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200807 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000808 idx is the index of the first character after the opening brace.
809 *next_idx_ptr is a return-by-reference index to the first character after
810 the closing brace.
811
812 Returns a new PyList
813 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200814 void *str;
815 int kind;
816 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000817 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200818 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000819 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000820
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200821 if (PyUnicode_READY(pystr) == -1)
822 return NULL;
823
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200824 rval = PyList_New(0);
825 if (rval == NULL)
826 return NULL;
827
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200828 str = PyUnicode_DATA(pystr);
829 kind = PyUnicode_KIND(pystr);
830 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
831
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000832 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200833 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000834
835 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200836 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
837 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000838
839 /* read any JSON term */
840 val = scan_once_unicode(s, pystr, idx, &next_idx);
841 if (val == NULL)
842 goto bail;
843
844 if (PyList_Append(rval, val) == -1)
845 goto bail;
846
847 Py_CLEAR(val);
848 idx = next_idx;
849
850 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200851 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000852
853 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200854 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000855 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200856 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200857 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000858 goto bail;
859 }
860 idx++;
861
862 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200863 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000864 }
865 }
866
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200867 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
868 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200869 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000870 goto bail;
871 }
872 *next_idx_ptr = idx + 1;
873 return rval;
874bail:
875 Py_XDECREF(val);
876 Py_DECREF(rval);
877 return NULL;
878}
879
880static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200881_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
882 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000883 constant is the constant string that was found
884 ("NaN", "Infinity", "-Infinity").
885 idx is the index of the first character of the constant
886 *next_idx_ptr is a return-by-reference index to the first character after
887 the constant.
888
889 Returns the result of parse_constant
890 */
891 PyObject *cstr;
892 PyObject *rval;
893 /* constant is "NaN", "Infinity", or "-Infinity" */
894 cstr = PyUnicode_InternFromString(constant);
895 if (cstr == NULL)
896 return NULL;
897
898 /* rval = parse_constant(constant) */
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200899 rval = _PyObject_CallOneArg(s->parse_constant, cstr);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200900 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000901 Py_DECREF(cstr);
902 *next_idx_ptr = idx;
903 return rval;
904}
905
906static PyObject *
907_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
908 /* Read a JSON number from PyUnicode pystr.
909 idx is the index of the first character of the number
910 *next_idx_ptr is a return-by-reference index to the first character after
911 the number.
912
913 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200914 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000915 May return other types if parse_int or parse_float are set
916 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200917 void *str;
918 int kind;
919 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000920 Py_ssize_t idx = start;
921 int is_float = 0;
922 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200923 PyObject *numstr = NULL;
924 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000925
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200926 if (PyUnicode_READY(pystr) == -1)
927 return NULL;
928
929 str = PyUnicode_DATA(pystr);
930 kind = PyUnicode_KIND(pystr);
931 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
932
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000933 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200934 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000935 idx++;
936 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200937 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000938 return NULL;
939 }
940 }
941
942 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200943 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000944 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200945 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000946 }
947 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200948 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000949 idx++;
950 }
951 /* no integer digits, error */
952 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200953 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000954 return NULL;
955 }
956
957 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200958 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000959 is_float = 1;
960 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200961 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000962 }
963
964 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200965 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000966 Py_ssize_t e_start = idx;
967 idx++;
968
969 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200970 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000971
972 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200973 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000974
975 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200976 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000977 is_float = 1;
978 }
979 else {
980 idx = e_start;
981 }
982 }
983
Antoine Pitrouf6454512011-04-25 19:16:06 +0200984 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
985 custom_func = s->parse_float;
986 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
987 custom_func = s->parse_int;
988 else
989 custom_func = NULL;
990
991 if (custom_func) {
992 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200993 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200994 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200995 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +0200996 if (numstr == NULL)
997 return NULL;
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200998 rval = _PyObject_CallOneArg(custom_func, numstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000999 }
1000 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001001 Py_ssize_t i, n;
1002 char *buf;
1003 /* Straight conversion to ASCII, to avoid costly conversion of
1004 decimal unicode digits (which cannot appear here) */
1005 n = idx - start;
1006 numstr = PyBytes_FromStringAndSize(NULL, n);
1007 if (numstr == NULL)
1008 return NULL;
1009 buf = PyBytes_AS_STRING(numstr);
1010 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001011 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001012 }
1013 if (is_float)
1014 rval = PyFloat_FromString(numstr);
1015 else
1016 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001017 }
1018 Py_DECREF(numstr);
1019 *next_idx_ptr = idx;
1020 return rval;
1021}
1022
1023static PyObject *
1024scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1025{
1026 /* Read one JSON term (of any kind) from PyUnicode pystr.
1027 idx is the index of the first character of the term
1028 *next_idx_ptr is a return-by-reference index to the first character after
1029 the number.
1030
1031 Returns a new PyObject representation of the term.
1032 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001033 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001034 void *str;
1035 int kind;
1036 Py_ssize_t length;
1037
1038 if (PyUnicode_READY(pystr) == -1)
1039 return NULL;
1040
1041 str = PyUnicode_DATA(pystr);
1042 kind = PyUnicode_KIND(pystr);
1043 length = PyUnicode_GET_LENGTH(pystr);
1044
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001045 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001046 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001047 return NULL;
1048 }
1049 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001050 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001051 return NULL;
1052 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001053
1054 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001055 case '"':
1056 /* string */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001057 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001058 case '{':
1059 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001060 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1061 "from a unicode string"))
1062 return NULL;
1063 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1064 Py_LeaveRecursiveCall();
1065 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001066 case '[':
1067 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001068 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1069 "from a unicode string"))
1070 return NULL;
1071 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1072 Py_LeaveRecursiveCall();
1073 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001074 case 'n':
1075 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001076 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001077 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001078 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001079 }
1080 break;
1081 case 't':
1082 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001083 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001084 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001085 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001086 }
1087 break;
1088 case 'f':
1089 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001090 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1091 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1092 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001093 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001094 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001095 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001096 }
1097 break;
1098 case 'N':
1099 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001100 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001101 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001102 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1103 }
1104 break;
1105 case 'I':
1106 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001107 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1108 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1109 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001110 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001111 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1112 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001113 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001114 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1115 }
1116 break;
1117 case '-':
1118 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001119 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001120 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1121 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001122 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001123 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001124 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1125 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001126 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001127 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1128 }
1129 break;
1130 }
1131 /* Didn't find a string, object, array, or named constant. Look for a number. */
1132 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1133}
1134
1135static PyObject *
1136scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1137{
1138 /* Python callable interface to scan_once_{str,unicode} */
1139 PyObject *pystr;
1140 PyObject *rval;
1141 Py_ssize_t idx;
1142 Py_ssize_t next_idx = -1;
1143 static char *kwlist[] = {"string", "idx", NULL};
1144 PyScannerObject *s;
1145 assert(PyScanner_Check(self));
1146 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001147 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001148 return NULL;
1149
1150 if (PyUnicode_Check(pystr)) {
1151 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1152 }
1153 else {
1154 PyErr_Format(PyExc_TypeError,
1155 "first argument must be a string, not %.80s",
1156 Py_TYPE(pystr)->tp_name);
1157 return NULL;
1158 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001159 PyDict_Clear(s->memo);
1160 if (rval == NULL)
1161 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001162 return _build_rval_index_tuple(rval, next_idx);
1163}
1164
1165static PyObject *
1166scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1167{
1168 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001169 PyObject *ctx;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001170 PyObject *strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001171 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001172
1173 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001174 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001175
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001176 s = (PyScannerObject *)type->tp_alloc(type, 0);
1177 if (s == NULL) {
1178 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001179 }
1180
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001181 s->memo = PyDict_New();
1182 if (s->memo == NULL)
1183 goto bail;
1184
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001185 /* All of these will fail "gracefully" so we don't need to verify them */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001186 strict = PyObject_GetAttrString(ctx, "strict");
1187 if (strict == NULL)
1188 goto bail;
1189 s->strict = PyObject_IsTrue(strict);
1190 Py_DECREF(strict);
1191 if (s->strict < 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001192 goto bail;
1193 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1194 if (s->object_hook == NULL)
1195 goto bail;
1196 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1197 if (s->object_pairs_hook == NULL)
1198 goto bail;
1199 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1200 if (s->parse_float == NULL)
1201 goto bail;
1202 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1203 if (s->parse_int == NULL)
1204 goto bail;
1205 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1206 if (s->parse_constant == NULL)
1207 goto bail;
1208
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001209 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001210
1211bail:
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001212 Py_DECREF(s);
1213 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001214}
1215
1216PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1217
1218static
1219PyTypeObject PyScannerType = {
1220 PyVarObject_HEAD_INIT(NULL, 0)
1221 "_json.Scanner", /* tp_name */
1222 sizeof(PyScannerObject), /* tp_basicsize */
1223 0, /* tp_itemsize */
1224 scanner_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001225 0, /* tp_vectorcall_offset */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001226 0, /* tp_getattr */
1227 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001228 0, /* tp_as_async */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001229 0, /* tp_repr */
1230 0, /* tp_as_number */
1231 0, /* tp_as_sequence */
1232 0, /* tp_as_mapping */
1233 0, /* tp_hash */
1234 scanner_call, /* tp_call */
1235 0, /* tp_str */
1236 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1237 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1238 0, /* tp_as_buffer */
1239 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1240 scanner_doc, /* tp_doc */
1241 scanner_traverse, /* tp_traverse */
1242 scanner_clear, /* tp_clear */
1243 0, /* tp_richcompare */
1244 0, /* tp_weaklistoffset */
1245 0, /* tp_iter */
1246 0, /* tp_iternext */
1247 0, /* tp_methods */
1248 scanner_members, /* tp_members */
1249 0, /* tp_getset */
1250 0, /* tp_base */
1251 0, /* tp_dict */
1252 0, /* tp_descr_get */
1253 0, /* tp_descr_set */
1254 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001255 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001256 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1257 scanner_new, /* tp_new */
1258 0,/* PyObject_GC_Del, */ /* tp_free */
1259};
1260
1261static PyObject *
1262encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1263{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001264 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1265
1266 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001267 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001268 PyObject *item_separator;
1269 int sort_keys, skipkeys, allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001270
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001271 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001272 &markers, &defaultfn, &encoder, &indent,
1273 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001274 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001275 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001276
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001277 if (markers != Py_None && !PyDict_Check(markers)) {
1278 PyErr_Format(PyExc_TypeError,
1279 "make_encoder() argument 1 must be dict or None, "
1280 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001281 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001282 }
1283
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001284 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1285 if (s == NULL)
1286 return NULL;
1287
Antoine Pitrou781eba72009-12-08 15:57:31 +00001288 s->markers = markers;
1289 s->defaultfn = defaultfn;
1290 s->encoder = encoder;
1291 s->indent = indent;
1292 s->key_separator = key_separator;
1293 s->item_separator = item_separator;
1294 s->sort_keys = sort_keys;
1295 s->skipkeys = skipkeys;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001296 s->allow_nan = allow_nan;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001297 s->fast_encode = NULL;
1298 if (PyCFunction_Check(s->encoder)) {
1299 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1300 if (f == (PyCFunction)py_encode_basestring_ascii ||
1301 f == (PyCFunction)py_encode_basestring) {
1302 s->fast_encode = f;
1303 }
1304 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001305
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001306 Py_INCREF(s->markers);
1307 Py_INCREF(s->defaultfn);
1308 Py_INCREF(s->encoder);
1309 Py_INCREF(s->indent);
1310 Py_INCREF(s->key_separator);
1311 Py_INCREF(s->item_separator);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001312 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001313}
1314
1315static PyObject *
1316encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1317{
1318 /* Python callable interface to encode_listencode_obj */
1319 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1320 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001321 Py_ssize_t indent_level;
1322 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001323 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001324
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001325 assert(PyEncoder_Check(self));
1326 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001327 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1328 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001329 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001330 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001331 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001332 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001333 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001334 return NULL;
1335 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001336 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001337}
1338
1339static PyObject *
1340_encoded_const(PyObject *obj)
1341{
1342 /* Return the JSON string representation of None, True, False */
1343 if (obj == Py_None) {
1344 static PyObject *s_null = NULL;
1345 if (s_null == NULL) {
1346 s_null = PyUnicode_InternFromString("null");
1347 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001348 Py_XINCREF(s_null);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001349 return s_null;
1350 }
1351 else if (obj == Py_True) {
1352 static PyObject *s_true = NULL;
1353 if (s_true == NULL) {
1354 s_true = PyUnicode_InternFromString("true");
1355 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001356 Py_XINCREF(s_true);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001357 return s_true;
1358 }
1359 else if (obj == Py_False) {
1360 static PyObject *s_false = NULL;
1361 if (s_false == NULL) {
1362 s_false = PyUnicode_InternFromString("false");
1363 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001364 Py_XINCREF(s_false);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001365 return s_false;
1366 }
1367 else {
1368 PyErr_SetString(PyExc_ValueError, "not a const");
1369 return NULL;
1370 }
1371}
1372
1373static PyObject *
1374encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1375{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001376 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001377 double i = PyFloat_AS_DOUBLE(obj);
1378 if (!Py_IS_FINITE(i)) {
1379 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001380 PyErr_SetString(
1381 PyExc_ValueError,
1382 "Out of range float values are not JSON compliant"
1383 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001384 return NULL;
1385 }
1386 if (i > 0) {
1387 return PyUnicode_FromString("Infinity");
1388 }
1389 else if (i < 0) {
1390 return PyUnicode_FromString("-Infinity");
1391 }
1392 else {
1393 return PyUnicode_FromString("NaN");
1394 }
1395 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001396 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001397}
1398
1399static PyObject *
1400encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1401{
1402 /* Return the JSON representation of a string */
Oren Milman2b382dd2017-09-24 12:07:12 +03001403 PyObject *encoded;
1404
1405 if (s->fast_encode) {
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001406 return s->fast_encode(NULL, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001407 }
Jeroen Demeyer196a5302019-07-04 12:31:34 +02001408 encoded = _PyObject_CallOneArg(s->encoder, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001409 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1410 PyErr_Format(PyExc_TypeError,
1411 "encoder() must return a string, not %.80s",
1412 Py_TYPE(encoded)->tp_name);
1413 Py_DECREF(encoded);
1414 return NULL;
1415 }
1416 return encoded;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001417}
1418
1419static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001420_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001421{
1422 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001423 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001424 Py_DECREF(stolen);
1425 return rval;
1426}
1427
1428static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001429encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001430 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001431{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001432 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001433 PyObject *newobj;
1434 int rv;
1435
1436 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1437 PyObject *cstr = _encoded_const(obj);
1438 if (cstr == NULL)
1439 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001440 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001441 }
1442 else if (PyUnicode_Check(obj))
1443 {
1444 PyObject *encoded = encoder_encode_string(s, obj);
1445 if (encoded == NULL)
1446 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001447 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001448 }
1449 else if (PyLong_Check(obj)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001450 PyObject *encoded = PyLong_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001451 if (encoded == NULL)
1452 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001453 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001454 }
1455 else if (PyFloat_Check(obj)) {
1456 PyObject *encoded = encoder_encode_float(s, obj);
1457 if (encoded == NULL)
1458 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001459 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001460 }
1461 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001462 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1463 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001464 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001465 Py_LeaveRecursiveCall();
1466 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001467 }
1468 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001469 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1470 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001471 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001472 Py_LeaveRecursiveCall();
1473 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001474 }
1475 else {
1476 PyObject *ident = NULL;
1477 if (s->markers != Py_None) {
1478 int has_key;
1479 ident = PyLong_FromVoidPtr(obj);
1480 if (ident == NULL)
1481 return -1;
1482 has_key = PyDict_Contains(s->markers, ident);
1483 if (has_key) {
1484 if (has_key != -1)
1485 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1486 Py_DECREF(ident);
1487 return -1;
1488 }
1489 if (PyDict_SetItem(s->markers, ident, obj)) {
1490 Py_DECREF(ident);
1491 return -1;
1492 }
1493 }
Jeroen Demeyer196a5302019-07-04 12:31:34 +02001494 newobj = _PyObject_CallOneArg(s->defaultfn, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001495 if (newobj == NULL) {
1496 Py_XDECREF(ident);
1497 return -1;
1498 }
Ezio Melotti13672652011-05-11 01:02:56 +03001499
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001500 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1501 Py_DECREF(newobj);
1502 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001503 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001504 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001505 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001506 Py_LeaveRecursiveCall();
1507
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001508 Py_DECREF(newobj);
1509 if (rv) {
1510 Py_XDECREF(ident);
1511 return -1;
1512 }
1513 if (ident != NULL) {
1514 if (PyDict_DelItem(s->markers, ident)) {
1515 Py_XDECREF(ident);
1516 return -1;
1517 }
1518 Py_XDECREF(ident);
1519 }
1520 return rv;
1521 }
1522}
1523
1524static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001525encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001526 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001527{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001528 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001529 static PyObject *open_dict = NULL;
1530 static PyObject *close_dict = NULL;
1531 static PyObject *empty_dict = NULL;
1532 PyObject *kstr = NULL;
1533 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001534 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001535 PyObject *items;
1536 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001537 Py_ssize_t idx;
1538
1539 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1540 open_dict = PyUnicode_InternFromString("{");
1541 close_dict = PyUnicode_InternFromString("}");
1542 empty_dict = PyUnicode_InternFromString("{}");
1543 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1544 return -1;
1545 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001546 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001547 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001548
1549 if (s->markers != Py_None) {
1550 int has_key;
1551 ident = PyLong_FromVoidPtr(dct);
1552 if (ident == NULL)
1553 goto bail;
1554 has_key = PyDict_Contains(s->markers, ident);
1555 if (has_key) {
1556 if (has_key != -1)
1557 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1558 goto bail;
1559 }
1560 if (PyDict_SetItem(s->markers, ident, dct)) {
1561 goto bail;
1562 }
1563 }
1564
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001565 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001566 goto bail;
1567
1568 if (s->indent != Py_None) {
1569 /* TODO: DOES NOT RUN */
1570 indent_level += 1;
1571 /*
1572 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1573 separator = _item_separator + newline_indent
1574 buf += newline_indent
1575 */
1576 }
1577
Benjamin Peterson501182a2015-05-02 22:28:04 -04001578 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001579 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001580 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001581 if (s->sort_keys && PyList_Sort(items) < 0) {
1582 Py_DECREF(items);
Benjamin Peterson501182a2015-05-02 22:28:04 -04001583 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001584 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001585 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001586 Py_DECREF(items);
1587 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001588 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001589 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001590 while ((item = PyIter_Next(it)) != NULL) {
1591 PyObject *encoded, *key, *value;
Serhiy Storchakafff9a312017-03-21 08:53:25 +02001592 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001593 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1594 goto bail;
1595 }
1596 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001597 if (PyUnicode_Check(key)) {
1598 Py_INCREF(key);
1599 kstr = key;
1600 }
1601 else if (PyFloat_Check(key)) {
1602 kstr = encoder_encode_float(s, key);
1603 if (kstr == NULL)
1604 goto bail;
1605 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001606 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 /* This must come before the PyLong_Check because
1608 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001609 kstr = _encoded_const(key);
1610 if (kstr == NULL)
1611 goto bail;
1612 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001613 else if (PyLong_Check(key)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001614 kstr = PyLong_Type.tp_repr(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001615 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001616 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001617 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001618 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001619 else if (s->skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001620 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001621 continue;
1622 }
1623 else {
Serhiy Storchakacfa797c2017-11-25 17:38:20 +02001624 PyErr_Format(PyExc_TypeError,
1625 "keys must be str, int, float, bool or None, "
1626 "not %.100s", key->ob_type->tp_name);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001627 goto bail;
1628 }
1629
1630 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001631 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001632 goto bail;
1633 }
1634
1635 encoded = encoder_encode_string(s, kstr);
1636 Py_CLEAR(kstr);
1637 if (encoded == NULL)
1638 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001639 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001640 Py_DECREF(encoded);
1641 goto bail;
1642 }
1643 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001644 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001645 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001646
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001647 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001648 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001649 goto bail;
1650 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001651 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001652 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001653 if (PyErr_Occurred())
1654 goto bail;
1655 Py_CLEAR(it);
1656
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001657 if (ident != NULL) {
1658 if (PyDict_DelItem(s->markers, ident))
1659 goto bail;
1660 Py_CLEAR(ident);
1661 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001662 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001663 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001664 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001665
1666 yield '\n' + (' ' * (_indent * _current_indent_level))
1667 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001668 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001669 goto bail;
1670 return 0;
1671
1672bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001673 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001674 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001675 Py_XDECREF(kstr);
1676 Py_XDECREF(ident);
1677 return -1;
1678}
1679
1680
1681static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001682encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001683 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001684{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001685 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001686 static PyObject *open_array = NULL;
1687 static PyObject *close_array = NULL;
1688 static PyObject *empty_array = NULL;
1689 PyObject *ident = NULL;
1690 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001691 Py_ssize_t i;
1692
1693 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1694 open_array = PyUnicode_InternFromString("[");
1695 close_array = PyUnicode_InternFromString("]");
1696 empty_array = PyUnicode_InternFromString("[]");
1697 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1698 return -1;
1699 }
1700 ident = NULL;
1701 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1702 if (s_fast == NULL)
1703 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001704 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001705 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001706 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001707 }
1708
1709 if (s->markers != Py_None) {
1710 int has_key;
1711 ident = PyLong_FromVoidPtr(seq);
1712 if (ident == NULL)
1713 goto bail;
1714 has_key = PyDict_Contains(s->markers, ident);
1715 if (has_key) {
1716 if (has_key != -1)
1717 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1718 goto bail;
1719 }
1720 if (PyDict_SetItem(s->markers, ident, seq)) {
1721 goto bail;
1722 }
1723 }
1724
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001725 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001726 goto bail;
1727 if (s->indent != Py_None) {
1728 /* TODO: DOES NOT RUN */
1729 indent_level += 1;
1730 /*
1731 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1732 separator = _item_separator + newline_indent
1733 buf += newline_indent
1734 */
1735 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001736 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1737 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001738 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001739 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001740 goto bail;
1741 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001742 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001743 goto bail;
1744 }
1745 if (ident != NULL) {
1746 if (PyDict_DelItem(s->markers, ident))
1747 goto bail;
1748 Py_CLEAR(ident);
1749 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001750
1751 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001752 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001753 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001754
1755 yield '\n' + (' ' * (_indent * _current_indent_level))
1756 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001757 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001758 goto bail;
1759 Py_DECREF(s_fast);
1760 return 0;
1761
1762bail:
1763 Py_XDECREF(ident);
1764 Py_DECREF(s_fast);
1765 return -1;
1766}
1767
1768static void
1769encoder_dealloc(PyObject *self)
1770{
INADA Naokia6296d32017-08-24 14:55:17 +09001771 /* bpo-31095: UnTrack is needed before calling any callbacks */
1772 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001773 encoder_clear(self);
1774 Py_TYPE(self)->tp_free(self);
1775}
1776
1777static int
1778encoder_traverse(PyObject *self, visitproc visit, void *arg)
1779{
1780 PyEncoderObject *s;
1781 assert(PyEncoder_Check(self));
1782 s = (PyEncoderObject *)self;
1783 Py_VISIT(s->markers);
1784 Py_VISIT(s->defaultfn);
1785 Py_VISIT(s->encoder);
1786 Py_VISIT(s->indent);
1787 Py_VISIT(s->key_separator);
1788 Py_VISIT(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001789 return 0;
1790}
1791
1792static int
1793encoder_clear(PyObject *self)
1794{
1795 /* Deallocate Encoder */
1796 PyEncoderObject *s;
1797 assert(PyEncoder_Check(self));
1798 s = (PyEncoderObject *)self;
1799 Py_CLEAR(s->markers);
1800 Py_CLEAR(s->defaultfn);
1801 Py_CLEAR(s->encoder);
1802 Py_CLEAR(s->indent);
1803 Py_CLEAR(s->key_separator);
1804 Py_CLEAR(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001805 return 0;
1806}
1807
1808PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1809
1810static
1811PyTypeObject PyEncoderType = {
1812 PyVarObject_HEAD_INIT(NULL, 0)
1813 "_json.Encoder", /* tp_name */
1814 sizeof(PyEncoderObject), /* tp_basicsize */
1815 0, /* tp_itemsize */
1816 encoder_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001817 0, /* tp_vectorcall_offset */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001818 0, /* tp_getattr */
1819 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001820 0, /* tp_as_async */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001821 0, /* tp_repr */
1822 0, /* tp_as_number */
1823 0, /* tp_as_sequence */
1824 0, /* tp_as_mapping */
1825 0, /* tp_hash */
1826 encoder_call, /* tp_call */
1827 0, /* tp_str */
1828 0, /* tp_getattro */
1829 0, /* tp_setattro */
1830 0, /* tp_as_buffer */
1831 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1832 encoder_doc, /* tp_doc */
1833 encoder_traverse, /* tp_traverse */
1834 encoder_clear, /* tp_clear */
1835 0, /* tp_richcompare */
1836 0, /* tp_weaklistoffset */
1837 0, /* tp_iter */
1838 0, /* tp_iternext */
1839 0, /* tp_methods */
1840 encoder_members, /* tp_members */
1841 0, /* tp_getset */
1842 0, /* tp_base */
1843 0, /* tp_dict */
1844 0, /* tp_descr_get */
1845 0, /* tp_descr_set */
1846 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001847 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001848 0, /* tp_alloc */
1849 encoder_new, /* tp_new */
1850 0, /* tp_free */
1851};
1852
1853static PyMethodDef speedups_methods[] = {
1854 {"encode_basestring_ascii",
1855 (PyCFunction)py_encode_basestring_ascii,
1856 METH_O,
1857 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001858 {"encode_basestring",
1859 (PyCFunction)py_encode_basestring,
1860 METH_O,
1861 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001862 {"scanstring",
1863 (PyCFunction)py_scanstring,
1864 METH_VARARGS,
1865 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001866 {NULL, NULL, 0, NULL}
1867};
1868
1869PyDoc_STRVAR(module_doc,
1870"json speedups\n");
1871
Martin v. Löwis1a214512008-06-11 05:26:20 +00001872static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001873 PyModuleDef_HEAD_INIT,
1874 "_json",
1875 module_doc,
1876 -1,
1877 speedups_methods,
1878 NULL,
1879 NULL,
1880 NULL,
1881 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001882};
1883
Victor Stinnerf024d262015-03-17 17:48:27 +01001884PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001885PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001886{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001887 PyObject *m = PyModule_Create(&jsonmodule);
1888 if (!m)
1889 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001890 if (PyType_Ready(&PyScannerType) < 0)
1891 goto fail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001892 if (PyType_Ready(&PyEncoderType) < 0)
1893 goto fail;
1894 Py_INCREF((PyObject*)&PyScannerType);
1895 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1896 Py_DECREF((PyObject*)&PyScannerType);
1897 goto fail;
1898 }
1899 Py_INCREF((PyObject*)&PyEncoderType);
1900 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1901 Py_DECREF((PyObject*)&PyEncoderType);
1902 goto fail;
1903 }
1904 return m;
1905 fail:
1906 Py_DECREF(m);
1907 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001908}