blob: 4faa9cc22edf01d8f880ef7a123f328cb8ba7bc8 [file] [log] [blame]
Victor Stinner5c75f372019-04-17 23:02:26 +02001/* JSON accelerator C extensor: _json module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006
Victor Stinner5c75f372019-04-17 23:02:26 +02007#if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8# error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06009#endif
10
Christian Heimes90540002008-05-08 14:29:10 +000011#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000012#include "structmember.h"
Victor Stinnere281f7d2018-11-01 02:30:36 +010013#include "pycore_accu.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +010014
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000015#ifdef __GNUC__
16#define UNUSED __attribute__((__unused__))
17#else
18#define UNUSED
19#endif
20
21#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
22#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
23#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
24#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
25
26static PyTypeObject PyScannerType;
27static PyTypeObject PyEncoderType;
28
29typedef struct _PyScannerObject {
30 PyObject_HEAD
Segev Finer541bd282017-07-13 08:52:08 +030031 signed char strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000032 PyObject *object_hook;
33 PyObject *object_pairs_hook;
34 PyObject *parse_float;
35 PyObject *parse_int;
36 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000037 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000038} PyScannerObject;
39
40static PyMemberDef scanner_members[] = {
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030041 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000042 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
43 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
44 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
45 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
46 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
47 {NULL}
48};
49
50typedef struct _PyEncoderObject {
51 PyObject_HEAD
52 PyObject *markers;
53 PyObject *defaultfn;
54 PyObject *encoder;
55 PyObject *indent;
56 PyObject *key_separator;
57 PyObject *item_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030058 char sort_keys;
59 char skipkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000060 int allow_nan;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030061 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000062} PyEncoderObject;
63
64static PyMemberDef encoder_members[] = {
65 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
66 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
67 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
68 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
69 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
70 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030071 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
72 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000073 {NULL}
74};
75
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020076static PyObject *
77join_list_unicode(PyObject *lst)
78{
79 /* return u''.join(lst) */
80 static PyObject *sep = NULL;
81 if (sep == NULL) {
82 sep = PyUnicode_FromStringAndSize("", 0);
83 if (sep == NULL)
84 return NULL;
85 }
86 return PyUnicode_Join(sep, lst);
87}
88
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020089/* Forward decls */
90
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000091static PyObject *
92ascii_escape_unicode(PyObject *pystr);
93static PyObject *
94py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
95void init_json(void);
96static PyObject *
97scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
98static PyObject *
99_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
100static PyObject *
101scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000102static void
103scanner_dealloc(PyObject *self);
104static int
105scanner_clear(PyObject *self);
106static PyObject *
107encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108static void
109encoder_dealloc(PyObject *self);
110static int
111encoder_clear(PyObject *self);
112static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200113encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000114static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200115encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000116static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200117encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000118static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000119_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000120static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200121raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000122static PyObject *
123encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000124static PyObject *
125encoder_encode_float(PyEncoderObject *s, PyObject *obj);
126
Christian Heimes90540002008-05-08 14:29:10 +0000127#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000128#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000129
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000130static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200131ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000132{
133 /* Escape unicode code point c to ASCII escape sequences
134 in char *output. output must have at least 12 bytes unused to
135 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000136 output[chars++] = '\\';
137 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000138 case '\\': output[chars++] = c; break;
139 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000140 case '\b': output[chars++] = 'b'; break;
141 case '\f': output[chars++] = 'f'; break;
142 case '\n': output[chars++] = 'n'; break;
143 case '\r': output[chars++] = 'r'; break;
144 case '\t': output[chars++] = 't'; break;
145 default:
Christian Heimes90540002008-05-08 14:29:10 +0000146 if (c >= 0x10000) {
147 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100148 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000149 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100150 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
151 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
152 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
153 output[chars++] = Py_hexdigits[(v ) & 0xf];
154 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000155 output[chars++] = '\\';
156 }
Christian Heimes90540002008-05-08 14:29:10 +0000157 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200158 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
159 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
160 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
161 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000162 }
163 return chars;
164}
165
166static PyObject *
167ascii_escape_unicode(PyObject *pystr)
168{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000169 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000170 Py_ssize_t i;
171 Py_ssize_t input_chars;
172 Py_ssize_t output_size;
173 Py_ssize_t chars;
174 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200175 void *input;
176 unsigned char *output;
177 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000178
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200179 if (PyUnicode_READY(pystr) == -1)
180 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000181
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200182 input_chars = PyUnicode_GET_LENGTH(pystr);
183 input = PyUnicode_DATA(pystr);
184 kind = PyUnicode_KIND(pystr);
185
186 /* Compute the output size */
187 for (i = 0, output_size = 2; i < input_chars; i++) {
188 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500189 Py_ssize_t d;
190 if (S_CHAR(c)) {
191 d = 1;
192 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200193 else {
194 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200195 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200196 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500197 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200198 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500199 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200200 }
201 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500202 if (output_size > PY_SSIZE_T_MAX - d) {
203 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
204 return NULL;
205 }
206 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200207 }
208
209 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000210 if (rval == NULL) {
211 return NULL;
212 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200213 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000214 chars = 0;
215 output[chars++] = '"';
216 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200217 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000218 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000219 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000220 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000221 else {
222 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000223 }
Christian Heimes90540002008-05-08 14:29:10 +0000224 }
225 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100226#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200227 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100228#endif
Christian Heimes90540002008-05-08 14:29:10 +0000229 return rval;
230}
231
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100232static PyObject *
233escape_unicode(PyObject *pystr)
234{
235 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
236 Py_ssize_t i;
237 Py_ssize_t input_chars;
238 Py_ssize_t output_size;
239 Py_ssize_t chars;
240 PyObject *rval;
241 void *input;
242 int kind;
243 Py_UCS4 maxchar;
244
245 if (PyUnicode_READY(pystr) == -1)
246 return NULL;
247
248 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
249 input_chars = PyUnicode_GET_LENGTH(pystr);
250 input = PyUnicode_DATA(pystr);
251 kind = PyUnicode_KIND(pystr);
252
253 /* Compute the output size */
254 for (i = 0, output_size = 2; i < input_chars; i++) {
255 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500256 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100257 switch (c) {
258 case '\\': case '"': case '\b': case '\f':
259 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500260 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100261 break;
262 default:
263 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500264 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100265 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500266 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100267 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500268 if (output_size > PY_SSIZE_T_MAX - d) {
269 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
270 return NULL;
271 }
272 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100273 }
274
275 rval = PyUnicode_New(output_size, maxchar);
276 if (rval == NULL)
277 return NULL;
278
279 kind = PyUnicode_KIND(rval);
280
281#define ENCODE_OUTPUT do { \
282 chars = 0; \
283 output[chars++] = '"'; \
284 for (i = 0; i < input_chars; i++) { \
285 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
286 switch (c) { \
287 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
288 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
289 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
290 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
291 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
292 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
293 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
294 default: \
295 if (c <= 0x1f) { \
296 output[chars++] = '\\'; \
297 output[chars++] = 'u'; \
298 output[chars++] = '0'; \
299 output[chars++] = '0'; \
300 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
301 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
302 } else { \
303 output[chars++] = c; \
304 } \
305 } \
306 } \
307 output[chars++] = '"'; \
308 } while (0)
309
310 if (kind == PyUnicode_1BYTE_KIND) {
311 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
312 ENCODE_OUTPUT;
313 } else if (kind == PyUnicode_2BYTE_KIND) {
314 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
315 ENCODE_OUTPUT;
316 } else {
317 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
318 assert(kind == PyUnicode_4BYTE_KIND);
319 ENCODE_OUTPUT;
320 }
321#undef ENCODE_OUTPUT
322
323#ifdef Py_DEBUG
324 assert(_PyUnicode_CheckConsistency(rval, 1));
325#endif
326 return rval;
327}
328
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000329static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200330raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000331{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200332 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
333 static PyObject *JSONDecodeError = NULL;
334 PyObject *exc;
335 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000336 PyObject *decoder = PyImport_ImportModule("json.decoder");
337 if (decoder == NULL)
338 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200339 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000340 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200341 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000342 return;
Christian Heimes90540002008-05-08 14:29:10 +0000343 }
Victor Stinner4c381542016-12-09 00:33:39 +0100344 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200345 if (exc) {
346 PyErr_SetObject(JSONDecodeError, exc);
347 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000348 }
Christian Heimes90540002008-05-08 14:29:10 +0000349}
350
Ezio Melotti37623ab2013-01-03 08:44:15 +0200351static void
352raise_stop_iteration(Py_ssize_t idx)
353{
354 PyObject *value = PyLong_FromSsize_t(idx);
355 if (value != NULL) {
356 PyErr_SetObject(PyExc_StopIteration, value);
357 Py_DECREF(value);
358 }
359}
360
Christian Heimes90540002008-05-08 14:29:10 +0000361static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000362_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
363 /* return (rval, idx) tuple, stealing reference to rval */
364 PyObject *tpl;
365 PyObject *pyidx;
366 /*
367 steal a reference to rval, returns (rval, idx)
368 */
369 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000370 return NULL;
371 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000372 pyidx = PyLong_FromSsize_t(idx);
373 if (pyidx == NULL) {
374 Py_DECREF(rval);
375 return NULL;
376 }
377 tpl = PyTuple_New(2);
378 if (tpl == NULL) {
379 Py_DECREF(pyidx);
380 Py_DECREF(rval);
381 return NULL;
382 }
383 PyTuple_SET_ITEM(tpl, 0, rval);
384 PyTuple_SET_ITEM(tpl, 1, pyidx);
385 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000386}
387
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000388#define APPEND_OLD_CHUNK \
389 if (chunk != NULL) { \
390 if (chunks == NULL) { \
391 chunks = PyList_New(0); \
392 if (chunks == NULL) { \
393 goto bail; \
394 } \
395 } \
396 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200397 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000398 goto bail; \
399 } \
400 Py_CLEAR(chunk); \
401 }
402
Christian Heimes90540002008-05-08 14:29:10 +0000403static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000404scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000405{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000406 /* Read the JSON string from PyUnicode pystr.
407 end is the index of the first character after the quote.
408 if strict is zero then literal control characters are allowed
409 *next_end_ptr is a return-by-reference index of the character
410 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000411
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000412 Return value is a new PyUnicode
413 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000414 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200415 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000416 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000417 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200418 const void *buf;
419 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000420 PyObject *chunks = NULL;
421 PyObject *chunk = NULL;
422
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200423 if (PyUnicode_READY(pystr) == -1)
424 return 0;
425
426 len = PyUnicode_GET_LENGTH(pystr);
427 buf = PyUnicode_DATA(pystr);
428 kind = PyUnicode_KIND(pystr);
429
Ezio Melotti37623ab2013-01-03 08:44:15 +0200430 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000431 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
432 goto bail;
433 }
Christian Heimes90540002008-05-08 14:29:10 +0000434 while (1) {
435 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200436 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000437 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200438 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000439 if (c == '"' || c == '\\') {
440 break;
441 }
442 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000443 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000444 goto bail;
445 }
446 }
447 if (!(c == '"' || c == '\\')) {
448 raise_errmsg("Unterminated string starting at", pystr, begin);
449 goto bail;
450 }
451 /* Pick up this chunk if it's not zero length */
452 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000453 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454 chunk = PyUnicode_FromKindAndData(
455 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200456 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200457 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000458 if (chunk == NULL) {
459 goto bail;
460 }
Christian Heimes90540002008-05-08 14:29:10 +0000461 }
462 next++;
463 if (c == '"') {
464 end = next;
465 break;
466 }
467 if (next == len) {
468 raise_errmsg("Unterminated string starting at", pystr, begin);
469 goto bail;
470 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200471 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000472 if (c != 'u') {
473 /* Non-unicode backslash escapes */
474 end = next + 1;
475 switch (c) {
476 case '"': break;
477 case '\\': break;
478 case '/': break;
479 case 'b': c = '\b'; break;
480 case 'f': c = '\f'; break;
481 case 'n': c = '\n'; break;
482 case 'r': c = '\r'; break;
483 case 't': c = '\t'; break;
484 default: c = 0;
485 }
486 if (c == 0) {
487 raise_errmsg("Invalid \\escape", pystr, end - 2);
488 goto bail;
489 }
490 }
491 else {
492 c = 0;
493 next++;
494 end = next + 4;
495 if (end >= len) {
496 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
497 goto bail;
498 }
499 /* Decode 4 hex digits */
500 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200501 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000502 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000503 switch (digit) {
504 case '0': case '1': case '2': case '3': case '4':
505 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000506 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000507 case 'a': case 'b': case 'c': case 'd': case 'e':
508 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000509 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000510 case 'A': case 'B': case 'C': case 'D': case 'E':
511 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000512 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000513 default:
514 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
515 goto bail;
516 }
517 }
Christian Heimes90540002008-05-08 14:29:10 +0000518 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200519 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
520 PyUnicode_READ(kind, buf, next++) == '\\' &&
521 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200522 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000523 end += 6;
524 /* Decode 4 hex digits */
525 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200526 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000527 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000528 switch (digit) {
529 case '0': case '1': case '2': case '3': case '4':
530 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000531 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000532 case 'a': case 'b': case 'c': case 'd': case 'e':
533 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000534 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000535 case 'A': case 'B': case 'C': case 'D': case 'E':
536 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000537 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000538 default:
539 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
540 goto bail;
541 }
542 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200543 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
544 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
545 else
546 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000547 }
Christian Heimes90540002008-05-08 14:29:10 +0000548 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000549 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200550 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000551 if (chunk == NULL) {
552 goto bail;
553 }
Christian Heimes90540002008-05-08 14:29:10 +0000554 }
555
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000556 if (chunks == NULL) {
557 if (chunk != NULL)
558 rval = chunk;
559 else
560 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000561 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000562 else {
563 APPEND_OLD_CHUNK
564 rval = join_list_unicode(chunks);
565 if (rval == NULL) {
566 goto bail;
567 }
568 Py_CLEAR(chunks);
569 }
570
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000571 *next_end_ptr = end;
572 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000573bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000574 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000575 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000576 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000577 return NULL;
578}
579
580PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000581 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000582 "\n"
583 "Scan the string s for a JSON string. End is the index of the\n"
584 "character in s after the quote that started the JSON string.\n"
585 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
586 "on attempt to decode an invalid string. If strict is False then literal\n"
587 "control characters are allowed in the string.\n"
588 "\n"
589 "Returns a tuple of the decoded string and the index of the character in s\n"
590 "after the end quote."
591);
Christian Heimes90540002008-05-08 14:29:10 +0000592
593static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000594py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000595{
596 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000597 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000598 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000599 Py_ssize_t next_end = -1;
600 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100601 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000602 return NULL;
603 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000604 if (PyUnicode_Check(pystr)) {
605 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000606 }
607 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000608 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000609 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000610 Py_TYPE(pystr)->tp_name);
611 return NULL;
612 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000613 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000614}
615
616PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000617 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000618 "\n"
619 "Return an ASCII-only JSON representation of a Python string"
620);
Christian Heimes90540002008-05-08 14:29:10 +0000621
622static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000623py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000624{
625 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000626 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000627 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000628 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000629 rval = ascii_escape_unicode(pystr);
630 }
631 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000632 PyErr_Format(PyExc_TypeError,
633 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000634 Py_TYPE(pystr)->tp_name);
635 return NULL;
636 }
Christian Heimes90540002008-05-08 14:29:10 +0000637 return rval;
638}
639
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100640
641PyDoc_STRVAR(pydoc_encode_basestring,
642 "encode_basestring(string) -> string\n"
643 "\n"
644 "Return a JSON representation of a Python string"
645);
646
647static PyObject *
648py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
649{
650 PyObject *rval;
651 /* Return a JSON representation of a Python string */
652 /* METH_O */
653 if (PyUnicode_Check(pystr)) {
654 rval = escape_unicode(pystr);
655 }
656 else {
657 PyErr_Format(PyExc_TypeError,
658 "first argument must be a string, not %.80s",
659 Py_TYPE(pystr)->tp_name);
660 return NULL;
661 }
662 return rval;
663}
664
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000665static void
666scanner_dealloc(PyObject *self)
667{
INADA Naokia6296d32017-08-24 14:55:17 +0900668 /* bpo-31095: UnTrack is needed before calling any callbacks */
669 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000670 scanner_clear(self);
671 Py_TYPE(self)->tp_free(self);
672}
673
674static int
675scanner_traverse(PyObject *self, visitproc visit, void *arg)
676{
677 PyScannerObject *s;
678 assert(PyScanner_Check(self));
679 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000680 Py_VISIT(s->object_hook);
681 Py_VISIT(s->object_pairs_hook);
682 Py_VISIT(s->parse_float);
683 Py_VISIT(s->parse_int);
684 Py_VISIT(s->parse_constant);
685 return 0;
686}
687
688static int
689scanner_clear(PyObject *self)
690{
691 PyScannerObject *s;
692 assert(PyScanner_Check(self));
693 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000694 Py_CLEAR(s->object_hook);
695 Py_CLEAR(s->object_pairs_hook);
696 Py_CLEAR(s->parse_float);
697 Py_CLEAR(s->parse_int);
698 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000699 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000700 return 0;
701}
702
703static PyObject *
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300704_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
705{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000706 /* Read a JSON object from PyUnicode pystr.
707 idx is the index of the first character after the opening curly brace.
708 *next_idx_ptr is a return-by-reference index to the first character after
709 the closing curly brace.
710
711 Returns a new PyObject (usually a dict, but object_hook can change that)
712 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200713 void *str;
714 int kind;
715 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000716 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000717 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000718 PyObject *key = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000719 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000720 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000721
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200722 if (PyUnicode_READY(pystr) == -1)
723 return NULL;
724
725 str = PyUnicode_DATA(pystr);
726 kind = PyUnicode_KIND(pystr);
727 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
728
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000729 if (has_pairs_hook)
730 rval = PyList_New(0);
731 else
732 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000733 if (rval == NULL)
734 return NULL;
735
736 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200737 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000738
739 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200740 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
741 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000742 PyObject *memokey;
743
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000744 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200745 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200746 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000747 goto bail;
748 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300749 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000750 if (key == NULL)
751 goto bail;
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200752 memokey = PyDict_GetItemWithError(s->memo, key);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000753 if (memokey != NULL) {
754 Py_INCREF(memokey);
755 Py_DECREF(key);
756 key = memokey;
757 }
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200758 else if (PyErr_Occurred()) {
759 goto bail;
760 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000761 else {
762 if (PyDict_SetItem(s->memo, key, key) < 0)
763 goto bail;
764 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000765 idx = next_idx;
766
767 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200768 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
769 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200770 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000771 goto bail;
772 }
773 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200774 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000775
776 /* read any JSON term */
777 val = scan_once_unicode(s, pystr, idx, &next_idx);
778 if (val == NULL)
779 goto bail;
780
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000781 if (has_pairs_hook) {
782 PyObject *item = PyTuple_Pack(2, key, val);
783 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000784 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000785 Py_CLEAR(key);
786 Py_CLEAR(val);
787 if (PyList_Append(rval, item) == -1) {
788 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000789 goto bail;
790 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000791 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000792 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000793 else {
794 if (PyDict_SetItem(rval, key, val) < 0)
795 goto bail;
796 Py_CLEAR(key);
797 Py_CLEAR(val);
798 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000799 idx = next_idx;
800
801 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200802 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000803
804 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200805 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000806 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200807 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200808 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000809 goto bail;
810 }
811 idx++;
812
813 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200814 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000815 }
816 }
817
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000818 *next_idx_ptr = idx + 1;
819
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000820 if (has_pairs_hook) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100821 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000822 Py_DECREF(rval);
823 return val;
824 }
825
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000826 /* if object_hook is not None: rval = object_hook(rval) */
827 if (s->object_hook != Py_None) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100828 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000829 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000830 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000831 }
832 return rval;
833bail:
834 Py_XDECREF(key);
835 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000836 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000837 return NULL;
838}
839
840static PyObject *
841_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200842 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000843 idx is the index of the first character after the opening brace.
844 *next_idx_ptr is a return-by-reference index to the first character after
845 the closing brace.
846
847 Returns a new PyList
848 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200849 void *str;
850 int kind;
851 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000852 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200853 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000854 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000855
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200856 if (PyUnicode_READY(pystr) == -1)
857 return NULL;
858
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200859 rval = PyList_New(0);
860 if (rval == NULL)
861 return NULL;
862
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200863 str = PyUnicode_DATA(pystr);
864 kind = PyUnicode_KIND(pystr);
865 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
866
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000867 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200868 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000869
870 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200871 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
872 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000873
874 /* read any JSON term */
875 val = scan_once_unicode(s, pystr, idx, &next_idx);
876 if (val == NULL)
877 goto bail;
878
879 if (PyList_Append(rval, val) == -1)
880 goto bail;
881
882 Py_CLEAR(val);
883 idx = next_idx;
884
885 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200886 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000887
888 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200889 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000890 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200891 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200892 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000893 goto bail;
894 }
895 idx++;
896
897 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200898 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000899 }
900 }
901
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200902 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
903 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200904 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000905 goto bail;
906 }
907 *next_idx_ptr = idx + 1;
908 return rval;
909bail:
910 Py_XDECREF(val);
911 Py_DECREF(rval);
912 return NULL;
913}
914
915static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200916_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
917 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000918 constant is the constant string that was found
919 ("NaN", "Infinity", "-Infinity").
920 idx is the index of the first character of the constant
921 *next_idx_ptr is a return-by-reference index to the first character after
922 the constant.
923
924 Returns the result of parse_constant
925 */
926 PyObject *cstr;
927 PyObject *rval;
928 /* constant is "NaN", "Infinity", or "-Infinity" */
929 cstr = PyUnicode_InternFromString(constant);
930 if (cstr == NULL)
931 return NULL;
932
933 /* rval = parse_constant(constant) */
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100934 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200935 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000936 Py_DECREF(cstr);
937 *next_idx_ptr = idx;
938 return rval;
939}
940
941static PyObject *
942_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
943 /* Read a JSON number from PyUnicode pystr.
944 idx is the index of the first character of the number
945 *next_idx_ptr is a return-by-reference index to the first character after
946 the number.
947
948 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200949 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000950 May return other types if parse_int or parse_float are set
951 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200952 void *str;
953 int kind;
954 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000955 Py_ssize_t idx = start;
956 int is_float = 0;
957 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200958 PyObject *numstr = NULL;
959 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000960
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200961 if (PyUnicode_READY(pystr) == -1)
962 return NULL;
963
964 str = PyUnicode_DATA(pystr);
965 kind = PyUnicode_KIND(pystr);
966 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
967
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000968 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200969 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000970 idx++;
971 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200972 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000973 return NULL;
974 }
975 }
976
977 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200978 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000979 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200980 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000981 }
982 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200983 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000984 idx++;
985 }
986 /* no integer digits, error */
987 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200988 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000989 return NULL;
990 }
991
992 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200993 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000994 is_float = 1;
995 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200996 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000997 }
998
999 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001000 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001001 Py_ssize_t e_start = idx;
1002 idx++;
1003
1004 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001006
1007 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001008 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001009
1010 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001011 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001012 is_float = 1;
1013 }
1014 else {
1015 idx = e_start;
1016 }
1017 }
1018
Antoine Pitrouf6454512011-04-25 19:16:06 +02001019 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1020 custom_func = s->parse_float;
1021 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1022 custom_func = s->parse_int;
1023 else
1024 custom_func = NULL;
1025
1026 if (custom_func) {
1027 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001028 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001029 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001030 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001031 if (numstr == NULL)
1032 return NULL;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001033 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001034 }
1035 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001036 Py_ssize_t i, n;
1037 char *buf;
1038 /* Straight conversion to ASCII, to avoid costly conversion of
1039 decimal unicode digits (which cannot appear here) */
1040 n = idx - start;
1041 numstr = PyBytes_FromStringAndSize(NULL, n);
1042 if (numstr == NULL)
1043 return NULL;
1044 buf = PyBytes_AS_STRING(numstr);
1045 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001046 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001047 }
1048 if (is_float)
1049 rval = PyFloat_FromString(numstr);
1050 else
1051 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001052 }
1053 Py_DECREF(numstr);
1054 *next_idx_ptr = idx;
1055 return rval;
1056}
1057
1058static PyObject *
1059scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1060{
1061 /* Read one JSON term (of any kind) from PyUnicode pystr.
1062 idx is the index of the first character of the term
1063 *next_idx_ptr is a return-by-reference index to the first character after
1064 the number.
1065
1066 Returns a new PyObject representation of the term.
1067 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001068 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001069 void *str;
1070 int kind;
1071 Py_ssize_t length;
1072
1073 if (PyUnicode_READY(pystr) == -1)
1074 return NULL;
1075
1076 str = PyUnicode_DATA(pystr);
1077 kind = PyUnicode_KIND(pystr);
1078 length = PyUnicode_GET_LENGTH(pystr);
1079
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001080 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001081 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001082 return NULL;
1083 }
1084 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001085 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001086 return NULL;
1087 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001088
1089 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001090 case '"':
1091 /* string */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001092 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001093 case '{':
1094 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001095 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1096 "from a unicode string"))
1097 return NULL;
1098 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1099 Py_LeaveRecursiveCall();
1100 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001101 case '[':
1102 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001103 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1104 "from a unicode string"))
1105 return NULL;
1106 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1107 Py_LeaveRecursiveCall();
1108 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001109 case 'n':
1110 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001111 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001112 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001113 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001114 }
1115 break;
1116 case 't':
1117 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001118 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001119 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001120 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001121 }
1122 break;
1123 case 'f':
1124 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001125 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1126 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1127 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001128 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001129 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001130 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001131 }
1132 break;
1133 case 'N':
1134 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001135 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001136 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001137 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1138 }
1139 break;
1140 case 'I':
1141 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001142 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1143 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1144 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001145 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001146 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1147 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001148 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001149 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1150 }
1151 break;
1152 case '-':
1153 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001154 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001155 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1156 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001157 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001158 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001159 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1160 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001161 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001162 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1163 }
1164 break;
1165 }
1166 /* Didn't find a string, object, array, or named constant. Look for a number. */
1167 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1168}
1169
1170static PyObject *
1171scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1172{
1173 /* Python callable interface to scan_once_{str,unicode} */
1174 PyObject *pystr;
1175 PyObject *rval;
1176 Py_ssize_t idx;
1177 Py_ssize_t next_idx = -1;
1178 static char *kwlist[] = {"string", "idx", NULL};
1179 PyScannerObject *s;
1180 assert(PyScanner_Check(self));
1181 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001182 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001183 return NULL;
1184
1185 if (PyUnicode_Check(pystr)) {
1186 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1187 }
1188 else {
1189 PyErr_Format(PyExc_TypeError,
1190 "first argument must be a string, not %.80s",
1191 Py_TYPE(pystr)->tp_name);
1192 return NULL;
1193 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001194 PyDict_Clear(s->memo);
1195 if (rval == NULL)
1196 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001197 return _build_rval_index_tuple(rval, next_idx);
1198}
1199
1200static PyObject *
1201scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1202{
1203 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001204 PyObject *ctx;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001205 PyObject *strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001206 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001207
1208 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001209 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001210
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001211 s = (PyScannerObject *)type->tp_alloc(type, 0);
1212 if (s == NULL) {
1213 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001214 }
1215
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001216 s->memo = PyDict_New();
1217 if (s->memo == NULL)
1218 goto bail;
1219
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001220 /* All of these will fail "gracefully" so we don't need to verify them */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001221 strict = PyObject_GetAttrString(ctx, "strict");
1222 if (strict == NULL)
1223 goto bail;
1224 s->strict = PyObject_IsTrue(strict);
1225 Py_DECREF(strict);
1226 if (s->strict < 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001227 goto bail;
1228 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1229 if (s->object_hook == NULL)
1230 goto bail;
1231 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1232 if (s->object_pairs_hook == NULL)
1233 goto bail;
1234 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1235 if (s->parse_float == NULL)
1236 goto bail;
1237 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1238 if (s->parse_int == NULL)
1239 goto bail;
1240 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1241 if (s->parse_constant == NULL)
1242 goto bail;
1243
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001244 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001245
1246bail:
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001247 Py_DECREF(s);
1248 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001249}
1250
1251PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1252
1253static
1254PyTypeObject PyScannerType = {
1255 PyVarObject_HEAD_INIT(NULL, 0)
1256 "_json.Scanner", /* tp_name */
1257 sizeof(PyScannerObject), /* tp_basicsize */
1258 0, /* tp_itemsize */
1259 scanner_dealloc, /* tp_dealloc */
1260 0, /* tp_print */
1261 0, /* tp_getattr */
1262 0, /* tp_setattr */
1263 0, /* tp_compare */
1264 0, /* tp_repr */
1265 0, /* tp_as_number */
1266 0, /* tp_as_sequence */
1267 0, /* tp_as_mapping */
1268 0, /* tp_hash */
1269 scanner_call, /* tp_call */
1270 0, /* tp_str */
1271 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1272 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1273 0, /* tp_as_buffer */
1274 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1275 scanner_doc, /* tp_doc */
1276 scanner_traverse, /* tp_traverse */
1277 scanner_clear, /* tp_clear */
1278 0, /* tp_richcompare */
1279 0, /* tp_weaklistoffset */
1280 0, /* tp_iter */
1281 0, /* tp_iternext */
1282 0, /* tp_methods */
1283 scanner_members, /* tp_members */
1284 0, /* tp_getset */
1285 0, /* tp_base */
1286 0, /* tp_dict */
1287 0, /* tp_descr_get */
1288 0, /* tp_descr_set */
1289 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001290 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001291 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1292 scanner_new, /* tp_new */
1293 0,/* PyObject_GC_Del, */ /* tp_free */
1294};
1295
1296static PyObject *
1297encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1298{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001299 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1300
1301 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001302 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001303 PyObject *item_separator;
1304 int sort_keys, skipkeys, allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001305
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001306 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001307 &markers, &defaultfn, &encoder, &indent,
1308 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001309 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001310 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001311
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001312 if (markers != Py_None && !PyDict_Check(markers)) {
1313 PyErr_Format(PyExc_TypeError,
1314 "make_encoder() argument 1 must be dict or None, "
1315 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001316 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001317 }
1318
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001319 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1320 if (s == NULL)
1321 return NULL;
1322
Antoine Pitrou781eba72009-12-08 15:57:31 +00001323 s->markers = markers;
1324 s->defaultfn = defaultfn;
1325 s->encoder = encoder;
1326 s->indent = indent;
1327 s->key_separator = key_separator;
1328 s->item_separator = item_separator;
1329 s->sort_keys = sort_keys;
1330 s->skipkeys = skipkeys;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001331 s->allow_nan = allow_nan;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001332 s->fast_encode = NULL;
1333 if (PyCFunction_Check(s->encoder)) {
1334 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1335 if (f == (PyCFunction)py_encode_basestring_ascii ||
1336 f == (PyCFunction)py_encode_basestring) {
1337 s->fast_encode = f;
1338 }
1339 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001340
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001341 Py_INCREF(s->markers);
1342 Py_INCREF(s->defaultfn);
1343 Py_INCREF(s->encoder);
1344 Py_INCREF(s->indent);
1345 Py_INCREF(s->key_separator);
1346 Py_INCREF(s->item_separator);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001347 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001348}
1349
1350static PyObject *
1351encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1352{
1353 /* Python callable interface to encode_listencode_obj */
1354 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1355 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001356 Py_ssize_t indent_level;
1357 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001358 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001359
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001360 assert(PyEncoder_Check(self));
1361 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001362 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1363 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001364 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001365 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001366 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001367 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001368 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001369 return NULL;
1370 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001371 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001372}
1373
1374static PyObject *
1375_encoded_const(PyObject *obj)
1376{
1377 /* Return the JSON string representation of None, True, False */
1378 if (obj == Py_None) {
1379 static PyObject *s_null = NULL;
1380 if (s_null == NULL) {
1381 s_null = PyUnicode_InternFromString("null");
1382 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001383 Py_XINCREF(s_null);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001384 return s_null;
1385 }
1386 else if (obj == Py_True) {
1387 static PyObject *s_true = NULL;
1388 if (s_true == NULL) {
1389 s_true = PyUnicode_InternFromString("true");
1390 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001391 Py_XINCREF(s_true);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001392 return s_true;
1393 }
1394 else if (obj == Py_False) {
1395 static PyObject *s_false = NULL;
1396 if (s_false == NULL) {
1397 s_false = PyUnicode_InternFromString("false");
1398 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001399 Py_XINCREF(s_false);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001400 return s_false;
1401 }
1402 else {
1403 PyErr_SetString(PyExc_ValueError, "not a const");
1404 return NULL;
1405 }
1406}
1407
1408static PyObject *
1409encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1410{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001411 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001412 double i = PyFloat_AS_DOUBLE(obj);
1413 if (!Py_IS_FINITE(i)) {
1414 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001415 PyErr_SetString(
1416 PyExc_ValueError,
1417 "Out of range float values are not JSON compliant"
1418 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001419 return NULL;
1420 }
1421 if (i > 0) {
1422 return PyUnicode_FromString("Infinity");
1423 }
1424 else if (i < 0) {
1425 return PyUnicode_FromString("-Infinity");
1426 }
1427 else {
1428 return PyUnicode_FromString("NaN");
1429 }
1430 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001431 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001432}
1433
1434static PyObject *
1435encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1436{
1437 /* Return the JSON representation of a string */
Oren Milman2b382dd2017-09-24 12:07:12 +03001438 PyObject *encoded;
1439
1440 if (s->fast_encode) {
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001441 return s->fast_encode(NULL, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001442 }
1443 encoded = PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1444 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1445 PyErr_Format(PyExc_TypeError,
1446 "encoder() must return a string, not %.80s",
1447 Py_TYPE(encoded)->tp_name);
1448 Py_DECREF(encoded);
1449 return NULL;
1450 }
1451 return encoded;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001452}
1453
1454static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001455_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001456{
1457 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001458 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001459 Py_DECREF(stolen);
1460 return rval;
1461}
1462
1463static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001464encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001465 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001466{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001467 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001468 PyObject *newobj;
1469 int rv;
1470
1471 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1472 PyObject *cstr = _encoded_const(obj);
1473 if (cstr == NULL)
1474 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001475 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001476 }
1477 else if (PyUnicode_Check(obj))
1478 {
1479 PyObject *encoded = encoder_encode_string(s, obj);
1480 if (encoded == NULL)
1481 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001482 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001483 }
1484 else if (PyLong_Check(obj)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001485 PyObject *encoded = PyLong_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001486 if (encoded == NULL)
1487 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001488 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001489 }
1490 else if (PyFloat_Check(obj)) {
1491 PyObject *encoded = encoder_encode_float(s, obj);
1492 if (encoded == NULL)
1493 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001494 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001495 }
1496 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001497 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1498 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001499 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001500 Py_LeaveRecursiveCall();
1501 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001502 }
1503 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001504 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1505 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001506 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001507 Py_LeaveRecursiveCall();
1508 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001509 }
1510 else {
1511 PyObject *ident = NULL;
1512 if (s->markers != Py_None) {
1513 int has_key;
1514 ident = PyLong_FromVoidPtr(obj);
1515 if (ident == NULL)
1516 return -1;
1517 has_key = PyDict_Contains(s->markers, ident);
1518 if (has_key) {
1519 if (has_key != -1)
1520 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1521 Py_DECREF(ident);
1522 return -1;
1523 }
1524 if (PyDict_SetItem(s->markers, ident, obj)) {
1525 Py_DECREF(ident);
1526 return -1;
1527 }
1528 }
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001529 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001530 if (newobj == NULL) {
1531 Py_XDECREF(ident);
1532 return -1;
1533 }
Ezio Melotti13672652011-05-11 01:02:56 +03001534
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001535 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1536 Py_DECREF(newobj);
1537 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001538 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001539 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001540 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001541 Py_LeaveRecursiveCall();
1542
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001543 Py_DECREF(newobj);
1544 if (rv) {
1545 Py_XDECREF(ident);
1546 return -1;
1547 }
1548 if (ident != NULL) {
1549 if (PyDict_DelItem(s->markers, ident)) {
1550 Py_XDECREF(ident);
1551 return -1;
1552 }
1553 Py_XDECREF(ident);
1554 }
1555 return rv;
1556 }
1557}
1558
1559static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001560encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001561 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001562{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001563 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001564 static PyObject *open_dict = NULL;
1565 static PyObject *close_dict = NULL;
1566 static PyObject *empty_dict = NULL;
1567 PyObject *kstr = NULL;
1568 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001569 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001570 PyObject *items;
1571 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001572 Py_ssize_t idx;
1573
1574 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1575 open_dict = PyUnicode_InternFromString("{");
1576 close_dict = PyUnicode_InternFromString("}");
1577 empty_dict = PyUnicode_InternFromString("{}");
1578 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1579 return -1;
1580 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001581 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001582 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001583
1584 if (s->markers != Py_None) {
1585 int has_key;
1586 ident = PyLong_FromVoidPtr(dct);
1587 if (ident == NULL)
1588 goto bail;
1589 has_key = PyDict_Contains(s->markers, ident);
1590 if (has_key) {
1591 if (has_key != -1)
1592 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1593 goto bail;
1594 }
1595 if (PyDict_SetItem(s->markers, ident, dct)) {
1596 goto bail;
1597 }
1598 }
1599
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001600 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001601 goto bail;
1602
1603 if (s->indent != Py_None) {
1604 /* TODO: DOES NOT RUN */
1605 indent_level += 1;
1606 /*
1607 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1608 separator = _item_separator + newline_indent
1609 buf += newline_indent
1610 */
1611 }
1612
Benjamin Peterson501182a2015-05-02 22:28:04 -04001613 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001614 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001615 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001616 if (s->sort_keys && PyList_Sort(items) < 0) {
1617 Py_DECREF(items);
Benjamin Peterson501182a2015-05-02 22:28:04 -04001618 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001619 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001620 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001621 Py_DECREF(items);
1622 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001623 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001624 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001625 while ((item = PyIter_Next(it)) != NULL) {
1626 PyObject *encoded, *key, *value;
Serhiy Storchakafff9a312017-03-21 08:53:25 +02001627 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001628 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1629 goto bail;
1630 }
1631 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001632 if (PyUnicode_Check(key)) {
1633 Py_INCREF(key);
1634 kstr = key;
1635 }
1636 else if (PyFloat_Check(key)) {
1637 kstr = encoder_encode_float(s, key);
1638 if (kstr == NULL)
1639 goto bail;
1640 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001641 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 /* This must come before the PyLong_Check because
1643 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001644 kstr = _encoded_const(key);
1645 if (kstr == NULL)
1646 goto bail;
1647 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001648 else if (PyLong_Check(key)) {
Serhiy Storchaka96aeaec2019-05-06 22:29:40 +03001649 kstr = PyLong_Type.tp_repr(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001650 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001651 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001652 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001653 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001654 else if (s->skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001655 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001656 continue;
1657 }
1658 else {
Serhiy Storchakacfa797c2017-11-25 17:38:20 +02001659 PyErr_Format(PyExc_TypeError,
1660 "keys must be str, int, float, bool or None, "
1661 "not %.100s", key->ob_type->tp_name);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001662 goto bail;
1663 }
1664
1665 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001666 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001667 goto bail;
1668 }
1669
1670 encoded = encoder_encode_string(s, kstr);
1671 Py_CLEAR(kstr);
1672 if (encoded == NULL)
1673 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001674 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001675 Py_DECREF(encoded);
1676 goto bail;
1677 }
1678 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001679 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001680 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001681
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001682 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001683 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001684 goto bail;
1685 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001686 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001687 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001688 if (PyErr_Occurred())
1689 goto bail;
1690 Py_CLEAR(it);
1691
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001692 if (ident != NULL) {
1693 if (PyDict_DelItem(s->markers, ident))
1694 goto bail;
1695 Py_CLEAR(ident);
1696 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001697 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001698 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001699 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001700
1701 yield '\n' + (' ' * (_indent * _current_indent_level))
1702 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001703 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001704 goto bail;
1705 return 0;
1706
1707bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001708 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001709 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001710 Py_XDECREF(kstr);
1711 Py_XDECREF(ident);
1712 return -1;
1713}
1714
1715
1716static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001717encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001718 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001719{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001720 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001721 static PyObject *open_array = NULL;
1722 static PyObject *close_array = NULL;
1723 static PyObject *empty_array = NULL;
1724 PyObject *ident = NULL;
1725 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001726 Py_ssize_t i;
1727
1728 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1729 open_array = PyUnicode_InternFromString("[");
1730 close_array = PyUnicode_InternFromString("]");
1731 empty_array = PyUnicode_InternFromString("[]");
1732 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1733 return -1;
1734 }
1735 ident = NULL;
1736 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1737 if (s_fast == NULL)
1738 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001739 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001740 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001741 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001742 }
1743
1744 if (s->markers != Py_None) {
1745 int has_key;
1746 ident = PyLong_FromVoidPtr(seq);
1747 if (ident == NULL)
1748 goto bail;
1749 has_key = PyDict_Contains(s->markers, ident);
1750 if (has_key) {
1751 if (has_key != -1)
1752 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1753 goto bail;
1754 }
1755 if (PyDict_SetItem(s->markers, ident, seq)) {
1756 goto bail;
1757 }
1758 }
1759
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001760 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001761 goto bail;
1762 if (s->indent != Py_None) {
1763 /* TODO: DOES NOT RUN */
1764 indent_level += 1;
1765 /*
1766 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1767 separator = _item_separator + newline_indent
1768 buf += newline_indent
1769 */
1770 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001771 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1772 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001773 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001774 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001775 goto bail;
1776 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001777 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001778 goto bail;
1779 }
1780 if (ident != NULL) {
1781 if (PyDict_DelItem(s->markers, ident))
1782 goto bail;
1783 Py_CLEAR(ident);
1784 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001785
1786 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001787 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001788 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001789
1790 yield '\n' + (' ' * (_indent * _current_indent_level))
1791 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001792 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001793 goto bail;
1794 Py_DECREF(s_fast);
1795 return 0;
1796
1797bail:
1798 Py_XDECREF(ident);
1799 Py_DECREF(s_fast);
1800 return -1;
1801}
1802
1803static void
1804encoder_dealloc(PyObject *self)
1805{
INADA Naokia6296d32017-08-24 14:55:17 +09001806 /* bpo-31095: UnTrack is needed before calling any callbacks */
1807 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001808 encoder_clear(self);
1809 Py_TYPE(self)->tp_free(self);
1810}
1811
1812static int
1813encoder_traverse(PyObject *self, visitproc visit, void *arg)
1814{
1815 PyEncoderObject *s;
1816 assert(PyEncoder_Check(self));
1817 s = (PyEncoderObject *)self;
1818 Py_VISIT(s->markers);
1819 Py_VISIT(s->defaultfn);
1820 Py_VISIT(s->encoder);
1821 Py_VISIT(s->indent);
1822 Py_VISIT(s->key_separator);
1823 Py_VISIT(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001824 return 0;
1825}
1826
1827static int
1828encoder_clear(PyObject *self)
1829{
1830 /* Deallocate Encoder */
1831 PyEncoderObject *s;
1832 assert(PyEncoder_Check(self));
1833 s = (PyEncoderObject *)self;
1834 Py_CLEAR(s->markers);
1835 Py_CLEAR(s->defaultfn);
1836 Py_CLEAR(s->encoder);
1837 Py_CLEAR(s->indent);
1838 Py_CLEAR(s->key_separator);
1839 Py_CLEAR(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001840 return 0;
1841}
1842
1843PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1844
1845static
1846PyTypeObject PyEncoderType = {
1847 PyVarObject_HEAD_INIT(NULL, 0)
1848 "_json.Encoder", /* tp_name */
1849 sizeof(PyEncoderObject), /* tp_basicsize */
1850 0, /* tp_itemsize */
1851 encoder_dealloc, /* tp_dealloc */
1852 0, /* tp_print */
1853 0, /* tp_getattr */
1854 0, /* tp_setattr */
1855 0, /* tp_compare */
1856 0, /* tp_repr */
1857 0, /* tp_as_number */
1858 0, /* tp_as_sequence */
1859 0, /* tp_as_mapping */
1860 0, /* tp_hash */
1861 encoder_call, /* tp_call */
1862 0, /* tp_str */
1863 0, /* tp_getattro */
1864 0, /* tp_setattro */
1865 0, /* tp_as_buffer */
1866 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1867 encoder_doc, /* tp_doc */
1868 encoder_traverse, /* tp_traverse */
1869 encoder_clear, /* tp_clear */
1870 0, /* tp_richcompare */
1871 0, /* tp_weaklistoffset */
1872 0, /* tp_iter */
1873 0, /* tp_iternext */
1874 0, /* tp_methods */
1875 encoder_members, /* tp_members */
1876 0, /* tp_getset */
1877 0, /* tp_base */
1878 0, /* tp_dict */
1879 0, /* tp_descr_get */
1880 0, /* tp_descr_set */
1881 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001882 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001883 0, /* tp_alloc */
1884 encoder_new, /* tp_new */
1885 0, /* tp_free */
1886};
1887
1888static PyMethodDef speedups_methods[] = {
1889 {"encode_basestring_ascii",
1890 (PyCFunction)py_encode_basestring_ascii,
1891 METH_O,
1892 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001893 {"encode_basestring",
1894 (PyCFunction)py_encode_basestring,
1895 METH_O,
1896 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001897 {"scanstring",
1898 (PyCFunction)py_scanstring,
1899 METH_VARARGS,
1900 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001901 {NULL, NULL, 0, NULL}
1902};
1903
1904PyDoc_STRVAR(module_doc,
1905"json speedups\n");
1906
Martin v. Löwis1a214512008-06-11 05:26:20 +00001907static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001908 PyModuleDef_HEAD_INIT,
1909 "_json",
1910 module_doc,
1911 -1,
1912 speedups_methods,
1913 NULL,
1914 NULL,
1915 NULL,
1916 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001917};
1918
Victor Stinnerf024d262015-03-17 17:48:27 +01001919PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001920PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001921{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001922 PyObject *m = PyModule_Create(&jsonmodule);
1923 if (!m)
1924 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001925 if (PyType_Ready(&PyScannerType) < 0)
1926 goto fail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001927 if (PyType_Ready(&PyEncoderType) < 0)
1928 goto fail;
1929 Py_INCREF((PyObject*)&PyScannerType);
1930 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1931 Py_DECREF((PyObject*)&PyScannerType);
1932 goto fail;
1933 }
1934 Py_INCREF((PyObject*)&PyEncoderType);
1935 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1936 Py_DECREF((PyObject*)&PyEncoderType);
1937 goto fail;
1938 }
1939 return m;
1940 fail:
1941 Py_DECREF(m);
1942 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001943}