blob: ac6e017a4eafc1430e65c387e05af45064ceb4de [file] [log] [blame]
Eric Snow2ebc5ce2017-09-07 23:51:28 -06001
2/* Core extension modules are built-in on some platforms (e.g. Windows). */
3#ifdef Py_BUILD_CORE
Eric Snowfc1bf872017-09-11 18:30:43 -07004#define Py_BUILD_CORE_BUILTIN
Eric Snow2ebc5ce2017-09-07 23:51:28 -06005#undef Py_BUILD_CORE
6#endif
7
Christian Heimes90540002008-05-08 14:29:10 +00008#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00009#include "structmember.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +010010#include "accu.h"
11
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000012#ifdef __GNUC__
13#define UNUSED __attribute__((__unused__))
14#else
15#define UNUSED
16#endif
17
18#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
19#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
20#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
21#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
22
23static PyTypeObject PyScannerType;
24static PyTypeObject PyEncoderType;
25
26typedef struct _PyScannerObject {
27 PyObject_HEAD
Segev Finer541bd282017-07-13 08:52:08 +030028 signed char strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000029 PyObject *object_hook;
30 PyObject *object_pairs_hook;
31 PyObject *parse_float;
32 PyObject *parse_int;
33 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000034 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000035} PyScannerObject;
36
37static PyMemberDef scanner_members[] = {
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030038 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000039 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
40 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
41 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
42 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
43 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
44 {NULL}
45};
46
47typedef struct _PyEncoderObject {
48 PyObject_HEAD
49 PyObject *markers;
50 PyObject *defaultfn;
51 PyObject *encoder;
52 PyObject *indent;
53 PyObject *key_separator;
54 PyObject *item_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030055 char sort_keys;
56 char skipkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000057 int allow_nan;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030058 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000059} PyEncoderObject;
60
61static PyMemberDef encoder_members[] = {
62 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
63 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
64 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
65 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
66 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
67 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030068 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
69 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000070 {NULL}
71};
72
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020073static PyObject *
74join_list_unicode(PyObject *lst)
75{
76 /* return u''.join(lst) */
77 static PyObject *sep = NULL;
78 if (sep == NULL) {
79 sep = PyUnicode_FromStringAndSize("", 0);
80 if (sep == NULL)
81 return NULL;
82 }
83 return PyUnicode_Join(sep, lst);
84}
85
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020086/* Forward decls */
87
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000088static PyObject *
89ascii_escape_unicode(PyObject *pystr);
90static PyObject *
91py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
92void init_json(void);
93static PyObject *
94scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
95static PyObject *
96_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
97static PyObject *
98scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000099static void
100scanner_dealloc(PyObject *self);
101static int
102scanner_clear(PyObject *self);
103static PyObject *
104encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000105static void
106encoder_dealloc(PyObject *self);
107static int
108encoder_clear(PyObject *self);
109static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200110encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200112encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000113static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200114encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000115static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000116_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000117static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200118raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000119static PyObject *
120encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000121static PyObject *
122encoder_encode_float(PyEncoderObject *s, PyObject *obj);
123
Christian Heimes90540002008-05-08 14:29:10 +0000124#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000125#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000126
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000127static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200128ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000129{
130 /* Escape unicode code point c to ASCII escape sequences
131 in char *output. output must have at least 12 bytes unused to
132 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000133 output[chars++] = '\\';
134 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000135 case '\\': output[chars++] = c; break;
136 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000137 case '\b': output[chars++] = 'b'; break;
138 case '\f': output[chars++] = 'f'; break;
139 case '\n': output[chars++] = 'n'; break;
140 case '\r': output[chars++] = 'r'; break;
141 case '\t': output[chars++] = 't'; break;
142 default:
Christian Heimes90540002008-05-08 14:29:10 +0000143 if (c >= 0x10000) {
144 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100145 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000146 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100147 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
148 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
149 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
150 output[chars++] = Py_hexdigits[(v ) & 0xf];
151 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000152 output[chars++] = '\\';
153 }
Christian Heimes90540002008-05-08 14:29:10 +0000154 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200155 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
156 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
157 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
158 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000159 }
160 return chars;
161}
162
163static PyObject *
164ascii_escape_unicode(PyObject *pystr)
165{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000166 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000167 Py_ssize_t i;
168 Py_ssize_t input_chars;
169 Py_ssize_t output_size;
170 Py_ssize_t chars;
171 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200172 void *input;
173 unsigned char *output;
174 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000175
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200176 if (PyUnicode_READY(pystr) == -1)
177 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000178
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200179 input_chars = PyUnicode_GET_LENGTH(pystr);
180 input = PyUnicode_DATA(pystr);
181 kind = PyUnicode_KIND(pystr);
182
183 /* Compute the output size */
184 for (i = 0, output_size = 2; i < input_chars; i++) {
185 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500186 Py_ssize_t d;
187 if (S_CHAR(c)) {
188 d = 1;
189 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200190 else {
191 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200192 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200193 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500194 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200195 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500196 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 }
198 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500199 if (output_size > PY_SSIZE_T_MAX - d) {
200 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
201 return NULL;
202 }
203 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200204 }
205
206 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000207 if (rval == NULL) {
208 return NULL;
209 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200210 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000211 chars = 0;
212 output[chars++] = '"';
213 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200214 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000215 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000216 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000217 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000218 else {
219 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000220 }
Christian Heimes90540002008-05-08 14:29:10 +0000221 }
222 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100223#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200224 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100225#endif
Christian Heimes90540002008-05-08 14:29:10 +0000226 return rval;
227}
228
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100229static PyObject *
230escape_unicode(PyObject *pystr)
231{
232 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
233 Py_ssize_t i;
234 Py_ssize_t input_chars;
235 Py_ssize_t output_size;
236 Py_ssize_t chars;
237 PyObject *rval;
238 void *input;
239 int kind;
240 Py_UCS4 maxchar;
241
242 if (PyUnicode_READY(pystr) == -1)
243 return NULL;
244
245 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
246 input_chars = PyUnicode_GET_LENGTH(pystr);
247 input = PyUnicode_DATA(pystr);
248 kind = PyUnicode_KIND(pystr);
249
250 /* Compute the output size */
251 for (i = 0, output_size = 2; i < input_chars; i++) {
252 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500253 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100254 switch (c) {
255 case '\\': case '"': case '\b': case '\f':
256 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500257 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100258 break;
259 default:
260 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500261 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100262 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500263 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100264 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500265 if (output_size > PY_SSIZE_T_MAX - d) {
266 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
267 return NULL;
268 }
269 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100270 }
271
272 rval = PyUnicode_New(output_size, maxchar);
273 if (rval == NULL)
274 return NULL;
275
276 kind = PyUnicode_KIND(rval);
277
278#define ENCODE_OUTPUT do { \
279 chars = 0; \
280 output[chars++] = '"'; \
281 for (i = 0; i < input_chars; i++) { \
282 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
283 switch (c) { \
284 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
285 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
286 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
287 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
288 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
289 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
290 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
291 default: \
292 if (c <= 0x1f) { \
293 output[chars++] = '\\'; \
294 output[chars++] = 'u'; \
295 output[chars++] = '0'; \
296 output[chars++] = '0'; \
297 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
298 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
299 } else { \
300 output[chars++] = c; \
301 } \
302 } \
303 } \
304 output[chars++] = '"'; \
305 } while (0)
306
307 if (kind == PyUnicode_1BYTE_KIND) {
308 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
309 ENCODE_OUTPUT;
310 } else if (kind == PyUnicode_2BYTE_KIND) {
311 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
312 ENCODE_OUTPUT;
313 } else {
314 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
315 assert(kind == PyUnicode_4BYTE_KIND);
316 ENCODE_OUTPUT;
317 }
318#undef ENCODE_OUTPUT
319
320#ifdef Py_DEBUG
321 assert(_PyUnicode_CheckConsistency(rval, 1));
322#endif
323 return rval;
324}
325
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000326static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200327raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000328{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200329 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
330 static PyObject *JSONDecodeError = NULL;
331 PyObject *exc;
332 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000333 PyObject *decoder = PyImport_ImportModule("json.decoder");
334 if (decoder == NULL)
335 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200336 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000337 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200338 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000339 return;
Christian Heimes90540002008-05-08 14:29:10 +0000340 }
Victor Stinner4c381542016-12-09 00:33:39 +0100341 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200342 if (exc) {
343 PyErr_SetObject(JSONDecodeError, exc);
344 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000345 }
Christian Heimes90540002008-05-08 14:29:10 +0000346}
347
Ezio Melotti37623ab2013-01-03 08:44:15 +0200348static void
349raise_stop_iteration(Py_ssize_t idx)
350{
351 PyObject *value = PyLong_FromSsize_t(idx);
352 if (value != NULL) {
353 PyErr_SetObject(PyExc_StopIteration, value);
354 Py_DECREF(value);
355 }
356}
357
Christian Heimes90540002008-05-08 14:29:10 +0000358static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000359_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
360 /* return (rval, idx) tuple, stealing reference to rval */
361 PyObject *tpl;
362 PyObject *pyidx;
363 /*
364 steal a reference to rval, returns (rval, idx)
365 */
366 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000367 return NULL;
368 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000369 pyidx = PyLong_FromSsize_t(idx);
370 if (pyidx == NULL) {
371 Py_DECREF(rval);
372 return NULL;
373 }
374 tpl = PyTuple_New(2);
375 if (tpl == NULL) {
376 Py_DECREF(pyidx);
377 Py_DECREF(rval);
378 return NULL;
379 }
380 PyTuple_SET_ITEM(tpl, 0, rval);
381 PyTuple_SET_ITEM(tpl, 1, pyidx);
382 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000383}
384
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000385#define APPEND_OLD_CHUNK \
386 if (chunk != NULL) { \
387 if (chunks == NULL) { \
388 chunks = PyList_New(0); \
389 if (chunks == NULL) { \
390 goto bail; \
391 } \
392 } \
393 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200394 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000395 goto bail; \
396 } \
397 Py_CLEAR(chunk); \
398 }
399
Christian Heimes90540002008-05-08 14:29:10 +0000400static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000401scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000402{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000403 /* Read the JSON string from PyUnicode pystr.
404 end is the index of the first character after the quote.
405 if strict is zero then literal control characters are allowed
406 *next_end_ptr is a return-by-reference index of the character
407 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000408
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000409 Return value is a new PyUnicode
410 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000411 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200412 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000413 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000414 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200415 const void *buf;
416 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000417 PyObject *chunks = NULL;
418 PyObject *chunk = NULL;
419
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 if (PyUnicode_READY(pystr) == -1)
421 return 0;
422
423 len = PyUnicode_GET_LENGTH(pystr);
424 buf = PyUnicode_DATA(pystr);
425 kind = PyUnicode_KIND(pystr);
426
Ezio Melotti37623ab2013-01-03 08:44:15 +0200427 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000428 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
429 goto bail;
430 }
Christian Heimes90540002008-05-08 14:29:10 +0000431 while (1) {
432 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000434 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000436 if (c == '"' || c == '\\') {
437 break;
438 }
439 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000440 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000441 goto bail;
442 }
443 }
444 if (!(c == '"' || c == '\\')) {
445 raise_errmsg("Unterminated string starting at", pystr, begin);
446 goto bail;
447 }
448 /* Pick up this chunk if it's not zero length */
449 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000450 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 chunk = PyUnicode_FromKindAndData(
452 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200453 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000455 if (chunk == NULL) {
456 goto bail;
457 }
Christian Heimes90540002008-05-08 14:29:10 +0000458 }
459 next++;
460 if (c == '"') {
461 end = next;
462 break;
463 }
464 if (next == len) {
465 raise_errmsg("Unterminated string starting at", pystr, begin);
466 goto bail;
467 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200468 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000469 if (c != 'u') {
470 /* Non-unicode backslash escapes */
471 end = next + 1;
472 switch (c) {
473 case '"': break;
474 case '\\': break;
475 case '/': break;
476 case 'b': c = '\b'; break;
477 case 'f': c = '\f'; break;
478 case 'n': c = '\n'; break;
479 case 'r': c = '\r'; break;
480 case 't': c = '\t'; break;
481 default: c = 0;
482 }
483 if (c == 0) {
484 raise_errmsg("Invalid \\escape", pystr, end - 2);
485 goto bail;
486 }
487 }
488 else {
489 c = 0;
490 next++;
491 end = next + 4;
492 if (end >= len) {
493 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
494 goto bail;
495 }
496 /* Decode 4 hex digits */
497 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200498 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000499 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000500 switch (digit) {
501 case '0': case '1': case '2': case '3': case '4':
502 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000503 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000504 case 'a': case 'b': case 'c': case 'd': case 'e':
505 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000506 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000507 case 'A': case 'B': case 'C': case 'D': case 'E':
508 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000509 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000510 default:
511 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
512 goto bail;
513 }
514 }
Christian Heimes90540002008-05-08 14:29:10 +0000515 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200516 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
517 PyUnicode_READ(kind, buf, next++) == '\\' &&
518 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200519 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000520 end += 6;
521 /* Decode 4 hex digits */
522 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200523 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000524 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000525 switch (digit) {
526 case '0': case '1': case '2': case '3': case '4':
527 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000528 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000529 case 'a': case 'b': case 'c': case 'd': case 'e':
530 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000531 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000532 case 'A': case 'B': case 'C': case 'D': case 'E':
533 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000534 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000535 default:
536 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
537 goto bail;
538 }
539 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200540 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
541 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
542 else
543 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000544 }
Christian Heimes90540002008-05-08 14:29:10 +0000545 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000546 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200547 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000548 if (chunk == NULL) {
549 goto bail;
550 }
Christian Heimes90540002008-05-08 14:29:10 +0000551 }
552
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000553 if (chunks == NULL) {
554 if (chunk != NULL)
555 rval = chunk;
556 else
557 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000558 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000559 else {
560 APPEND_OLD_CHUNK
561 rval = join_list_unicode(chunks);
562 if (rval == NULL) {
563 goto bail;
564 }
565 Py_CLEAR(chunks);
566 }
567
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000568 *next_end_ptr = end;
569 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000570bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000571 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000572 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000573 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000574 return NULL;
575}
576
577PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000578 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000579 "\n"
580 "Scan the string s for a JSON string. End is the index of the\n"
581 "character in s after the quote that started the JSON string.\n"
582 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
583 "on attempt to decode an invalid string. If strict is False then literal\n"
584 "control characters are allowed in the string.\n"
585 "\n"
586 "Returns a tuple of the decoded string and the index of the character in s\n"
587 "after the end quote."
588);
Christian Heimes90540002008-05-08 14:29:10 +0000589
590static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000591py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000592{
593 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000594 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000595 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000596 Py_ssize_t next_end = -1;
597 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100598 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000599 return NULL;
600 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000601 if (PyUnicode_Check(pystr)) {
602 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000603 }
604 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000606 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000607 Py_TYPE(pystr)->tp_name);
608 return NULL;
609 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000610 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000611}
612
613PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000614 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000615 "\n"
616 "Return an ASCII-only JSON representation of a Python string"
617);
Christian Heimes90540002008-05-08 14:29:10 +0000618
619static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000620py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000621{
622 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000623 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000624 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000625 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000626 rval = ascii_escape_unicode(pystr);
627 }
628 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000629 PyErr_Format(PyExc_TypeError,
630 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000631 Py_TYPE(pystr)->tp_name);
632 return NULL;
633 }
Christian Heimes90540002008-05-08 14:29:10 +0000634 return rval;
635}
636
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100637
638PyDoc_STRVAR(pydoc_encode_basestring,
639 "encode_basestring(string) -> string\n"
640 "\n"
641 "Return a JSON representation of a Python string"
642);
643
644static PyObject *
645py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
646{
647 PyObject *rval;
648 /* Return a JSON representation of a Python string */
649 /* METH_O */
650 if (PyUnicode_Check(pystr)) {
651 rval = escape_unicode(pystr);
652 }
653 else {
654 PyErr_Format(PyExc_TypeError,
655 "first argument must be a string, not %.80s",
656 Py_TYPE(pystr)->tp_name);
657 return NULL;
658 }
659 return rval;
660}
661
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000662static void
663scanner_dealloc(PyObject *self)
664{
INADA Naokia6296d32017-08-24 14:55:17 +0900665 /* bpo-31095: UnTrack is needed before calling any callbacks */
666 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000667 scanner_clear(self);
668 Py_TYPE(self)->tp_free(self);
669}
670
671static int
672scanner_traverse(PyObject *self, visitproc visit, void *arg)
673{
674 PyScannerObject *s;
675 assert(PyScanner_Check(self));
676 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000677 Py_VISIT(s->object_hook);
678 Py_VISIT(s->object_pairs_hook);
679 Py_VISIT(s->parse_float);
680 Py_VISIT(s->parse_int);
681 Py_VISIT(s->parse_constant);
682 return 0;
683}
684
685static int
686scanner_clear(PyObject *self)
687{
688 PyScannerObject *s;
689 assert(PyScanner_Check(self));
690 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000691 Py_CLEAR(s->object_hook);
692 Py_CLEAR(s->object_pairs_hook);
693 Py_CLEAR(s->parse_float);
694 Py_CLEAR(s->parse_int);
695 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000696 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000697 return 0;
698}
699
700static PyObject *
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300701_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
702{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000703 /* Read a JSON object from PyUnicode pystr.
704 idx is the index of the first character after the opening curly brace.
705 *next_idx_ptr is a return-by-reference index to the first character after
706 the closing curly brace.
707
708 Returns a new PyObject (usually a dict, but object_hook can change that)
709 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200710 void *str;
711 int kind;
712 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000713 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000714 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000715 PyObject *key = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000716 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000717 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000718
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200719 if (PyUnicode_READY(pystr) == -1)
720 return NULL;
721
722 str = PyUnicode_DATA(pystr);
723 kind = PyUnicode_KIND(pystr);
724 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
725
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000726 if (has_pairs_hook)
727 rval = PyList_New(0);
728 else
729 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000730 if (rval == NULL)
731 return NULL;
732
733 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200734 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000735
736 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200737 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
738 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000739 PyObject *memokey;
740
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000741 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200742 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200743 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000744 goto bail;
745 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300746 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000747 if (key == NULL)
748 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000749 memokey = PyDict_GetItem(s->memo, key);
750 if (memokey != NULL) {
751 Py_INCREF(memokey);
752 Py_DECREF(key);
753 key = memokey;
754 }
755 else {
756 if (PyDict_SetItem(s->memo, key, key) < 0)
757 goto bail;
758 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000759 idx = next_idx;
760
761 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200762 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
763 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200764 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000765 goto bail;
766 }
767 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200768 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000769
770 /* read any JSON term */
771 val = scan_once_unicode(s, pystr, idx, &next_idx);
772 if (val == NULL)
773 goto bail;
774
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000775 if (has_pairs_hook) {
776 PyObject *item = PyTuple_Pack(2, key, val);
777 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000778 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000779 Py_CLEAR(key);
780 Py_CLEAR(val);
781 if (PyList_Append(rval, item) == -1) {
782 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000783 goto bail;
784 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000785 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000786 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000787 else {
788 if (PyDict_SetItem(rval, key, val) < 0)
789 goto bail;
790 Py_CLEAR(key);
791 Py_CLEAR(val);
792 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000793 idx = next_idx;
794
795 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200796 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000797
798 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200799 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000800 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200801 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200802 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000803 goto bail;
804 }
805 idx++;
806
807 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200808 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000809 }
810 }
811
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000812 *next_idx_ptr = idx + 1;
813
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000814 if (has_pairs_hook) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100815 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000816 Py_DECREF(rval);
817 return val;
818 }
819
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000820 /* if object_hook is not None: rval = object_hook(rval) */
821 if (s->object_hook != Py_None) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100822 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000823 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000824 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000825 }
826 return rval;
827bail:
828 Py_XDECREF(key);
829 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000830 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000831 return NULL;
832}
833
834static PyObject *
835_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200836 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000837 idx is the index of the first character after the opening brace.
838 *next_idx_ptr is a return-by-reference index to the first character after
839 the closing brace.
840
841 Returns a new PyList
842 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200843 void *str;
844 int kind;
845 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000846 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200847 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000848 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000849
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200850 if (PyUnicode_READY(pystr) == -1)
851 return NULL;
852
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200853 rval = PyList_New(0);
854 if (rval == NULL)
855 return NULL;
856
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200857 str = PyUnicode_DATA(pystr);
858 kind = PyUnicode_KIND(pystr);
859 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
860
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000861 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200862 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000863
864 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200865 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
866 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000867
868 /* read any JSON term */
869 val = scan_once_unicode(s, pystr, idx, &next_idx);
870 if (val == NULL)
871 goto bail;
872
873 if (PyList_Append(rval, val) == -1)
874 goto bail;
875
876 Py_CLEAR(val);
877 idx = next_idx;
878
879 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200880 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000881
882 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200883 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000884 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200885 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200886 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000887 goto bail;
888 }
889 idx++;
890
891 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200892 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000893 }
894 }
895
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200896 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
897 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200898 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000899 goto bail;
900 }
901 *next_idx_ptr = idx + 1;
902 return rval;
903bail:
904 Py_XDECREF(val);
905 Py_DECREF(rval);
906 return NULL;
907}
908
909static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200910_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
911 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000912 constant is the constant string that was found
913 ("NaN", "Infinity", "-Infinity").
914 idx is the index of the first character of the constant
915 *next_idx_ptr is a return-by-reference index to the first character after
916 the constant.
917
918 Returns the result of parse_constant
919 */
920 PyObject *cstr;
921 PyObject *rval;
922 /* constant is "NaN", "Infinity", or "-Infinity" */
923 cstr = PyUnicode_InternFromString(constant);
924 if (cstr == NULL)
925 return NULL;
926
927 /* rval = parse_constant(constant) */
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100928 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200929 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000930 Py_DECREF(cstr);
931 *next_idx_ptr = idx;
932 return rval;
933}
934
935static PyObject *
936_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
937 /* Read a JSON number from PyUnicode pystr.
938 idx is the index of the first character of the number
939 *next_idx_ptr is a return-by-reference index to the first character after
940 the number.
941
942 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200943 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000944 May return other types if parse_int or parse_float are set
945 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200946 void *str;
947 int kind;
948 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000949 Py_ssize_t idx = start;
950 int is_float = 0;
951 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200952 PyObject *numstr = NULL;
953 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000954
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200955 if (PyUnicode_READY(pystr) == -1)
956 return NULL;
957
958 str = PyUnicode_DATA(pystr);
959 kind = PyUnicode_KIND(pystr);
960 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
961
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000962 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200963 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000964 idx++;
965 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200966 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000967 return NULL;
968 }
969 }
970
971 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200972 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000973 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200974 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000975 }
976 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200977 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000978 idx++;
979 }
980 /* no integer digits, error */
981 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200982 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000983 return NULL;
984 }
985
986 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200987 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000988 is_float = 1;
989 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200990 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000991 }
992
993 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200994 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000995 Py_ssize_t e_start = idx;
996 idx++;
997
998 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200999 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001000
1001 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001002 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001003
1004 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001006 is_float = 1;
1007 }
1008 else {
1009 idx = e_start;
1010 }
1011 }
1012
Antoine Pitrouf6454512011-04-25 19:16:06 +02001013 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1014 custom_func = s->parse_float;
1015 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1016 custom_func = s->parse_int;
1017 else
1018 custom_func = NULL;
1019
1020 if (custom_func) {
1021 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001022 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001023 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001024 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001025 if (numstr == NULL)
1026 return NULL;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001027 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001028 }
1029 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001030 Py_ssize_t i, n;
1031 char *buf;
1032 /* Straight conversion to ASCII, to avoid costly conversion of
1033 decimal unicode digits (which cannot appear here) */
1034 n = idx - start;
1035 numstr = PyBytes_FromStringAndSize(NULL, n);
1036 if (numstr == NULL)
1037 return NULL;
1038 buf = PyBytes_AS_STRING(numstr);
1039 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001040 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001041 }
1042 if (is_float)
1043 rval = PyFloat_FromString(numstr);
1044 else
1045 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001046 }
1047 Py_DECREF(numstr);
1048 *next_idx_ptr = idx;
1049 return rval;
1050}
1051
1052static PyObject *
1053scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1054{
1055 /* Read one JSON term (of any kind) from PyUnicode pystr.
1056 idx is the index of the first character of the term
1057 *next_idx_ptr is a return-by-reference index to the first character after
1058 the number.
1059
1060 Returns a new PyObject representation of the term.
1061 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001062 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001063 void *str;
1064 int kind;
1065 Py_ssize_t length;
1066
1067 if (PyUnicode_READY(pystr) == -1)
1068 return NULL;
1069
1070 str = PyUnicode_DATA(pystr);
1071 kind = PyUnicode_KIND(pystr);
1072 length = PyUnicode_GET_LENGTH(pystr);
1073
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001074 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001075 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001076 return NULL;
1077 }
1078 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001079 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001080 return NULL;
1081 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001082
1083 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001084 case '"':
1085 /* string */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001086 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001087 case '{':
1088 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001089 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1090 "from a unicode string"))
1091 return NULL;
1092 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1093 Py_LeaveRecursiveCall();
1094 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001095 case '[':
1096 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001097 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1098 "from a unicode string"))
1099 return NULL;
1100 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1101 Py_LeaveRecursiveCall();
1102 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001103 case 'n':
1104 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001105 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001106 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001107 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001108 }
1109 break;
1110 case 't':
1111 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001112 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001113 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001114 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001115 }
1116 break;
1117 case 'f':
1118 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001119 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1120 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1121 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001123 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001124 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001125 }
1126 break;
1127 case 'N':
1128 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001129 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001130 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001131 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1132 }
1133 break;
1134 case 'I':
1135 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001136 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1137 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1138 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001139 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001140 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1141 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001142 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001143 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1144 }
1145 break;
1146 case '-':
1147 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001148 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1150 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001151 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001152 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001153 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1154 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001155 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001156 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1157 }
1158 break;
1159 }
1160 /* Didn't find a string, object, array, or named constant. Look for a number. */
1161 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1162}
1163
1164static PyObject *
1165scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1166{
1167 /* Python callable interface to scan_once_{str,unicode} */
1168 PyObject *pystr;
1169 PyObject *rval;
1170 Py_ssize_t idx;
1171 Py_ssize_t next_idx = -1;
1172 static char *kwlist[] = {"string", "idx", NULL};
1173 PyScannerObject *s;
1174 assert(PyScanner_Check(self));
1175 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001176 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001177 return NULL;
1178
1179 if (PyUnicode_Check(pystr)) {
1180 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1181 }
1182 else {
1183 PyErr_Format(PyExc_TypeError,
1184 "first argument must be a string, not %.80s",
1185 Py_TYPE(pystr)->tp_name);
1186 return NULL;
1187 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001188 PyDict_Clear(s->memo);
1189 if (rval == NULL)
1190 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001191 return _build_rval_index_tuple(rval, next_idx);
1192}
1193
1194static PyObject *
1195scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1196{
1197 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001198 PyObject *ctx;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001199 PyObject *strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001200 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001201
1202 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001203 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001204
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001205 s = (PyScannerObject *)type->tp_alloc(type, 0);
1206 if (s == NULL) {
1207 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001208 }
1209
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001210 s->memo = PyDict_New();
1211 if (s->memo == NULL)
1212 goto bail;
1213
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001214 /* All of these will fail "gracefully" so we don't need to verify them */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001215 strict = PyObject_GetAttrString(ctx, "strict");
1216 if (strict == NULL)
1217 goto bail;
1218 s->strict = PyObject_IsTrue(strict);
1219 Py_DECREF(strict);
1220 if (s->strict < 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001221 goto bail;
1222 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1223 if (s->object_hook == NULL)
1224 goto bail;
1225 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1226 if (s->object_pairs_hook == NULL)
1227 goto bail;
1228 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1229 if (s->parse_float == NULL)
1230 goto bail;
1231 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1232 if (s->parse_int == NULL)
1233 goto bail;
1234 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1235 if (s->parse_constant == NULL)
1236 goto bail;
1237
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001238 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001239
1240bail:
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001241 Py_DECREF(s);
1242 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001243}
1244
1245PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1246
1247static
1248PyTypeObject PyScannerType = {
1249 PyVarObject_HEAD_INIT(NULL, 0)
1250 "_json.Scanner", /* tp_name */
1251 sizeof(PyScannerObject), /* tp_basicsize */
1252 0, /* tp_itemsize */
1253 scanner_dealloc, /* tp_dealloc */
1254 0, /* tp_print */
1255 0, /* tp_getattr */
1256 0, /* tp_setattr */
1257 0, /* tp_compare */
1258 0, /* tp_repr */
1259 0, /* tp_as_number */
1260 0, /* tp_as_sequence */
1261 0, /* tp_as_mapping */
1262 0, /* tp_hash */
1263 scanner_call, /* tp_call */
1264 0, /* tp_str */
1265 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1266 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1267 0, /* tp_as_buffer */
1268 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1269 scanner_doc, /* tp_doc */
1270 scanner_traverse, /* tp_traverse */
1271 scanner_clear, /* tp_clear */
1272 0, /* tp_richcompare */
1273 0, /* tp_weaklistoffset */
1274 0, /* tp_iter */
1275 0, /* tp_iternext */
1276 0, /* tp_methods */
1277 scanner_members, /* tp_members */
1278 0, /* tp_getset */
1279 0, /* tp_base */
1280 0, /* tp_dict */
1281 0, /* tp_descr_get */
1282 0, /* tp_descr_set */
1283 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001284 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001285 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1286 scanner_new, /* tp_new */
1287 0,/* PyObject_GC_Del, */ /* tp_free */
1288};
1289
1290static PyObject *
1291encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1292{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001293 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1294
1295 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001296 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001297 PyObject *item_separator;
1298 int sort_keys, skipkeys, allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001299
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001300 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001301 &markers, &defaultfn, &encoder, &indent,
1302 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001303 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001304 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001305
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001306 if (markers != Py_None && !PyDict_Check(markers)) {
1307 PyErr_Format(PyExc_TypeError,
1308 "make_encoder() argument 1 must be dict or None, "
1309 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001310 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001311 }
1312
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001313 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1314 if (s == NULL)
1315 return NULL;
1316
Antoine Pitrou781eba72009-12-08 15:57:31 +00001317 s->markers = markers;
1318 s->defaultfn = defaultfn;
1319 s->encoder = encoder;
1320 s->indent = indent;
1321 s->key_separator = key_separator;
1322 s->item_separator = item_separator;
1323 s->sort_keys = sort_keys;
1324 s->skipkeys = skipkeys;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001325 s->allow_nan = allow_nan;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001326 s->fast_encode = NULL;
1327 if (PyCFunction_Check(s->encoder)) {
1328 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1329 if (f == (PyCFunction)py_encode_basestring_ascii ||
1330 f == (PyCFunction)py_encode_basestring) {
1331 s->fast_encode = f;
1332 }
1333 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001334
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001335 Py_INCREF(s->markers);
1336 Py_INCREF(s->defaultfn);
1337 Py_INCREF(s->encoder);
1338 Py_INCREF(s->indent);
1339 Py_INCREF(s->key_separator);
1340 Py_INCREF(s->item_separator);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001341 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001342}
1343
1344static PyObject *
1345encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1346{
1347 /* Python callable interface to encode_listencode_obj */
1348 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1349 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001350 Py_ssize_t indent_level;
1351 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001352 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001353
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001354 assert(PyEncoder_Check(self));
1355 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001356 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1357 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001358 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001359 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001360 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001361 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001362 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001363 return NULL;
1364 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001365 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001366}
1367
1368static PyObject *
1369_encoded_const(PyObject *obj)
1370{
1371 /* Return the JSON string representation of None, True, False */
1372 if (obj == Py_None) {
1373 static PyObject *s_null = NULL;
1374 if (s_null == NULL) {
1375 s_null = PyUnicode_InternFromString("null");
1376 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001377 Py_XINCREF(s_null);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001378 return s_null;
1379 }
1380 else if (obj == Py_True) {
1381 static PyObject *s_true = NULL;
1382 if (s_true == NULL) {
1383 s_true = PyUnicode_InternFromString("true");
1384 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001385 Py_XINCREF(s_true);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001386 return s_true;
1387 }
1388 else if (obj == Py_False) {
1389 static PyObject *s_false = NULL;
1390 if (s_false == NULL) {
1391 s_false = PyUnicode_InternFromString("false");
1392 }
Alexey Izbyshev6f82bff2018-09-13 00:05:20 +03001393 Py_XINCREF(s_false);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001394 return s_false;
1395 }
1396 else {
1397 PyErr_SetString(PyExc_ValueError, "not a const");
1398 return NULL;
1399 }
1400}
1401
1402static PyObject *
1403encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1404{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001405 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001406 double i = PyFloat_AS_DOUBLE(obj);
1407 if (!Py_IS_FINITE(i)) {
1408 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001409 PyErr_SetString(
1410 PyExc_ValueError,
1411 "Out of range float values are not JSON compliant"
1412 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001413 return NULL;
1414 }
1415 if (i > 0) {
1416 return PyUnicode_FromString("Infinity");
1417 }
1418 else if (i < 0) {
1419 return PyUnicode_FromString("-Infinity");
1420 }
1421 else {
1422 return PyUnicode_FromString("NaN");
1423 }
1424 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001425 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001426}
1427
1428static PyObject *
1429encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1430{
1431 /* Return the JSON representation of a string */
Oren Milman2b382dd2017-09-24 12:07:12 +03001432 PyObject *encoded;
1433
1434 if (s->fast_encode) {
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001435 return s->fast_encode(NULL, obj);
Oren Milman2b382dd2017-09-24 12:07:12 +03001436 }
1437 encoded = PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1438 if (encoded != NULL && !PyUnicode_Check(encoded)) {
1439 PyErr_Format(PyExc_TypeError,
1440 "encoder() must return a string, not %.80s",
1441 Py_TYPE(encoded)->tp_name);
1442 Py_DECREF(encoded);
1443 return NULL;
1444 }
1445 return encoded;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001446}
1447
1448static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001449_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001450{
1451 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001452 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001453 Py_DECREF(stolen);
1454 return rval;
1455}
1456
1457static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001458encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001459 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001460{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001461 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001462 PyObject *newobj;
1463 int rv;
1464
1465 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1466 PyObject *cstr = _encoded_const(obj);
1467 if (cstr == NULL)
1468 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001469 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001470 }
1471 else if (PyUnicode_Check(obj))
1472 {
1473 PyObject *encoded = encoder_encode_string(s, obj);
1474 if (encoded == NULL)
1475 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001476 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001477 }
1478 else if (PyLong_Check(obj)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001479 PyObject *encoded = PyLong_Type.tp_str(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001480 if (encoded == NULL)
1481 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001482 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001483 }
1484 else if (PyFloat_Check(obj)) {
1485 PyObject *encoded = encoder_encode_float(s, obj);
1486 if (encoded == NULL)
1487 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001488 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001489 }
1490 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001491 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1492 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001493 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001494 Py_LeaveRecursiveCall();
1495 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001496 }
1497 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001498 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1499 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001500 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001501 Py_LeaveRecursiveCall();
1502 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001503 }
1504 else {
1505 PyObject *ident = NULL;
1506 if (s->markers != Py_None) {
1507 int has_key;
1508 ident = PyLong_FromVoidPtr(obj);
1509 if (ident == NULL)
1510 return -1;
1511 has_key = PyDict_Contains(s->markers, ident);
1512 if (has_key) {
1513 if (has_key != -1)
1514 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1515 Py_DECREF(ident);
1516 return -1;
1517 }
1518 if (PyDict_SetItem(s->markers, ident, obj)) {
1519 Py_DECREF(ident);
1520 return -1;
1521 }
1522 }
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001523 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001524 if (newobj == NULL) {
1525 Py_XDECREF(ident);
1526 return -1;
1527 }
Ezio Melotti13672652011-05-11 01:02:56 +03001528
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001529 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1530 Py_DECREF(newobj);
1531 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001532 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001533 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001534 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001535 Py_LeaveRecursiveCall();
1536
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001537 Py_DECREF(newobj);
1538 if (rv) {
1539 Py_XDECREF(ident);
1540 return -1;
1541 }
1542 if (ident != NULL) {
1543 if (PyDict_DelItem(s->markers, ident)) {
1544 Py_XDECREF(ident);
1545 return -1;
1546 }
1547 Py_XDECREF(ident);
1548 }
1549 return rv;
1550 }
1551}
1552
1553static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001554encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001555 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001556{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001557 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001558 static PyObject *open_dict = NULL;
1559 static PyObject *close_dict = NULL;
1560 static PyObject *empty_dict = NULL;
1561 PyObject *kstr = NULL;
1562 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001563 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001564 PyObject *items;
1565 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001566 Py_ssize_t idx;
1567
1568 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1569 open_dict = PyUnicode_InternFromString("{");
1570 close_dict = PyUnicode_InternFromString("}");
1571 empty_dict = PyUnicode_InternFromString("{}");
1572 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1573 return -1;
1574 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001575 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001576 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001577
1578 if (s->markers != Py_None) {
1579 int has_key;
1580 ident = PyLong_FromVoidPtr(dct);
1581 if (ident == NULL)
1582 goto bail;
1583 has_key = PyDict_Contains(s->markers, ident);
1584 if (has_key) {
1585 if (has_key != -1)
1586 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1587 goto bail;
1588 }
1589 if (PyDict_SetItem(s->markers, ident, dct)) {
1590 goto bail;
1591 }
1592 }
1593
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001594 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001595 goto bail;
1596
1597 if (s->indent != Py_None) {
1598 /* TODO: DOES NOT RUN */
1599 indent_level += 1;
1600 /*
1601 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1602 separator = _item_separator + newline_indent
1603 buf += newline_indent
1604 */
1605 }
1606
Benjamin Peterson501182a2015-05-02 22:28:04 -04001607 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001608 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001609 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001610 if (s->sort_keys && PyList_Sort(items) < 0) {
1611 Py_DECREF(items);
Benjamin Peterson501182a2015-05-02 22:28:04 -04001612 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001613 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001614 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001615 Py_DECREF(items);
1616 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001617 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001618 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001619 while ((item = PyIter_Next(it)) != NULL) {
1620 PyObject *encoded, *key, *value;
Serhiy Storchakafff9a312017-03-21 08:53:25 +02001621 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001622 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1623 goto bail;
1624 }
1625 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001626 if (PyUnicode_Check(key)) {
1627 Py_INCREF(key);
1628 kstr = key;
1629 }
1630 else if (PyFloat_Check(key)) {
1631 kstr = encoder_encode_float(s, key);
1632 if (kstr == NULL)
1633 goto bail;
1634 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001635 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 /* This must come before the PyLong_Check because
1637 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001638 kstr = _encoded_const(key);
1639 if (kstr == NULL)
1640 goto bail;
1641 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001642 else if (PyLong_Check(key)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001643 kstr = PyLong_Type.tp_str(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001644 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001645 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001646 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001647 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001648 else if (s->skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001649 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001650 continue;
1651 }
1652 else {
Serhiy Storchakacfa797c2017-11-25 17:38:20 +02001653 PyErr_Format(PyExc_TypeError,
1654 "keys must be str, int, float, bool or None, "
1655 "not %.100s", key->ob_type->tp_name);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001656 goto bail;
1657 }
1658
1659 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001660 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001661 goto bail;
1662 }
1663
1664 encoded = encoder_encode_string(s, kstr);
1665 Py_CLEAR(kstr);
1666 if (encoded == NULL)
1667 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001668 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001669 Py_DECREF(encoded);
1670 goto bail;
1671 }
1672 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001673 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001674 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001675
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001676 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001677 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001678 goto bail;
1679 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001680 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001681 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001682 if (PyErr_Occurred())
1683 goto bail;
1684 Py_CLEAR(it);
1685
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001686 if (ident != NULL) {
1687 if (PyDict_DelItem(s->markers, ident))
1688 goto bail;
1689 Py_CLEAR(ident);
1690 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001691 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001692 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001693 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001694
1695 yield '\n' + (' ' * (_indent * _current_indent_level))
1696 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001697 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001698 goto bail;
1699 return 0;
1700
1701bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001702 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001703 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001704 Py_XDECREF(kstr);
1705 Py_XDECREF(ident);
1706 return -1;
1707}
1708
1709
1710static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001711encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001712 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001713{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001714 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001715 static PyObject *open_array = NULL;
1716 static PyObject *close_array = NULL;
1717 static PyObject *empty_array = NULL;
1718 PyObject *ident = NULL;
1719 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001720 Py_ssize_t i;
1721
1722 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1723 open_array = PyUnicode_InternFromString("[");
1724 close_array = PyUnicode_InternFromString("]");
1725 empty_array = PyUnicode_InternFromString("[]");
1726 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1727 return -1;
1728 }
1729 ident = NULL;
1730 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1731 if (s_fast == NULL)
1732 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001733 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001734 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001735 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001736 }
1737
1738 if (s->markers != Py_None) {
1739 int has_key;
1740 ident = PyLong_FromVoidPtr(seq);
1741 if (ident == NULL)
1742 goto bail;
1743 has_key = PyDict_Contains(s->markers, ident);
1744 if (has_key) {
1745 if (has_key != -1)
1746 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1747 goto bail;
1748 }
1749 if (PyDict_SetItem(s->markers, ident, seq)) {
1750 goto bail;
1751 }
1752 }
1753
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001754 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001755 goto bail;
1756 if (s->indent != Py_None) {
1757 /* TODO: DOES NOT RUN */
1758 indent_level += 1;
1759 /*
1760 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1761 separator = _item_separator + newline_indent
1762 buf += newline_indent
1763 */
1764 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001765 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1766 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001767 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001768 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001769 goto bail;
1770 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001771 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001772 goto bail;
1773 }
1774 if (ident != NULL) {
1775 if (PyDict_DelItem(s->markers, ident))
1776 goto bail;
1777 Py_CLEAR(ident);
1778 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001779
1780 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001781 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001782 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001783
1784 yield '\n' + (' ' * (_indent * _current_indent_level))
1785 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001786 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001787 goto bail;
1788 Py_DECREF(s_fast);
1789 return 0;
1790
1791bail:
1792 Py_XDECREF(ident);
1793 Py_DECREF(s_fast);
1794 return -1;
1795}
1796
1797static void
1798encoder_dealloc(PyObject *self)
1799{
INADA Naokia6296d32017-08-24 14:55:17 +09001800 /* bpo-31095: UnTrack is needed before calling any callbacks */
1801 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001802 encoder_clear(self);
1803 Py_TYPE(self)->tp_free(self);
1804}
1805
1806static int
1807encoder_traverse(PyObject *self, visitproc visit, void *arg)
1808{
1809 PyEncoderObject *s;
1810 assert(PyEncoder_Check(self));
1811 s = (PyEncoderObject *)self;
1812 Py_VISIT(s->markers);
1813 Py_VISIT(s->defaultfn);
1814 Py_VISIT(s->encoder);
1815 Py_VISIT(s->indent);
1816 Py_VISIT(s->key_separator);
1817 Py_VISIT(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001818 return 0;
1819}
1820
1821static int
1822encoder_clear(PyObject *self)
1823{
1824 /* Deallocate Encoder */
1825 PyEncoderObject *s;
1826 assert(PyEncoder_Check(self));
1827 s = (PyEncoderObject *)self;
1828 Py_CLEAR(s->markers);
1829 Py_CLEAR(s->defaultfn);
1830 Py_CLEAR(s->encoder);
1831 Py_CLEAR(s->indent);
1832 Py_CLEAR(s->key_separator);
1833 Py_CLEAR(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001834 return 0;
1835}
1836
1837PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1838
1839static
1840PyTypeObject PyEncoderType = {
1841 PyVarObject_HEAD_INIT(NULL, 0)
1842 "_json.Encoder", /* tp_name */
1843 sizeof(PyEncoderObject), /* tp_basicsize */
1844 0, /* tp_itemsize */
1845 encoder_dealloc, /* tp_dealloc */
1846 0, /* tp_print */
1847 0, /* tp_getattr */
1848 0, /* tp_setattr */
1849 0, /* tp_compare */
1850 0, /* tp_repr */
1851 0, /* tp_as_number */
1852 0, /* tp_as_sequence */
1853 0, /* tp_as_mapping */
1854 0, /* tp_hash */
1855 encoder_call, /* tp_call */
1856 0, /* tp_str */
1857 0, /* tp_getattro */
1858 0, /* tp_setattro */
1859 0, /* tp_as_buffer */
1860 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1861 encoder_doc, /* tp_doc */
1862 encoder_traverse, /* tp_traverse */
1863 encoder_clear, /* tp_clear */
1864 0, /* tp_richcompare */
1865 0, /* tp_weaklistoffset */
1866 0, /* tp_iter */
1867 0, /* tp_iternext */
1868 0, /* tp_methods */
1869 encoder_members, /* tp_members */
1870 0, /* tp_getset */
1871 0, /* tp_base */
1872 0, /* tp_dict */
1873 0, /* tp_descr_get */
1874 0, /* tp_descr_set */
1875 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001876 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001877 0, /* tp_alloc */
1878 encoder_new, /* tp_new */
1879 0, /* tp_free */
1880};
1881
1882static PyMethodDef speedups_methods[] = {
1883 {"encode_basestring_ascii",
1884 (PyCFunction)py_encode_basestring_ascii,
1885 METH_O,
1886 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001887 {"encode_basestring",
1888 (PyCFunction)py_encode_basestring,
1889 METH_O,
1890 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001891 {"scanstring",
1892 (PyCFunction)py_scanstring,
1893 METH_VARARGS,
1894 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001895 {NULL, NULL, 0, NULL}
1896};
1897
1898PyDoc_STRVAR(module_doc,
1899"json speedups\n");
1900
Martin v. Löwis1a214512008-06-11 05:26:20 +00001901static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001902 PyModuleDef_HEAD_INIT,
1903 "_json",
1904 module_doc,
1905 -1,
1906 speedups_methods,
1907 NULL,
1908 NULL,
1909 NULL,
1910 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001911};
1912
Victor Stinnerf024d262015-03-17 17:48:27 +01001913PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001914PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001915{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001916 PyObject *m = PyModule_Create(&jsonmodule);
1917 if (!m)
1918 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001919 if (PyType_Ready(&PyScannerType) < 0)
1920 goto fail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001921 if (PyType_Ready(&PyEncoderType) < 0)
1922 goto fail;
1923 Py_INCREF((PyObject*)&PyScannerType);
1924 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1925 Py_DECREF((PyObject*)&PyScannerType);
1926 goto fail;
1927 }
1928 Py_INCREF((PyObject*)&PyEncoderType);
1929 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1930 Py_DECREF((PyObject*)&PyEncoderType);
1931 goto fail;
1932 }
1933 return m;
1934 fail:
1935 Py_DECREF(m);
1936 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001937}