blob: 769696d9d6855869f545665483a9a991de4014b5 [file] [log] [blame]
Eric Snow2ebc5ce2017-09-07 23:51:28 -06001
2/* Core extension modules are built-in on some platforms (e.g. Windows). */
3#ifdef Py_BUILD_CORE
Eric Snowfc1bf872017-09-11 18:30:43 -07004#define Py_BUILD_CORE_BUILTIN
Eric Snow2ebc5ce2017-09-07 23:51:28 -06005#undef Py_BUILD_CORE
6#endif
7
Christian Heimes90540002008-05-08 14:29:10 +00008#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00009#include "structmember.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +010010#include "accu.h"
11
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000012#ifdef __GNUC__
13#define UNUSED __attribute__((__unused__))
14#else
15#define UNUSED
16#endif
17
18#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
19#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
20#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
21#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
22
23static PyTypeObject PyScannerType;
24static PyTypeObject PyEncoderType;
25
26typedef struct _PyScannerObject {
27 PyObject_HEAD
Segev Finer541bd282017-07-13 08:52:08 +030028 signed char strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000029 PyObject *object_hook;
30 PyObject *object_pairs_hook;
31 PyObject *parse_float;
32 PyObject *parse_int;
33 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000034 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000035} PyScannerObject;
36
37static PyMemberDef scanner_members[] = {
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030038 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000039 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
40 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
41 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
42 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
43 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
44 {NULL}
45};
46
47typedef struct _PyEncoderObject {
48 PyObject_HEAD
49 PyObject *markers;
50 PyObject *defaultfn;
51 PyObject *encoder;
52 PyObject *indent;
53 PyObject *key_separator;
54 PyObject *item_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030055 char sort_keys;
56 char skipkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000057 int allow_nan;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030058 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000059} PyEncoderObject;
60
61static PyMemberDef encoder_members[] = {
62 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
63 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
64 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
65 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
66 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
67 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030068 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
69 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000070 {NULL}
71};
72
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020073static PyObject *
74join_list_unicode(PyObject *lst)
75{
76 /* return u''.join(lst) */
77 static PyObject *sep = NULL;
78 if (sep == NULL) {
79 sep = PyUnicode_FromStringAndSize("", 0);
80 if (sep == NULL)
81 return NULL;
82 }
83 return PyUnicode_Join(sep, lst);
84}
85
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020086/* Forward decls */
87
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000088static PyObject *
89ascii_escape_unicode(PyObject *pystr);
90static PyObject *
91py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
92void init_json(void);
93static PyObject *
94scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
95static PyObject *
96_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
97static PyObject *
98scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000099static void
100scanner_dealloc(PyObject *self);
101static int
102scanner_clear(PyObject *self);
103static PyObject *
104encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000105static void
106encoder_dealloc(PyObject *self);
107static int
108encoder_clear(PyObject *self);
109static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200110encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200112encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000113static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200114encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000115static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000116_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000117static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200118raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000119static PyObject *
120encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000121static PyObject *
122encoder_encode_float(PyEncoderObject *s, PyObject *obj);
123
Christian Heimes90540002008-05-08 14:29:10 +0000124#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000125#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000126
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000127static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200128ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000129{
130 /* Escape unicode code point c to ASCII escape sequences
131 in char *output. output must have at least 12 bytes unused to
132 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000133 output[chars++] = '\\';
134 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000135 case '\\': output[chars++] = c; break;
136 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000137 case '\b': output[chars++] = 'b'; break;
138 case '\f': output[chars++] = 'f'; break;
139 case '\n': output[chars++] = 'n'; break;
140 case '\r': output[chars++] = 'r'; break;
141 case '\t': output[chars++] = 't'; break;
142 default:
Christian Heimes90540002008-05-08 14:29:10 +0000143 if (c >= 0x10000) {
144 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100145 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000146 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100147 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
148 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
149 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
150 output[chars++] = Py_hexdigits[(v ) & 0xf];
151 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000152 output[chars++] = '\\';
153 }
Christian Heimes90540002008-05-08 14:29:10 +0000154 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200155 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
156 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
157 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
158 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000159 }
160 return chars;
161}
162
163static PyObject *
164ascii_escape_unicode(PyObject *pystr)
165{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000166 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000167 Py_ssize_t i;
168 Py_ssize_t input_chars;
169 Py_ssize_t output_size;
170 Py_ssize_t chars;
171 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200172 void *input;
173 unsigned char *output;
174 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000175
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200176 if (PyUnicode_READY(pystr) == -1)
177 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000178
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200179 input_chars = PyUnicode_GET_LENGTH(pystr);
180 input = PyUnicode_DATA(pystr);
181 kind = PyUnicode_KIND(pystr);
182
183 /* Compute the output size */
184 for (i = 0, output_size = 2; i < input_chars; i++) {
185 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500186 Py_ssize_t d;
187 if (S_CHAR(c)) {
188 d = 1;
189 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200190 else {
191 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200192 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200193 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500194 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200195 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500196 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 }
198 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500199 if (output_size > PY_SSIZE_T_MAX - d) {
200 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
201 return NULL;
202 }
203 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200204 }
205
206 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000207 if (rval == NULL) {
208 return NULL;
209 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200210 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000211 chars = 0;
212 output[chars++] = '"';
213 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200214 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000215 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000216 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000217 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000218 else {
219 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000220 }
Christian Heimes90540002008-05-08 14:29:10 +0000221 }
222 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100223#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200224 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100225#endif
Christian Heimes90540002008-05-08 14:29:10 +0000226 return rval;
227}
228
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100229static PyObject *
230escape_unicode(PyObject *pystr)
231{
232 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
233 Py_ssize_t i;
234 Py_ssize_t input_chars;
235 Py_ssize_t output_size;
236 Py_ssize_t chars;
237 PyObject *rval;
238 void *input;
239 int kind;
240 Py_UCS4 maxchar;
241
242 if (PyUnicode_READY(pystr) == -1)
243 return NULL;
244
245 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
246 input_chars = PyUnicode_GET_LENGTH(pystr);
247 input = PyUnicode_DATA(pystr);
248 kind = PyUnicode_KIND(pystr);
249
250 /* Compute the output size */
251 for (i = 0, output_size = 2; i < input_chars; i++) {
252 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500253 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100254 switch (c) {
255 case '\\': case '"': case '\b': case '\f':
256 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500257 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100258 break;
259 default:
260 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500261 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100262 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500263 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100264 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500265 if (output_size > PY_SSIZE_T_MAX - d) {
266 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
267 return NULL;
268 }
269 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100270 }
271
272 rval = PyUnicode_New(output_size, maxchar);
273 if (rval == NULL)
274 return NULL;
275
276 kind = PyUnicode_KIND(rval);
277
278#define ENCODE_OUTPUT do { \
279 chars = 0; \
280 output[chars++] = '"'; \
281 for (i = 0; i < input_chars; i++) { \
282 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
283 switch (c) { \
284 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
285 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
286 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
287 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
288 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
289 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
290 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
291 default: \
292 if (c <= 0x1f) { \
293 output[chars++] = '\\'; \
294 output[chars++] = 'u'; \
295 output[chars++] = '0'; \
296 output[chars++] = '0'; \
297 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
298 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
299 } else { \
300 output[chars++] = c; \
301 } \
302 } \
303 } \
304 output[chars++] = '"'; \
305 } while (0)
306
307 if (kind == PyUnicode_1BYTE_KIND) {
308 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
309 ENCODE_OUTPUT;
310 } else if (kind == PyUnicode_2BYTE_KIND) {
311 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
312 ENCODE_OUTPUT;
313 } else {
314 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
315 assert(kind == PyUnicode_4BYTE_KIND);
316 ENCODE_OUTPUT;
317 }
318#undef ENCODE_OUTPUT
319
320#ifdef Py_DEBUG
321 assert(_PyUnicode_CheckConsistency(rval, 1));
322#endif
323 return rval;
324}
325
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000326static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200327raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000328{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200329 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
330 static PyObject *JSONDecodeError = NULL;
331 PyObject *exc;
332 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000333 PyObject *decoder = PyImport_ImportModule("json.decoder");
334 if (decoder == NULL)
335 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200336 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000337 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200338 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000339 return;
Christian Heimes90540002008-05-08 14:29:10 +0000340 }
Victor Stinner4c381542016-12-09 00:33:39 +0100341 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200342 if (exc) {
343 PyErr_SetObject(JSONDecodeError, exc);
344 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000345 }
Christian Heimes90540002008-05-08 14:29:10 +0000346}
347
Ezio Melotti37623ab2013-01-03 08:44:15 +0200348static void
349raise_stop_iteration(Py_ssize_t idx)
350{
351 PyObject *value = PyLong_FromSsize_t(idx);
352 if (value != NULL) {
353 PyErr_SetObject(PyExc_StopIteration, value);
354 Py_DECREF(value);
355 }
356}
357
Christian Heimes90540002008-05-08 14:29:10 +0000358static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000359_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
360 /* return (rval, idx) tuple, stealing reference to rval */
361 PyObject *tpl;
362 PyObject *pyidx;
363 /*
364 steal a reference to rval, returns (rval, idx)
365 */
366 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000367 return NULL;
368 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000369 pyidx = PyLong_FromSsize_t(idx);
370 if (pyidx == NULL) {
371 Py_DECREF(rval);
372 return NULL;
373 }
374 tpl = PyTuple_New(2);
375 if (tpl == NULL) {
376 Py_DECREF(pyidx);
377 Py_DECREF(rval);
378 return NULL;
379 }
380 PyTuple_SET_ITEM(tpl, 0, rval);
381 PyTuple_SET_ITEM(tpl, 1, pyidx);
382 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000383}
384
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000385#define APPEND_OLD_CHUNK \
386 if (chunk != NULL) { \
387 if (chunks == NULL) { \
388 chunks = PyList_New(0); \
389 if (chunks == NULL) { \
390 goto bail; \
391 } \
392 } \
393 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200394 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000395 goto bail; \
396 } \
397 Py_CLEAR(chunk); \
398 }
399
Christian Heimes90540002008-05-08 14:29:10 +0000400static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000401scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000402{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000403 /* Read the JSON string from PyUnicode pystr.
404 end is the index of the first character after the quote.
405 if strict is zero then literal control characters are allowed
406 *next_end_ptr is a return-by-reference index of the character
407 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000408
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000409 Return value is a new PyUnicode
410 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000411 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200412 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000413 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000414 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200415 const void *buf;
416 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000417 PyObject *chunks = NULL;
418 PyObject *chunk = NULL;
419
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 if (PyUnicode_READY(pystr) == -1)
421 return 0;
422
423 len = PyUnicode_GET_LENGTH(pystr);
424 buf = PyUnicode_DATA(pystr);
425 kind = PyUnicode_KIND(pystr);
426
Ezio Melotti37623ab2013-01-03 08:44:15 +0200427 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000428 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
429 goto bail;
430 }
Christian Heimes90540002008-05-08 14:29:10 +0000431 while (1) {
432 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000434 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000436 if (c == '"' || c == '\\') {
437 break;
438 }
439 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000440 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000441 goto bail;
442 }
443 }
444 if (!(c == '"' || c == '\\')) {
445 raise_errmsg("Unterminated string starting at", pystr, begin);
446 goto bail;
447 }
448 /* Pick up this chunk if it's not zero length */
449 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000450 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 chunk = PyUnicode_FromKindAndData(
452 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200453 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000455 if (chunk == NULL) {
456 goto bail;
457 }
Christian Heimes90540002008-05-08 14:29:10 +0000458 }
459 next++;
460 if (c == '"') {
461 end = next;
462 break;
463 }
464 if (next == len) {
465 raise_errmsg("Unterminated string starting at", pystr, begin);
466 goto bail;
467 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200468 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000469 if (c != 'u') {
470 /* Non-unicode backslash escapes */
471 end = next + 1;
472 switch (c) {
473 case '"': break;
474 case '\\': break;
475 case '/': break;
476 case 'b': c = '\b'; break;
477 case 'f': c = '\f'; break;
478 case 'n': c = '\n'; break;
479 case 'r': c = '\r'; break;
480 case 't': c = '\t'; break;
481 default: c = 0;
482 }
483 if (c == 0) {
484 raise_errmsg("Invalid \\escape", pystr, end - 2);
485 goto bail;
486 }
487 }
488 else {
489 c = 0;
490 next++;
491 end = next + 4;
492 if (end >= len) {
493 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
494 goto bail;
495 }
496 /* Decode 4 hex digits */
497 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200498 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000499 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000500 switch (digit) {
501 case '0': case '1': case '2': case '3': case '4':
502 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000503 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000504 case 'a': case 'b': case 'c': case 'd': case 'e':
505 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000506 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000507 case 'A': case 'B': case 'C': case 'D': case 'E':
508 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000509 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000510 default:
511 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
512 goto bail;
513 }
514 }
Christian Heimes90540002008-05-08 14:29:10 +0000515 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200516 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
517 PyUnicode_READ(kind, buf, next++) == '\\' &&
518 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200519 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000520 end += 6;
521 /* Decode 4 hex digits */
522 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200523 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000524 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000525 switch (digit) {
526 case '0': case '1': case '2': case '3': case '4':
527 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000528 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000529 case 'a': case 'b': case 'c': case 'd': case 'e':
530 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000531 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000532 case 'A': case 'B': case 'C': case 'D': case 'E':
533 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000534 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000535 default:
536 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
537 goto bail;
538 }
539 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200540 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
541 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
542 else
543 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000544 }
Christian Heimes90540002008-05-08 14:29:10 +0000545 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000546 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200547 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000548 if (chunk == NULL) {
549 goto bail;
550 }
Christian Heimes90540002008-05-08 14:29:10 +0000551 }
552
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000553 if (chunks == NULL) {
554 if (chunk != NULL)
555 rval = chunk;
556 else
557 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000558 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000559 else {
560 APPEND_OLD_CHUNK
561 rval = join_list_unicode(chunks);
562 if (rval == NULL) {
563 goto bail;
564 }
565 Py_CLEAR(chunks);
566 }
567
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000568 *next_end_ptr = end;
569 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000570bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000571 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000572 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000573 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000574 return NULL;
575}
576
577PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000578 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000579 "\n"
580 "Scan the string s for a JSON string. End is the index of the\n"
581 "character in s after the quote that started the JSON string.\n"
582 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
583 "on attempt to decode an invalid string. If strict is False then literal\n"
584 "control characters are allowed in the string.\n"
585 "\n"
586 "Returns a tuple of the decoded string and the index of the character in s\n"
587 "after the end quote."
588);
Christian Heimes90540002008-05-08 14:29:10 +0000589
590static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000591py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000592{
593 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000594 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000595 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000596 Py_ssize_t next_end = -1;
597 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100598 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000599 return NULL;
600 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000601 if (PyUnicode_Check(pystr)) {
602 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000603 }
604 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000606 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000607 Py_TYPE(pystr)->tp_name);
608 return NULL;
609 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000610 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000611}
612
613PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000614 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000615 "\n"
616 "Return an ASCII-only JSON representation of a Python string"
617);
Christian Heimes90540002008-05-08 14:29:10 +0000618
619static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000620py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000621{
622 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000623 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000624 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000625 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000626 rval = ascii_escape_unicode(pystr);
627 }
628 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000629 PyErr_Format(PyExc_TypeError,
630 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000631 Py_TYPE(pystr)->tp_name);
632 return NULL;
633 }
Christian Heimes90540002008-05-08 14:29:10 +0000634 return rval;
635}
636
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100637
638PyDoc_STRVAR(pydoc_encode_basestring,
639 "encode_basestring(string) -> string\n"
640 "\n"
641 "Return a JSON representation of a Python string"
642);
643
644static PyObject *
645py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
646{
647 PyObject *rval;
648 /* Return a JSON representation of a Python string */
649 /* METH_O */
650 if (PyUnicode_Check(pystr)) {
651 rval = escape_unicode(pystr);
652 }
653 else {
654 PyErr_Format(PyExc_TypeError,
655 "first argument must be a string, not %.80s",
656 Py_TYPE(pystr)->tp_name);
657 return NULL;
658 }
659 return rval;
660}
661
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000662static void
663scanner_dealloc(PyObject *self)
664{
INADA Naokia6296d32017-08-24 14:55:17 +0900665 /* bpo-31095: UnTrack is needed before calling any callbacks */
666 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000667 scanner_clear(self);
668 Py_TYPE(self)->tp_free(self);
669}
670
671static int
672scanner_traverse(PyObject *self, visitproc visit, void *arg)
673{
674 PyScannerObject *s;
675 assert(PyScanner_Check(self));
676 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000677 Py_VISIT(s->object_hook);
678 Py_VISIT(s->object_pairs_hook);
679 Py_VISIT(s->parse_float);
680 Py_VISIT(s->parse_int);
681 Py_VISIT(s->parse_constant);
682 return 0;
683}
684
685static int
686scanner_clear(PyObject *self)
687{
688 PyScannerObject *s;
689 assert(PyScanner_Check(self));
690 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000691 Py_CLEAR(s->object_hook);
692 Py_CLEAR(s->object_pairs_hook);
693 Py_CLEAR(s->parse_float);
694 Py_CLEAR(s->parse_int);
695 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000696 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000697 return 0;
698}
699
700static PyObject *
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300701_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
702{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000703 /* Read a JSON object from PyUnicode pystr.
704 idx is the index of the first character after the opening curly brace.
705 *next_idx_ptr is a return-by-reference index to the first character after
706 the closing curly brace.
707
708 Returns a new PyObject (usually a dict, but object_hook can change that)
709 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200710 void *str;
711 int kind;
712 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000713 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000714 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000715 PyObject *key = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000716 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000717 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000718
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200719 if (PyUnicode_READY(pystr) == -1)
720 return NULL;
721
722 str = PyUnicode_DATA(pystr);
723 kind = PyUnicode_KIND(pystr);
724 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
725
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000726 if (has_pairs_hook)
727 rval = PyList_New(0);
728 else
729 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000730 if (rval == NULL)
731 return NULL;
732
733 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200734 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000735
736 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200737 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
738 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000739 PyObject *memokey;
740
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000741 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200742 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200743 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000744 goto bail;
745 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300746 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000747 if (key == NULL)
748 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000749 memokey = PyDict_GetItem(s->memo, key);
750 if (memokey != NULL) {
751 Py_INCREF(memokey);
752 Py_DECREF(key);
753 key = memokey;
754 }
755 else {
756 if (PyDict_SetItem(s->memo, key, key) < 0)
757 goto bail;
758 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000759 idx = next_idx;
760
761 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200762 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
763 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200764 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000765 goto bail;
766 }
767 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200768 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000769
770 /* read any JSON term */
771 val = scan_once_unicode(s, pystr, idx, &next_idx);
772 if (val == NULL)
773 goto bail;
774
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000775 if (has_pairs_hook) {
776 PyObject *item = PyTuple_Pack(2, key, val);
777 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000778 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000779 Py_CLEAR(key);
780 Py_CLEAR(val);
781 if (PyList_Append(rval, item) == -1) {
782 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000783 goto bail;
784 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000785 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000786 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000787 else {
788 if (PyDict_SetItem(rval, key, val) < 0)
789 goto bail;
790 Py_CLEAR(key);
791 Py_CLEAR(val);
792 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000793 idx = next_idx;
794
795 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200796 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000797
798 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200799 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000800 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200801 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200802 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000803 goto bail;
804 }
805 idx++;
806
807 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200808 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000809 }
810 }
811
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000812 *next_idx_ptr = idx + 1;
813
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000814 if (has_pairs_hook) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100815 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000816 Py_DECREF(rval);
817 return val;
818 }
819
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000820 /* if object_hook is not None: rval = object_hook(rval) */
821 if (s->object_hook != Py_None) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100822 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000823 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000824 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000825 }
826 return rval;
827bail:
828 Py_XDECREF(key);
829 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000830 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000831 return NULL;
832}
833
834static PyObject *
835_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200836 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000837 idx is the index of the first character after the opening brace.
838 *next_idx_ptr is a return-by-reference index to the first character after
839 the closing brace.
840
841 Returns a new PyList
842 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200843 void *str;
844 int kind;
845 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000846 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200847 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000848 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000849
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200850 if (PyUnicode_READY(pystr) == -1)
851 return NULL;
852
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200853 rval = PyList_New(0);
854 if (rval == NULL)
855 return NULL;
856
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200857 str = PyUnicode_DATA(pystr);
858 kind = PyUnicode_KIND(pystr);
859 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
860
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000861 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200862 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000863
864 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200865 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
866 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000867
868 /* read any JSON term */
869 val = scan_once_unicode(s, pystr, idx, &next_idx);
870 if (val == NULL)
871 goto bail;
872
873 if (PyList_Append(rval, val) == -1)
874 goto bail;
875
876 Py_CLEAR(val);
877 idx = next_idx;
878
879 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200880 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000881
882 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200883 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000884 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200885 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200886 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000887 goto bail;
888 }
889 idx++;
890
891 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200892 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000893 }
894 }
895
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200896 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
897 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200898 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000899 goto bail;
900 }
901 *next_idx_ptr = idx + 1;
902 return rval;
903bail:
904 Py_XDECREF(val);
905 Py_DECREF(rval);
906 return NULL;
907}
908
909static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200910_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
911 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000912 constant is the constant string that was found
913 ("NaN", "Infinity", "-Infinity").
914 idx is the index of the first character of the constant
915 *next_idx_ptr is a return-by-reference index to the first character after
916 the constant.
917
918 Returns the result of parse_constant
919 */
920 PyObject *cstr;
921 PyObject *rval;
922 /* constant is "NaN", "Infinity", or "-Infinity" */
923 cstr = PyUnicode_InternFromString(constant);
924 if (cstr == NULL)
925 return NULL;
926
927 /* rval = parse_constant(constant) */
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100928 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200929 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000930 Py_DECREF(cstr);
931 *next_idx_ptr = idx;
932 return rval;
933}
934
935static PyObject *
936_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
937 /* Read a JSON number from PyUnicode pystr.
938 idx is the index of the first character of the number
939 *next_idx_ptr is a return-by-reference index to the first character after
940 the number.
941
942 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200943 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000944 May return other types if parse_int or parse_float are set
945 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200946 void *str;
947 int kind;
948 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000949 Py_ssize_t idx = start;
950 int is_float = 0;
951 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200952 PyObject *numstr = NULL;
953 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000954
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200955 if (PyUnicode_READY(pystr) == -1)
956 return NULL;
957
958 str = PyUnicode_DATA(pystr);
959 kind = PyUnicode_KIND(pystr);
960 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
961
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000962 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200963 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000964 idx++;
965 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200966 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000967 return NULL;
968 }
969 }
970
971 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200972 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000973 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200974 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000975 }
976 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200977 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000978 idx++;
979 }
980 /* no integer digits, error */
981 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200982 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000983 return NULL;
984 }
985
986 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200987 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000988 is_float = 1;
989 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200990 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000991 }
992
993 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200994 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000995 Py_ssize_t e_start = idx;
996 idx++;
997
998 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200999 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001000
1001 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001002 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001003
1004 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001006 is_float = 1;
1007 }
1008 else {
1009 idx = e_start;
1010 }
1011 }
1012
Antoine Pitrouf6454512011-04-25 19:16:06 +02001013 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1014 custom_func = s->parse_float;
1015 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1016 custom_func = s->parse_int;
1017 else
1018 custom_func = NULL;
1019
1020 if (custom_func) {
1021 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001022 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001023 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001024 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001025 if (numstr == NULL)
1026 return NULL;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001027 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001028 }
1029 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001030 Py_ssize_t i, n;
1031 char *buf;
1032 /* Straight conversion to ASCII, to avoid costly conversion of
1033 decimal unicode digits (which cannot appear here) */
1034 n = idx - start;
1035 numstr = PyBytes_FromStringAndSize(NULL, n);
1036 if (numstr == NULL)
1037 return NULL;
1038 buf = PyBytes_AS_STRING(numstr);
1039 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001040 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001041 }
1042 if (is_float)
1043 rval = PyFloat_FromString(numstr);
1044 else
1045 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001046 }
1047 Py_DECREF(numstr);
1048 *next_idx_ptr = idx;
1049 return rval;
1050}
1051
1052static PyObject *
1053scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1054{
1055 /* Read one JSON term (of any kind) from PyUnicode pystr.
1056 idx is the index of the first character of the term
1057 *next_idx_ptr is a return-by-reference index to the first character after
1058 the number.
1059
1060 Returns a new PyObject representation of the term.
1061 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001062 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001063 void *str;
1064 int kind;
1065 Py_ssize_t length;
1066
1067 if (PyUnicode_READY(pystr) == -1)
1068 return NULL;
1069
1070 str = PyUnicode_DATA(pystr);
1071 kind = PyUnicode_KIND(pystr);
1072 length = PyUnicode_GET_LENGTH(pystr);
1073
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001074 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001075 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001076 return NULL;
1077 }
1078 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001079 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001080 return NULL;
1081 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001082
1083 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001084 case '"':
1085 /* string */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001086 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001087 case '{':
1088 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001089 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1090 "from a unicode string"))
1091 return NULL;
1092 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1093 Py_LeaveRecursiveCall();
1094 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001095 case '[':
1096 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001097 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1098 "from a unicode string"))
1099 return NULL;
1100 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1101 Py_LeaveRecursiveCall();
1102 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001103 case 'n':
1104 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001105 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001106 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001107 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001108 }
1109 break;
1110 case 't':
1111 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001112 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001113 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001114 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001115 }
1116 break;
1117 case 'f':
1118 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001119 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1120 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1121 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001123 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001124 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001125 }
1126 break;
1127 case 'N':
1128 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001129 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001130 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001131 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1132 }
1133 break;
1134 case 'I':
1135 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001136 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1137 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1138 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001139 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001140 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1141 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001142 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001143 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1144 }
1145 break;
1146 case '-':
1147 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001148 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1150 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001151 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001152 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001153 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1154 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001155 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001156 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1157 }
1158 break;
1159 }
1160 /* Didn't find a string, object, array, or named constant. Look for a number. */
1161 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1162}
1163
1164static PyObject *
1165scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1166{
1167 /* Python callable interface to scan_once_{str,unicode} */
1168 PyObject *pystr;
1169 PyObject *rval;
1170 Py_ssize_t idx;
1171 Py_ssize_t next_idx = -1;
1172 static char *kwlist[] = {"string", "idx", NULL};
1173 PyScannerObject *s;
1174 assert(PyScanner_Check(self));
1175 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001176 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001177 return NULL;
1178
1179 if (PyUnicode_Check(pystr)) {
1180 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1181 }
1182 else {
1183 PyErr_Format(PyExc_TypeError,
1184 "first argument must be a string, not %.80s",
1185 Py_TYPE(pystr)->tp_name);
1186 return NULL;
1187 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001188 PyDict_Clear(s->memo);
1189 if (rval == NULL)
1190 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001191 return _build_rval_index_tuple(rval, next_idx);
1192}
1193
1194static PyObject *
1195scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1196{
1197 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001198 PyObject *ctx;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001199 PyObject *strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001200 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001201
1202 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001203 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001204
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001205 s = (PyScannerObject *)type->tp_alloc(type, 0);
1206 if (s == NULL) {
1207 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001208 }
1209
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001210 s->memo = PyDict_New();
1211 if (s->memo == NULL)
1212 goto bail;
1213
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001214 /* All of these will fail "gracefully" so we don't need to verify them */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001215 strict = PyObject_GetAttrString(ctx, "strict");
1216 if (strict == NULL)
1217 goto bail;
1218 s->strict = PyObject_IsTrue(strict);
1219 Py_DECREF(strict);
1220 if (s->strict < 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001221 goto bail;
1222 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1223 if (s->object_hook == NULL)
1224 goto bail;
1225 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1226 if (s->object_pairs_hook == NULL)
1227 goto bail;
1228 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1229 if (s->parse_float == NULL)
1230 goto bail;
1231 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1232 if (s->parse_int == NULL)
1233 goto bail;
1234 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1235 if (s->parse_constant == NULL)
1236 goto bail;
1237
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001238 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001239
1240bail:
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001241 Py_DECREF(s);
1242 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001243}
1244
1245PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1246
1247static
1248PyTypeObject PyScannerType = {
1249 PyVarObject_HEAD_INIT(NULL, 0)
1250 "_json.Scanner", /* tp_name */
1251 sizeof(PyScannerObject), /* tp_basicsize */
1252 0, /* tp_itemsize */
1253 scanner_dealloc, /* tp_dealloc */
1254 0, /* tp_print */
1255 0, /* tp_getattr */
1256 0, /* tp_setattr */
1257 0, /* tp_compare */
1258 0, /* tp_repr */
1259 0, /* tp_as_number */
1260 0, /* tp_as_sequence */
1261 0, /* tp_as_mapping */
1262 0, /* tp_hash */
1263 scanner_call, /* tp_call */
1264 0, /* tp_str */
1265 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1266 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1267 0, /* tp_as_buffer */
1268 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1269 scanner_doc, /* tp_doc */
1270 scanner_traverse, /* tp_traverse */
1271 scanner_clear, /* tp_clear */
1272 0, /* tp_richcompare */
1273 0, /* tp_weaklistoffset */
1274 0, /* tp_iter */
1275 0, /* tp_iternext */
1276 0, /* tp_methods */
1277 scanner_members, /* tp_members */
1278 0, /* tp_getset */
1279 0, /* tp_base */
1280 0, /* tp_dict */
1281 0, /* tp_descr_get */
1282 0, /* tp_descr_set */
1283 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001284 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001285 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1286 scanner_new, /* tp_new */
1287 0,/* PyObject_GC_Del, */ /* tp_free */
1288};
1289
1290static PyObject *
1291encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1292{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001293 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1294
1295 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001296 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001297 PyObject *item_separator;
1298 int sort_keys, skipkeys, allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001299
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001300 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001301 &markers, &defaultfn, &encoder, &indent,
1302 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001303 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001304 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001305
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001306 if (markers != Py_None && !PyDict_Check(markers)) {
1307 PyErr_Format(PyExc_TypeError,
1308 "make_encoder() argument 1 must be dict or None, "
1309 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001310 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001311 }
1312
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001313 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1314 if (s == NULL)
1315 return NULL;
1316
Antoine Pitrou781eba72009-12-08 15:57:31 +00001317 s->markers = markers;
1318 s->defaultfn = defaultfn;
1319 s->encoder = encoder;
1320 s->indent = indent;
1321 s->key_separator = key_separator;
1322 s->item_separator = item_separator;
1323 s->sort_keys = sort_keys;
1324 s->skipkeys = skipkeys;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001325 s->allow_nan = allow_nan;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001326 s->fast_encode = NULL;
1327 if (PyCFunction_Check(s->encoder)) {
1328 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1329 if (f == (PyCFunction)py_encode_basestring_ascii ||
1330 f == (PyCFunction)py_encode_basestring) {
1331 s->fast_encode = f;
1332 }
1333 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001334
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001335 Py_INCREF(s->markers);
1336 Py_INCREF(s->defaultfn);
1337 Py_INCREF(s->encoder);
1338 Py_INCREF(s->indent);
1339 Py_INCREF(s->key_separator);
1340 Py_INCREF(s->item_separator);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001341 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001342}
1343
1344static PyObject *
1345encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1346{
1347 /* Python callable interface to encode_listencode_obj */
1348 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1349 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001350 Py_ssize_t indent_level;
1351 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001352 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001353
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001354 assert(PyEncoder_Check(self));
1355 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001356 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1357 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001358 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001359 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001360 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001361 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001362 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001363 return NULL;
1364 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001365 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001366}
1367
1368static PyObject *
1369_encoded_const(PyObject *obj)
1370{
1371 /* Return the JSON string representation of None, True, False */
1372 if (obj == Py_None) {
1373 static PyObject *s_null = NULL;
1374 if (s_null == NULL) {
1375 s_null = PyUnicode_InternFromString("null");
1376 }
1377 Py_INCREF(s_null);
1378 return s_null;
1379 }
1380 else if (obj == Py_True) {
1381 static PyObject *s_true = NULL;
1382 if (s_true == NULL) {
1383 s_true = PyUnicode_InternFromString("true");
1384 }
1385 Py_INCREF(s_true);
1386 return s_true;
1387 }
1388 else if (obj == Py_False) {
1389 static PyObject *s_false = NULL;
1390 if (s_false == NULL) {
1391 s_false = PyUnicode_InternFromString("false");
1392 }
1393 Py_INCREF(s_false);
1394 return s_false;
1395 }
1396 else {
1397 PyErr_SetString(PyExc_ValueError, "not a const");
1398 return NULL;
1399 }
1400}
1401
1402static PyObject *
1403encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1404{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001405 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001406 double i = PyFloat_AS_DOUBLE(obj);
1407 if (!Py_IS_FINITE(i)) {
1408 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001409 PyErr_SetString(
1410 PyExc_ValueError,
1411 "Out of range float values are not JSON compliant"
1412 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001413 return NULL;
1414 }
1415 if (i > 0) {
1416 return PyUnicode_FromString("Infinity");
1417 }
1418 else if (i < 0) {
1419 return PyUnicode_FromString("-Infinity");
1420 }
1421 else {
1422 return PyUnicode_FromString("NaN");
1423 }
1424 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001425 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001426}
1427
1428static PyObject *
1429encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1430{
1431 /* Return the JSON representation of a string */
1432 if (s->fast_encode)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001433 return s->fast_encode(NULL, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001434 else
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001435 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001436}
1437
1438static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001439_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001440{
1441 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001442 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001443 Py_DECREF(stolen);
1444 return rval;
1445}
1446
1447static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001448encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001449 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001450{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001451 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001452 PyObject *newobj;
1453 int rv;
1454
1455 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1456 PyObject *cstr = _encoded_const(obj);
1457 if (cstr == NULL)
1458 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001459 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001460 }
1461 else if (PyUnicode_Check(obj))
1462 {
1463 PyObject *encoded = encoder_encode_string(s, obj);
1464 if (encoded == NULL)
1465 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001466 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001467 }
1468 else if (PyLong_Check(obj)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001469 PyObject *encoded = PyLong_Type.tp_str(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001470 if (encoded == NULL)
1471 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001472 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001473 }
1474 else if (PyFloat_Check(obj)) {
1475 PyObject *encoded = encoder_encode_float(s, obj);
1476 if (encoded == NULL)
1477 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001478 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001479 }
1480 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001481 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1482 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001483 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001484 Py_LeaveRecursiveCall();
1485 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001486 }
1487 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001488 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1489 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001490 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001491 Py_LeaveRecursiveCall();
1492 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001493 }
1494 else {
1495 PyObject *ident = NULL;
1496 if (s->markers != Py_None) {
1497 int has_key;
1498 ident = PyLong_FromVoidPtr(obj);
1499 if (ident == NULL)
1500 return -1;
1501 has_key = PyDict_Contains(s->markers, ident);
1502 if (has_key) {
1503 if (has_key != -1)
1504 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1505 Py_DECREF(ident);
1506 return -1;
1507 }
1508 if (PyDict_SetItem(s->markers, ident, obj)) {
1509 Py_DECREF(ident);
1510 return -1;
1511 }
1512 }
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001513 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001514 if (newobj == NULL) {
1515 Py_XDECREF(ident);
1516 return -1;
1517 }
Ezio Melotti13672652011-05-11 01:02:56 +03001518
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001519 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1520 Py_DECREF(newobj);
1521 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001522 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001523 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001524 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001525 Py_LeaveRecursiveCall();
1526
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001527 Py_DECREF(newobj);
1528 if (rv) {
1529 Py_XDECREF(ident);
1530 return -1;
1531 }
1532 if (ident != NULL) {
1533 if (PyDict_DelItem(s->markers, ident)) {
1534 Py_XDECREF(ident);
1535 return -1;
1536 }
1537 Py_XDECREF(ident);
1538 }
1539 return rv;
1540 }
1541}
1542
1543static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001544encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001545 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001546{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001547 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001548 static PyObject *open_dict = NULL;
1549 static PyObject *close_dict = NULL;
1550 static PyObject *empty_dict = NULL;
1551 PyObject *kstr = NULL;
1552 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001553 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001554 PyObject *items;
1555 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001556 Py_ssize_t idx;
1557
1558 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1559 open_dict = PyUnicode_InternFromString("{");
1560 close_dict = PyUnicode_InternFromString("}");
1561 empty_dict = PyUnicode_InternFromString("{}");
1562 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1563 return -1;
1564 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001565 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001566 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001567
1568 if (s->markers != Py_None) {
1569 int has_key;
1570 ident = PyLong_FromVoidPtr(dct);
1571 if (ident == NULL)
1572 goto bail;
1573 has_key = PyDict_Contains(s->markers, ident);
1574 if (has_key) {
1575 if (has_key != -1)
1576 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1577 goto bail;
1578 }
1579 if (PyDict_SetItem(s->markers, ident, dct)) {
1580 goto bail;
1581 }
1582 }
1583
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001584 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001585 goto bail;
1586
1587 if (s->indent != Py_None) {
1588 /* TODO: DOES NOT RUN */
1589 indent_level += 1;
1590 /*
1591 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1592 separator = _item_separator + newline_indent
1593 buf += newline_indent
1594 */
1595 }
1596
Benjamin Peterson501182a2015-05-02 22:28:04 -04001597 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001598 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001599 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001600 if (s->sort_keys && PyList_Sort(items) < 0) {
1601 Py_DECREF(items);
Benjamin Peterson501182a2015-05-02 22:28:04 -04001602 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001603 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001604 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001605 Py_DECREF(items);
1606 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001607 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001608 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001609 while ((item = PyIter_Next(it)) != NULL) {
1610 PyObject *encoded, *key, *value;
Serhiy Storchakafff9a312017-03-21 08:53:25 +02001611 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001612 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1613 goto bail;
1614 }
1615 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001616 if (PyUnicode_Check(key)) {
1617 Py_INCREF(key);
1618 kstr = key;
1619 }
1620 else if (PyFloat_Check(key)) {
1621 kstr = encoder_encode_float(s, key);
1622 if (kstr == NULL)
1623 goto bail;
1624 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001625 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 /* This must come before the PyLong_Check because
1627 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001628 kstr = _encoded_const(key);
1629 if (kstr == NULL)
1630 goto bail;
1631 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001632 else if (PyLong_Check(key)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001633 kstr = PyLong_Type.tp_str(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001634 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001635 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001636 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001637 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001638 else if (s->skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001639 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001640 continue;
1641 }
1642 else {
1643 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001644 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001645 goto bail;
1646 }
1647
1648 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001649 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001650 goto bail;
1651 }
1652
1653 encoded = encoder_encode_string(s, kstr);
1654 Py_CLEAR(kstr);
1655 if (encoded == NULL)
1656 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001657 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001658 Py_DECREF(encoded);
1659 goto bail;
1660 }
1661 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001662 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001663 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001664
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001665 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001666 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001667 goto bail;
1668 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001669 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001670 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001671 if (PyErr_Occurred())
1672 goto bail;
1673 Py_CLEAR(it);
1674
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001675 if (ident != NULL) {
1676 if (PyDict_DelItem(s->markers, ident))
1677 goto bail;
1678 Py_CLEAR(ident);
1679 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001680 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001681 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001682 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001683
1684 yield '\n' + (' ' * (_indent * _current_indent_level))
1685 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001686 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001687 goto bail;
1688 return 0;
1689
1690bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001691 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001692 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001693 Py_XDECREF(kstr);
1694 Py_XDECREF(ident);
1695 return -1;
1696}
1697
1698
1699static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001700encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001701 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001702{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001703 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001704 static PyObject *open_array = NULL;
1705 static PyObject *close_array = NULL;
1706 static PyObject *empty_array = NULL;
1707 PyObject *ident = NULL;
1708 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001709 Py_ssize_t i;
1710
1711 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1712 open_array = PyUnicode_InternFromString("[");
1713 close_array = PyUnicode_InternFromString("]");
1714 empty_array = PyUnicode_InternFromString("[]");
1715 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1716 return -1;
1717 }
1718 ident = NULL;
1719 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1720 if (s_fast == NULL)
1721 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001722 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001723 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001724 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001725 }
1726
1727 if (s->markers != Py_None) {
1728 int has_key;
1729 ident = PyLong_FromVoidPtr(seq);
1730 if (ident == NULL)
1731 goto bail;
1732 has_key = PyDict_Contains(s->markers, ident);
1733 if (has_key) {
1734 if (has_key != -1)
1735 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1736 goto bail;
1737 }
1738 if (PyDict_SetItem(s->markers, ident, seq)) {
1739 goto bail;
1740 }
1741 }
1742
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001743 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001744 goto bail;
1745 if (s->indent != Py_None) {
1746 /* TODO: DOES NOT RUN */
1747 indent_level += 1;
1748 /*
1749 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1750 separator = _item_separator + newline_indent
1751 buf += newline_indent
1752 */
1753 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001754 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1755 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001756 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001757 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001758 goto bail;
1759 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001760 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001761 goto bail;
1762 }
1763 if (ident != NULL) {
1764 if (PyDict_DelItem(s->markers, ident))
1765 goto bail;
1766 Py_CLEAR(ident);
1767 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001768
1769 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001770 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001771 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001772
1773 yield '\n' + (' ' * (_indent * _current_indent_level))
1774 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001775 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001776 goto bail;
1777 Py_DECREF(s_fast);
1778 return 0;
1779
1780bail:
1781 Py_XDECREF(ident);
1782 Py_DECREF(s_fast);
1783 return -1;
1784}
1785
1786static void
1787encoder_dealloc(PyObject *self)
1788{
INADA Naokia6296d32017-08-24 14:55:17 +09001789 /* bpo-31095: UnTrack is needed before calling any callbacks */
1790 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001791 encoder_clear(self);
1792 Py_TYPE(self)->tp_free(self);
1793}
1794
1795static int
1796encoder_traverse(PyObject *self, visitproc visit, void *arg)
1797{
1798 PyEncoderObject *s;
1799 assert(PyEncoder_Check(self));
1800 s = (PyEncoderObject *)self;
1801 Py_VISIT(s->markers);
1802 Py_VISIT(s->defaultfn);
1803 Py_VISIT(s->encoder);
1804 Py_VISIT(s->indent);
1805 Py_VISIT(s->key_separator);
1806 Py_VISIT(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001807 return 0;
1808}
1809
1810static int
1811encoder_clear(PyObject *self)
1812{
1813 /* Deallocate Encoder */
1814 PyEncoderObject *s;
1815 assert(PyEncoder_Check(self));
1816 s = (PyEncoderObject *)self;
1817 Py_CLEAR(s->markers);
1818 Py_CLEAR(s->defaultfn);
1819 Py_CLEAR(s->encoder);
1820 Py_CLEAR(s->indent);
1821 Py_CLEAR(s->key_separator);
1822 Py_CLEAR(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001823 return 0;
1824}
1825
1826PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1827
1828static
1829PyTypeObject PyEncoderType = {
1830 PyVarObject_HEAD_INIT(NULL, 0)
1831 "_json.Encoder", /* tp_name */
1832 sizeof(PyEncoderObject), /* tp_basicsize */
1833 0, /* tp_itemsize */
1834 encoder_dealloc, /* tp_dealloc */
1835 0, /* tp_print */
1836 0, /* tp_getattr */
1837 0, /* tp_setattr */
1838 0, /* tp_compare */
1839 0, /* tp_repr */
1840 0, /* tp_as_number */
1841 0, /* tp_as_sequence */
1842 0, /* tp_as_mapping */
1843 0, /* tp_hash */
1844 encoder_call, /* tp_call */
1845 0, /* tp_str */
1846 0, /* tp_getattro */
1847 0, /* tp_setattro */
1848 0, /* tp_as_buffer */
1849 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1850 encoder_doc, /* tp_doc */
1851 encoder_traverse, /* tp_traverse */
1852 encoder_clear, /* tp_clear */
1853 0, /* tp_richcompare */
1854 0, /* tp_weaklistoffset */
1855 0, /* tp_iter */
1856 0, /* tp_iternext */
1857 0, /* tp_methods */
1858 encoder_members, /* tp_members */
1859 0, /* tp_getset */
1860 0, /* tp_base */
1861 0, /* tp_dict */
1862 0, /* tp_descr_get */
1863 0, /* tp_descr_set */
1864 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001865 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001866 0, /* tp_alloc */
1867 encoder_new, /* tp_new */
1868 0, /* tp_free */
1869};
1870
1871static PyMethodDef speedups_methods[] = {
1872 {"encode_basestring_ascii",
1873 (PyCFunction)py_encode_basestring_ascii,
1874 METH_O,
1875 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001876 {"encode_basestring",
1877 (PyCFunction)py_encode_basestring,
1878 METH_O,
1879 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001880 {"scanstring",
1881 (PyCFunction)py_scanstring,
1882 METH_VARARGS,
1883 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001884 {NULL, NULL, 0, NULL}
1885};
1886
1887PyDoc_STRVAR(module_doc,
1888"json speedups\n");
1889
Martin v. Löwis1a214512008-06-11 05:26:20 +00001890static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001891 PyModuleDef_HEAD_INIT,
1892 "_json",
1893 module_doc,
1894 -1,
1895 speedups_methods,
1896 NULL,
1897 NULL,
1898 NULL,
1899 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001900};
1901
Victor Stinnerf024d262015-03-17 17:48:27 +01001902PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001903PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001904{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001905 PyObject *m = PyModule_Create(&jsonmodule);
1906 if (!m)
1907 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001908 if (PyType_Ready(&PyScannerType) < 0)
1909 goto fail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001910 if (PyType_Ready(&PyEncoderType) < 0)
1911 goto fail;
1912 Py_INCREF((PyObject*)&PyScannerType);
1913 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1914 Py_DECREF((PyObject*)&PyScannerType);
1915 goto fail;
1916 }
1917 Py_INCREF((PyObject*)&PyEncoderType);
1918 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1919 Py_DECREF((PyObject*)&PyEncoderType);
1920 goto fail;
1921 }
1922 return m;
1923 fail:
1924 Py_DECREF(m);
1925 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001926}