blob: 54fc90cfece82e5ccee0b854bff5bc40491c224c [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +01003#include "accu.h"
4
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00005#ifdef __GNUC__
6#define UNUSED __attribute__((__unused__))
7#else
8#define UNUSED
9#endif
10
11#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
12#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
13#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
14#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
15
16static PyTypeObject PyScannerType;
17static PyTypeObject PyEncoderType;
18
19typedef struct _PyScannerObject {
20 PyObject_HEAD
Segev Finer541bd282017-07-13 08:52:08 +030021 signed char strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000022 PyObject *object_hook;
23 PyObject *object_pairs_hook;
24 PyObject *parse_float;
25 PyObject *parse_int;
26 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000027 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028} PyScannerObject;
29
30static PyMemberDef scanner_members[] = {
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030031 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000032 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
33 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
34 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
35 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
36 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
37 {NULL}
38};
39
40typedef struct _PyEncoderObject {
41 PyObject_HEAD
42 PyObject *markers;
43 PyObject *defaultfn;
44 PyObject *encoder;
45 PyObject *indent;
46 PyObject *key_separator;
47 PyObject *item_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030048 char sort_keys;
49 char skipkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000050 int allow_nan;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030051 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000052} PyEncoderObject;
53
54static PyMemberDef encoder_members[] = {
55 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
56 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
57 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
58 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
59 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
60 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030061 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
62 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000063 {NULL}
64};
65
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020066static PyObject *
67join_list_unicode(PyObject *lst)
68{
69 /* return u''.join(lst) */
70 static PyObject *sep = NULL;
71 if (sep == NULL) {
72 sep = PyUnicode_FromStringAndSize("", 0);
73 if (sep == NULL)
74 return NULL;
75 }
76 return PyUnicode_Join(sep, lst);
77}
78
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020079/* Forward decls */
80
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000081static PyObject *
82ascii_escape_unicode(PyObject *pystr);
83static PyObject *
84py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
85void init_json(void);
86static PyObject *
87scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
88static PyObject *
89_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
90static PyObject *
91scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000092static void
93scanner_dealloc(PyObject *self);
94static int
95scanner_clear(PyObject *self);
96static PyObject *
97encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000098static void
99encoder_dealloc(PyObject *self);
100static int
101encoder_clear(PyObject *self);
102static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200103encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000104static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200105encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000106static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200107encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000109_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000110static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200111raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000112static PyObject *
113encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000114static PyObject *
115encoder_encode_float(PyEncoderObject *s, PyObject *obj);
116
Christian Heimes90540002008-05-08 14:29:10 +0000117#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000118#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000119
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000120static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200121ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000122{
123 /* Escape unicode code point c to ASCII escape sequences
124 in char *output. output must have at least 12 bytes unused to
125 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000126 output[chars++] = '\\';
127 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000128 case '\\': output[chars++] = c; break;
129 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000130 case '\b': output[chars++] = 'b'; break;
131 case '\f': output[chars++] = 'f'; break;
132 case '\n': output[chars++] = 'n'; break;
133 case '\r': output[chars++] = 'r'; break;
134 case '\t': output[chars++] = 't'; break;
135 default:
Christian Heimes90540002008-05-08 14:29:10 +0000136 if (c >= 0x10000) {
137 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100138 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000139 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100140 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
141 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
142 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
143 output[chars++] = Py_hexdigits[(v ) & 0xf];
144 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000145 output[chars++] = '\\';
146 }
Christian Heimes90540002008-05-08 14:29:10 +0000147 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200148 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
149 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
150 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
151 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000152 }
153 return chars;
154}
155
156static PyObject *
157ascii_escape_unicode(PyObject *pystr)
158{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000159 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000160 Py_ssize_t i;
161 Py_ssize_t input_chars;
162 Py_ssize_t output_size;
163 Py_ssize_t chars;
164 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200165 void *input;
166 unsigned char *output;
167 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000168
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200169 if (PyUnicode_READY(pystr) == -1)
170 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000171
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200172 input_chars = PyUnicode_GET_LENGTH(pystr);
173 input = PyUnicode_DATA(pystr);
174 kind = PyUnicode_KIND(pystr);
175
176 /* Compute the output size */
177 for (i = 0, output_size = 2; i < input_chars; i++) {
178 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500179 Py_ssize_t d;
180 if (S_CHAR(c)) {
181 d = 1;
182 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200183 else {
184 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200185 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200186 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500187 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200188 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500189 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200190 }
191 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500192 if (output_size > PY_SSIZE_T_MAX - d) {
193 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
194 return NULL;
195 }
196 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 }
198
199 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000200 if (rval == NULL) {
201 return NULL;
202 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200203 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000204 chars = 0;
205 output[chars++] = '"';
206 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200207 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000208 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000209 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000210 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000211 else {
212 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000213 }
Christian Heimes90540002008-05-08 14:29:10 +0000214 }
215 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100216#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200217 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100218#endif
Christian Heimes90540002008-05-08 14:29:10 +0000219 return rval;
220}
221
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100222static PyObject *
223escape_unicode(PyObject *pystr)
224{
225 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
226 Py_ssize_t i;
227 Py_ssize_t input_chars;
228 Py_ssize_t output_size;
229 Py_ssize_t chars;
230 PyObject *rval;
231 void *input;
232 int kind;
233 Py_UCS4 maxchar;
234
235 if (PyUnicode_READY(pystr) == -1)
236 return NULL;
237
238 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
239 input_chars = PyUnicode_GET_LENGTH(pystr);
240 input = PyUnicode_DATA(pystr);
241 kind = PyUnicode_KIND(pystr);
242
243 /* Compute the output size */
244 for (i = 0, output_size = 2; i < input_chars; i++) {
245 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500246 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100247 switch (c) {
248 case '\\': case '"': case '\b': case '\f':
249 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500250 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100251 break;
252 default:
253 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500254 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100255 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500256 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100257 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500258 if (output_size > PY_SSIZE_T_MAX - d) {
259 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
260 return NULL;
261 }
262 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100263 }
264
265 rval = PyUnicode_New(output_size, maxchar);
266 if (rval == NULL)
267 return NULL;
268
269 kind = PyUnicode_KIND(rval);
270
271#define ENCODE_OUTPUT do { \
272 chars = 0; \
273 output[chars++] = '"'; \
274 for (i = 0; i < input_chars; i++) { \
275 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
276 switch (c) { \
277 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
278 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
279 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
280 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
281 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
282 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
283 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
284 default: \
285 if (c <= 0x1f) { \
286 output[chars++] = '\\'; \
287 output[chars++] = 'u'; \
288 output[chars++] = '0'; \
289 output[chars++] = '0'; \
290 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
291 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
292 } else { \
293 output[chars++] = c; \
294 } \
295 } \
296 } \
297 output[chars++] = '"'; \
298 } while (0)
299
300 if (kind == PyUnicode_1BYTE_KIND) {
301 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
302 ENCODE_OUTPUT;
303 } else if (kind == PyUnicode_2BYTE_KIND) {
304 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
305 ENCODE_OUTPUT;
306 } else {
307 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
308 assert(kind == PyUnicode_4BYTE_KIND);
309 ENCODE_OUTPUT;
310 }
311#undef ENCODE_OUTPUT
312
313#ifdef Py_DEBUG
314 assert(_PyUnicode_CheckConsistency(rval, 1));
315#endif
316 return rval;
317}
318
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000319static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200320raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000321{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200322 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
323 static PyObject *JSONDecodeError = NULL;
324 PyObject *exc;
325 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000326 PyObject *decoder = PyImport_ImportModule("json.decoder");
327 if (decoder == NULL)
328 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200329 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000330 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200331 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000332 return;
Christian Heimes90540002008-05-08 14:29:10 +0000333 }
Victor Stinner4c381542016-12-09 00:33:39 +0100334 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200335 if (exc) {
336 PyErr_SetObject(JSONDecodeError, exc);
337 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000338 }
Christian Heimes90540002008-05-08 14:29:10 +0000339}
340
Ezio Melotti37623ab2013-01-03 08:44:15 +0200341static void
342raise_stop_iteration(Py_ssize_t idx)
343{
344 PyObject *value = PyLong_FromSsize_t(idx);
345 if (value != NULL) {
346 PyErr_SetObject(PyExc_StopIteration, value);
347 Py_DECREF(value);
348 }
349}
350
Christian Heimes90540002008-05-08 14:29:10 +0000351static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000352_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
353 /* return (rval, idx) tuple, stealing reference to rval */
354 PyObject *tpl;
355 PyObject *pyidx;
356 /*
357 steal a reference to rval, returns (rval, idx)
358 */
359 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000360 return NULL;
361 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000362 pyidx = PyLong_FromSsize_t(idx);
363 if (pyidx == NULL) {
364 Py_DECREF(rval);
365 return NULL;
366 }
367 tpl = PyTuple_New(2);
368 if (tpl == NULL) {
369 Py_DECREF(pyidx);
370 Py_DECREF(rval);
371 return NULL;
372 }
373 PyTuple_SET_ITEM(tpl, 0, rval);
374 PyTuple_SET_ITEM(tpl, 1, pyidx);
375 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000376}
377
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000378#define APPEND_OLD_CHUNK \
379 if (chunk != NULL) { \
380 if (chunks == NULL) { \
381 chunks = PyList_New(0); \
382 if (chunks == NULL) { \
383 goto bail; \
384 } \
385 } \
386 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200387 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000388 goto bail; \
389 } \
390 Py_CLEAR(chunk); \
391 }
392
Christian Heimes90540002008-05-08 14:29:10 +0000393static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000394scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000395{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000396 /* Read the JSON string from PyUnicode pystr.
397 end is the index of the first character after the quote.
398 if strict is zero then literal control characters are allowed
399 *next_end_ptr is a return-by-reference index of the character
400 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000401
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000402 Return value is a new PyUnicode
403 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000404 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000406 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000407 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200408 const void *buf;
409 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000410 PyObject *chunks = NULL;
411 PyObject *chunk = NULL;
412
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 if (PyUnicode_READY(pystr) == -1)
414 return 0;
415
416 len = PyUnicode_GET_LENGTH(pystr);
417 buf = PyUnicode_DATA(pystr);
418 kind = PyUnicode_KIND(pystr);
419
Ezio Melotti37623ab2013-01-03 08:44:15 +0200420 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000421 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
422 goto bail;
423 }
Christian Heimes90540002008-05-08 14:29:10 +0000424 while (1) {
425 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000427 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200428 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000429 if (c == '"' || c == '\\') {
430 break;
431 }
432 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000433 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000434 goto bail;
435 }
436 }
437 if (!(c == '"' || c == '\\')) {
438 raise_errmsg("Unterminated string starting at", pystr, begin);
439 goto bail;
440 }
441 /* Pick up this chunk if it's not zero length */
442 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000443 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200444 chunk = PyUnicode_FromKindAndData(
445 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200446 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200447 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000448 if (chunk == NULL) {
449 goto bail;
450 }
Christian Heimes90540002008-05-08 14:29:10 +0000451 }
452 next++;
453 if (c == '"') {
454 end = next;
455 break;
456 }
457 if (next == len) {
458 raise_errmsg("Unterminated string starting at", pystr, begin);
459 goto bail;
460 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200461 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000462 if (c != 'u') {
463 /* Non-unicode backslash escapes */
464 end = next + 1;
465 switch (c) {
466 case '"': break;
467 case '\\': break;
468 case '/': break;
469 case 'b': c = '\b'; break;
470 case 'f': c = '\f'; break;
471 case 'n': c = '\n'; break;
472 case 'r': c = '\r'; break;
473 case 't': c = '\t'; break;
474 default: c = 0;
475 }
476 if (c == 0) {
477 raise_errmsg("Invalid \\escape", pystr, end - 2);
478 goto bail;
479 }
480 }
481 else {
482 c = 0;
483 next++;
484 end = next + 4;
485 if (end >= len) {
486 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
487 goto bail;
488 }
489 /* Decode 4 hex digits */
490 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000492 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000493 switch (digit) {
494 case '0': case '1': case '2': case '3': case '4':
495 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000496 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000497 case 'a': case 'b': case 'c': case 'd': case 'e':
498 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000499 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000500 case 'A': case 'B': case 'C': case 'D': case 'E':
501 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000502 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000503 default:
504 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
505 goto bail;
506 }
507 }
Christian Heimes90540002008-05-08 14:29:10 +0000508 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200509 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
510 PyUnicode_READ(kind, buf, next++) == '\\' &&
511 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200512 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000513 end += 6;
514 /* Decode 4 hex digits */
515 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200516 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000517 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000518 switch (digit) {
519 case '0': case '1': case '2': case '3': case '4':
520 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000521 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000522 case 'a': case 'b': case 'c': case 'd': case 'e':
523 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000524 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000525 case 'A': case 'B': case 'C': case 'D': case 'E':
526 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000527 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000528 default:
529 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
530 goto bail;
531 }
532 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200533 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
534 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
535 else
536 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000537 }
Christian Heimes90540002008-05-08 14:29:10 +0000538 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000539 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200540 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000541 if (chunk == NULL) {
542 goto bail;
543 }
Christian Heimes90540002008-05-08 14:29:10 +0000544 }
545
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000546 if (chunks == NULL) {
547 if (chunk != NULL)
548 rval = chunk;
549 else
550 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000551 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000552 else {
553 APPEND_OLD_CHUNK
554 rval = join_list_unicode(chunks);
555 if (rval == NULL) {
556 goto bail;
557 }
558 Py_CLEAR(chunks);
559 }
560
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000561 *next_end_ptr = end;
562 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000563bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000564 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000565 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000566 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000567 return NULL;
568}
569
570PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000571 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000572 "\n"
573 "Scan the string s for a JSON string. End is the index of the\n"
574 "character in s after the quote that started the JSON string.\n"
575 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
576 "on attempt to decode an invalid string. If strict is False then literal\n"
577 "control characters are allowed in the string.\n"
578 "\n"
579 "Returns a tuple of the decoded string and the index of the character in s\n"
580 "after the end quote."
581);
Christian Heimes90540002008-05-08 14:29:10 +0000582
583static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000584py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000585{
586 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000587 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000588 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000589 Py_ssize_t next_end = -1;
590 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100591 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000592 return NULL;
593 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000594 if (PyUnicode_Check(pystr)) {
595 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000596 }
597 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000598 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000599 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000600 Py_TYPE(pystr)->tp_name);
601 return NULL;
602 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000603 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000604}
605
606PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000607 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000608 "\n"
609 "Return an ASCII-only JSON representation of a Python string"
610);
Christian Heimes90540002008-05-08 14:29:10 +0000611
612static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000613py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000614{
615 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000616 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000617 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000618 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000619 rval = ascii_escape_unicode(pystr);
620 }
621 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000622 PyErr_Format(PyExc_TypeError,
623 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000624 Py_TYPE(pystr)->tp_name);
625 return NULL;
626 }
Christian Heimes90540002008-05-08 14:29:10 +0000627 return rval;
628}
629
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100630
631PyDoc_STRVAR(pydoc_encode_basestring,
632 "encode_basestring(string) -> string\n"
633 "\n"
634 "Return a JSON representation of a Python string"
635);
636
637static PyObject *
638py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
639{
640 PyObject *rval;
641 /* Return a JSON representation of a Python string */
642 /* METH_O */
643 if (PyUnicode_Check(pystr)) {
644 rval = escape_unicode(pystr);
645 }
646 else {
647 PyErr_Format(PyExc_TypeError,
648 "first argument must be a string, not %.80s",
649 Py_TYPE(pystr)->tp_name);
650 return NULL;
651 }
652 return rval;
653}
654
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000655static void
656scanner_dealloc(PyObject *self)
657{
INADA Naokia6296d32017-08-24 14:55:17 +0900658 /* bpo-31095: UnTrack is needed before calling any callbacks */
659 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000660 scanner_clear(self);
661 Py_TYPE(self)->tp_free(self);
662}
663
664static int
665scanner_traverse(PyObject *self, visitproc visit, void *arg)
666{
667 PyScannerObject *s;
668 assert(PyScanner_Check(self));
669 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000670 Py_VISIT(s->object_hook);
671 Py_VISIT(s->object_pairs_hook);
672 Py_VISIT(s->parse_float);
673 Py_VISIT(s->parse_int);
674 Py_VISIT(s->parse_constant);
675 return 0;
676}
677
678static int
679scanner_clear(PyObject *self)
680{
681 PyScannerObject *s;
682 assert(PyScanner_Check(self));
683 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000684 Py_CLEAR(s->object_hook);
685 Py_CLEAR(s->object_pairs_hook);
686 Py_CLEAR(s->parse_float);
687 Py_CLEAR(s->parse_int);
688 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000689 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000690 return 0;
691}
692
693static PyObject *
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300694_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
695{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000696 /* Read a JSON object from PyUnicode pystr.
697 idx is the index of the first character after the opening curly brace.
698 *next_idx_ptr is a return-by-reference index to the first character after
699 the closing curly brace.
700
701 Returns a new PyObject (usually a dict, but object_hook can change that)
702 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200703 void *str;
704 int kind;
705 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000706 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000707 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000708 PyObject *key = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000709 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000710 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000711
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200712 if (PyUnicode_READY(pystr) == -1)
713 return NULL;
714
715 str = PyUnicode_DATA(pystr);
716 kind = PyUnicode_KIND(pystr);
717 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
718
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000719 if (has_pairs_hook)
720 rval = PyList_New(0);
721 else
722 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000723 if (rval == NULL)
724 return NULL;
725
726 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200727 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000728
729 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200730 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
731 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000732 PyObject *memokey;
733
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000734 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200735 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200736 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000737 goto bail;
738 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300739 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000740 if (key == NULL)
741 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000742 memokey = PyDict_GetItem(s->memo, key);
743 if (memokey != NULL) {
744 Py_INCREF(memokey);
745 Py_DECREF(key);
746 key = memokey;
747 }
748 else {
749 if (PyDict_SetItem(s->memo, key, key) < 0)
750 goto bail;
751 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000752 idx = next_idx;
753
754 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200755 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
756 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200757 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000758 goto bail;
759 }
760 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200761 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000762
763 /* read any JSON term */
764 val = scan_once_unicode(s, pystr, idx, &next_idx);
765 if (val == NULL)
766 goto bail;
767
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000768 if (has_pairs_hook) {
769 PyObject *item = PyTuple_Pack(2, key, val);
770 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000771 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000772 Py_CLEAR(key);
773 Py_CLEAR(val);
774 if (PyList_Append(rval, item) == -1) {
775 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000776 goto bail;
777 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000778 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000779 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000780 else {
781 if (PyDict_SetItem(rval, key, val) < 0)
782 goto bail;
783 Py_CLEAR(key);
784 Py_CLEAR(val);
785 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000786 idx = next_idx;
787
788 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200789 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000790
791 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200792 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000793 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200794 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200795 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000796 goto bail;
797 }
798 idx++;
799
800 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200801 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000802 }
803 }
804
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000805 *next_idx_ptr = idx + 1;
806
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000807 if (has_pairs_hook) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100808 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000809 Py_DECREF(rval);
810 return val;
811 }
812
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000813 /* if object_hook is not None: rval = object_hook(rval) */
814 if (s->object_hook != Py_None) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100815 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000816 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000817 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000818 }
819 return rval;
820bail:
821 Py_XDECREF(key);
822 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000823 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000824 return NULL;
825}
826
827static PyObject *
828_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200829 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000830 idx is the index of the first character after the opening brace.
831 *next_idx_ptr is a return-by-reference index to the first character after
832 the closing brace.
833
834 Returns a new PyList
835 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200836 void *str;
837 int kind;
838 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000839 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200840 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000841 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000842
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200843 if (PyUnicode_READY(pystr) == -1)
844 return NULL;
845
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200846 rval = PyList_New(0);
847 if (rval == NULL)
848 return NULL;
849
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200850 str = PyUnicode_DATA(pystr);
851 kind = PyUnicode_KIND(pystr);
852 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
853
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000854 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200855 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000856
857 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200858 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
859 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000860
861 /* read any JSON term */
862 val = scan_once_unicode(s, pystr, idx, &next_idx);
863 if (val == NULL)
864 goto bail;
865
866 if (PyList_Append(rval, val) == -1)
867 goto bail;
868
869 Py_CLEAR(val);
870 idx = next_idx;
871
872 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200873 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000874
875 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200876 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000877 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200878 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200879 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000880 goto bail;
881 }
882 idx++;
883
884 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200885 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000886 }
887 }
888
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200889 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
890 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200891 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000892 goto bail;
893 }
894 *next_idx_ptr = idx + 1;
895 return rval;
896bail:
897 Py_XDECREF(val);
898 Py_DECREF(rval);
899 return NULL;
900}
901
902static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200903_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
904 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000905 constant is the constant string that was found
906 ("NaN", "Infinity", "-Infinity").
907 idx is the index of the first character of the constant
908 *next_idx_ptr is a return-by-reference index to the first character after
909 the constant.
910
911 Returns the result of parse_constant
912 */
913 PyObject *cstr;
914 PyObject *rval;
915 /* constant is "NaN", "Infinity", or "-Infinity" */
916 cstr = PyUnicode_InternFromString(constant);
917 if (cstr == NULL)
918 return NULL;
919
920 /* rval = parse_constant(constant) */
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100921 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200922 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000923 Py_DECREF(cstr);
924 *next_idx_ptr = idx;
925 return rval;
926}
927
928static PyObject *
929_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
930 /* Read a JSON number from PyUnicode pystr.
931 idx is the index of the first character of the number
932 *next_idx_ptr is a return-by-reference index to the first character after
933 the number.
934
935 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200936 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000937 May return other types if parse_int or parse_float are set
938 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200939 void *str;
940 int kind;
941 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000942 Py_ssize_t idx = start;
943 int is_float = 0;
944 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200945 PyObject *numstr = NULL;
946 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000947
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200948 if (PyUnicode_READY(pystr) == -1)
949 return NULL;
950
951 str = PyUnicode_DATA(pystr);
952 kind = PyUnicode_KIND(pystr);
953 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
954
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000955 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200956 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000957 idx++;
958 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200959 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000960 return NULL;
961 }
962 }
963
964 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200965 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000966 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200967 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000968 }
969 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200970 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000971 idx++;
972 }
973 /* no integer digits, error */
974 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200975 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000976 return NULL;
977 }
978
979 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200980 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000981 is_float = 1;
982 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200983 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000984 }
985
986 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200987 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000988 Py_ssize_t e_start = idx;
989 idx++;
990
991 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200992 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000993
994 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200995 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000996
997 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200998 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000999 is_float = 1;
1000 }
1001 else {
1002 idx = e_start;
1003 }
1004 }
1005
Antoine Pitrouf6454512011-04-25 19:16:06 +02001006 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1007 custom_func = s->parse_float;
1008 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1009 custom_func = s->parse_int;
1010 else
1011 custom_func = NULL;
1012
1013 if (custom_func) {
1014 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001015 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001016 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001017 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001018 if (numstr == NULL)
1019 return NULL;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001020 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001021 }
1022 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001023 Py_ssize_t i, n;
1024 char *buf;
1025 /* Straight conversion to ASCII, to avoid costly conversion of
1026 decimal unicode digits (which cannot appear here) */
1027 n = idx - start;
1028 numstr = PyBytes_FromStringAndSize(NULL, n);
1029 if (numstr == NULL)
1030 return NULL;
1031 buf = PyBytes_AS_STRING(numstr);
1032 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001033 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001034 }
1035 if (is_float)
1036 rval = PyFloat_FromString(numstr);
1037 else
1038 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001039 }
1040 Py_DECREF(numstr);
1041 *next_idx_ptr = idx;
1042 return rval;
1043}
1044
1045static PyObject *
1046scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1047{
1048 /* Read one JSON term (of any kind) from PyUnicode pystr.
1049 idx is the index of the first character of the term
1050 *next_idx_ptr is a return-by-reference index to the first character after
1051 the number.
1052
1053 Returns a new PyObject representation of the term.
1054 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001055 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001056 void *str;
1057 int kind;
1058 Py_ssize_t length;
1059
1060 if (PyUnicode_READY(pystr) == -1)
1061 return NULL;
1062
1063 str = PyUnicode_DATA(pystr);
1064 kind = PyUnicode_KIND(pystr);
1065 length = PyUnicode_GET_LENGTH(pystr);
1066
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001067 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001068 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001069 return NULL;
1070 }
1071 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001072 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001073 return NULL;
1074 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001075
1076 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001077 case '"':
1078 /* string */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001079 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001080 case '{':
1081 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001082 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1083 "from a unicode string"))
1084 return NULL;
1085 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1086 Py_LeaveRecursiveCall();
1087 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001088 case '[':
1089 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001090 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1091 "from a unicode string"))
1092 return NULL;
1093 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1094 Py_LeaveRecursiveCall();
1095 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001096 case 'n':
1097 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001098 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001099 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001100 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001101 }
1102 break;
1103 case 't':
1104 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001105 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001106 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001107 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001108 }
1109 break;
1110 case 'f':
1111 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001112 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1113 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1114 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001115 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001116 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001117 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001118 }
1119 break;
1120 case 'N':
1121 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001122 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001123 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001124 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1125 }
1126 break;
1127 case 'I':
1128 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001129 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1130 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1131 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001132 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001133 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1134 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001135 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001136 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1137 }
1138 break;
1139 case '-':
1140 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001141 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001142 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1143 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001144 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001145 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001146 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1147 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001148 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001149 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1150 }
1151 break;
1152 }
1153 /* Didn't find a string, object, array, or named constant. Look for a number. */
1154 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1155}
1156
1157static PyObject *
1158scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1159{
1160 /* Python callable interface to scan_once_{str,unicode} */
1161 PyObject *pystr;
1162 PyObject *rval;
1163 Py_ssize_t idx;
1164 Py_ssize_t next_idx = -1;
1165 static char *kwlist[] = {"string", "idx", NULL};
1166 PyScannerObject *s;
1167 assert(PyScanner_Check(self));
1168 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001169 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001170 return NULL;
1171
1172 if (PyUnicode_Check(pystr)) {
1173 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1174 }
1175 else {
1176 PyErr_Format(PyExc_TypeError,
1177 "first argument must be a string, not %.80s",
1178 Py_TYPE(pystr)->tp_name);
1179 return NULL;
1180 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001181 PyDict_Clear(s->memo);
1182 if (rval == NULL)
1183 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001184 return _build_rval_index_tuple(rval, next_idx);
1185}
1186
1187static PyObject *
1188scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1189{
1190 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001191 PyObject *ctx;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001192 PyObject *strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001193 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001194
1195 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001196 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001197
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001198 s = (PyScannerObject *)type->tp_alloc(type, 0);
1199 if (s == NULL) {
1200 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001201 }
1202
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001203 s->memo = PyDict_New();
1204 if (s->memo == NULL)
1205 goto bail;
1206
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001207 /* All of these will fail "gracefully" so we don't need to verify them */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001208 strict = PyObject_GetAttrString(ctx, "strict");
1209 if (strict == NULL)
1210 goto bail;
1211 s->strict = PyObject_IsTrue(strict);
1212 Py_DECREF(strict);
1213 if (s->strict < 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001214 goto bail;
1215 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1216 if (s->object_hook == NULL)
1217 goto bail;
1218 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1219 if (s->object_pairs_hook == NULL)
1220 goto bail;
1221 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1222 if (s->parse_float == NULL)
1223 goto bail;
1224 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1225 if (s->parse_int == NULL)
1226 goto bail;
1227 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1228 if (s->parse_constant == NULL)
1229 goto bail;
1230
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001231 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001232
1233bail:
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001234 Py_DECREF(s);
1235 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001236}
1237
1238PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1239
1240static
1241PyTypeObject PyScannerType = {
1242 PyVarObject_HEAD_INIT(NULL, 0)
1243 "_json.Scanner", /* tp_name */
1244 sizeof(PyScannerObject), /* tp_basicsize */
1245 0, /* tp_itemsize */
1246 scanner_dealloc, /* tp_dealloc */
1247 0, /* tp_print */
1248 0, /* tp_getattr */
1249 0, /* tp_setattr */
1250 0, /* tp_compare */
1251 0, /* tp_repr */
1252 0, /* tp_as_number */
1253 0, /* tp_as_sequence */
1254 0, /* tp_as_mapping */
1255 0, /* tp_hash */
1256 scanner_call, /* tp_call */
1257 0, /* tp_str */
1258 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1259 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1260 0, /* tp_as_buffer */
1261 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1262 scanner_doc, /* tp_doc */
1263 scanner_traverse, /* tp_traverse */
1264 scanner_clear, /* tp_clear */
1265 0, /* tp_richcompare */
1266 0, /* tp_weaklistoffset */
1267 0, /* tp_iter */
1268 0, /* tp_iternext */
1269 0, /* tp_methods */
1270 scanner_members, /* tp_members */
1271 0, /* tp_getset */
1272 0, /* tp_base */
1273 0, /* tp_dict */
1274 0, /* tp_descr_get */
1275 0, /* tp_descr_set */
1276 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001277 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001278 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1279 scanner_new, /* tp_new */
1280 0,/* PyObject_GC_Del, */ /* tp_free */
1281};
1282
1283static PyObject *
1284encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1285{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001286 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1287
1288 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001289 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001290 PyObject *item_separator;
1291 int sort_keys, skipkeys, allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001292
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001293 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001294 &markers, &defaultfn, &encoder, &indent,
1295 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001296 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001297 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001298
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001299 if (markers != Py_None && !PyDict_Check(markers)) {
1300 PyErr_Format(PyExc_TypeError,
1301 "make_encoder() argument 1 must be dict or None, "
1302 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001303 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001304 }
1305
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001306 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1307 if (s == NULL)
1308 return NULL;
1309
Antoine Pitrou781eba72009-12-08 15:57:31 +00001310 s->markers = markers;
1311 s->defaultfn = defaultfn;
1312 s->encoder = encoder;
1313 s->indent = indent;
1314 s->key_separator = key_separator;
1315 s->item_separator = item_separator;
1316 s->sort_keys = sort_keys;
1317 s->skipkeys = skipkeys;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001318 s->allow_nan = allow_nan;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001319 s->fast_encode = NULL;
1320 if (PyCFunction_Check(s->encoder)) {
1321 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1322 if (f == (PyCFunction)py_encode_basestring_ascii ||
1323 f == (PyCFunction)py_encode_basestring) {
1324 s->fast_encode = f;
1325 }
1326 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001327
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001328 Py_INCREF(s->markers);
1329 Py_INCREF(s->defaultfn);
1330 Py_INCREF(s->encoder);
1331 Py_INCREF(s->indent);
1332 Py_INCREF(s->key_separator);
1333 Py_INCREF(s->item_separator);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001334 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001335}
1336
1337static PyObject *
1338encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1339{
1340 /* Python callable interface to encode_listencode_obj */
1341 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1342 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001343 Py_ssize_t indent_level;
1344 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001345 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001346
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001347 assert(PyEncoder_Check(self));
1348 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001349 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1350 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001351 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001352 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001353 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001354 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001355 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001356 return NULL;
1357 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001358 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001359}
1360
1361static PyObject *
1362_encoded_const(PyObject *obj)
1363{
1364 /* Return the JSON string representation of None, True, False */
1365 if (obj == Py_None) {
1366 static PyObject *s_null = NULL;
1367 if (s_null == NULL) {
1368 s_null = PyUnicode_InternFromString("null");
1369 }
1370 Py_INCREF(s_null);
1371 return s_null;
1372 }
1373 else if (obj == Py_True) {
1374 static PyObject *s_true = NULL;
1375 if (s_true == NULL) {
1376 s_true = PyUnicode_InternFromString("true");
1377 }
1378 Py_INCREF(s_true);
1379 return s_true;
1380 }
1381 else if (obj == Py_False) {
1382 static PyObject *s_false = NULL;
1383 if (s_false == NULL) {
1384 s_false = PyUnicode_InternFromString("false");
1385 }
1386 Py_INCREF(s_false);
1387 return s_false;
1388 }
1389 else {
1390 PyErr_SetString(PyExc_ValueError, "not a const");
1391 return NULL;
1392 }
1393}
1394
1395static PyObject *
1396encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1397{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001398 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001399 double i = PyFloat_AS_DOUBLE(obj);
1400 if (!Py_IS_FINITE(i)) {
1401 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001402 PyErr_SetString(
1403 PyExc_ValueError,
1404 "Out of range float values are not JSON compliant"
1405 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001406 return NULL;
1407 }
1408 if (i > 0) {
1409 return PyUnicode_FromString("Infinity");
1410 }
1411 else if (i < 0) {
1412 return PyUnicode_FromString("-Infinity");
1413 }
1414 else {
1415 return PyUnicode_FromString("NaN");
1416 }
1417 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001418 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001419}
1420
1421static PyObject *
1422encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1423{
1424 /* Return the JSON representation of a string */
1425 if (s->fast_encode)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001426 return s->fast_encode(NULL, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001427 else
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001428 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001429}
1430
1431static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001432_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001433{
1434 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001435 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001436 Py_DECREF(stolen);
1437 return rval;
1438}
1439
1440static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001441encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001442 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001443{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001444 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001445 PyObject *newobj;
1446 int rv;
1447
1448 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1449 PyObject *cstr = _encoded_const(obj);
1450 if (cstr == NULL)
1451 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001452 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001453 }
1454 else if (PyUnicode_Check(obj))
1455 {
1456 PyObject *encoded = encoder_encode_string(s, obj);
1457 if (encoded == NULL)
1458 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001459 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001460 }
1461 else if (PyLong_Check(obj)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001462 PyObject *encoded = PyLong_Type.tp_str(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001463 if (encoded == NULL)
1464 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001465 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001466 }
1467 else if (PyFloat_Check(obj)) {
1468 PyObject *encoded = encoder_encode_float(s, obj);
1469 if (encoded == NULL)
1470 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001471 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001472 }
1473 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001474 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1475 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001476 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001477 Py_LeaveRecursiveCall();
1478 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001479 }
1480 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001481 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1482 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001483 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001484 Py_LeaveRecursiveCall();
1485 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001486 }
1487 else {
1488 PyObject *ident = NULL;
1489 if (s->markers != Py_None) {
1490 int has_key;
1491 ident = PyLong_FromVoidPtr(obj);
1492 if (ident == NULL)
1493 return -1;
1494 has_key = PyDict_Contains(s->markers, ident);
1495 if (has_key) {
1496 if (has_key != -1)
1497 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1498 Py_DECREF(ident);
1499 return -1;
1500 }
1501 if (PyDict_SetItem(s->markers, ident, obj)) {
1502 Py_DECREF(ident);
1503 return -1;
1504 }
1505 }
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001506 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001507 if (newobj == NULL) {
1508 Py_XDECREF(ident);
1509 return -1;
1510 }
Ezio Melotti13672652011-05-11 01:02:56 +03001511
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001512 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1513 Py_DECREF(newobj);
1514 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001515 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001516 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001517 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001518 Py_LeaveRecursiveCall();
1519
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001520 Py_DECREF(newobj);
1521 if (rv) {
1522 Py_XDECREF(ident);
1523 return -1;
1524 }
1525 if (ident != NULL) {
1526 if (PyDict_DelItem(s->markers, ident)) {
1527 Py_XDECREF(ident);
1528 return -1;
1529 }
1530 Py_XDECREF(ident);
1531 }
1532 return rv;
1533 }
1534}
1535
1536static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001537encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001538 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001539{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001540 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001541 static PyObject *open_dict = NULL;
1542 static PyObject *close_dict = NULL;
1543 static PyObject *empty_dict = NULL;
1544 PyObject *kstr = NULL;
1545 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001546 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001547 PyObject *items;
1548 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001549 Py_ssize_t idx;
1550
1551 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1552 open_dict = PyUnicode_InternFromString("{");
1553 close_dict = PyUnicode_InternFromString("}");
1554 empty_dict = PyUnicode_InternFromString("{}");
1555 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1556 return -1;
1557 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001558 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001559 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001560
1561 if (s->markers != Py_None) {
1562 int has_key;
1563 ident = PyLong_FromVoidPtr(dct);
1564 if (ident == NULL)
1565 goto bail;
1566 has_key = PyDict_Contains(s->markers, ident);
1567 if (has_key) {
1568 if (has_key != -1)
1569 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1570 goto bail;
1571 }
1572 if (PyDict_SetItem(s->markers, ident, dct)) {
1573 goto bail;
1574 }
1575 }
1576
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001577 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001578 goto bail;
1579
1580 if (s->indent != Py_None) {
1581 /* TODO: DOES NOT RUN */
1582 indent_level += 1;
1583 /*
1584 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1585 separator = _item_separator + newline_indent
1586 buf += newline_indent
1587 */
1588 }
1589
Benjamin Peterson501182a2015-05-02 22:28:04 -04001590 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001591 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001592 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001593 if (s->sort_keys && PyList_Sort(items) < 0) {
1594 Py_DECREF(items);
Benjamin Peterson501182a2015-05-02 22:28:04 -04001595 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001596 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001597 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001598 Py_DECREF(items);
1599 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001600 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001601 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001602 while ((item = PyIter_Next(it)) != NULL) {
1603 PyObject *encoded, *key, *value;
Serhiy Storchakafff9a312017-03-21 08:53:25 +02001604 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001605 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1606 goto bail;
1607 }
1608 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001609 if (PyUnicode_Check(key)) {
1610 Py_INCREF(key);
1611 kstr = key;
1612 }
1613 else if (PyFloat_Check(key)) {
1614 kstr = encoder_encode_float(s, key);
1615 if (kstr == NULL)
1616 goto bail;
1617 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001618 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 /* This must come before the PyLong_Check because
1620 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001621 kstr = _encoded_const(key);
1622 if (kstr == NULL)
1623 goto bail;
1624 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001625 else if (PyLong_Check(key)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001626 kstr = PyLong_Type.tp_str(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001627 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001628 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001629 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001630 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001631 else if (s->skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001632 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001633 continue;
1634 }
1635 else {
1636 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001637 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001638 goto bail;
1639 }
1640
1641 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001642 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001643 goto bail;
1644 }
1645
1646 encoded = encoder_encode_string(s, kstr);
1647 Py_CLEAR(kstr);
1648 if (encoded == NULL)
1649 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001650 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001651 Py_DECREF(encoded);
1652 goto bail;
1653 }
1654 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001655 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001656 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001657
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001658 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001659 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001660 goto bail;
1661 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001662 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001663 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001664 if (PyErr_Occurred())
1665 goto bail;
1666 Py_CLEAR(it);
1667
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001668 if (ident != NULL) {
1669 if (PyDict_DelItem(s->markers, ident))
1670 goto bail;
1671 Py_CLEAR(ident);
1672 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001673 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001674 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001675 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001676
1677 yield '\n' + (' ' * (_indent * _current_indent_level))
1678 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001679 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001680 goto bail;
1681 return 0;
1682
1683bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001684 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001685 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001686 Py_XDECREF(kstr);
1687 Py_XDECREF(ident);
1688 return -1;
1689}
1690
1691
1692static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001693encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001694 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001695{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001696 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001697 static PyObject *open_array = NULL;
1698 static PyObject *close_array = NULL;
1699 static PyObject *empty_array = NULL;
1700 PyObject *ident = NULL;
1701 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001702 Py_ssize_t i;
1703
1704 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1705 open_array = PyUnicode_InternFromString("[");
1706 close_array = PyUnicode_InternFromString("]");
1707 empty_array = PyUnicode_InternFromString("[]");
1708 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1709 return -1;
1710 }
1711 ident = NULL;
1712 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1713 if (s_fast == NULL)
1714 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001715 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001716 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001717 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001718 }
1719
1720 if (s->markers != Py_None) {
1721 int has_key;
1722 ident = PyLong_FromVoidPtr(seq);
1723 if (ident == NULL)
1724 goto bail;
1725 has_key = PyDict_Contains(s->markers, ident);
1726 if (has_key) {
1727 if (has_key != -1)
1728 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1729 goto bail;
1730 }
1731 if (PyDict_SetItem(s->markers, ident, seq)) {
1732 goto bail;
1733 }
1734 }
1735
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001736 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001737 goto bail;
1738 if (s->indent != Py_None) {
1739 /* TODO: DOES NOT RUN */
1740 indent_level += 1;
1741 /*
1742 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1743 separator = _item_separator + newline_indent
1744 buf += newline_indent
1745 */
1746 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001747 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1748 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001749 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001750 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001751 goto bail;
1752 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001753 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001754 goto bail;
1755 }
1756 if (ident != NULL) {
1757 if (PyDict_DelItem(s->markers, ident))
1758 goto bail;
1759 Py_CLEAR(ident);
1760 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001761
1762 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001763 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001764 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001765
1766 yield '\n' + (' ' * (_indent * _current_indent_level))
1767 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001768 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001769 goto bail;
1770 Py_DECREF(s_fast);
1771 return 0;
1772
1773bail:
1774 Py_XDECREF(ident);
1775 Py_DECREF(s_fast);
1776 return -1;
1777}
1778
1779static void
1780encoder_dealloc(PyObject *self)
1781{
INADA Naokia6296d32017-08-24 14:55:17 +09001782 /* bpo-31095: UnTrack is needed before calling any callbacks */
1783 PyObject_GC_UnTrack(self);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001784 encoder_clear(self);
1785 Py_TYPE(self)->tp_free(self);
1786}
1787
1788static int
1789encoder_traverse(PyObject *self, visitproc visit, void *arg)
1790{
1791 PyEncoderObject *s;
1792 assert(PyEncoder_Check(self));
1793 s = (PyEncoderObject *)self;
1794 Py_VISIT(s->markers);
1795 Py_VISIT(s->defaultfn);
1796 Py_VISIT(s->encoder);
1797 Py_VISIT(s->indent);
1798 Py_VISIT(s->key_separator);
1799 Py_VISIT(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001800 return 0;
1801}
1802
1803static int
1804encoder_clear(PyObject *self)
1805{
1806 /* Deallocate Encoder */
1807 PyEncoderObject *s;
1808 assert(PyEncoder_Check(self));
1809 s = (PyEncoderObject *)self;
1810 Py_CLEAR(s->markers);
1811 Py_CLEAR(s->defaultfn);
1812 Py_CLEAR(s->encoder);
1813 Py_CLEAR(s->indent);
1814 Py_CLEAR(s->key_separator);
1815 Py_CLEAR(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001816 return 0;
1817}
1818
1819PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1820
1821static
1822PyTypeObject PyEncoderType = {
1823 PyVarObject_HEAD_INIT(NULL, 0)
1824 "_json.Encoder", /* tp_name */
1825 sizeof(PyEncoderObject), /* tp_basicsize */
1826 0, /* tp_itemsize */
1827 encoder_dealloc, /* tp_dealloc */
1828 0, /* tp_print */
1829 0, /* tp_getattr */
1830 0, /* tp_setattr */
1831 0, /* tp_compare */
1832 0, /* tp_repr */
1833 0, /* tp_as_number */
1834 0, /* tp_as_sequence */
1835 0, /* tp_as_mapping */
1836 0, /* tp_hash */
1837 encoder_call, /* tp_call */
1838 0, /* tp_str */
1839 0, /* tp_getattro */
1840 0, /* tp_setattro */
1841 0, /* tp_as_buffer */
1842 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1843 encoder_doc, /* tp_doc */
1844 encoder_traverse, /* tp_traverse */
1845 encoder_clear, /* tp_clear */
1846 0, /* tp_richcompare */
1847 0, /* tp_weaklistoffset */
1848 0, /* tp_iter */
1849 0, /* tp_iternext */
1850 0, /* tp_methods */
1851 encoder_members, /* tp_members */
1852 0, /* tp_getset */
1853 0, /* tp_base */
1854 0, /* tp_dict */
1855 0, /* tp_descr_get */
1856 0, /* tp_descr_set */
1857 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001858 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001859 0, /* tp_alloc */
1860 encoder_new, /* tp_new */
1861 0, /* tp_free */
1862};
1863
1864static PyMethodDef speedups_methods[] = {
1865 {"encode_basestring_ascii",
1866 (PyCFunction)py_encode_basestring_ascii,
1867 METH_O,
1868 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001869 {"encode_basestring",
1870 (PyCFunction)py_encode_basestring,
1871 METH_O,
1872 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001873 {"scanstring",
1874 (PyCFunction)py_scanstring,
1875 METH_VARARGS,
1876 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001877 {NULL, NULL, 0, NULL}
1878};
1879
1880PyDoc_STRVAR(module_doc,
1881"json speedups\n");
1882
Martin v. Löwis1a214512008-06-11 05:26:20 +00001883static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001884 PyModuleDef_HEAD_INIT,
1885 "_json",
1886 module_doc,
1887 -1,
1888 speedups_methods,
1889 NULL,
1890 NULL,
1891 NULL,
1892 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001893};
1894
Victor Stinnerf024d262015-03-17 17:48:27 +01001895PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001896PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001897{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001898 PyObject *m = PyModule_Create(&jsonmodule);
1899 if (!m)
1900 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001901 if (PyType_Ready(&PyScannerType) < 0)
1902 goto fail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001903 if (PyType_Ready(&PyEncoderType) < 0)
1904 goto fail;
1905 Py_INCREF((PyObject*)&PyScannerType);
1906 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1907 Py_DECREF((PyObject*)&PyScannerType);
1908 goto fail;
1909 }
1910 Py_INCREF((PyObject*)&PyEncoderType);
1911 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1912 Py_DECREF((PyObject*)&PyEncoderType);
1913 goto fail;
1914 }
1915 return m;
1916 fail:
1917 Py_DECREF(m);
1918 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001919}