blob: 6cc31c6e3712245b51e2e36c11c6e75187d2395c [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +01003#include "accu.h"
4
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00005#ifdef __GNUC__
6#define UNUSED __attribute__((__unused__))
7#else
8#define UNUSED
9#endif
10
11#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
12#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
13#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
14#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
15
16static PyTypeObject PyScannerType;
17static PyTypeObject PyEncoderType;
18
19typedef struct _PyScannerObject {
20 PyObject_HEAD
Segev Finer541bd282017-07-13 08:52:08 +030021 signed char strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000022 PyObject *object_hook;
23 PyObject *object_pairs_hook;
24 PyObject *parse_float;
25 PyObject *parse_int;
26 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000027 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028} PyScannerObject;
29
30static PyMemberDef scanner_members[] = {
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030031 {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000032 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
33 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
34 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
35 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
36 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
37 {NULL}
38};
39
40typedef struct _PyEncoderObject {
41 PyObject_HEAD
42 PyObject *markers;
43 PyObject *defaultfn;
44 PyObject *encoder;
45 PyObject *indent;
46 PyObject *key_separator;
47 PyObject *item_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030048 char sort_keys;
49 char skipkeys;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000050 int allow_nan;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030051 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000052} PyEncoderObject;
53
54static PyMemberDef encoder_members[] = {
55 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
56 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
57 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
58 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
59 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
60 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +030061 {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
62 {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000063 {NULL}
64};
65
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020066static PyObject *
67join_list_unicode(PyObject *lst)
68{
69 /* return u''.join(lst) */
70 static PyObject *sep = NULL;
71 if (sep == NULL) {
72 sep = PyUnicode_FromStringAndSize("", 0);
73 if (sep == NULL)
74 return NULL;
75 }
76 return PyUnicode_Join(sep, lst);
77}
78
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020079/* Forward decls */
80
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000081static PyObject *
82ascii_escape_unicode(PyObject *pystr);
83static PyObject *
84py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
85void init_json(void);
86static PyObject *
87scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
88static PyObject *
89_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
90static PyObject *
91scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000092static void
93scanner_dealloc(PyObject *self);
94static int
95scanner_clear(PyObject *self);
96static PyObject *
97encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000098static void
99encoder_dealloc(PyObject *self);
100static int
101encoder_clear(PyObject *self);
102static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200103encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000104static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200105encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000106static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200107encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000109_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000110static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200111raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000112static PyObject *
113encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000114static PyObject *
115encoder_encode_float(PyEncoderObject *s, PyObject *obj);
116
Christian Heimes90540002008-05-08 14:29:10 +0000117#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000118#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000119
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000120static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200121ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000122{
123 /* Escape unicode code point c to ASCII escape sequences
124 in char *output. output must have at least 12 bytes unused to
125 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000126 output[chars++] = '\\';
127 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000128 case '\\': output[chars++] = c; break;
129 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000130 case '\b': output[chars++] = 'b'; break;
131 case '\f': output[chars++] = 'f'; break;
132 case '\n': output[chars++] = 'n'; break;
133 case '\r': output[chars++] = 'r'; break;
134 case '\t': output[chars++] = 't'; break;
135 default:
Christian Heimes90540002008-05-08 14:29:10 +0000136 if (c >= 0x10000) {
137 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100138 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000139 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100140 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
141 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
142 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
143 output[chars++] = Py_hexdigits[(v ) & 0xf];
144 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000145 output[chars++] = '\\';
146 }
Christian Heimes90540002008-05-08 14:29:10 +0000147 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200148 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
149 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
150 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
151 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000152 }
153 return chars;
154}
155
156static PyObject *
157ascii_escape_unicode(PyObject *pystr)
158{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000159 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000160 Py_ssize_t i;
161 Py_ssize_t input_chars;
162 Py_ssize_t output_size;
163 Py_ssize_t chars;
164 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200165 void *input;
166 unsigned char *output;
167 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000168
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200169 if (PyUnicode_READY(pystr) == -1)
170 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000171
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200172 input_chars = PyUnicode_GET_LENGTH(pystr);
173 input = PyUnicode_DATA(pystr);
174 kind = PyUnicode_KIND(pystr);
175
176 /* Compute the output size */
177 for (i = 0, output_size = 2; i < input_chars; i++) {
178 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500179 Py_ssize_t d;
180 if (S_CHAR(c)) {
181 d = 1;
182 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200183 else {
184 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200185 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200186 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500187 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200188 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500189 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200190 }
191 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500192 if (output_size > PY_SSIZE_T_MAX - d) {
193 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
194 return NULL;
195 }
196 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 }
198
199 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000200 if (rval == NULL) {
201 return NULL;
202 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200203 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000204 chars = 0;
205 output[chars++] = '"';
206 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200207 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000208 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000209 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000210 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000211 else {
212 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000213 }
Christian Heimes90540002008-05-08 14:29:10 +0000214 }
215 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100216#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200217 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100218#endif
Christian Heimes90540002008-05-08 14:29:10 +0000219 return rval;
220}
221
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100222static PyObject *
223escape_unicode(PyObject *pystr)
224{
225 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
226 Py_ssize_t i;
227 Py_ssize_t input_chars;
228 Py_ssize_t output_size;
229 Py_ssize_t chars;
230 PyObject *rval;
231 void *input;
232 int kind;
233 Py_UCS4 maxchar;
234
235 if (PyUnicode_READY(pystr) == -1)
236 return NULL;
237
238 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
239 input_chars = PyUnicode_GET_LENGTH(pystr);
240 input = PyUnicode_DATA(pystr);
241 kind = PyUnicode_KIND(pystr);
242
243 /* Compute the output size */
244 for (i = 0, output_size = 2; i < input_chars; i++) {
245 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500246 Py_ssize_t d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100247 switch (c) {
248 case '\\': case '"': case '\b': case '\f':
249 case '\n': case '\r': case '\t':
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500250 d = 2;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100251 break;
252 default:
253 if (c <= 0x1f)
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500254 d = 6;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100255 else
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500256 d = 1;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100257 }
Benjamin Peterson7b78d432015-06-27 15:01:51 -0500258 if (output_size > PY_SSIZE_T_MAX - d) {
259 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
260 return NULL;
261 }
262 output_size += d;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100263 }
264
265 rval = PyUnicode_New(output_size, maxchar);
266 if (rval == NULL)
267 return NULL;
268
269 kind = PyUnicode_KIND(rval);
270
271#define ENCODE_OUTPUT do { \
272 chars = 0; \
273 output[chars++] = '"'; \
274 for (i = 0; i < input_chars; i++) { \
275 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
276 switch (c) { \
277 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
278 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
279 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
280 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
281 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
282 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
283 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
284 default: \
285 if (c <= 0x1f) { \
286 output[chars++] = '\\'; \
287 output[chars++] = 'u'; \
288 output[chars++] = '0'; \
289 output[chars++] = '0'; \
290 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
291 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
292 } else { \
293 output[chars++] = c; \
294 } \
295 } \
296 } \
297 output[chars++] = '"'; \
298 } while (0)
299
300 if (kind == PyUnicode_1BYTE_KIND) {
301 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
302 ENCODE_OUTPUT;
303 } else if (kind == PyUnicode_2BYTE_KIND) {
304 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
305 ENCODE_OUTPUT;
306 } else {
307 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
308 assert(kind == PyUnicode_4BYTE_KIND);
309 ENCODE_OUTPUT;
310 }
311#undef ENCODE_OUTPUT
312
313#ifdef Py_DEBUG
314 assert(_PyUnicode_CheckConsistency(rval, 1));
315#endif
316 return rval;
317}
318
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000319static void
Serhiy Storchakaef1585e2015-12-25 20:01:53 +0200320raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
Christian Heimes90540002008-05-08 14:29:10 +0000321{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200322 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
323 static PyObject *JSONDecodeError = NULL;
324 PyObject *exc;
325 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000326 PyObject *decoder = PyImport_ImportModule("json.decoder");
327 if (decoder == NULL)
328 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200329 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000330 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200331 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000332 return;
Christian Heimes90540002008-05-08 14:29:10 +0000333 }
Victor Stinner4c381542016-12-09 00:33:39 +0100334 exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200335 if (exc) {
336 PyErr_SetObject(JSONDecodeError, exc);
337 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000338 }
Christian Heimes90540002008-05-08 14:29:10 +0000339}
340
Ezio Melotti37623ab2013-01-03 08:44:15 +0200341static void
342raise_stop_iteration(Py_ssize_t idx)
343{
344 PyObject *value = PyLong_FromSsize_t(idx);
345 if (value != NULL) {
346 PyErr_SetObject(PyExc_StopIteration, value);
347 Py_DECREF(value);
348 }
349}
350
Christian Heimes90540002008-05-08 14:29:10 +0000351static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000352_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
353 /* return (rval, idx) tuple, stealing reference to rval */
354 PyObject *tpl;
355 PyObject *pyidx;
356 /*
357 steal a reference to rval, returns (rval, idx)
358 */
359 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000360 return NULL;
361 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000362 pyidx = PyLong_FromSsize_t(idx);
363 if (pyidx == NULL) {
364 Py_DECREF(rval);
365 return NULL;
366 }
367 tpl = PyTuple_New(2);
368 if (tpl == NULL) {
369 Py_DECREF(pyidx);
370 Py_DECREF(rval);
371 return NULL;
372 }
373 PyTuple_SET_ITEM(tpl, 0, rval);
374 PyTuple_SET_ITEM(tpl, 1, pyidx);
375 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000376}
377
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000378#define APPEND_OLD_CHUNK \
379 if (chunk != NULL) { \
380 if (chunks == NULL) { \
381 chunks = PyList_New(0); \
382 if (chunks == NULL) { \
383 goto bail; \
384 } \
385 } \
386 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200387 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000388 goto bail; \
389 } \
390 Py_CLEAR(chunk); \
391 }
392
Christian Heimes90540002008-05-08 14:29:10 +0000393static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000394scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000395{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000396 /* Read the JSON string from PyUnicode pystr.
397 end is the index of the first character after the quote.
398 if strict is zero then literal control characters are allowed
399 *next_end_ptr is a return-by-reference index of the character
400 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000401
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000402 Return value is a new PyUnicode
403 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000404 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000406 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000407 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200408 const void *buf;
409 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000410 PyObject *chunks = NULL;
411 PyObject *chunk = NULL;
412
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 if (PyUnicode_READY(pystr) == -1)
414 return 0;
415
416 len = PyUnicode_GET_LENGTH(pystr);
417 buf = PyUnicode_DATA(pystr);
418 kind = PyUnicode_KIND(pystr);
419
Ezio Melotti37623ab2013-01-03 08:44:15 +0200420 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000421 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
422 goto bail;
423 }
Christian Heimes90540002008-05-08 14:29:10 +0000424 while (1) {
425 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000427 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200428 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000429 if (c == '"' || c == '\\') {
430 break;
431 }
432 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000433 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000434 goto bail;
435 }
436 }
437 if (!(c == '"' || c == '\\')) {
438 raise_errmsg("Unterminated string starting at", pystr, begin);
439 goto bail;
440 }
441 /* Pick up this chunk if it's not zero length */
442 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000443 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200444 chunk = PyUnicode_FromKindAndData(
445 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200446 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200447 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000448 if (chunk == NULL) {
449 goto bail;
450 }
Christian Heimes90540002008-05-08 14:29:10 +0000451 }
452 next++;
453 if (c == '"') {
454 end = next;
455 break;
456 }
457 if (next == len) {
458 raise_errmsg("Unterminated string starting at", pystr, begin);
459 goto bail;
460 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200461 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000462 if (c != 'u') {
463 /* Non-unicode backslash escapes */
464 end = next + 1;
465 switch (c) {
466 case '"': break;
467 case '\\': break;
468 case '/': break;
469 case 'b': c = '\b'; break;
470 case 'f': c = '\f'; break;
471 case 'n': c = '\n'; break;
472 case 'r': c = '\r'; break;
473 case 't': c = '\t'; break;
474 default: c = 0;
475 }
476 if (c == 0) {
477 raise_errmsg("Invalid \\escape", pystr, end - 2);
478 goto bail;
479 }
480 }
481 else {
482 c = 0;
483 next++;
484 end = next + 4;
485 if (end >= len) {
486 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
487 goto bail;
488 }
489 /* Decode 4 hex digits */
490 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000492 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000493 switch (digit) {
494 case '0': case '1': case '2': case '3': case '4':
495 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000496 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000497 case 'a': case 'b': case 'c': case 'd': case 'e':
498 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000499 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000500 case 'A': case 'B': case 'C': case 'D': case 'E':
501 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000502 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000503 default:
504 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
505 goto bail;
506 }
507 }
Christian Heimes90540002008-05-08 14:29:10 +0000508 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200509 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
510 PyUnicode_READ(kind, buf, next++) == '\\' &&
511 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200512 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000513 end += 6;
514 /* Decode 4 hex digits */
515 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200516 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000517 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000518 switch (digit) {
519 case '0': case '1': case '2': case '3': case '4':
520 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000521 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000522 case 'a': case 'b': case 'c': case 'd': case 'e':
523 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000524 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000525 case 'A': case 'B': case 'C': case 'D': case 'E':
526 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000527 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000528 default:
529 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
530 goto bail;
531 }
532 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200533 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
534 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
535 else
536 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000537 }
Christian Heimes90540002008-05-08 14:29:10 +0000538 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000539 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200540 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000541 if (chunk == NULL) {
542 goto bail;
543 }
Christian Heimes90540002008-05-08 14:29:10 +0000544 }
545
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000546 if (chunks == NULL) {
547 if (chunk != NULL)
548 rval = chunk;
549 else
550 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000551 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000552 else {
553 APPEND_OLD_CHUNK
554 rval = join_list_unicode(chunks);
555 if (rval == NULL) {
556 goto bail;
557 }
558 Py_CLEAR(chunks);
559 }
560
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000561 *next_end_ptr = end;
562 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000563bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000564 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000565 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000566 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000567 return NULL;
568}
569
570PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000571 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000572 "\n"
573 "Scan the string s for a JSON string. End is the index of the\n"
574 "character in s after the quote that started the JSON string.\n"
575 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
576 "on attempt to decode an invalid string. If strict is False then literal\n"
577 "control characters are allowed in the string.\n"
578 "\n"
579 "Returns a tuple of the decoded string and the index of the character in s\n"
580 "after the end quote."
581);
Christian Heimes90540002008-05-08 14:29:10 +0000582
583static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000584py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000585{
586 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000587 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000588 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000589 Py_ssize_t next_end = -1;
590 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100591 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000592 return NULL;
593 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000594 if (PyUnicode_Check(pystr)) {
595 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000596 }
597 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000598 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000599 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000600 Py_TYPE(pystr)->tp_name);
601 return NULL;
602 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000603 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000604}
605
606PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000607 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000608 "\n"
609 "Return an ASCII-only JSON representation of a Python string"
610);
Christian Heimes90540002008-05-08 14:29:10 +0000611
612static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000613py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000614{
615 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000616 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000617 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000618 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000619 rval = ascii_escape_unicode(pystr);
620 }
621 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000622 PyErr_Format(PyExc_TypeError,
623 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000624 Py_TYPE(pystr)->tp_name);
625 return NULL;
626 }
Christian Heimes90540002008-05-08 14:29:10 +0000627 return rval;
628}
629
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100630
631PyDoc_STRVAR(pydoc_encode_basestring,
632 "encode_basestring(string) -> string\n"
633 "\n"
634 "Return a JSON representation of a Python string"
635);
636
637static PyObject *
638py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
639{
640 PyObject *rval;
641 /* Return a JSON representation of a Python string */
642 /* METH_O */
643 if (PyUnicode_Check(pystr)) {
644 rval = escape_unicode(pystr);
645 }
646 else {
647 PyErr_Format(PyExc_TypeError,
648 "first argument must be a string, not %.80s",
649 Py_TYPE(pystr)->tp_name);
650 return NULL;
651 }
652 return rval;
653}
654
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000655static void
656scanner_dealloc(PyObject *self)
657{
658 /* Deallocate scanner object */
659 scanner_clear(self);
660 Py_TYPE(self)->tp_free(self);
661}
662
663static int
664scanner_traverse(PyObject *self, visitproc visit, void *arg)
665{
666 PyScannerObject *s;
667 assert(PyScanner_Check(self));
668 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000669 Py_VISIT(s->object_hook);
670 Py_VISIT(s->object_pairs_hook);
671 Py_VISIT(s->parse_float);
672 Py_VISIT(s->parse_int);
673 Py_VISIT(s->parse_constant);
674 return 0;
675}
676
677static int
678scanner_clear(PyObject *self)
679{
680 PyScannerObject *s;
681 assert(PyScanner_Check(self));
682 s = (PyScannerObject *)self;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000683 Py_CLEAR(s->object_hook);
684 Py_CLEAR(s->object_pairs_hook);
685 Py_CLEAR(s->parse_float);
686 Py_CLEAR(s->parse_int);
687 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000688 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000689 return 0;
690}
691
692static PyObject *
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300693_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
694{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000695 /* Read a JSON object from PyUnicode pystr.
696 idx is the index of the first character after the opening curly brace.
697 *next_idx_ptr is a return-by-reference index to the first character after
698 the closing curly brace.
699
700 Returns a new PyObject (usually a dict, but object_hook can change that)
701 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200702 void *str;
703 int kind;
704 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000705 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000706 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000707 PyObject *key = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000708 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000709 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000710
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200711 if (PyUnicode_READY(pystr) == -1)
712 return NULL;
713
714 str = PyUnicode_DATA(pystr);
715 kind = PyUnicode_KIND(pystr);
716 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
717
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000718 if (has_pairs_hook)
719 rval = PyList_New(0);
720 else
721 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000722 if (rval == NULL)
723 return NULL;
724
725 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200726 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000727
728 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200729 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
730 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000731 PyObject *memokey;
732
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000733 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200734 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200735 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000736 goto bail;
737 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +0300738 key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000739 if (key == NULL)
740 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000741 memokey = PyDict_GetItem(s->memo, key);
742 if (memokey != NULL) {
743 Py_INCREF(memokey);
744 Py_DECREF(key);
745 key = memokey;
746 }
747 else {
748 if (PyDict_SetItem(s->memo, key, key) < 0)
749 goto bail;
750 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000751 idx = next_idx;
752
753 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200754 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
755 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200756 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000757 goto bail;
758 }
759 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200760 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000761
762 /* read any JSON term */
763 val = scan_once_unicode(s, pystr, idx, &next_idx);
764 if (val == NULL)
765 goto bail;
766
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000767 if (has_pairs_hook) {
768 PyObject *item = PyTuple_Pack(2, key, val);
769 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000770 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000771 Py_CLEAR(key);
772 Py_CLEAR(val);
773 if (PyList_Append(rval, item) == -1) {
774 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000775 goto bail;
776 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000777 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000778 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000779 else {
780 if (PyDict_SetItem(rval, key, val) < 0)
781 goto bail;
782 Py_CLEAR(key);
783 Py_CLEAR(val);
784 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000785 idx = next_idx;
786
787 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200788 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000789
790 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200791 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000792 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200793 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200794 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000795 goto bail;
796 }
797 idx++;
798
799 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200800 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000801 }
802 }
803
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000804 *next_idx_ptr = idx + 1;
805
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000806 if (has_pairs_hook) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100807 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000808 Py_DECREF(rval);
809 return val;
810 }
811
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000812 /* if object_hook is not None: rval = object_hook(rval) */
813 if (s->object_hook != Py_None) {
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100814 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000815 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000816 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000817 }
818 return rval;
819bail:
820 Py_XDECREF(key);
821 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000822 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000823 return NULL;
824}
825
826static PyObject *
827_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200828 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000829 idx is the index of the first character after the opening brace.
830 *next_idx_ptr is a return-by-reference index to the first character after
831 the closing brace.
832
833 Returns a new PyList
834 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200835 void *str;
836 int kind;
837 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000838 PyObject *val = NULL;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200839 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000840 Py_ssize_t next_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000841
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200842 if (PyUnicode_READY(pystr) == -1)
843 return NULL;
844
Serhiy Storchaka21fe7212017-01-03 11:17:44 +0200845 rval = PyList_New(0);
846 if (rval == NULL)
847 return NULL;
848
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200849 str = PyUnicode_DATA(pystr);
850 kind = PyUnicode_KIND(pystr);
851 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
852
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000853 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200854 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000855
856 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200857 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
858 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000859
860 /* read any JSON term */
861 val = scan_once_unicode(s, pystr, idx, &next_idx);
862 if (val == NULL)
863 goto bail;
864
865 if (PyList_Append(rval, val) == -1)
866 goto bail;
867
868 Py_CLEAR(val);
869 idx = next_idx;
870
871 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200872 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000873
874 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200875 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000876 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200877 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200878 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000879 goto bail;
880 }
881 idx++;
882
883 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200884 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000885 }
886 }
887
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200888 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
889 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200890 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000891 goto bail;
892 }
893 *next_idx_ptr = idx + 1;
894 return rval;
895bail:
896 Py_XDECREF(val);
897 Py_DECREF(rval);
898 return NULL;
899}
900
901static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200902_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
903 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000904 constant is the constant string that was found
905 ("NaN", "Infinity", "-Infinity").
906 idx is the index of the first character of the constant
907 *next_idx_ptr is a return-by-reference index to the first character after
908 the constant.
909
910 Returns the result of parse_constant
911 */
912 PyObject *cstr;
913 PyObject *rval;
914 /* constant is "NaN", "Infinity", or "-Infinity" */
915 cstr = PyUnicode_InternFromString(constant);
916 if (cstr == NULL)
917 return NULL;
918
919 /* rval = parse_constant(constant) */
Victor Stinnerde4ae3d2016-12-04 22:59:09 +0100920 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200921 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000922 Py_DECREF(cstr);
923 *next_idx_ptr = idx;
924 return rval;
925}
926
927static PyObject *
928_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
929 /* Read a JSON number from PyUnicode pystr.
930 idx is the index of the first character of the number
931 *next_idx_ptr is a return-by-reference index to the first character after
932 the number.
933
934 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200935 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000936 May return other types if parse_int or parse_float are set
937 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200938 void *str;
939 int kind;
940 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000941 Py_ssize_t idx = start;
942 int is_float = 0;
943 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200944 PyObject *numstr = NULL;
945 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000946
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200947 if (PyUnicode_READY(pystr) == -1)
948 return NULL;
949
950 str = PyUnicode_DATA(pystr);
951 kind = PyUnicode_KIND(pystr);
952 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
953
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000954 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200955 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000956 idx++;
957 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200958 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000959 return NULL;
960 }
961 }
962
963 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200964 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000965 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200966 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000967 }
968 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200969 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000970 idx++;
971 }
972 /* no integer digits, error */
973 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200974 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000975 return NULL;
976 }
977
978 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200979 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000980 is_float = 1;
981 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200982 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000983 }
984
985 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200986 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000987 Py_ssize_t e_start = idx;
988 idx++;
989
990 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200991 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000992
993 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200994 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000995
996 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200997 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000998 is_float = 1;
999 }
1000 else {
1001 idx = e_start;
1002 }
1003 }
1004
Antoine Pitrouf6454512011-04-25 19:16:06 +02001005 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1006 custom_func = s->parse_float;
1007 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1008 custom_func = s->parse_int;
1009 else
1010 custom_func = NULL;
1011
1012 if (custom_func) {
1013 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001014 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001015 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001016 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001017 if (numstr == NULL)
1018 return NULL;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001019 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001020 }
1021 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001022 Py_ssize_t i, n;
1023 char *buf;
1024 /* Straight conversion to ASCII, to avoid costly conversion of
1025 decimal unicode digits (which cannot appear here) */
1026 n = idx - start;
1027 numstr = PyBytes_FromStringAndSize(NULL, n);
1028 if (numstr == NULL)
1029 return NULL;
1030 buf = PyBytes_AS_STRING(numstr);
1031 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001032 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001033 }
1034 if (is_float)
1035 rval = PyFloat_FromString(numstr);
1036 else
1037 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001038 }
1039 Py_DECREF(numstr);
1040 *next_idx_ptr = idx;
1041 return rval;
1042}
1043
1044static PyObject *
1045scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1046{
1047 /* Read one JSON term (of any kind) from PyUnicode pystr.
1048 idx is the index of the first character of the term
1049 *next_idx_ptr is a return-by-reference index to the first character after
1050 the number.
1051
1052 Returns a new PyObject representation of the term.
1053 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001054 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001055 void *str;
1056 int kind;
1057 Py_ssize_t length;
1058
1059 if (PyUnicode_READY(pystr) == -1)
1060 return NULL;
1061
1062 str = PyUnicode_DATA(pystr);
1063 kind = PyUnicode_KIND(pystr);
1064 length = PyUnicode_GET_LENGTH(pystr);
1065
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001066 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001067 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001068 return NULL;
1069 }
1070 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001071 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001072 return NULL;
1073 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001074
1075 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001076 case '"':
1077 /* string */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001078 return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001079 case '{':
1080 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001081 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1082 "from a unicode string"))
1083 return NULL;
1084 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1085 Py_LeaveRecursiveCall();
1086 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001087 case '[':
1088 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001089 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1090 "from a unicode string"))
1091 return NULL;
1092 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1093 Py_LeaveRecursiveCall();
1094 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001095 case 'n':
1096 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001097 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001098 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001099 Py_RETURN_NONE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001100 }
1101 break;
1102 case 't':
1103 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001104 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001105 *next_idx_ptr = idx + 4;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001106 Py_RETURN_TRUE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001107 }
1108 break;
1109 case 'f':
1110 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001111 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1112 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1113 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001114 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001115 *next_idx_ptr = idx + 5;
Serhiy Storchakad1302c02017-01-23 10:23:58 +02001116 Py_RETURN_FALSE;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001117 }
1118 break;
1119 case 'N':
1120 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001121 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001123 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1124 }
1125 break;
1126 case 'I':
1127 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001128 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1129 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1130 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001131 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001132 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1133 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001134 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001135 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1136 }
1137 break;
1138 case '-':
1139 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001140 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001141 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1142 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001143 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001144 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001145 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1146 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001147 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001148 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1149 }
1150 break;
1151 }
1152 /* Didn't find a string, object, array, or named constant. Look for a number. */
1153 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1154}
1155
1156static PyObject *
1157scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1158{
1159 /* Python callable interface to scan_once_{str,unicode} */
1160 PyObject *pystr;
1161 PyObject *rval;
1162 Py_ssize_t idx;
1163 Py_ssize_t next_idx = -1;
1164 static char *kwlist[] = {"string", "idx", NULL};
1165 PyScannerObject *s;
1166 assert(PyScanner_Check(self));
1167 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001168 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001169 return NULL;
1170
1171 if (PyUnicode_Check(pystr)) {
1172 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1173 }
1174 else {
1175 PyErr_Format(PyExc_TypeError,
1176 "first argument must be a string, not %.80s",
1177 Py_TYPE(pystr)->tp_name);
1178 return NULL;
1179 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001180 PyDict_Clear(s->memo);
1181 if (rval == NULL)
1182 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001183 return _build_rval_index_tuple(rval, next_idx);
1184}
1185
1186static PyObject *
1187scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1188{
1189 PyScannerObject *s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001190 PyObject *ctx;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001191 PyObject *strict;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001192 static char *kwlist[] = {"context", NULL};
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001193
1194 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001195 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001196
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001197 s = (PyScannerObject *)type->tp_alloc(type, 0);
1198 if (s == NULL) {
1199 return NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001200 }
1201
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001202 s->memo = PyDict_New();
1203 if (s->memo == NULL)
1204 goto bail;
1205
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001206 /* All of these will fail "gracefully" so we don't need to verify them */
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001207 strict = PyObject_GetAttrString(ctx, "strict");
1208 if (strict == NULL)
1209 goto bail;
1210 s->strict = PyObject_IsTrue(strict);
1211 Py_DECREF(strict);
1212 if (s->strict < 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001213 goto bail;
1214 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1215 if (s->object_hook == NULL)
1216 goto bail;
1217 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1218 if (s->object_pairs_hook == NULL)
1219 goto bail;
1220 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1221 if (s->parse_float == NULL)
1222 goto bail;
1223 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1224 if (s->parse_int == NULL)
1225 goto bail;
1226 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1227 if (s->parse_constant == NULL)
1228 goto bail;
1229
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001230 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001231
1232bail:
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001233 Py_DECREF(s);
1234 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001235}
1236
1237PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1238
1239static
1240PyTypeObject PyScannerType = {
1241 PyVarObject_HEAD_INIT(NULL, 0)
1242 "_json.Scanner", /* tp_name */
1243 sizeof(PyScannerObject), /* tp_basicsize */
1244 0, /* tp_itemsize */
1245 scanner_dealloc, /* tp_dealloc */
1246 0, /* tp_print */
1247 0, /* tp_getattr */
1248 0, /* tp_setattr */
1249 0, /* tp_compare */
1250 0, /* tp_repr */
1251 0, /* tp_as_number */
1252 0, /* tp_as_sequence */
1253 0, /* tp_as_mapping */
1254 0, /* tp_hash */
1255 scanner_call, /* tp_call */
1256 0, /* tp_str */
1257 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1258 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1259 0, /* tp_as_buffer */
1260 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1261 scanner_doc, /* tp_doc */
1262 scanner_traverse, /* tp_traverse */
1263 scanner_clear, /* tp_clear */
1264 0, /* tp_richcompare */
1265 0, /* tp_weaklistoffset */
1266 0, /* tp_iter */
1267 0, /* tp_iternext */
1268 0, /* tp_methods */
1269 scanner_members, /* tp_members */
1270 0, /* tp_getset */
1271 0, /* tp_base */
1272 0, /* tp_dict */
1273 0, /* tp_descr_get */
1274 0, /* tp_descr_set */
1275 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001276 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001277 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1278 scanner_new, /* tp_new */
1279 0,/* PyObject_GC_Del, */ /* tp_free */
1280};
1281
1282static PyObject *
1283encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1284{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001285 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1286
1287 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001288 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001289 PyObject *item_separator;
1290 int sort_keys, skipkeys, allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001291
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001292 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001293 &markers, &defaultfn, &encoder, &indent,
1294 &key_separator, &item_separator,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001295 &sort_keys, &skipkeys, &allow_nan))
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001296 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001297
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001298 if (markers != Py_None && !PyDict_Check(markers)) {
1299 PyErr_Format(PyExc_TypeError,
1300 "make_encoder() argument 1 must be dict or None, "
1301 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001302 return NULL;
Serhiy Storchaka83236f72015-07-26 09:01:22 +03001303 }
1304
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001305 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1306 if (s == NULL)
1307 return NULL;
1308
Antoine Pitrou781eba72009-12-08 15:57:31 +00001309 s->markers = markers;
1310 s->defaultfn = defaultfn;
1311 s->encoder = encoder;
1312 s->indent = indent;
1313 s->key_separator = key_separator;
1314 s->item_separator = item_separator;
1315 s->sort_keys = sort_keys;
1316 s->skipkeys = skipkeys;
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001317 s->allow_nan = allow_nan;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001318 s->fast_encode = NULL;
1319 if (PyCFunction_Check(s->encoder)) {
1320 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1321 if (f == (PyCFunction)py_encode_basestring_ascii ||
1322 f == (PyCFunction)py_encode_basestring) {
1323 s->fast_encode = f;
1324 }
1325 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001326
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001327 Py_INCREF(s->markers);
1328 Py_INCREF(s->defaultfn);
1329 Py_INCREF(s->encoder);
1330 Py_INCREF(s->indent);
1331 Py_INCREF(s->key_separator);
1332 Py_INCREF(s->item_separator);
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001333 return (PyObject *)s;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001334}
1335
1336static PyObject *
1337encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1338{
1339 /* Python callable interface to encode_listencode_obj */
1340 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1341 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001342 Py_ssize_t indent_level;
1343 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001344 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001345
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001346 assert(PyEncoder_Check(self));
1347 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001348 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1349 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001350 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001351 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001352 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001353 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001354 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001355 return NULL;
1356 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001357 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001358}
1359
1360static PyObject *
1361_encoded_const(PyObject *obj)
1362{
1363 /* Return the JSON string representation of None, True, False */
1364 if (obj == Py_None) {
1365 static PyObject *s_null = NULL;
1366 if (s_null == NULL) {
1367 s_null = PyUnicode_InternFromString("null");
1368 }
1369 Py_INCREF(s_null);
1370 return s_null;
1371 }
1372 else if (obj == Py_True) {
1373 static PyObject *s_true = NULL;
1374 if (s_true == NULL) {
1375 s_true = PyUnicode_InternFromString("true");
1376 }
1377 Py_INCREF(s_true);
1378 return s_true;
1379 }
1380 else if (obj == Py_False) {
1381 static PyObject *s_false = NULL;
1382 if (s_false == NULL) {
1383 s_false = PyUnicode_InternFromString("false");
1384 }
1385 Py_INCREF(s_false);
1386 return s_false;
1387 }
1388 else {
1389 PyErr_SetString(PyExc_ValueError, "not a const");
1390 return NULL;
1391 }
1392}
1393
1394static PyObject *
1395encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1396{
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001397 /* Return the JSON representation of a PyFloat. */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001398 double i = PyFloat_AS_DOUBLE(obj);
1399 if (!Py_IS_FINITE(i)) {
1400 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001401 PyErr_SetString(
1402 PyExc_ValueError,
1403 "Out of range float values are not JSON compliant"
1404 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001405 return NULL;
1406 }
1407 if (i > 0) {
1408 return PyUnicode_FromString("Infinity");
1409 }
1410 else if (i < 0) {
1411 return PyUnicode_FromString("-Infinity");
1412 }
1413 else {
1414 return PyUnicode_FromString("NaN");
1415 }
1416 }
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001417 return PyFloat_Type.tp_repr(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001418}
1419
1420static PyObject *
1421encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1422{
1423 /* Return the JSON representation of a string */
1424 if (s->fast_encode)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001425 return s->fast_encode(NULL, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001426 else
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001427 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001428}
1429
1430static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001431_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001432{
1433 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001434 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001435 Py_DECREF(stolen);
1436 return rval;
1437}
1438
1439static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001440encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001441 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001442{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001443 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001444 PyObject *newobj;
1445 int rv;
1446
1447 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1448 PyObject *cstr = _encoded_const(obj);
1449 if (cstr == NULL)
1450 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001451 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001452 }
1453 else if (PyUnicode_Check(obj))
1454 {
1455 PyObject *encoded = encoder_encode_string(s, obj);
1456 if (encoded == NULL)
1457 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001458 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001459 }
1460 else if (PyLong_Check(obj)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001461 PyObject *encoded = PyLong_Type.tp_str(obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001462 if (encoded == NULL)
1463 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001464 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001465 }
1466 else if (PyFloat_Check(obj)) {
1467 PyObject *encoded = encoder_encode_float(s, obj);
1468 if (encoded == NULL)
1469 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001470 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001471 }
1472 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001473 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1474 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001475 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001476 Py_LeaveRecursiveCall();
1477 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001478 }
1479 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001480 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1481 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001482 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001483 Py_LeaveRecursiveCall();
1484 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001485 }
1486 else {
1487 PyObject *ident = NULL;
1488 if (s->markers != Py_None) {
1489 int has_key;
1490 ident = PyLong_FromVoidPtr(obj);
1491 if (ident == NULL)
1492 return -1;
1493 has_key = PyDict_Contains(s->markers, ident);
1494 if (has_key) {
1495 if (has_key != -1)
1496 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1497 Py_DECREF(ident);
1498 return -1;
1499 }
1500 if (PyDict_SetItem(s->markers, ident, obj)) {
1501 Py_DECREF(ident);
1502 return -1;
1503 }
1504 }
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001505 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001506 if (newobj == NULL) {
1507 Py_XDECREF(ident);
1508 return -1;
1509 }
Ezio Melotti13672652011-05-11 01:02:56 +03001510
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001511 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1512 Py_DECREF(newobj);
1513 Py_XDECREF(ident);
Ezio Melotti13672652011-05-11 01:02:56 +03001514 return -1;
Serhiy Storchaka21fe7212017-01-03 11:17:44 +02001515 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001516 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001517 Py_LeaveRecursiveCall();
1518
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001519 Py_DECREF(newobj);
1520 if (rv) {
1521 Py_XDECREF(ident);
1522 return -1;
1523 }
1524 if (ident != NULL) {
1525 if (PyDict_DelItem(s->markers, ident)) {
1526 Py_XDECREF(ident);
1527 return -1;
1528 }
1529 Py_XDECREF(ident);
1530 }
1531 return rv;
1532 }
1533}
1534
1535static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001536encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001537 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001538{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001539 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001540 static PyObject *open_dict = NULL;
1541 static PyObject *close_dict = NULL;
1542 static PyObject *empty_dict = NULL;
1543 PyObject *kstr = NULL;
1544 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001545 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001546 PyObject *items;
1547 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001548 Py_ssize_t idx;
1549
1550 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1551 open_dict = PyUnicode_InternFromString("{");
1552 close_dict = PyUnicode_InternFromString("}");
1553 empty_dict = PyUnicode_InternFromString("{}");
1554 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1555 return -1;
1556 }
Serhiy Storchaka8cbc51a2017-01-13 08:38:15 +02001557 if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001558 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001559
1560 if (s->markers != Py_None) {
1561 int has_key;
1562 ident = PyLong_FromVoidPtr(dct);
1563 if (ident == NULL)
1564 goto bail;
1565 has_key = PyDict_Contains(s->markers, ident);
1566 if (has_key) {
1567 if (has_key != -1)
1568 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1569 goto bail;
1570 }
1571 if (PyDict_SetItem(s->markers, ident, dct)) {
1572 goto bail;
1573 }
1574 }
1575
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001576 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001577 goto bail;
1578
1579 if (s->indent != Py_None) {
1580 /* TODO: DOES NOT RUN */
1581 indent_level += 1;
1582 /*
1583 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1584 separator = _item_separator + newline_indent
1585 buf += newline_indent
1586 */
1587 }
1588
Benjamin Peterson501182a2015-05-02 22:28:04 -04001589 items = PyMapping_Items(dct);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001590 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001591 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001592 if (s->sort_keys && PyList_Sort(items) < 0) {
1593 Py_DECREF(items);
Benjamin Peterson501182a2015-05-02 22:28:04 -04001594 goto bail;
Serhiy Storchaka49f64492017-07-16 07:29:16 +03001595 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001596 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001597 Py_DECREF(items);
1598 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001599 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001600 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001601 while ((item = PyIter_Next(it)) != NULL) {
1602 PyObject *encoded, *key, *value;
Serhiy Storchakafff9a312017-03-21 08:53:25 +02001603 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001604 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1605 goto bail;
1606 }
1607 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001608 if (PyUnicode_Check(key)) {
1609 Py_INCREF(key);
1610 kstr = key;
1611 }
1612 else if (PyFloat_Check(key)) {
1613 kstr = encoder_encode_float(s, key);
1614 if (kstr == NULL)
1615 goto bail;
1616 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001617 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618 /* This must come before the PyLong_Check because
1619 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001620 kstr = _encoded_const(key);
1621 if (kstr == NULL)
1622 goto bail;
1623 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001624 else if (PyLong_Check(key)) {
Serhiy Storchakae0805cf2016-04-10 14:41:19 +03001625 kstr = PyLong_Type.tp_str(key);
Ethan Furmana4998a72013-08-10 13:01:45 -07001626 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001627 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001628 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001629 }
Serhiy Storchakaac5bbd42017-05-28 15:31:49 +03001630 else if (s->skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001631 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001632 continue;
1633 }
1634 else {
1635 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001636 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001637 goto bail;
1638 }
1639
1640 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001641 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001642 goto bail;
1643 }
1644
1645 encoded = encoder_encode_string(s, kstr);
1646 Py_CLEAR(kstr);
1647 if (encoded == NULL)
1648 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001649 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001650 Py_DECREF(encoded);
1651 goto bail;
1652 }
1653 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001654 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001655 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001656
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001657 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001658 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001659 goto bail;
1660 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001661 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001662 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001663 if (PyErr_Occurred())
1664 goto bail;
1665 Py_CLEAR(it);
1666
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001667 if (ident != NULL) {
1668 if (PyDict_DelItem(s->markers, ident))
1669 goto bail;
1670 Py_CLEAR(ident);
1671 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001672 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001673 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001674 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001675
1676 yield '\n' + (' ' * (_indent * _current_indent_level))
1677 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001678 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001679 goto bail;
1680 return 0;
1681
1682bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001683 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001684 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001685 Py_XDECREF(kstr);
1686 Py_XDECREF(ident);
1687 return -1;
1688}
1689
1690
1691static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001692encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001693 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001694{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001695 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001696 static PyObject *open_array = NULL;
1697 static PyObject *close_array = NULL;
1698 static PyObject *empty_array = NULL;
1699 PyObject *ident = NULL;
1700 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001701 Py_ssize_t i;
1702
1703 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1704 open_array = PyUnicode_InternFromString("[");
1705 close_array = PyUnicode_InternFromString("]");
1706 empty_array = PyUnicode_InternFromString("[]");
1707 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1708 return -1;
1709 }
1710 ident = NULL;
1711 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1712 if (s_fast == NULL)
1713 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001714 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001715 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001716 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001717 }
1718
1719 if (s->markers != Py_None) {
1720 int has_key;
1721 ident = PyLong_FromVoidPtr(seq);
1722 if (ident == NULL)
1723 goto bail;
1724 has_key = PyDict_Contains(s->markers, ident);
1725 if (has_key) {
1726 if (has_key != -1)
1727 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1728 goto bail;
1729 }
1730 if (PyDict_SetItem(s->markers, ident, seq)) {
1731 goto bail;
1732 }
1733 }
1734
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001735 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001736 goto bail;
1737 if (s->indent != Py_None) {
1738 /* TODO: DOES NOT RUN */
1739 indent_level += 1;
1740 /*
1741 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1742 separator = _item_separator + newline_indent
1743 buf += newline_indent
1744 */
1745 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001746 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1747 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001748 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001749 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001750 goto bail;
1751 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001752 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001753 goto bail;
1754 }
1755 if (ident != NULL) {
1756 if (PyDict_DelItem(s->markers, ident))
1757 goto bail;
1758 Py_CLEAR(ident);
1759 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001760
1761 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001762 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001763 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001764
1765 yield '\n' + (' ' * (_indent * _current_indent_level))
1766 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001767 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001768 goto bail;
1769 Py_DECREF(s_fast);
1770 return 0;
1771
1772bail:
1773 Py_XDECREF(ident);
1774 Py_DECREF(s_fast);
1775 return -1;
1776}
1777
1778static void
1779encoder_dealloc(PyObject *self)
1780{
1781 /* Deallocate Encoder */
1782 encoder_clear(self);
1783 Py_TYPE(self)->tp_free(self);
1784}
1785
1786static int
1787encoder_traverse(PyObject *self, visitproc visit, void *arg)
1788{
1789 PyEncoderObject *s;
1790 assert(PyEncoder_Check(self));
1791 s = (PyEncoderObject *)self;
1792 Py_VISIT(s->markers);
1793 Py_VISIT(s->defaultfn);
1794 Py_VISIT(s->encoder);
1795 Py_VISIT(s->indent);
1796 Py_VISIT(s->key_separator);
1797 Py_VISIT(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001798 return 0;
1799}
1800
1801static int
1802encoder_clear(PyObject *self)
1803{
1804 /* Deallocate Encoder */
1805 PyEncoderObject *s;
1806 assert(PyEncoder_Check(self));
1807 s = (PyEncoderObject *)self;
1808 Py_CLEAR(s->markers);
1809 Py_CLEAR(s->defaultfn);
1810 Py_CLEAR(s->encoder);
1811 Py_CLEAR(s->indent);
1812 Py_CLEAR(s->key_separator);
1813 Py_CLEAR(s->item_separator);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001814 return 0;
1815}
1816
1817PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1818
1819static
1820PyTypeObject PyEncoderType = {
1821 PyVarObject_HEAD_INIT(NULL, 0)
1822 "_json.Encoder", /* tp_name */
1823 sizeof(PyEncoderObject), /* tp_basicsize */
1824 0, /* tp_itemsize */
1825 encoder_dealloc, /* tp_dealloc */
1826 0, /* tp_print */
1827 0, /* tp_getattr */
1828 0, /* tp_setattr */
1829 0, /* tp_compare */
1830 0, /* tp_repr */
1831 0, /* tp_as_number */
1832 0, /* tp_as_sequence */
1833 0, /* tp_as_mapping */
1834 0, /* tp_hash */
1835 encoder_call, /* tp_call */
1836 0, /* tp_str */
1837 0, /* tp_getattro */
1838 0, /* tp_setattro */
1839 0, /* tp_as_buffer */
1840 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1841 encoder_doc, /* tp_doc */
1842 encoder_traverse, /* tp_traverse */
1843 encoder_clear, /* tp_clear */
1844 0, /* tp_richcompare */
1845 0, /* tp_weaklistoffset */
1846 0, /* tp_iter */
1847 0, /* tp_iternext */
1848 0, /* tp_methods */
1849 encoder_members, /* tp_members */
1850 0, /* tp_getset */
1851 0, /* tp_base */
1852 0, /* tp_dict */
1853 0, /* tp_descr_get */
1854 0, /* tp_descr_set */
1855 0, /* tp_dictoffset */
Serhiy Storchaka76a3e512017-05-05 10:08:49 +03001856 0, /* tp_init */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001857 0, /* tp_alloc */
1858 encoder_new, /* tp_new */
1859 0, /* tp_free */
1860};
1861
1862static PyMethodDef speedups_methods[] = {
1863 {"encode_basestring_ascii",
1864 (PyCFunction)py_encode_basestring_ascii,
1865 METH_O,
1866 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001867 {"encode_basestring",
1868 (PyCFunction)py_encode_basestring,
1869 METH_O,
1870 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001871 {"scanstring",
1872 (PyCFunction)py_scanstring,
1873 METH_VARARGS,
1874 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001875 {NULL, NULL, 0, NULL}
1876};
1877
1878PyDoc_STRVAR(module_doc,
1879"json speedups\n");
1880
Martin v. Löwis1a214512008-06-11 05:26:20 +00001881static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 PyModuleDef_HEAD_INIT,
1883 "_json",
1884 module_doc,
1885 -1,
1886 speedups_methods,
1887 NULL,
1888 NULL,
1889 NULL,
1890 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001891};
1892
Victor Stinnerf024d262015-03-17 17:48:27 +01001893PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001894PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001895{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001896 PyObject *m = PyModule_Create(&jsonmodule);
1897 if (!m)
1898 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001899 if (PyType_Ready(&PyScannerType) < 0)
1900 goto fail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001901 if (PyType_Ready(&PyEncoderType) < 0)
1902 goto fail;
1903 Py_INCREF((PyObject*)&PyScannerType);
1904 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1905 Py_DECREF((PyObject*)&PyScannerType);
1906 goto fail;
1907 }
1908 Py_INCREF((PyObject*)&PyEncoderType);
1909 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1910 Py_DECREF((PyObject*)&PyEncoderType);
1911 goto fail;
1912 }
1913 return m;
1914 fail:
1915 Py_DECREF(m);
1916 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001917}