blob: 3a60243256288f55230c6cecb1f4fdac6c675dc1 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +01003#include "accu.h"
4
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00005#ifdef __GNUC__
6#define UNUSED __attribute__((__unused__))
7#else
8#define UNUSED
9#endif
10
11#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
12#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
13#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
14#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
15
16static PyTypeObject PyScannerType;
17static PyTypeObject PyEncoderType;
18
19typedef struct _PyScannerObject {
20 PyObject_HEAD
21 PyObject *strict;
22 PyObject *object_hook;
23 PyObject *object_pairs_hook;
24 PyObject *parse_float;
25 PyObject *parse_int;
26 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000027 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028} PyScannerObject;
29
30static PyMemberDef scanner_members[] = {
31 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
32 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
33 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
34 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
35 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
36 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
37 {NULL}
38};
39
40typedef struct _PyEncoderObject {
41 PyObject_HEAD
42 PyObject *markers;
43 PyObject *defaultfn;
44 PyObject *encoder;
45 PyObject *indent;
46 PyObject *key_separator;
47 PyObject *item_separator;
48 PyObject *sort_keys;
49 PyObject *skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010050 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000051 int allow_nan;
52} PyEncoderObject;
53
54static PyMemberDef encoder_members[] = {
55 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
56 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
57 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
58 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
59 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
60 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
61 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
62 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
63 {NULL}
64};
65
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020066static PyObject *
67join_list_unicode(PyObject *lst)
68{
69 /* return u''.join(lst) */
70 static PyObject *sep = NULL;
71 if (sep == NULL) {
72 sep = PyUnicode_FromStringAndSize("", 0);
73 if (sep == NULL)
74 return NULL;
75 }
76 return PyUnicode_Join(sep, lst);
77}
78
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020079/* Forward decls */
80
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000081static PyObject *
82ascii_escape_unicode(PyObject *pystr);
83static PyObject *
84py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
85void init_json(void);
86static PyObject *
87scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
88static PyObject *
89_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
90static PyObject *
91scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
92static int
93scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
94static void
95scanner_dealloc(PyObject *self);
96static int
97scanner_clear(PyObject *self);
98static PyObject *
99encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
100static int
101encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
102static void
103encoder_dealloc(PyObject *self);
104static int
105encoder_clear(PyObject *self);
106static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200107encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200109encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000110static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200111encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000112static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000113_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000114static void
115raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
116static PyObject *
117encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000118static PyObject *
Ethan Furmana4998a72013-08-10 13:01:45 -0700119encoder_encode_long(PyEncoderObject* s UNUSED, PyObject *obj);
120static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000121encoder_encode_float(PyEncoderObject *s, PyObject *obj);
122
Christian Heimes90540002008-05-08 14:29:10 +0000123#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000124#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000125
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000126static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200127ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000128{
129 /* Escape unicode code point c to ASCII escape sequences
130 in char *output. output must have at least 12 bytes unused to
131 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000132 output[chars++] = '\\';
133 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000134 case '\\': output[chars++] = c; break;
135 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000136 case '\b': output[chars++] = 'b'; break;
137 case '\f': output[chars++] = 'f'; break;
138 case '\n': output[chars++] = 'n'; break;
139 case '\r': output[chars++] = 'r'; break;
140 case '\t': output[chars++] = 't'; break;
141 default:
Christian Heimes90540002008-05-08 14:29:10 +0000142 if (c >= 0x10000) {
143 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100144 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000145 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100146 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
147 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
148 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
149 output[chars++] = Py_hexdigits[(v ) & 0xf];
150 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000151 output[chars++] = '\\';
152 }
Christian Heimes90540002008-05-08 14:29:10 +0000153 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200154 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
155 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
156 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
157 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000158 }
159 return chars;
160}
161
162static PyObject *
163ascii_escape_unicode(PyObject *pystr)
164{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000165 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000166 Py_ssize_t i;
167 Py_ssize_t input_chars;
168 Py_ssize_t output_size;
169 Py_ssize_t chars;
170 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200171 void *input;
172 unsigned char *output;
173 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000174
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200175 if (PyUnicode_READY(pystr) == -1)
176 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000177
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200178 input_chars = PyUnicode_GET_LENGTH(pystr);
179 input = PyUnicode_DATA(pystr);
180 kind = PyUnicode_KIND(pystr);
181
182 /* Compute the output size */
183 for (i = 0, output_size = 2; i < input_chars; i++) {
184 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500185 Py_ssize_t d;
186 if (S_CHAR(c)) {
187 d = 1;
188 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200189 else {
190 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200191 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200192 case '\n': case '\r': case '\t':
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500193 d = 2; break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200194 default:
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500195 d = c >= 0x10000 ? 12 : 6;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200196 }
197 }
Benjamin Petersone3bfe192015-02-01 17:53:53 -0500198 if (output_size > PY_SSIZE_T_MAX - d) {
199 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
200 return NULL;
201 }
202 output_size += d;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200203 }
204
205 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000206 if (rval == NULL) {
207 return NULL;
208 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200209 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000210 chars = 0;
211 output[chars++] = '"';
212 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200213 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000214 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000215 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000216 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000217 else {
218 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000219 }
Christian Heimes90540002008-05-08 14:29:10 +0000220 }
221 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100222#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200223 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100224#endif
Christian Heimes90540002008-05-08 14:29:10 +0000225 return rval;
226}
227
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100228static PyObject *
229escape_unicode(PyObject *pystr)
230{
231 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
232 Py_ssize_t i;
233 Py_ssize_t input_chars;
234 Py_ssize_t output_size;
235 Py_ssize_t chars;
236 PyObject *rval;
237 void *input;
238 int kind;
239 Py_UCS4 maxchar;
240
241 if (PyUnicode_READY(pystr) == -1)
242 return NULL;
243
244 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
245 input_chars = PyUnicode_GET_LENGTH(pystr);
246 input = PyUnicode_DATA(pystr);
247 kind = PyUnicode_KIND(pystr);
248
249 /* Compute the output size */
250 for (i = 0, output_size = 2; i < input_chars; i++) {
251 Py_UCS4 c = PyUnicode_READ(kind, input, i);
252 switch (c) {
253 case '\\': case '"': case '\b': case '\f':
254 case '\n': case '\r': case '\t':
255 output_size += 2;
256 break;
257 default:
258 if (c <= 0x1f)
259 output_size += 6;
260 else
261 output_size++;
262 }
263 }
264
265 rval = PyUnicode_New(output_size, maxchar);
266 if (rval == NULL)
267 return NULL;
268
269 kind = PyUnicode_KIND(rval);
270
271#define ENCODE_OUTPUT do { \
272 chars = 0; \
273 output[chars++] = '"'; \
274 for (i = 0; i < input_chars; i++) { \
275 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
276 switch (c) { \
277 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
278 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
279 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
280 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
281 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
282 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
283 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
284 default: \
285 if (c <= 0x1f) { \
286 output[chars++] = '\\'; \
287 output[chars++] = 'u'; \
288 output[chars++] = '0'; \
289 output[chars++] = '0'; \
290 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
291 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
292 } else { \
293 output[chars++] = c; \
294 } \
295 } \
296 } \
297 output[chars++] = '"'; \
298 } while (0)
299
300 if (kind == PyUnicode_1BYTE_KIND) {
301 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
302 ENCODE_OUTPUT;
303 } else if (kind == PyUnicode_2BYTE_KIND) {
304 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
305 ENCODE_OUTPUT;
306 } else {
307 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
308 assert(kind == PyUnicode_4BYTE_KIND);
309 ENCODE_OUTPUT;
310 }
311#undef ENCODE_OUTPUT
312
313#ifdef Py_DEBUG
314 assert(_PyUnicode_CheckConsistency(rval, 1));
315#endif
316 return rval;
317}
318
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000319static void
Christian Heimes90540002008-05-08 14:29:10 +0000320raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
321{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200322 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
323 static PyObject *JSONDecodeError = NULL;
324 PyObject *exc;
325 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000326 PyObject *decoder = PyImport_ImportModule("json.decoder");
327 if (decoder == NULL)
328 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200329 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000330 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200331 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000332 return;
Christian Heimes90540002008-05-08 14:29:10 +0000333 }
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200334 exc = PyObject_CallFunction(JSONDecodeError, "(zOn)", msg, s, end);
335 if (exc) {
336 PyErr_SetObject(JSONDecodeError, exc);
337 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000338 }
Christian Heimes90540002008-05-08 14:29:10 +0000339}
340
Ezio Melotti37623ab2013-01-03 08:44:15 +0200341static void
342raise_stop_iteration(Py_ssize_t idx)
343{
344 PyObject *value = PyLong_FromSsize_t(idx);
345 if (value != NULL) {
346 PyErr_SetObject(PyExc_StopIteration, value);
347 Py_DECREF(value);
348 }
349}
350
Christian Heimes90540002008-05-08 14:29:10 +0000351static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000352_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
353 /* return (rval, idx) tuple, stealing reference to rval */
354 PyObject *tpl;
355 PyObject *pyidx;
356 /*
357 steal a reference to rval, returns (rval, idx)
358 */
359 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000360 return NULL;
361 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000362 pyidx = PyLong_FromSsize_t(idx);
363 if (pyidx == NULL) {
364 Py_DECREF(rval);
365 return NULL;
366 }
367 tpl = PyTuple_New(2);
368 if (tpl == NULL) {
369 Py_DECREF(pyidx);
370 Py_DECREF(rval);
371 return NULL;
372 }
373 PyTuple_SET_ITEM(tpl, 0, rval);
374 PyTuple_SET_ITEM(tpl, 1, pyidx);
375 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000376}
377
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000378#define APPEND_OLD_CHUNK \
379 if (chunk != NULL) { \
380 if (chunks == NULL) { \
381 chunks = PyList_New(0); \
382 if (chunks == NULL) { \
383 goto bail; \
384 } \
385 } \
386 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200387 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000388 goto bail; \
389 } \
390 Py_CLEAR(chunk); \
391 }
392
Christian Heimes90540002008-05-08 14:29:10 +0000393static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000394scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000395{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000396 /* Read the JSON string from PyUnicode pystr.
397 end is the index of the first character after the quote.
398 if strict is zero then literal control characters are allowed
399 *next_end_ptr is a return-by-reference index of the character
400 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000401
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000402 Return value is a new PyUnicode
403 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000404 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000406 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000407 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200408 const void *buf;
409 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000410 PyObject *chunks = NULL;
411 PyObject *chunk = NULL;
412
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 if (PyUnicode_READY(pystr) == -1)
414 return 0;
415
416 len = PyUnicode_GET_LENGTH(pystr);
417 buf = PyUnicode_DATA(pystr);
418 kind = PyUnicode_KIND(pystr);
419
Ezio Melotti37623ab2013-01-03 08:44:15 +0200420 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000421 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
422 goto bail;
423 }
Christian Heimes90540002008-05-08 14:29:10 +0000424 while (1) {
425 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000427 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200428 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000429 if (c == '"' || c == '\\') {
430 break;
431 }
432 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000433 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000434 goto bail;
435 }
436 }
437 if (!(c == '"' || c == '\\')) {
438 raise_errmsg("Unterminated string starting at", pystr, begin);
439 goto bail;
440 }
441 /* Pick up this chunk if it's not zero length */
442 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000443 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200444 chunk = PyUnicode_FromKindAndData(
445 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200446 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200447 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000448 if (chunk == NULL) {
449 goto bail;
450 }
Christian Heimes90540002008-05-08 14:29:10 +0000451 }
452 next++;
453 if (c == '"') {
454 end = next;
455 break;
456 }
457 if (next == len) {
458 raise_errmsg("Unterminated string starting at", pystr, begin);
459 goto bail;
460 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200461 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000462 if (c != 'u') {
463 /* Non-unicode backslash escapes */
464 end = next + 1;
465 switch (c) {
466 case '"': break;
467 case '\\': break;
468 case '/': break;
469 case 'b': c = '\b'; break;
470 case 'f': c = '\f'; break;
471 case 'n': c = '\n'; break;
472 case 'r': c = '\r'; break;
473 case 't': c = '\t'; break;
474 default: c = 0;
475 }
476 if (c == 0) {
477 raise_errmsg("Invalid \\escape", pystr, end - 2);
478 goto bail;
479 }
480 }
481 else {
482 c = 0;
483 next++;
484 end = next + 4;
485 if (end >= len) {
486 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
487 goto bail;
488 }
489 /* Decode 4 hex digits */
490 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000492 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000493 switch (digit) {
494 case '0': case '1': case '2': case '3': case '4':
495 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000496 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000497 case 'a': case 'b': case 'c': case 'd': case 'e':
498 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000499 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000500 case 'A': case 'B': case 'C': case 'D': case 'E':
501 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000502 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000503 default:
504 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
505 goto bail;
506 }
507 }
Christian Heimes90540002008-05-08 14:29:10 +0000508 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200509 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
510 PyUnicode_READ(kind, buf, next++) == '\\' &&
511 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200512 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000513 end += 6;
514 /* Decode 4 hex digits */
515 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200516 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000517 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000518 switch (digit) {
519 case '0': case '1': case '2': case '3': case '4':
520 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000521 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000522 case 'a': case 'b': case 'c': case 'd': case 'e':
523 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000524 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000525 case 'A': case 'B': case 'C': case 'D': case 'E':
526 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000527 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000528 default:
529 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
530 goto bail;
531 }
532 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200533 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
534 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
535 else
536 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000537 }
Christian Heimes90540002008-05-08 14:29:10 +0000538 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000539 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200540 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000541 if (chunk == NULL) {
542 goto bail;
543 }
Christian Heimes90540002008-05-08 14:29:10 +0000544 }
545
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000546 if (chunks == NULL) {
547 if (chunk != NULL)
548 rval = chunk;
549 else
550 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000551 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000552 else {
553 APPEND_OLD_CHUNK
554 rval = join_list_unicode(chunks);
555 if (rval == NULL) {
556 goto bail;
557 }
558 Py_CLEAR(chunks);
559 }
560
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000561 *next_end_ptr = end;
562 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000563bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000564 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000565 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000566 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000567 return NULL;
568}
569
570PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000571 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000572 "\n"
573 "Scan the string s for a JSON string. End is the index of the\n"
574 "character in s after the quote that started the JSON string.\n"
575 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
576 "on attempt to decode an invalid string. If strict is False then literal\n"
577 "control characters are allowed in the string.\n"
578 "\n"
579 "Returns a tuple of the decoded string and the index of the character in s\n"
580 "after the end quote."
581);
Christian Heimes90540002008-05-08 14:29:10 +0000582
583static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000584py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000585{
586 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000587 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000588 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000589 Py_ssize_t next_end = -1;
590 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100591 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000592 return NULL;
593 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000594 if (PyUnicode_Check(pystr)) {
595 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000596 }
597 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000598 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000599 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000600 Py_TYPE(pystr)->tp_name);
601 return NULL;
602 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000603 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000604}
605
606PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000607 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000608 "\n"
609 "Return an ASCII-only JSON representation of a Python string"
610);
Christian Heimes90540002008-05-08 14:29:10 +0000611
612static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000613py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000614{
615 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000616 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000617 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000618 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000619 rval = ascii_escape_unicode(pystr);
620 }
621 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000622 PyErr_Format(PyExc_TypeError,
623 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000624 Py_TYPE(pystr)->tp_name);
625 return NULL;
626 }
Christian Heimes90540002008-05-08 14:29:10 +0000627 return rval;
628}
629
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100630
631PyDoc_STRVAR(pydoc_encode_basestring,
632 "encode_basestring(string) -> string\n"
633 "\n"
634 "Return a JSON representation of a Python string"
635);
636
637static PyObject *
638py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
639{
640 PyObject *rval;
641 /* Return a JSON representation of a Python string */
642 /* METH_O */
643 if (PyUnicode_Check(pystr)) {
644 rval = escape_unicode(pystr);
645 }
646 else {
647 PyErr_Format(PyExc_TypeError,
648 "first argument must be a string, not %.80s",
649 Py_TYPE(pystr)->tp_name);
650 return NULL;
651 }
652 return rval;
653}
654
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000655static void
656scanner_dealloc(PyObject *self)
657{
658 /* Deallocate scanner object */
659 scanner_clear(self);
660 Py_TYPE(self)->tp_free(self);
661}
662
663static int
664scanner_traverse(PyObject *self, visitproc visit, void *arg)
665{
666 PyScannerObject *s;
667 assert(PyScanner_Check(self));
668 s = (PyScannerObject *)self;
669 Py_VISIT(s->strict);
670 Py_VISIT(s->object_hook);
671 Py_VISIT(s->object_pairs_hook);
672 Py_VISIT(s->parse_float);
673 Py_VISIT(s->parse_int);
674 Py_VISIT(s->parse_constant);
675 return 0;
676}
677
678static int
679scanner_clear(PyObject *self)
680{
681 PyScannerObject *s;
682 assert(PyScanner_Check(self));
683 s = (PyScannerObject *)self;
684 Py_CLEAR(s->strict);
685 Py_CLEAR(s->object_hook);
686 Py_CLEAR(s->object_pairs_hook);
687 Py_CLEAR(s->parse_float);
688 Py_CLEAR(s->parse_int);
689 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000690 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000691 return 0;
692}
693
694static PyObject *
695_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
696 /* Read a JSON object from PyUnicode pystr.
697 idx is the index of the first character after the opening curly brace.
698 *next_idx_ptr is a return-by-reference index to the first character after
699 the closing curly brace.
700
701 Returns a new PyObject (usually a dict, but object_hook can change that)
702 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200703 void *str;
704 int kind;
705 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000706 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000707 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000708 PyObject *key = NULL;
709 int strict = PyObject_IsTrue(s->strict);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000710 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000711 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000712
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200713 if (PyUnicode_READY(pystr) == -1)
714 return NULL;
715
716 str = PyUnicode_DATA(pystr);
717 kind = PyUnicode_KIND(pystr);
718 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
719
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000720 if (has_pairs_hook)
721 rval = PyList_New(0);
722 else
723 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000724 if (rval == NULL)
725 return NULL;
726
727 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200728 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000729
730 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200731 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
732 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000733 PyObject *memokey;
734
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000735 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200736 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200737 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000738 goto bail;
739 }
740 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
741 if (key == NULL)
742 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000743 memokey = PyDict_GetItem(s->memo, key);
744 if (memokey != NULL) {
745 Py_INCREF(memokey);
746 Py_DECREF(key);
747 key = memokey;
748 }
749 else {
750 if (PyDict_SetItem(s->memo, key, key) < 0)
751 goto bail;
752 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000753 idx = next_idx;
754
755 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200756 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
757 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200758 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000759 goto bail;
760 }
761 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200762 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000763
764 /* read any JSON term */
765 val = scan_once_unicode(s, pystr, idx, &next_idx);
766 if (val == NULL)
767 goto bail;
768
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000769 if (has_pairs_hook) {
770 PyObject *item = PyTuple_Pack(2, key, val);
771 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000772 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000773 Py_CLEAR(key);
774 Py_CLEAR(val);
775 if (PyList_Append(rval, item) == -1) {
776 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000777 goto bail;
778 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000779 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000780 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000781 else {
782 if (PyDict_SetItem(rval, key, val) < 0)
783 goto bail;
784 Py_CLEAR(key);
785 Py_CLEAR(val);
786 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000787 idx = next_idx;
788
789 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200790 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000791
792 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200793 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000794 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200795 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200796 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000797 goto bail;
798 }
799 idx++;
800
801 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200802 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000803 }
804 }
805
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000806 *next_idx_ptr = idx + 1;
807
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000808 if (has_pairs_hook) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000809 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000810 Py_DECREF(rval);
811 return val;
812 }
813
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000814 /* if object_hook is not None: rval = object_hook(rval) */
815 if (s->object_hook != Py_None) {
816 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000817 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000818 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000819 }
820 return rval;
821bail:
822 Py_XDECREF(key);
823 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000824 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000825 return NULL;
826}
827
828static PyObject *
829_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200830 /* Read a JSON array from PyUnicode pystr.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000831 idx is the index of the first character after the opening brace.
832 *next_idx_ptr is a return-by-reference index to the first character after
833 the closing brace.
834
835 Returns a new PyList
836 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200837 void *str;
838 int kind;
839 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000840 PyObject *val = NULL;
841 PyObject *rval = PyList_New(0);
842 Py_ssize_t next_idx;
843 if (rval == NULL)
844 return NULL;
845
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200846 if (PyUnicode_READY(pystr) == -1)
847 return NULL;
848
849 str = PyUnicode_DATA(pystr);
850 kind = PyUnicode_KIND(pystr);
851 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
852
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000853 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200854 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000855
856 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200857 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
858 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000859
860 /* read any JSON term */
861 val = scan_once_unicode(s, pystr, idx, &next_idx);
862 if (val == NULL)
863 goto bail;
864
865 if (PyList_Append(rval, val) == -1)
866 goto bail;
867
868 Py_CLEAR(val);
869 idx = next_idx;
870
871 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200872 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000873
874 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200875 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000876 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200877 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200878 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000879 goto bail;
880 }
881 idx++;
882
883 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200884 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000885 }
886 }
887
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200888 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
889 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200890 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000891 goto bail;
892 }
893 *next_idx_ptr = idx + 1;
894 return rval;
895bail:
896 Py_XDECREF(val);
897 Py_DECREF(rval);
898 return NULL;
899}
900
901static PyObject *
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200902_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
903 /* Read a JSON constant.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000904 constant is the constant string that was found
905 ("NaN", "Infinity", "-Infinity").
906 idx is the index of the first character of the constant
907 *next_idx_ptr is a return-by-reference index to the first character after
908 the constant.
909
910 Returns the result of parse_constant
911 */
912 PyObject *cstr;
913 PyObject *rval;
914 /* constant is "NaN", "Infinity", or "-Infinity" */
915 cstr = PyUnicode_InternFromString(constant);
916 if (cstr == NULL)
917 return NULL;
918
919 /* rval = parse_constant(constant) */
920 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200921 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000922 Py_DECREF(cstr);
923 *next_idx_ptr = idx;
924 return rval;
925}
926
927static PyObject *
928_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
929 /* Read a JSON number from PyUnicode pystr.
930 idx is the index of the first character of the number
931 *next_idx_ptr is a return-by-reference index to the first character after
932 the number.
933
934 Returns a new PyObject representation of that number:
Serhiy Storchaka483405b2015-02-17 10:14:30 +0200935 PyLong, or PyFloat.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000936 May return other types if parse_int or parse_float are set
937 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200938 void *str;
939 int kind;
940 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000941 Py_ssize_t idx = start;
942 int is_float = 0;
943 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200944 PyObject *numstr = NULL;
945 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000946
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200947 if (PyUnicode_READY(pystr) == -1)
948 return NULL;
949
950 str = PyUnicode_DATA(pystr);
951 kind = PyUnicode_KIND(pystr);
952 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
953
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000954 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200955 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000956 idx++;
957 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200958 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000959 return NULL;
960 }
961 }
962
963 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200964 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000965 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200966 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000967 }
968 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200969 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000970 idx++;
971 }
972 /* no integer digits, error */
973 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200974 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000975 return NULL;
976 }
977
978 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200979 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000980 is_float = 1;
981 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200982 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000983 }
984
985 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200986 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000987 Py_ssize_t e_start = idx;
988 idx++;
989
990 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200991 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000992
993 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200994 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000995
996 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200997 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000998 is_float = 1;
999 }
1000 else {
1001 idx = e_start;
1002 }
1003 }
1004
Antoine Pitrouf6454512011-04-25 19:16:06 +02001005 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1006 custom_func = s->parse_float;
1007 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1008 custom_func = s->parse_int;
1009 else
1010 custom_func = NULL;
1011
1012 if (custom_func) {
1013 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001014 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001015 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001016 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001017 if (numstr == NULL)
1018 return NULL;
1019 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001020 }
1021 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001022 Py_ssize_t i, n;
1023 char *buf;
1024 /* Straight conversion to ASCII, to avoid costly conversion of
1025 decimal unicode digits (which cannot appear here) */
1026 n = idx - start;
1027 numstr = PyBytes_FromStringAndSize(NULL, n);
1028 if (numstr == NULL)
1029 return NULL;
1030 buf = PyBytes_AS_STRING(numstr);
1031 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001032 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001033 }
1034 if (is_float)
1035 rval = PyFloat_FromString(numstr);
1036 else
1037 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001038 }
1039 Py_DECREF(numstr);
1040 *next_idx_ptr = idx;
1041 return rval;
1042}
1043
1044static PyObject *
1045scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1046{
1047 /* Read one JSON term (of any kind) from PyUnicode pystr.
1048 idx is the index of the first character of the term
1049 *next_idx_ptr is a return-by-reference index to the first character after
1050 the number.
1051
1052 Returns a new PyObject representation of the term.
1053 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001054 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001055 void *str;
1056 int kind;
1057 Py_ssize_t length;
1058
1059 if (PyUnicode_READY(pystr) == -1)
1060 return NULL;
1061
1062 str = PyUnicode_DATA(pystr);
1063 kind = PyUnicode_KIND(pystr);
1064 length = PyUnicode_GET_LENGTH(pystr);
1065
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001066 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001067 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001068 return NULL;
1069 }
1070 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001071 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001072 return NULL;
1073 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001074
1075 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001076 case '"':
1077 /* string */
1078 return scanstring_unicode(pystr, idx + 1,
1079 PyObject_IsTrue(s->strict),
1080 next_idx_ptr);
1081 case '{':
1082 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001083 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1084 "from a unicode string"))
1085 return NULL;
1086 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1087 Py_LeaveRecursiveCall();
1088 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001089 case '[':
1090 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001091 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1092 "from a unicode string"))
1093 return NULL;
1094 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1095 Py_LeaveRecursiveCall();
1096 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001097 case 'n':
1098 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001099 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001100 Py_INCREF(Py_None);
1101 *next_idx_ptr = idx + 4;
1102 return Py_None;
1103 }
1104 break;
1105 case 't':
1106 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001107 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001108 Py_INCREF(Py_True);
1109 *next_idx_ptr = idx + 4;
1110 return Py_True;
1111 }
1112 break;
1113 case 'f':
1114 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001115 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1116 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1117 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001118 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001119 Py_INCREF(Py_False);
1120 *next_idx_ptr = idx + 5;
1121 return Py_False;
1122 }
1123 break;
1124 case 'N':
1125 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001126 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001127 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001128 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1129 }
1130 break;
1131 case 'I':
1132 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001133 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1134 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1135 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001136 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001137 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1138 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001139 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001140 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1141 }
1142 break;
1143 case '-':
1144 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001145 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001146 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1147 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001148 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001149 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001150 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1151 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001152 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001153 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1154 }
1155 break;
1156 }
1157 /* Didn't find a string, object, array, or named constant. Look for a number. */
1158 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1159}
1160
1161static PyObject *
1162scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1163{
1164 /* Python callable interface to scan_once_{str,unicode} */
1165 PyObject *pystr;
1166 PyObject *rval;
1167 Py_ssize_t idx;
1168 Py_ssize_t next_idx = -1;
1169 static char *kwlist[] = {"string", "idx", NULL};
1170 PyScannerObject *s;
1171 assert(PyScanner_Check(self));
1172 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001173 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001174 return NULL;
1175
1176 if (PyUnicode_Check(pystr)) {
1177 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1178 }
1179 else {
1180 PyErr_Format(PyExc_TypeError,
1181 "first argument must be a string, not %.80s",
1182 Py_TYPE(pystr)->tp_name);
1183 return NULL;
1184 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001185 PyDict_Clear(s->memo);
1186 if (rval == NULL)
1187 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001188 return _build_rval_index_tuple(rval, next_idx);
1189}
1190
1191static PyObject *
1192scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1193{
1194 PyScannerObject *s;
1195 s = (PyScannerObject *)type->tp_alloc(type, 0);
1196 if (s != NULL) {
1197 s->strict = NULL;
1198 s->object_hook = NULL;
1199 s->object_pairs_hook = NULL;
1200 s->parse_float = NULL;
1201 s->parse_int = NULL;
1202 s->parse_constant = NULL;
1203 }
1204 return (PyObject *)s;
1205}
1206
1207static int
1208scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1209{
1210 /* Initialize Scanner object */
1211 PyObject *ctx;
1212 static char *kwlist[] = {"context", NULL};
1213 PyScannerObject *s;
1214
1215 assert(PyScanner_Check(self));
1216 s = (PyScannerObject *)self;
1217
1218 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1219 return -1;
1220
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001221 if (s->memo == NULL) {
1222 s->memo = PyDict_New();
1223 if (s->memo == NULL)
1224 goto bail;
1225 }
1226
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001227 /* All of these will fail "gracefully" so we don't need to verify them */
1228 s->strict = PyObject_GetAttrString(ctx, "strict");
1229 if (s->strict == NULL)
1230 goto bail;
1231 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1232 if (s->object_hook == NULL)
1233 goto bail;
1234 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1235 if (s->object_pairs_hook == NULL)
1236 goto bail;
1237 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1238 if (s->parse_float == NULL)
1239 goto bail;
1240 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1241 if (s->parse_int == NULL)
1242 goto bail;
1243 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1244 if (s->parse_constant == NULL)
1245 goto bail;
1246
1247 return 0;
1248
1249bail:
1250 Py_CLEAR(s->strict);
1251 Py_CLEAR(s->object_hook);
1252 Py_CLEAR(s->object_pairs_hook);
1253 Py_CLEAR(s->parse_float);
1254 Py_CLEAR(s->parse_int);
1255 Py_CLEAR(s->parse_constant);
1256 return -1;
1257}
1258
1259PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1260
1261static
1262PyTypeObject PyScannerType = {
1263 PyVarObject_HEAD_INIT(NULL, 0)
1264 "_json.Scanner", /* tp_name */
1265 sizeof(PyScannerObject), /* tp_basicsize */
1266 0, /* tp_itemsize */
1267 scanner_dealloc, /* tp_dealloc */
1268 0, /* tp_print */
1269 0, /* tp_getattr */
1270 0, /* tp_setattr */
1271 0, /* tp_compare */
1272 0, /* tp_repr */
1273 0, /* tp_as_number */
1274 0, /* tp_as_sequence */
1275 0, /* tp_as_mapping */
1276 0, /* tp_hash */
1277 scanner_call, /* tp_call */
1278 0, /* tp_str */
1279 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1280 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1281 0, /* tp_as_buffer */
1282 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1283 scanner_doc, /* tp_doc */
1284 scanner_traverse, /* tp_traverse */
1285 scanner_clear, /* tp_clear */
1286 0, /* tp_richcompare */
1287 0, /* tp_weaklistoffset */
1288 0, /* tp_iter */
1289 0, /* tp_iternext */
1290 0, /* tp_methods */
1291 scanner_members, /* tp_members */
1292 0, /* tp_getset */
1293 0, /* tp_base */
1294 0, /* tp_dict */
1295 0, /* tp_descr_get */
1296 0, /* tp_descr_set */
1297 0, /* tp_dictoffset */
1298 scanner_init, /* tp_init */
1299 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1300 scanner_new, /* tp_new */
1301 0,/* PyObject_GC_Del, */ /* tp_free */
1302};
1303
1304static PyObject *
1305encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1306{
1307 PyEncoderObject *s;
1308 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1309 if (s != NULL) {
1310 s->markers = NULL;
1311 s->defaultfn = NULL;
1312 s->encoder = NULL;
1313 s->indent = NULL;
1314 s->key_separator = NULL;
1315 s->item_separator = NULL;
1316 s->sort_keys = NULL;
1317 s->skipkeys = NULL;
1318 }
1319 return (PyObject *)s;
1320}
1321
1322static int
1323encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1324{
1325 /* initialize Encoder object */
1326 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1327
1328 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001329 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1330 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001331
1332 assert(PyEncoder_Check(self));
1333 s = (PyEncoderObject *)self;
1334
1335 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001336 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1337 &sort_keys, &skipkeys, &allow_nan))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001338 return -1;
1339
Antoine Pitrou781eba72009-12-08 15:57:31 +00001340 s->markers = markers;
1341 s->defaultfn = defaultfn;
1342 s->encoder = encoder;
1343 s->indent = indent;
1344 s->key_separator = key_separator;
1345 s->item_separator = item_separator;
1346 s->sort_keys = sort_keys;
1347 s->skipkeys = skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001348 s->fast_encode = NULL;
1349 if (PyCFunction_Check(s->encoder)) {
1350 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1351 if (f == (PyCFunction)py_encode_basestring_ascii ||
1352 f == (PyCFunction)py_encode_basestring) {
1353 s->fast_encode = f;
1354 }
1355 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001356 s->allow_nan = PyObject_IsTrue(allow_nan);
1357
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001358 Py_INCREF(s->markers);
1359 Py_INCREF(s->defaultfn);
1360 Py_INCREF(s->encoder);
1361 Py_INCREF(s->indent);
1362 Py_INCREF(s->key_separator);
1363 Py_INCREF(s->item_separator);
1364 Py_INCREF(s->sort_keys);
1365 Py_INCREF(s->skipkeys);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001366 return 0;
1367}
1368
1369static PyObject *
1370encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1371{
1372 /* Python callable interface to encode_listencode_obj */
1373 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1374 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001375 Py_ssize_t indent_level;
1376 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001377 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001378
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001379 assert(PyEncoder_Check(self));
1380 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001381 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1382 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001383 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001384 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001385 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001386 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001387 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001388 return NULL;
1389 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001390 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001391}
1392
1393static PyObject *
1394_encoded_const(PyObject *obj)
1395{
1396 /* Return the JSON string representation of None, True, False */
1397 if (obj == Py_None) {
1398 static PyObject *s_null = NULL;
1399 if (s_null == NULL) {
1400 s_null = PyUnicode_InternFromString("null");
1401 }
1402 Py_INCREF(s_null);
1403 return s_null;
1404 }
1405 else if (obj == Py_True) {
1406 static PyObject *s_true = NULL;
1407 if (s_true == NULL) {
1408 s_true = PyUnicode_InternFromString("true");
1409 }
1410 Py_INCREF(s_true);
1411 return s_true;
1412 }
1413 else if (obj == Py_False) {
1414 static PyObject *s_false = NULL;
1415 if (s_false == NULL) {
1416 s_false = PyUnicode_InternFromString("false");
1417 }
1418 Py_INCREF(s_false);
1419 return s_false;
1420 }
1421 else {
1422 PyErr_SetString(PyExc_ValueError, "not a const");
1423 return NULL;
1424 }
1425}
1426
1427static PyObject *
Ethan Furmana4998a72013-08-10 13:01:45 -07001428encoder_encode_long(PyEncoderObject* s UNUSED, PyObject *obj)
1429{
1430 /* Return the JSON representation of a PyLong and PyLong subclasses.
1431 Calls int() on PyLong subclasses in case the str() was changed.
1432 Added specifically to deal with IntEnum. See Issue18264. */
1433 PyObject *encoded, *longobj;
1434 if (PyLong_CheckExact(obj)) {
1435 encoded = PyObject_Str(obj);
1436 }
1437 else {
1438 longobj = PyNumber_Long(obj);
1439 if (longobj == NULL) {
1440 PyErr_SetString(
1441 PyExc_ValueError,
1442 "Unable to coerce int subclass to int"
1443 );
1444 return NULL;
1445 }
1446 encoded = PyObject_Str(longobj);
1447 Py_DECREF(longobj);
1448 }
1449 return encoded;
1450}
1451
1452
1453static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001454encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1455{
Ethan Furmana4998a72013-08-10 13:01:45 -07001456 /* Return the JSON representation of a PyFloat.
1457 Modified to call float() on float subclasses in case the subclass
1458 changes the repr. See Issue18264. */
1459 PyObject *encoded, *floatobj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001460 double i = PyFloat_AS_DOUBLE(obj);
1461 if (!Py_IS_FINITE(i)) {
1462 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001463 PyErr_SetString(
1464 PyExc_ValueError,
1465 "Out of range float values are not JSON compliant"
1466 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001467 return NULL;
1468 }
1469 if (i > 0) {
1470 return PyUnicode_FromString("Infinity");
1471 }
1472 else if (i < 0) {
1473 return PyUnicode_FromString("-Infinity");
1474 }
1475 else {
1476 return PyUnicode_FromString("NaN");
1477 }
1478 }
Ethan Furmana4998a72013-08-10 13:01:45 -07001479 /* coerce float subclasses to float (primarily for Enum) */
1480 if (PyFloat_CheckExact(obj)) {
1481 /* Use a better float format here? */
1482 encoded = PyObject_Repr(obj);
1483 }
1484 else {
1485 floatobj = PyNumber_Float(obj);
1486 if (floatobj == NULL) {
1487 PyErr_SetString(
1488 PyExc_ValueError,
1489 "Unable to coerce float subclass to float"
1490 );
1491 return NULL;
1492 }
1493 encoded = PyObject_Repr(floatobj);
1494 Py_DECREF(floatobj);
1495 }
1496 return encoded;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001497}
1498
1499static PyObject *
1500encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1501{
1502 /* Return the JSON representation of a string */
1503 if (s->fast_encode)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001504 return s->fast_encode(NULL, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001505 else
1506 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1507}
1508
1509static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001510_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001511{
1512 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001513 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001514 Py_DECREF(stolen);
1515 return rval;
1516}
1517
1518static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001519encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001520 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001521{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001522 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001523 PyObject *newobj;
1524 int rv;
1525
1526 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1527 PyObject *cstr = _encoded_const(obj);
1528 if (cstr == NULL)
1529 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001530 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001531 }
1532 else if (PyUnicode_Check(obj))
1533 {
1534 PyObject *encoded = encoder_encode_string(s, obj);
1535 if (encoded == NULL)
1536 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001537 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001538 }
1539 else if (PyLong_Check(obj)) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001540 PyObject *encoded = encoder_encode_long(s, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001541 if (encoded == NULL)
1542 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001543 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001544 }
1545 else if (PyFloat_Check(obj)) {
1546 PyObject *encoded = encoder_encode_float(s, obj);
1547 if (encoded == NULL)
1548 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001549 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001550 }
1551 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001552 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1553 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001554 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001555 Py_LeaveRecursiveCall();
1556 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001557 }
1558 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001559 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1560 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001561 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001562 Py_LeaveRecursiveCall();
1563 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001564 }
1565 else {
1566 PyObject *ident = NULL;
1567 if (s->markers != Py_None) {
1568 int has_key;
1569 ident = PyLong_FromVoidPtr(obj);
1570 if (ident == NULL)
1571 return -1;
1572 has_key = PyDict_Contains(s->markers, ident);
1573 if (has_key) {
1574 if (has_key != -1)
1575 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1576 Py_DECREF(ident);
1577 return -1;
1578 }
1579 if (PyDict_SetItem(s->markers, ident, obj)) {
1580 Py_DECREF(ident);
1581 return -1;
1582 }
1583 }
1584 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1585 if (newobj == NULL) {
1586 Py_XDECREF(ident);
1587 return -1;
1588 }
Ezio Melotti13672652011-05-11 01:02:56 +03001589
1590 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1591 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001592 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001593 Py_LeaveRecursiveCall();
1594
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001595 Py_DECREF(newobj);
1596 if (rv) {
1597 Py_XDECREF(ident);
1598 return -1;
1599 }
1600 if (ident != NULL) {
1601 if (PyDict_DelItem(s->markers, ident)) {
1602 Py_XDECREF(ident);
1603 return -1;
1604 }
1605 Py_XDECREF(ident);
1606 }
1607 return rv;
1608 }
1609}
1610
1611static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001612encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001613 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001614{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001615 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001616 static PyObject *open_dict = NULL;
1617 static PyObject *close_dict = NULL;
1618 static PyObject *empty_dict = NULL;
1619 PyObject *kstr = NULL;
1620 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001621 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001622 PyObject *items;
1623 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001624 int skipkeys;
1625 Py_ssize_t idx;
1626
1627 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1628 open_dict = PyUnicode_InternFromString("{");
1629 close_dict = PyUnicode_InternFromString("}");
1630 empty_dict = PyUnicode_InternFromString("{}");
1631 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1632 return -1;
1633 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001634 if (Py_SIZE(dct) == 0)
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001635 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001636
1637 if (s->markers != Py_None) {
1638 int has_key;
1639 ident = PyLong_FromVoidPtr(dct);
1640 if (ident == NULL)
1641 goto bail;
1642 has_key = PyDict_Contains(s->markers, ident);
1643 if (has_key) {
1644 if (has_key != -1)
1645 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1646 goto bail;
1647 }
1648 if (PyDict_SetItem(s->markers, ident, dct)) {
1649 goto bail;
1650 }
1651 }
1652
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001653 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001654 goto bail;
1655
1656 if (s->indent != Py_None) {
1657 /* TODO: DOES NOT RUN */
1658 indent_level += 1;
1659 /*
1660 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1661 separator = _item_separator + newline_indent
1662 buf += newline_indent
1663 */
1664 }
1665
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001666 if (PyObject_IsTrue(s->sort_keys)) {
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001667 /* First sort the keys then replace them with (key, value) tuples. */
1668 Py_ssize_t i, nitems;
1669 items = PyMapping_Keys(dct);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 if (items == NULL)
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001671 goto bail;
1672 if (!PyList_Check(items)) {
1673 PyErr_SetString(PyExc_ValueError, "keys must return list");
1674 goto bail;
1675 }
1676 if (PyList_Sort(items) < 0)
1677 goto bail;
1678 nitems = PyList_GET_SIZE(items);
1679 for (i = 0; i < nitems; i++) {
1680 PyObject *key, *value;
1681 key = PyList_GET_ITEM(items, i);
1682 value = PyDict_GetItem(dct, key);
1683 item = PyTuple_Pack(2, key, value);
1684 if (item == NULL)
1685 goto bail;
1686 PyList_SET_ITEM(items, i, item);
Victor Stinner31a3ec32014-09-10 23:31:42 +02001687 item = NULL;
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001688 Py_DECREF(key);
1689 }
1690 }
1691 else {
1692 items = PyMapping_Items(dct);
1693 }
1694 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001695 goto bail;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001696 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001697 Py_DECREF(items);
1698 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001699 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001700 skipkeys = PyObject_IsTrue(s->skipkeys);
1701 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001702 while ((item = PyIter_Next(it)) != NULL) {
1703 PyObject *encoded, *key, *value;
1704 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
1705 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1706 goto bail;
1707 }
1708 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001709 if (PyUnicode_Check(key)) {
1710 Py_INCREF(key);
1711 kstr = key;
1712 }
1713 else if (PyFloat_Check(key)) {
1714 kstr = encoder_encode_float(s, key);
1715 if (kstr == NULL)
1716 goto bail;
1717 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001718 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 /* This must come before the PyLong_Check because
1720 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001721 kstr = _encoded_const(key);
1722 if (kstr == NULL)
1723 goto bail;
1724 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001725 else if (PyLong_Check(key)) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001726 kstr = encoder_encode_long(s, key);
1727 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001728 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001729 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001730 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001731 else if (skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001732 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001733 continue;
1734 }
1735 else {
1736 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001737 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001738 goto bail;
1739 }
1740
1741 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001742 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001743 goto bail;
1744 }
1745
1746 encoded = encoder_encode_string(s, kstr);
1747 Py_CLEAR(kstr);
1748 if (encoded == NULL)
1749 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001750 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001751 Py_DECREF(encoded);
1752 goto bail;
1753 }
1754 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001755 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001756 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001757
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001758 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001759 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001760 goto bail;
1761 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001762 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001763 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001764 if (PyErr_Occurred())
1765 goto bail;
1766 Py_CLEAR(it);
1767
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001768 if (ident != NULL) {
1769 if (PyDict_DelItem(s->markers, ident))
1770 goto bail;
1771 Py_CLEAR(ident);
1772 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001773 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001774 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001775 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001776
1777 yield '\n' + (' ' * (_indent * _current_indent_level))
1778 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001779 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001780 goto bail;
1781 return 0;
1782
1783bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001784 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001785 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001786 Py_XDECREF(kstr);
1787 Py_XDECREF(ident);
1788 return -1;
1789}
1790
1791
1792static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001793encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001794 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001795{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001796 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001797 static PyObject *open_array = NULL;
1798 static PyObject *close_array = NULL;
1799 static PyObject *empty_array = NULL;
1800 PyObject *ident = NULL;
1801 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001802 Py_ssize_t i;
1803
1804 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1805 open_array = PyUnicode_InternFromString("[");
1806 close_array = PyUnicode_InternFromString("]");
1807 empty_array = PyUnicode_InternFromString("[]");
1808 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1809 return -1;
1810 }
1811 ident = NULL;
1812 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1813 if (s_fast == NULL)
1814 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001815 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001816 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001817 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001818 }
1819
1820 if (s->markers != Py_None) {
1821 int has_key;
1822 ident = PyLong_FromVoidPtr(seq);
1823 if (ident == NULL)
1824 goto bail;
1825 has_key = PyDict_Contains(s->markers, ident);
1826 if (has_key) {
1827 if (has_key != -1)
1828 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1829 goto bail;
1830 }
1831 if (PyDict_SetItem(s->markers, ident, seq)) {
1832 goto bail;
1833 }
1834 }
1835
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001836 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001837 goto bail;
1838 if (s->indent != Py_None) {
1839 /* TODO: DOES NOT RUN */
1840 indent_level += 1;
1841 /*
1842 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1843 separator = _item_separator + newline_indent
1844 buf += newline_indent
1845 */
1846 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001847 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1848 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001849 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001850 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001851 goto bail;
1852 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001853 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001854 goto bail;
1855 }
1856 if (ident != NULL) {
1857 if (PyDict_DelItem(s->markers, ident))
1858 goto bail;
1859 Py_CLEAR(ident);
1860 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001861
1862 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001863 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001864 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001865
1866 yield '\n' + (' ' * (_indent * _current_indent_level))
1867 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001868 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001869 goto bail;
1870 Py_DECREF(s_fast);
1871 return 0;
1872
1873bail:
1874 Py_XDECREF(ident);
1875 Py_DECREF(s_fast);
1876 return -1;
1877}
1878
1879static void
1880encoder_dealloc(PyObject *self)
1881{
1882 /* Deallocate Encoder */
1883 encoder_clear(self);
1884 Py_TYPE(self)->tp_free(self);
1885}
1886
1887static int
1888encoder_traverse(PyObject *self, visitproc visit, void *arg)
1889{
1890 PyEncoderObject *s;
1891 assert(PyEncoder_Check(self));
1892 s = (PyEncoderObject *)self;
1893 Py_VISIT(s->markers);
1894 Py_VISIT(s->defaultfn);
1895 Py_VISIT(s->encoder);
1896 Py_VISIT(s->indent);
1897 Py_VISIT(s->key_separator);
1898 Py_VISIT(s->item_separator);
1899 Py_VISIT(s->sort_keys);
1900 Py_VISIT(s->skipkeys);
1901 return 0;
1902}
1903
1904static int
1905encoder_clear(PyObject *self)
1906{
1907 /* Deallocate Encoder */
1908 PyEncoderObject *s;
1909 assert(PyEncoder_Check(self));
1910 s = (PyEncoderObject *)self;
1911 Py_CLEAR(s->markers);
1912 Py_CLEAR(s->defaultfn);
1913 Py_CLEAR(s->encoder);
1914 Py_CLEAR(s->indent);
1915 Py_CLEAR(s->key_separator);
1916 Py_CLEAR(s->item_separator);
1917 Py_CLEAR(s->sort_keys);
1918 Py_CLEAR(s->skipkeys);
1919 return 0;
1920}
1921
1922PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1923
1924static
1925PyTypeObject PyEncoderType = {
1926 PyVarObject_HEAD_INIT(NULL, 0)
1927 "_json.Encoder", /* tp_name */
1928 sizeof(PyEncoderObject), /* tp_basicsize */
1929 0, /* tp_itemsize */
1930 encoder_dealloc, /* tp_dealloc */
1931 0, /* tp_print */
1932 0, /* tp_getattr */
1933 0, /* tp_setattr */
1934 0, /* tp_compare */
1935 0, /* tp_repr */
1936 0, /* tp_as_number */
1937 0, /* tp_as_sequence */
1938 0, /* tp_as_mapping */
1939 0, /* tp_hash */
1940 encoder_call, /* tp_call */
1941 0, /* tp_str */
1942 0, /* tp_getattro */
1943 0, /* tp_setattro */
1944 0, /* tp_as_buffer */
1945 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1946 encoder_doc, /* tp_doc */
1947 encoder_traverse, /* tp_traverse */
1948 encoder_clear, /* tp_clear */
1949 0, /* tp_richcompare */
1950 0, /* tp_weaklistoffset */
1951 0, /* tp_iter */
1952 0, /* tp_iternext */
1953 0, /* tp_methods */
1954 encoder_members, /* tp_members */
1955 0, /* tp_getset */
1956 0, /* tp_base */
1957 0, /* tp_dict */
1958 0, /* tp_descr_get */
1959 0, /* tp_descr_set */
1960 0, /* tp_dictoffset */
1961 encoder_init, /* tp_init */
1962 0, /* tp_alloc */
1963 encoder_new, /* tp_new */
1964 0, /* tp_free */
1965};
1966
1967static PyMethodDef speedups_methods[] = {
1968 {"encode_basestring_ascii",
1969 (PyCFunction)py_encode_basestring_ascii,
1970 METH_O,
1971 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001972 {"encode_basestring",
1973 (PyCFunction)py_encode_basestring,
1974 METH_O,
1975 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001976 {"scanstring",
1977 (PyCFunction)py_scanstring,
1978 METH_VARARGS,
1979 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001980 {NULL, NULL, 0, NULL}
1981};
1982
1983PyDoc_STRVAR(module_doc,
1984"json speedups\n");
1985
Martin v. Löwis1a214512008-06-11 05:26:20 +00001986static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001987 PyModuleDef_HEAD_INIT,
1988 "_json",
1989 module_doc,
1990 -1,
1991 speedups_methods,
1992 NULL,
1993 NULL,
1994 NULL,
1995 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001996};
1997
Victor Stinnerf024d262015-03-17 17:48:27 +01001998PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001999PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00002000{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002001 PyObject *m = PyModule_Create(&jsonmodule);
2002 if (!m)
2003 return NULL;
2004 PyScannerType.tp_new = PyType_GenericNew;
2005 if (PyType_Ready(&PyScannerType) < 0)
2006 goto fail;
2007 PyEncoderType.tp_new = PyType_GenericNew;
2008 if (PyType_Ready(&PyEncoderType) < 0)
2009 goto fail;
2010 Py_INCREF((PyObject*)&PyScannerType);
2011 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
2012 Py_DECREF((PyObject*)&PyScannerType);
2013 goto fail;
2014 }
2015 Py_INCREF((PyObject*)&PyEncoderType);
2016 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
2017 Py_DECREF((PyObject*)&PyEncoderType);
2018 goto fail;
2019 }
2020 return m;
2021 fail:
2022 Py_DECREF(m);
2023 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00002024}