blob: 94309905e38a2bb835c6efdeb226e5a0a0528385 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +01003#include "accu.h"
4
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00005#ifdef __GNUC__
6#define UNUSED __attribute__((__unused__))
7#else
8#define UNUSED
9#endif
10
11#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
12#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
13#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
14#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
15
16static PyTypeObject PyScannerType;
17static PyTypeObject PyEncoderType;
18
19typedef struct _PyScannerObject {
20 PyObject_HEAD
21 PyObject *strict;
22 PyObject *object_hook;
23 PyObject *object_pairs_hook;
24 PyObject *parse_float;
25 PyObject *parse_int;
26 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000027 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028} PyScannerObject;
29
30static PyMemberDef scanner_members[] = {
31 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
32 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
33 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
34 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
35 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
36 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
37 {NULL}
38};
39
40typedef struct _PyEncoderObject {
41 PyObject_HEAD
42 PyObject *markers;
43 PyObject *defaultfn;
44 PyObject *encoder;
45 PyObject *indent;
46 PyObject *key_separator;
47 PyObject *item_separator;
48 PyObject *sort_keys;
49 PyObject *skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010050 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000051 int allow_nan;
52} PyEncoderObject;
53
54static PyMemberDef encoder_members[] = {
55 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
56 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
57 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
58 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
59 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
60 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
61 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
62 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
63 {NULL}
64};
65
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020066static PyObject *
67join_list_unicode(PyObject *lst)
68{
69 /* return u''.join(lst) */
70 static PyObject *sep = NULL;
71 if (sep == NULL) {
72 sep = PyUnicode_FromStringAndSize("", 0);
73 if (sep == NULL)
74 return NULL;
75 }
76 return PyUnicode_Join(sep, lst);
77}
78
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020079/* Forward decls */
80
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000081static PyObject *
82ascii_escape_unicode(PyObject *pystr);
83static PyObject *
84py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
85void init_json(void);
86static PyObject *
87scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
88static PyObject *
89_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
90static PyObject *
91scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
92static int
93scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
94static void
95scanner_dealloc(PyObject *self);
96static int
97scanner_clear(PyObject *self);
98static PyObject *
99encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
100static int
101encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
102static void
103encoder_dealloc(PyObject *self);
104static int
105encoder_clear(PyObject *self);
106static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200107encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200109encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000110static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200111encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000112static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000113_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000114static void
115raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
116static PyObject *
117encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000118static PyObject *
Ethan Furmana4998a72013-08-10 13:01:45 -0700119encoder_encode_long(PyEncoderObject* s UNUSED, PyObject *obj);
120static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000121encoder_encode_float(PyEncoderObject *s, PyObject *obj);
122
Christian Heimes90540002008-05-08 14:29:10 +0000123#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000124#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000125
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000126static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200127ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000128{
129 /* Escape unicode code point c to ASCII escape sequences
130 in char *output. output must have at least 12 bytes unused to
131 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000132 output[chars++] = '\\';
133 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000134 case '\\': output[chars++] = c; break;
135 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000136 case '\b': output[chars++] = 'b'; break;
137 case '\f': output[chars++] = 'f'; break;
138 case '\n': output[chars++] = 'n'; break;
139 case '\r': output[chars++] = 'r'; break;
140 case '\t': output[chars++] = 't'; break;
141 default:
Christian Heimes90540002008-05-08 14:29:10 +0000142 if (c >= 0x10000) {
143 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100144 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000145 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100146 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
147 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
148 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
149 output[chars++] = Py_hexdigits[(v ) & 0xf];
150 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000151 output[chars++] = '\\';
152 }
Christian Heimes90540002008-05-08 14:29:10 +0000153 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200154 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
155 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
156 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
157 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000158 }
159 return chars;
160}
161
162static PyObject *
163ascii_escape_unicode(PyObject *pystr)
164{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000165 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000166 Py_ssize_t i;
167 Py_ssize_t input_chars;
168 Py_ssize_t output_size;
169 Py_ssize_t chars;
170 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200171 void *input;
172 unsigned char *output;
173 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000174
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200175 if (PyUnicode_READY(pystr) == -1)
176 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000177
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200178 input_chars = PyUnicode_GET_LENGTH(pystr);
179 input = PyUnicode_DATA(pystr);
180 kind = PyUnicode_KIND(pystr);
181
182 /* Compute the output size */
183 for (i = 0, output_size = 2; i < input_chars; i++) {
184 Py_UCS4 c = PyUnicode_READ(kind, input, i);
185 if (S_CHAR(c))
186 output_size++;
187 else {
188 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200189 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200190 case '\n': case '\r': case '\t':
191 output_size += 2; break;
192 default:
193 output_size += c >= 0x10000 ? 12 : 6;
194 }
195 }
196 }
197
198 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000199 if (rval == NULL) {
200 return NULL;
201 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200202 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000203 chars = 0;
204 output[chars++] = '"';
205 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200206 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000207 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000208 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000209 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000210 else {
211 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000212 }
Christian Heimes90540002008-05-08 14:29:10 +0000213 }
214 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100215#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200216 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100217#endif
Christian Heimes90540002008-05-08 14:29:10 +0000218 return rval;
219}
220
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100221static PyObject *
222escape_unicode(PyObject *pystr)
223{
224 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
225 Py_ssize_t i;
226 Py_ssize_t input_chars;
227 Py_ssize_t output_size;
228 Py_ssize_t chars;
229 PyObject *rval;
230 void *input;
231 int kind;
232 Py_UCS4 maxchar;
233
234 if (PyUnicode_READY(pystr) == -1)
235 return NULL;
236
237 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
238 input_chars = PyUnicode_GET_LENGTH(pystr);
239 input = PyUnicode_DATA(pystr);
240 kind = PyUnicode_KIND(pystr);
241
242 /* Compute the output size */
243 for (i = 0, output_size = 2; i < input_chars; i++) {
244 Py_UCS4 c = PyUnicode_READ(kind, input, i);
245 switch (c) {
246 case '\\': case '"': case '\b': case '\f':
247 case '\n': case '\r': case '\t':
248 output_size += 2;
249 break;
250 default:
251 if (c <= 0x1f)
252 output_size += 6;
253 else
254 output_size++;
255 }
256 }
257
258 rval = PyUnicode_New(output_size, maxchar);
259 if (rval == NULL)
260 return NULL;
261
262 kind = PyUnicode_KIND(rval);
263
264#define ENCODE_OUTPUT do { \
265 chars = 0; \
266 output[chars++] = '"'; \
267 for (i = 0; i < input_chars; i++) { \
268 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
269 switch (c) { \
270 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
271 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
272 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
273 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
274 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
275 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
276 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
277 default: \
278 if (c <= 0x1f) { \
279 output[chars++] = '\\'; \
280 output[chars++] = 'u'; \
281 output[chars++] = '0'; \
282 output[chars++] = '0'; \
283 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
284 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
285 } else { \
286 output[chars++] = c; \
287 } \
288 } \
289 } \
290 output[chars++] = '"'; \
291 } while (0)
292
293 if (kind == PyUnicode_1BYTE_KIND) {
294 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
295 ENCODE_OUTPUT;
296 } else if (kind == PyUnicode_2BYTE_KIND) {
297 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
298 ENCODE_OUTPUT;
299 } else {
300 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
301 assert(kind == PyUnicode_4BYTE_KIND);
302 ENCODE_OUTPUT;
303 }
304#undef ENCODE_OUTPUT
305
306#ifdef Py_DEBUG
307 assert(_PyUnicode_CheckConsistency(rval, 1));
308#endif
309 return rval;
310}
311
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000312static void
Christian Heimes90540002008-05-08 14:29:10 +0000313raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
314{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000315 /* Use the Python function json.decoder.errmsg to raise a nice
316 looking ValueError exception */
Christian Heimes90540002008-05-08 14:29:10 +0000317 static PyObject *errmsg_fn = NULL;
318 PyObject *pymsg;
319 if (errmsg_fn == NULL) {
320 PyObject *decoder = PyImport_ImportModule("json.decoder");
321 if (decoder == NULL)
322 return;
323 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000324 Py_DECREF(decoder);
Christian Heimes90540002008-05-08 14:29:10 +0000325 if (errmsg_fn == NULL)
326 return;
Christian Heimes90540002008-05-08 14:29:10 +0000327 }
Antoine Pitroucbb02842012-12-01 19:34:16 +0100328 pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000329 if (pymsg) {
330 PyErr_SetObject(PyExc_ValueError, pymsg);
331 Py_DECREF(pymsg);
332 }
Christian Heimes90540002008-05-08 14:29:10 +0000333}
334
Ezio Melotti37623ab2013-01-03 08:44:15 +0200335static void
336raise_stop_iteration(Py_ssize_t idx)
337{
338 PyObject *value = PyLong_FromSsize_t(idx);
339 if (value != NULL) {
340 PyErr_SetObject(PyExc_StopIteration, value);
341 Py_DECREF(value);
342 }
343}
344
Christian Heimes90540002008-05-08 14:29:10 +0000345static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000346_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
347 /* return (rval, idx) tuple, stealing reference to rval */
348 PyObject *tpl;
349 PyObject *pyidx;
350 /*
351 steal a reference to rval, returns (rval, idx)
352 */
353 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000354 return NULL;
355 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000356 pyidx = PyLong_FromSsize_t(idx);
357 if (pyidx == NULL) {
358 Py_DECREF(rval);
359 return NULL;
360 }
361 tpl = PyTuple_New(2);
362 if (tpl == NULL) {
363 Py_DECREF(pyidx);
364 Py_DECREF(rval);
365 return NULL;
366 }
367 PyTuple_SET_ITEM(tpl, 0, rval);
368 PyTuple_SET_ITEM(tpl, 1, pyidx);
369 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000370}
371
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000372#define APPEND_OLD_CHUNK \
373 if (chunk != NULL) { \
374 if (chunks == NULL) { \
375 chunks = PyList_New(0); \
376 if (chunks == NULL) { \
377 goto bail; \
378 } \
379 } \
380 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200381 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000382 goto bail; \
383 } \
384 Py_CLEAR(chunk); \
385 }
386
Christian Heimes90540002008-05-08 14:29:10 +0000387static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000388scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000389{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000390 /* Read the JSON string from PyUnicode pystr.
391 end is the index of the first character after the quote.
392 if strict is zero then literal control characters are allowed
393 *next_end_ptr is a return-by-reference index of the character
394 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000395
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000396 Return value is a new PyUnicode
397 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000398 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200399 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000400 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000401 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200402 const void *buf;
403 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000404 PyObject *chunks = NULL;
405 PyObject *chunk = NULL;
406
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200407 if (PyUnicode_READY(pystr) == -1)
408 return 0;
409
410 len = PyUnicode_GET_LENGTH(pystr);
411 buf = PyUnicode_DATA(pystr);
412 kind = PyUnicode_KIND(pystr);
413
Ezio Melotti37623ab2013-01-03 08:44:15 +0200414 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000415 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
416 goto bail;
417 }
Christian Heimes90540002008-05-08 14:29:10 +0000418 while (1) {
419 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000421 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200422 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000423 if (c == '"' || c == '\\') {
424 break;
425 }
426 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000427 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000428 goto bail;
429 }
430 }
431 if (!(c == '"' || c == '\\')) {
432 raise_errmsg("Unterminated string starting at", pystr, begin);
433 goto bail;
434 }
435 /* Pick up this chunk if it's not zero length */
436 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000437 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200438 chunk = PyUnicode_FromKindAndData(
439 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200440 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200441 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000442 if (chunk == NULL) {
443 goto bail;
444 }
Christian Heimes90540002008-05-08 14:29:10 +0000445 }
446 next++;
447 if (c == '"') {
448 end = next;
449 break;
450 }
451 if (next == len) {
452 raise_errmsg("Unterminated string starting at", pystr, begin);
453 goto bail;
454 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200455 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000456 if (c != 'u') {
457 /* Non-unicode backslash escapes */
458 end = next + 1;
459 switch (c) {
460 case '"': break;
461 case '\\': break;
462 case '/': break;
463 case 'b': c = '\b'; break;
464 case 'f': c = '\f'; break;
465 case 'n': c = '\n'; break;
466 case 'r': c = '\r'; break;
467 case 't': c = '\t'; break;
468 default: c = 0;
469 }
470 if (c == 0) {
471 raise_errmsg("Invalid \\escape", pystr, end - 2);
472 goto bail;
473 }
474 }
475 else {
476 c = 0;
477 next++;
478 end = next + 4;
479 if (end >= len) {
480 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
481 goto bail;
482 }
483 /* Decode 4 hex digits */
484 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200485 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000486 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000487 switch (digit) {
488 case '0': case '1': case '2': case '3': case '4':
489 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000490 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000491 case 'a': case 'b': case 'c': case 'd': case 'e':
492 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000493 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000494 case 'A': case 'B': case 'C': case 'D': case 'E':
495 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000496 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000497 default:
498 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
499 goto bail;
500 }
501 }
Christian Heimes90540002008-05-08 14:29:10 +0000502 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200503 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
504 PyUnicode_READ(kind, buf, next++) == '\\' &&
505 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200506 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000507 end += 6;
508 /* Decode 4 hex digits */
509 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200510 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000511 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000512 switch (digit) {
513 case '0': case '1': case '2': case '3': case '4':
514 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000515 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000516 case 'a': case 'b': case 'c': case 'd': case 'e':
517 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000518 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000519 case 'A': case 'B': case 'C': case 'D': case 'E':
520 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000521 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000522 default:
523 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
524 goto bail;
525 }
526 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200527 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
528 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
529 else
530 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000531 }
Christian Heimes90540002008-05-08 14:29:10 +0000532 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000533 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200534 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000535 if (chunk == NULL) {
536 goto bail;
537 }
Christian Heimes90540002008-05-08 14:29:10 +0000538 }
539
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000540 if (chunks == NULL) {
541 if (chunk != NULL)
542 rval = chunk;
543 else
544 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000545 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000546 else {
547 APPEND_OLD_CHUNK
548 rval = join_list_unicode(chunks);
549 if (rval == NULL) {
550 goto bail;
551 }
552 Py_CLEAR(chunks);
553 }
554
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000555 *next_end_ptr = end;
556 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000557bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000558 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000559 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000560 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000561 return NULL;
562}
563
564PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000565 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000566 "\n"
567 "Scan the string s for a JSON string. End is the index of the\n"
568 "character in s after the quote that started the JSON string.\n"
569 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
570 "on attempt to decode an invalid string. If strict is False then literal\n"
571 "control characters are allowed in the string.\n"
572 "\n"
573 "Returns a tuple of the decoded string and the index of the character in s\n"
574 "after the end quote."
575);
Christian Heimes90540002008-05-08 14:29:10 +0000576
577static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000578py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000579{
580 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000581 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000582 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000583 Py_ssize_t next_end = -1;
584 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100585 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000586 return NULL;
587 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000588 if (PyUnicode_Check(pystr)) {
589 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000590 }
591 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000593 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000594 Py_TYPE(pystr)->tp_name);
595 return NULL;
596 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000597 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000598}
599
600PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000601 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000602 "\n"
603 "Return an ASCII-only JSON representation of a Python string"
604);
Christian Heimes90540002008-05-08 14:29:10 +0000605
606static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000607py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000608{
609 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000610 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000611 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000612 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000613 rval = ascii_escape_unicode(pystr);
614 }
615 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000616 PyErr_Format(PyExc_TypeError,
617 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000618 Py_TYPE(pystr)->tp_name);
619 return NULL;
620 }
Christian Heimes90540002008-05-08 14:29:10 +0000621 return rval;
622}
623
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100624
625PyDoc_STRVAR(pydoc_encode_basestring,
626 "encode_basestring(string) -> string\n"
627 "\n"
628 "Return a JSON representation of a Python string"
629);
630
631static PyObject *
632py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
633{
634 PyObject *rval;
635 /* Return a JSON representation of a Python string */
636 /* METH_O */
637 if (PyUnicode_Check(pystr)) {
638 rval = escape_unicode(pystr);
639 }
640 else {
641 PyErr_Format(PyExc_TypeError,
642 "first argument must be a string, not %.80s",
643 Py_TYPE(pystr)->tp_name);
644 return NULL;
645 }
646 return rval;
647}
648
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000649static void
650scanner_dealloc(PyObject *self)
651{
652 /* Deallocate scanner object */
653 scanner_clear(self);
654 Py_TYPE(self)->tp_free(self);
655}
656
657static int
658scanner_traverse(PyObject *self, visitproc visit, void *arg)
659{
660 PyScannerObject *s;
661 assert(PyScanner_Check(self));
662 s = (PyScannerObject *)self;
663 Py_VISIT(s->strict);
664 Py_VISIT(s->object_hook);
665 Py_VISIT(s->object_pairs_hook);
666 Py_VISIT(s->parse_float);
667 Py_VISIT(s->parse_int);
668 Py_VISIT(s->parse_constant);
669 return 0;
670}
671
672static int
673scanner_clear(PyObject *self)
674{
675 PyScannerObject *s;
676 assert(PyScanner_Check(self));
677 s = (PyScannerObject *)self;
678 Py_CLEAR(s->strict);
679 Py_CLEAR(s->object_hook);
680 Py_CLEAR(s->object_pairs_hook);
681 Py_CLEAR(s->parse_float);
682 Py_CLEAR(s->parse_int);
683 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000684 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000685 return 0;
686}
687
688static PyObject *
689_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
690 /* Read a JSON object from PyUnicode pystr.
691 idx is the index of the first character after the opening curly brace.
692 *next_idx_ptr is a return-by-reference index to the first character after
693 the closing curly brace.
694
695 Returns a new PyObject (usually a dict, but object_hook can change that)
696 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200697 void *str;
698 int kind;
699 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000700 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000701 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000702 PyObject *key = NULL;
703 int strict = PyObject_IsTrue(s->strict);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000704 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000705 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000706
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200707 if (PyUnicode_READY(pystr) == -1)
708 return NULL;
709
710 str = PyUnicode_DATA(pystr);
711 kind = PyUnicode_KIND(pystr);
712 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
713
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000714 if (has_pairs_hook)
715 rval = PyList_New(0);
716 else
717 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000718 if (rval == NULL)
719 return NULL;
720
721 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200722 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000723
724 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200725 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
726 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000727 PyObject *memokey;
728
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000729 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200730 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200731 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000732 goto bail;
733 }
734 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
735 if (key == NULL)
736 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000737 memokey = PyDict_GetItem(s->memo, key);
738 if (memokey != NULL) {
739 Py_INCREF(memokey);
740 Py_DECREF(key);
741 key = memokey;
742 }
743 else {
744 if (PyDict_SetItem(s->memo, key, key) < 0)
745 goto bail;
746 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000747 idx = next_idx;
748
749 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200750 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
751 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200752 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000753 goto bail;
754 }
755 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200756 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000757
758 /* read any JSON term */
759 val = scan_once_unicode(s, pystr, idx, &next_idx);
760 if (val == NULL)
761 goto bail;
762
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000763 if (has_pairs_hook) {
764 PyObject *item = PyTuple_Pack(2, key, val);
765 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000766 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000767 Py_CLEAR(key);
768 Py_CLEAR(val);
769 if (PyList_Append(rval, item) == -1) {
770 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000771 goto bail;
772 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000773 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000774 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000775 else {
776 if (PyDict_SetItem(rval, key, val) < 0)
777 goto bail;
778 Py_CLEAR(key);
779 Py_CLEAR(val);
780 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000781 idx = next_idx;
782
783 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200784 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000785
786 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200787 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000788 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200789 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200790 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000791 goto bail;
792 }
793 idx++;
794
795 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200796 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000797 }
798 }
799
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000800 *next_idx_ptr = idx + 1;
801
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000802 if (has_pairs_hook) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000803 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000804 Py_DECREF(rval);
805 return val;
806 }
807
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000808 /* if object_hook is not None: rval = object_hook(rval) */
809 if (s->object_hook != Py_None) {
810 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000811 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000812 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000813 }
814 return rval;
815bail:
816 Py_XDECREF(key);
817 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000818 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000819 return NULL;
820}
821
822static PyObject *
823_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
824 /* Read a JSON array from PyString pystr.
825 idx is the index of the first character after the opening brace.
826 *next_idx_ptr is a return-by-reference index to the first character after
827 the closing brace.
828
829 Returns a new PyList
830 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200831 void *str;
832 int kind;
833 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000834 PyObject *val = NULL;
835 PyObject *rval = PyList_New(0);
836 Py_ssize_t next_idx;
837 if (rval == NULL)
838 return NULL;
839
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200840 if (PyUnicode_READY(pystr) == -1)
841 return NULL;
842
843 str = PyUnicode_DATA(pystr);
844 kind = PyUnicode_KIND(pystr);
845 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
846
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000847 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200848 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000849
850 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200851 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
852 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000853
854 /* read any JSON term */
855 val = scan_once_unicode(s, pystr, idx, &next_idx);
856 if (val == NULL)
857 goto bail;
858
859 if (PyList_Append(rval, val) == -1)
860 goto bail;
861
862 Py_CLEAR(val);
863 idx = next_idx;
864
865 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200866 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000867
868 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200869 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000870 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200871 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200872 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000873 goto bail;
874 }
875 idx++;
876
877 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200878 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000879 }
880 }
881
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200882 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
883 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200884 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000885 goto bail;
886 }
887 *next_idx_ptr = idx + 1;
888 return rval;
889bail:
890 Py_XDECREF(val);
891 Py_DECREF(rval);
892 return NULL;
893}
894
895static PyObject *
896_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
897 /* Read a JSON constant from PyString pystr.
898 constant is the constant string that was found
899 ("NaN", "Infinity", "-Infinity").
900 idx is the index of the first character of the constant
901 *next_idx_ptr is a return-by-reference index to the first character after
902 the constant.
903
904 Returns the result of parse_constant
905 */
906 PyObject *cstr;
907 PyObject *rval;
908 /* constant is "NaN", "Infinity", or "-Infinity" */
909 cstr = PyUnicode_InternFromString(constant);
910 if (cstr == NULL)
911 return NULL;
912
913 /* rval = parse_constant(constant) */
914 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200915 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000916 Py_DECREF(cstr);
917 *next_idx_ptr = idx;
918 return rval;
919}
920
921static PyObject *
922_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
923 /* Read a JSON number from PyUnicode pystr.
924 idx is the index of the first character of the number
925 *next_idx_ptr is a return-by-reference index to the first character after
926 the number.
927
928 Returns a new PyObject representation of that number:
929 PyInt, PyLong, or PyFloat.
930 May return other types if parse_int or parse_float are set
931 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200932 void *str;
933 int kind;
934 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000935 Py_ssize_t idx = start;
936 int is_float = 0;
937 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200938 PyObject *numstr = NULL;
939 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000940
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200941 if (PyUnicode_READY(pystr) == -1)
942 return NULL;
943
944 str = PyUnicode_DATA(pystr);
945 kind = PyUnicode_KIND(pystr);
946 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
947
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000948 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200949 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000950 idx++;
951 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200952 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000953 return NULL;
954 }
955 }
956
957 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200958 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000959 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200960 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000961 }
962 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200963 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000964 idx++;
965 }
966 /* no integer digits, error */
967 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200968 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000969 return NULL;
970 }
971
972 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200973 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000974 is_float = 1;
975 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200976 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000977 }
978
979 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200980 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000981 Py_ssize_t e_start = idx;
982 idx++;
983
984 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200985 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000986
987 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200988 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000989
990 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200991 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000992 is_float = 1;
993 }
994 else {
995 idx = e_start;
996 }
997 }
998
Antoine Pitrouf6454512011-04-25 19:16:06 +0200999 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1000 custom_func = s->parse_float;
1001 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1002 custom_func = s->parse_int;
1003 else
1004 custom_func = NULL;
1005
1006 if (custom_func) {
1007 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001008 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001009 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001010 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001011 if (numstr == NULL)
1012 return NULL;
1013 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001014 }
1015 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001016 Py_ssize_t i, n;
1017 char *buf;
1018 /* Straight conversion to ASCII, to avoid costly conversion of
1019 decimal unicode digits (which cannot appear here) */
1020 n = idx - start;
1021 numstr = PyBytes_FromStringAndSize(NULL, n);
1022 if (numstr == NULL)
1023 return NULL;
1024 buf = PyBytes_AS_STRING(numstr);
1025 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001026 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001027 }
1028 if (is_float)
1029 rval = PyFloat_FromString(numstr);
1030 else
1031 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001032 }
1033 Py_DECREF(numstr);
1034 *next_idx_ptr = idx;
1035 return rval;
1036}
1037
1038static PyObject *
1039scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1040{
1041 /* Read one JSON term (of any kind) from PyUnicode pystr.
1042 idx is the index of the first character of the term
1043 *next_idx_ptr is a return-by-reference index to the first character after
1044 the number.
1045
1046 Returns a new PyObject representation of the term.
1047 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001048 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001049 void *str;
1050 int kind;
1051 Py_ssize_t length;
1052
1053 if (PyUnicode_READY(pystr) == -1)
1054 return NULL;
1055
1056 str = PyUnicode_DATA(pystr);
1057 kind = PyUnicode_KIND(pystr);
1058 length = PyUnicode_GET_LENGTH(pystr);
1059
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001060 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001061 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001062 return NULL;
1063 }
1064 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001065 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001066 return NULL;
1067 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001068
1069 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001070 case '"':
1071 /* string */
1072 return scanstring_unicode(pystr, idx + 1,
1073 PyObject_IsTrue(s->strict),
1074 next_idx_ptr);
1075 case '{':
1076 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001077 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1078 "from a unicode string"))
1079 return NULL;
1080 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1081 Py_LeaveRecursiveCall();
1082 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001083 case '[':
1084 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001085 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1086 "from a unicode string"))
1087 return NULL;
1088 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1089 Py_LeaveRecursiveCall();
1090 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001091 case 'n':
1092 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001093 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001094 Py_INCREF(Py_None);
1095 *next_idx_ptr = idx + 4;
1096 return Py_None;
1097 }
1098 break;
1099 case 't':
1100 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001101 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001102 Py_INCREF(Py_True);
1103 *next_idx_ptr = idx + 4;
1104 return Py_True;
1105 }
1106 break;
1107 case 'f':
1108 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001109 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1110 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1111 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001112 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001113 Py_INCREF(Py_False);
1114 *next_idx_ptr = idx + 5;
1115 return Py_False;
1116 }
1117 break;
1118 case 'N':
1119 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001120 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001121 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001122 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1123 }
1124 break;
1125 case 'I':
1126 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001127 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1128 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1129 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001130 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001131 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1132 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001133 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001134 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1135 }
1136 break;
1137 case '-':
1138 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001139 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001140 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1141 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001142 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001143 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001144 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1145 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001146 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001147 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1148 }
1149 break;
1150 }
1151 /* Didn't find a string, object, array, or named constant. Look for a number. */
1152 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1153}
1154
1155static PyObject *
1156scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1157{
1158 /* Python callable interface to scan_once_{str,unicode} */
1159 PyObject *pystr;
1160 PyObject *rval;
1161 Py_ssize_t idx;
1162 Py_ssize_t next_idx = -1;
1163 static char *kwlist[] = {"string", "idx", NULL};
1164 PyScannerObject *s;
1165 assert(PyScanner_Check(self));
1166 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001167 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001168 return NULL;
1169
1170 if (PyUnicode_Check(pystr)) {
1171 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1172 }
1173 else {
1174 PyErr_Format(PyExc_TypeError,
1175 "first argument must be a string, not %.80s",
1176 Py_TYPE(pystr)->tp_name);
1177 return NULL;
1178 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001179 PyDict_Clear(s->memo);
1180 if (rval == NULL)
1181 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001182 return _build_rval_index_tuple(rval, next_idx);
1183}
1184
1185static PyObject *
1186scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1187{
1188 PyScannerObject *s;
1189 s = (PyScannerObject *)type->tp_alloc(type, 0);
1190 if (s != NULL) {
1191 s->strict = NULL;
1192 s->object_hook = NULL;
1193 s->object_pairs_hook = NULL;
1194 s->parse_float = NULL;
1195 s->parse_int = NULL;
1196 s->parse_constant = NULL;
1197 }
1198 return (PyObject *)s;
1199}
1200
1201static int
1202scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1203{
1204 /* Initialize Scanner object */
1205 PyObject *ctx;
1206 static char *kwlist[] = {"context", NULL};
1207 PyScannerObject *s;
1208
1209 assert(PyScanner_Check(self));
1210 s = (PyScannerObject *)self;
1211
1212 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1213 return -1;
1214
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001215 if (s->memo == NULL) {
1216 s->memo = PyDict_New();
1217 if (s->memo == NULL)
1218 goto bail;
1219 }
1220
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001221 /* All of these will fail "gracefully" so we don't need to verify them */
1222 s->strict = PyObject_GetAttrString(ctx, "strict");
1223 if (s->strict == NULL)
1224 goto bail;
1225 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1226 if (s->object_hook == NULL)
1227 goto bail;
1228 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1229 if (s->object_pairs_hook == NULL)
1230 goto bail;
1231 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1232 if (s->parse_float == NULL)
1233 goto bail;
1234 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1235 if (s->parse_int == NULL)
1236 goto bail;
1237 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1238 if (s->parse_constant == NULL)
1239 goto bail;
1240
1241 return 0;
1242
1243bail:
1244 Py_CLEAR(s->strict);
1245 Py_CLEAR(s->object_hook);
1246 Py_CLEAR(s->object_pairs_hook);
1247 Py_CLEAR(s->parse_float);
1248 Py_CLEAR(s->parse_int);
1249 Py_CLEAR(s->parse_constant);
1250 return -1;
1251}
1252
1253PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1254
1255static
1256PyTypeObject PyScannerType = {
1257 PyVarObject_HEAD_INIT(NULL, 0)
1258 "_json.Scanner", /* tp_name */
1259 sizeof(PyScannerObject), /* tp_basicsize */
1260 0, /* tp_itemsize */
1261 scanner_dealloc, /* tp_dealloc */
1262 0, /* tp_print */
1263 0, /* tp_getattr */
1264 0, /* tp_setattr */
1265 0, /* tp_compare */
1266 0, /* tp_repr */
1267 0, /* tp_as_number */
1268 0, /* tp_as_sequence */
1269 0, /* tp_as_mapping */
1270 0, /* tp_hash */
1271 scanner_call, /* tp_call */
1272 0, /* tp_str */
1273 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1274 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1275 0, /* tp_as_buffer */
1276 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1277 scanner_doc, /* tp_doc */
1278 scanner_traverse, /* tp_traverse */
1279 scanner_clear, /* tp_clear */
1280 0, /* tp_richcompare */
1281 0, /* tp_weaklistoffset */
1282 0, /* tp_iter */
1283 0, /* tp_iternext */
1284 0, /* tp_methods */
1285 scanner_members, /* tp_members */
1286 0, /* tp_getset */
1287 0, /* tp_base */
1288 0, /* tp_dict */
1289 0, /* tp_descr_get */
1290 0, /* tp_descr_set */
1291 0, /* tp_dictoffset */
1292 scanner_init, /* tp_init */
1293 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1294 scanner_new, /* tp_new */
1295 0,/* PyObject_GC_Del, */ /* tp_free */
1296};
1297
1298static PyObject *
1299encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1300{
1301 PyEncoderObject *s;
1302 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1303 if (s != NULL) {
1304 s->markers = NULL;
1305 s->defaultfn = NULL;
1306 s->encoder = NULL;
1307 s->indent = NULL;
1308 s->key_separator = NULL;
1309 s->item_separator = NULL;
1310 s->sort_keys = NULL;
1311 s->skipkeys = NULL;
1312 }
1313 return (PyObject *)s;
1314}
1315
1316static int
1317encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1318{
1319 /* initialize Encoder object */
1320 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1321
1322 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001323 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1324 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001325
1326 assert(PyEncoder_Check(self));
1327 s = (PyEncoderObject *)self;
1328
1329 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001330 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1331 &sort_keys, &skipkeys, &allow_nan))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001332 return -1;
1333
Antoine Pitrou781eba72009-12-08 15:57:31 +00001334 s->markers = markers;
1335 s->defaultfn = defaultfn;
1336 s->encoder = encoder;
1337 s->indent = indent;
1338 s->key_separator = key_separator;
1339 s->item_separator = item_separator;
1340 s->sort_keys = sort_keys;
1341 s->skipkeys = skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001342 s->fast_encode = NULL;
1343 if (PyCFunction_Check(s->encoder)) {
1344 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1345 if (f == (PyCFunction)py_encode_basestring_ascii ||
1346 f == (PyCFunction)py_encode_basestring) {
1347 s->fast_encode = f;
1348 }
1349 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001350 s->allow_nan = PyObject_IsTrue(allow_nan);
1351
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001352 Py_INCREF(s->markers);
1353 Py_INCREF(s->defaultfn);
1354 Py_INCREF(s->encoder);
1355 Py_INCREF(s->indent);
1356 Py_INCREF(s->key_separator);
1357 Py_INCREF(s->item_separator);
1358 Py_INCREF(s->sort_keys);
1359 Py_INCREF(s->skipkeys);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001360 return 0;
1361}
1362
1363static PyObject *
1364encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1365{
1366 /* Python callable interface to encode_listencode_obj */
1367 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1368 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001369 Py_ssize_t indent_level;
1370 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001371 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001372
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001373 assert(PyEncoder_Check(self));
1374 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001375 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1376 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001377 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001378 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001379 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001380 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001381 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001382 return NULL;
1383 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001384 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001385}
1386
1387static PyObject *
1388_encoded_const(PyObject *obj)
1389{
1390 /* Return the JSON string representation of None, True, False */
1391 if (obj == Py_None) {
1392 static PyObject *s_null = NULL;
1393 if (s_null == NULL) {
1394 s_null = PyUnicode_InternFromString("null");
1395 }
1396 Py_INCREF(s_null);
1397 return s_null;
1398 }
1399 else if (obj == Py_True) {
1400 static PyObject *s_true = NULL;
1401 if (s_true == NULL) {
1402 s_true = PyUnicode_InternFromString("true");
1403 }
1404 Py_INCREF(s_true);
1405 return s_true;
1406 }
1407 else if (obj == Py_False) {
1408 static PyObject *s_false = NULL;
1409 if (s_false == NULL) {
1410 s_false = PyUnicode_InternFromString("false");
1411 }
1412 Py_INCREF(s_false);
1413 return s_false;
1414 }
1415 else {
1416 PyErr_SetString(PyExc_ValueError, "not a const");
1417 return NULL;
1418 }
1419}
1420
1421static PyObject *
Ethan Furmana4998a72013-08-10 13:01:45 -07001422encoder_encode_long(PyEncoderObject* s UNUSED, PyObject *obj)
1423{
1424 /* Return the JSON representation of a PyLong and PyLong subclasses.
1425 Calls int() on PyLong subclasses in case the str() was changed.
1426 Added specifically to deal with IntEnum. See Issue18264. */
1427 PyObject *encoded, *longobj;
1428 if (PyLong_CheckExact(obj)) {
1429 encoded = PyObject_Str(obj);
1430 }
1431 else {
1432 longobj = PyNumber_Long(obj);
1433 if (longobj == NULL) {
1434 PyErr_SetString(
1435 PyExc_ValueError,
1436 "Unable to coerce int subclass to int"
1437 );
1438 return NULL;
1439 }
1440 encoded = PyObject_Str(longobj);
1441 Py_DECREF(longobj);
1442 }
1443 return encoded;
1444}
1445
1446
1447static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001448encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1449{
Ethan Furmana4998a72013-08-10 13:01:45 -07001450 /* Return the JSON representation of a PyFloat.
1451 Modified to call float() on float subclasses in case the subclass
1452 changes the repr. See Issue18264. */
1453 PyObject *encoded, *floatobj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001454 double i = PyFloat_AS_DOUBLE(obj);
1455 if (!Py_IS_FINITE(i)) {
1456 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001457 PyErr_SetString(
1458 PyExc_ValueError,
1459 "Out of range float values are not JSON compliant"
1460 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001461 return NULL;
1462 }
1463 if (i > 0) {
1464 return PyUnicode_FromString("Infinity");
1465 }
1466 else if (i < 0) {
1467 return PyUnicode_FromString("-Infinity");
1468 }
1469 else {
1470 return PyUnicode_FromString("NaN");
1471 }
1472 }
Ethan Furmana4998a72013-08-10 13:01:45 -07001473 /* coerce float subclasses to float (primarily for Enum) */
1474 if (PyFloat_CheckExact(obj)) {
1475 /* Use a better float format here? */
1476 encoded = PyObject_Repr(obj);
1477 }
1478 else {
1479 floatobj = PyNumber_Float(obj);
1480 if (floatobj == NULL) {
1481 PyErr_SetString(
1482 PyExc_ValueError,
1483 "Unable to coerce float subclass to float"
1484 );
1485 return NULL;
1486 }
1487 encoded = PyObject_Repr(floatobj);
1488 Py_DECREF(floatobj);
1489 }
1490 return encoded;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001491}
1492
1493static PyObject *
1494encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1495{
1496 /* Return the JSON representation of a string */
1497 if (s->fast_encode)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001498 return s->fast_encode(NULL, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001499 else
1500 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1501}
1502
1503static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001504_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001505{
1506 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001507 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001508 Py_DECREF(stolen);
1509 return rval;
1510}
1511
1512static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001513encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001514 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001515{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001516 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001517 PyObject *newobj;
1518 int rv;
1519
1520 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1521 PyObject *cstr = _encoded_const(obj);
1522 if (cstr == NULL)
1523 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001524 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001525 }
1526 else if (PyUnicode_Check(obj))
1527 {
1528 PyObject *encoded = encoder_encode_string(s, obj);
1529 if (encoded == NULL)
1530 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001531 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001532 }
1533 else if (PyLong_Check(obj)) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001534 PyObject *encoded = encoder_encode_long(s, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001535 if (encoded == NULL)
1536 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001537 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001538 }
1539 else if (PyFloat_Check(obj)) {
1540 PyObject *encoded = encoder_encode_float(s, obj);
1541 if (encoded == NULL)
1542 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001543 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001544 }
1545 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001546 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1547 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001548 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001549 Py_LeaveRecursiveCall();
1550 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001551 }
1552 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001553 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1554 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001555 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001556 Py_LeaveRecursiveCall();
1557 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001558 }
1559 else {
1560 PyObject *ident = NULL;
1561 if (s->markers != Py_None) {
1562 int has_key;
1563 ident = PyLong_FromVoidPtr(obj);
1564 if (ident == NULL)
1565 return -1;
1566 has_key = PyDict_Contains(s->markers, ident);
1567 if (has_key) {
1568 if (has_key != -1)
1569 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1570 Py_DECREF(ident);
1571 return -1;
1572 }
1573 if (PyDict_SetItem(s->markers, ident, obj)) {
1574 Py_DECREF(ident);
1575 return -1;
1576 }
1577 }
1578 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1579 if (newobj == NULL) {
1580 Py_XDECREF(ident);
1581 return -1;
1582 }
Ezio Melotti13672652011-05-11 01:02:56 +03001583
1584 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1585 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001586 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001587 Py_LeaveRecursiveCall();
1588
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001589 Py_DECREF(newobj);
1590 if (rv) {
1591 Py_XDECREF(ident);
1592 return -1;
1593 }
1594 if (ident != NULL) {
1595 if (PyDict_DelItem(s->markers, ident)) {
1596 Py_XDECREF(ident);
1597 return -1;
1598 }
1599 Py_XDECREF(ident);
1600 }
1601 return rv;
1602 }
1603}
1604
1605static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001606encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001607 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001608{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001609 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001610 static PyObject *open_dict = NULL;
1611 static PyObject *close_dict = NULL;
1612 static PyObject *empty_dict = NULL;
1613 PyObject *kstr = NULL;
1614 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001615 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001616 PyObject *items;
1617 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001618 int skipkeys;
1619 Py_ssize_t idx;
1620
1621 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1622 open_dict = PyUnicode_InternFromString("{");
1623 close_dict = PyUnicode_InternFromString("}");
1624 empty_dict = PyUnicode_InternFromString("{}");
1625 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1626 return -1;
1627 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001628 if (Py_SIZE(dct) == 0)
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001629 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001630
1631 if (s->markers != Py_None) {
1632 int has_key;
1633 ident = PyLong_FromVoidPtr(dct);
1634 if (ident == NULL)
1635 goto bail;
1636 has_key = PyDict_Contains(s->markers, ident);
1637 if (has_key) {
1638 if (has_key != -1)
1639 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1640 goto bail;
1641 }
1642 if (PyDict_SetItem(s->markers, ident, dct)) {
1643 goto bail;
1644 }
1645 }
1646
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001647 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001648 goto bail;
1649
1650 if (s->indent != Py_None) {
1651 /* TODO: DOES NOT RUN */
1652 indent_level += 1;
1653 /*
1654 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1655 separator = _item_separator + newline_indent
1656 buf += newline_indent
1657 */
1658 }
1659
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001660 if (PyObject_IsTrue(s->sort_keys)) {
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001661 /* First sort the keys then replace them with (key, value) tuples. */
1662 Py_ssize_t i, nitems;
1663 items = PyMapping_Keys(dct);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 if (items == NULL)
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001665 goto bail;
1666 if (!PyList_Check(items)) {
1667 PyErr_SetString(PyExc_ValueError, "keys must return list");
1668 goto bail;
1669 }
1670 if (PyList_Sort(items) < 0)
1671 goto bail;
1672 nitems = PyList_GET_SIZE(items);
1673 for (i = 0; i < nitems; i++) {
1674 PyObject *key, *value;
1675 key = PyList_GET_ITEM(items, i);
1676 value = PyDict_GetItem(dct, key);
1677 item = PyTuple_Pack(2, key, value);
1678 if (item == NULL)
1679 goto bail;
1680 PyList_SET_ITEM(items, i, item);
Victor Stinner31a3ec32014-09-10 23:31:42 +02001681 item = NULL;
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001682 Py_DECREF(key);
1683 }
1684 }
1685 else {
1686 items = PyMapping_Items(dct);
1687 }
1688 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001689 goto bail;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001690 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001691 Py_DECREF(items);
1692 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001693 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001694 skipkeys = PyObject_IsTrue(s->skipkeys);
1695 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001696 while ((item = PyIter_Next(it)) != NULL) {
1697 PyObject *encoded, *key, *value;
1698 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
1699 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1700 goto bail;
1701 }
1702 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001703 if (PyUnicode_Check(key)) {
1704 Py_INCREF(key);
1705 kstr = key;
1706 }
1707 else if (PyFloat_Check(key)) {
1708 kstr = encoder_encode_float(s, key);
1709 if (kstr == NULL)
1710 goto bail;
1711 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001712 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 /* This must come before the PyLong_Check because
1714 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001715 kstr = _encoded_const(key);
1716 if (kstr == NULL)
1717 goto bail;
1718 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001719 else if (PyLong_Check(key)) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001720 kstr = encoder_encode_long(s, key);
1721 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001722 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001723 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001724 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001725 else if (skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001726 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001727 continue;
1728 }
1729 else {
1730 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001731 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001732 goto bail;
1733 }
1734
1735 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001736 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001737 goto bail;
1738 }
1739
1740 encoded = encoder_encode_string(s, kstr);
1741 Py_CLEAR(kstr);
1742 if (encoded == NULL)
1743 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001744 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001745 Py_DECREF(encoded);
1746 goto bail;
1747 }
1748 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001749 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001750 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001751
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001752 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001753 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001754 goto bail;
1755 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001756 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001757 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001758 if (PyErr_Occurred())
1759 goto bail;
1760 Py_CLEAR(it);
1761
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001762 if (ident != NULL) {
1763 if (PyDict_DelItem(s->markers, ident))
1764 goto bail;
1765 Py_CLEAR(ident);
1766 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001767 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001768 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001769 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001770
1771 yield '\n' + (' ' * (_indent * _current_indent_level))
1772 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001773 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001774 goto bail;
1775 return 0;
1776
1777bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001778 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001779 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001780 Py_XDECREF(kstr);
1781 Py_XDECREF(ident);
1782 return -1;
1783}
1784
1785
1786static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001787encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001788 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001789{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001790 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001791 static PyObject *open_array = NULL;
1792 static PyObject *close_array = NULL;
1793 static PyObject *empty_array = NULL;
1794 PyObject *ident = NULL;
1795 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001796 Py_ssize_t i;
1797
1798 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1799 open_array = PyUnicode_InternFromString("[");
1800 close_array = PyUnicode_InternFromString("]");
1801 empty_array = PyUnicode_InternFromString("[]");
1802 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1803 return -1;
1804 }
1805 ident = NULL;
1806 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1807 if (s_fast == NULL)
1808 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001809 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001810 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001811 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001812 }
1813
1814 if (s->markers != Py_None) {
1815 int has_key;
1816 ident = PyLong_FromVoidPtr(seq);
1817 if (ident == NULL)
1818 goto bail;
1819 has_key = PyDict_Contains(s->markers, ident);
1820 if (has_key) {
1821 if (has_key != -1)
1822 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1823 goto bail;
1824 }
1825 if (PyDict_SetItem(s->markers, ident, seq)) {
1826 goto bail;
1827 }
1828 }
1829
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001830 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001831 goto bail;
1832 if (s->indent != Py_None) {
1833 /* TODO: DOES NOT RUN */
1834 indent_level += 1;
1835 /*
1836 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1837 separator = _item_separator + newline_indent
1838 buf += newline_indent
1839 */
1840 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001841 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1842 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001843 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001844 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001845 goto bail;
1846 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001847 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001848 goto bail;
1849 }
1850 if (ident != NULL) {
1851 if (PyDict_DelItem(s->markers, ident))
1852 goto bail;
1853 Py_CLEAR(ident);
1854 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001855
1856 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001857 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001858 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001859
1860 yield '\n' + (' ' * (_indent * _current_indent_level))
1861 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001862 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001863 goto bail;
1864 Py_DECREF(s_fast);
1865 return 0;
1866
1867bail:
1868 Py_XDECREF(ident);
1869 Py_DECREF(s_fast);
1870 return -1;
1871}
1872
1873static void
1874encoder_dealloc(PyObject *self)
1875{
1876 /* Deallocate Encoder */
1877 encoder_clear(self);
1878 Py_TYPE(self)->tp_free(self);
1879}
1880
1881static int
1882encoder_traverse(PyObject *self, visitproc visit, void *arg)
1883{
1884 PyEncoderObject *s;
1885 assert(PyEncoder_Check(self));
1886 s = (PyEncoderObject *)self;
1887 Py_VISIT(s->markers);
1888 Py_VISIT(s->defaultfn);
1889 Py_VISIT(s->encoder);
1890 Py_VISIT(s->indent);
1891 Py_VISIT(s->key_separator);
1892 Py_VISIT(s->item_separator);
1893 Py_VISIT(s->sort_keys);
1894 Py_VISIT(s->skipkeys);
1895 return 0;
1896}
1897
1898static int
1899encoder_clear(PyObject *self)
1900{
1901 /* Deallocate Encoder */
1902 PyEncoderObject *s;
1903 assert(PyEncoder_Check(self));
1904 s = (PyEncoderObject *)self;
1905 Py_CLEAR(s->markers);
1906 Py_CLEAR(s->defaultfn);
1907 Py_CLEAR(s->encoder);
1908 Py_CLEAR(s->indent);
1909 Py_CLEAR(s->key_separator);
1910 Py_CLEAR(s->item_separator);
1911 Py_CLEAR(s->sort_keys);
1912 Py_CLEAR(s->skipkeys);
1913 return 0;
1914}
1915
1916PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1917
1918static
1919PyTypeObject PyEncoderType = {
1920 PyVarObject_HEAD_INIT(NULL, 0)
1921 "_json.Encoder", /* tp_name */
1922 sizeof(PyEncoderObject), /* tp_basicsize */
1923 0, /* tp_itemsize */
1924 encoder_dealloc, /* tp_dealloc */
1925 0, /* tp_print */
1926 0, /* tp_getattr */
1927 0, /* tp_setattr */
1928 0, /* tp_compare */
1929 0, /* tp_repr */
1930 0, /* tp_as_number */
1931 0, /* tp_as_sequence */
1932 0, /* tp_as_mapping */
1933 0, /* tp_hash */
1934 encoder_call, /* tp_call */
1935 0, /* tp_str */
1936 0, /* tp_getattro */
1937 0, /* tp_setattro */
1938 0, /* tp_as_buffer */
1939 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1940 encoder_doc, /* tp_doc */
1941 encoder_traverse, /* tp_traverse */
1942 encoder_clear, /* tp_clear */
1943 0, /* tp_richcompare */
1944 0, /* tp_weaklistoffset */
1945 0, /* tp_iter */
1946 0, /* tp_iternext */
1947 0, /* tp_methods */
1948 encoder_members, /* tp_members */
1949 0, /* tp_getset */
1950 0, /* tp_base */
1951 0, /* tp_dict */
1952 0, /* tp_descr_get */
1953 0, /* tp_descr_set */
1954 0, /* tp_dictoffset */
1955 encoder_init, /* tp_init */
1956 0, /* tp_alloc */
1957 encoder_new, /* tp_new */
1958 0, /* tp_free */
1959};
1960
1961static PyMethodDef speedups_methods[] = {
1962 {"encode_basestring_ascii",
1963 (PyCFunction)py_encode_basestring_ascii,
1964 METH_O,
1965 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001966 {"encode_basestring",
1967 (PyCFunction)py_encode_basestring,
1968 METH_O,
1969 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001970 {"scanstring",
1971 (PyCFunction)py_scanstring,
1972 METH_VARARGS,
1973 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001974 {NULL, NULL, 0, NULL}
1975};
1976
1977PyDoc_STRVAR(module_doc,
1978"json speedups\n");
1979
Martin v. Löwis1a214512008-06-11 05:26:20 +00001980static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001981 PyModuleDef_HEAD_INIT,
1982 "_json",
1983 module_doc,
1984 -1,
1985 speedups_methods,
1986 NULL,
1987 NULL,
1988 NULL,
1989 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001990};
1991
1992PyObject*
1993PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001994{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001995 PyObject *m = PyModule_Create(&jsonmodule);
1996 if (!m)
1997 return NULL;
1998 PyScannerType.tp_new = PyType_GenericNew;
1999 if (PyType_Ready(&PyScannerType) < 0)
2000 goto fail;
2001 PyEncoderType.tp_new = PyType_GenericNew;
2002 if (PyType_Ready(&PyEncoderType) < 0)
2003 goto fail;
2004 Py_INCREF((PyObject*)&PyScannerType);
2005 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
2006 Py_DECREF((PyObject*)&PyScannerType);
2007 goto fail;
2008 }
2009 Py_INCREF((PyObject*)&PyEncoderType);
2010 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
2011 Py_DECREF((PyObject*)&PyEncoderType);
2012 goto fail;
2013 }
2014 return m;
2015 fail:
2016 Py_DECREF(m);
2017 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00002018}