blob: 0bf475e1867aacdca95a5ce9a9c26c47d13163d4 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
Antoine Pitroud0acb412012-03-22 14:42:18 +01003#include "accu.h"
4
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00005#ifdef __GNUC__
6#define UNUSED __attribute__((__unused__))
7#else
8#define UNUSED
9#endif
10
11#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
12#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
13#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
14#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
15
16static PyTypeObject PyScannerType;
17static PyTypeObject PyEncoderType;
18
19typedef struct _PyScannerObject {
20 PyObject_HEAD
21 PyObject *strict;
22 PyObject *object_hook;
23 PyObject *object_pairs_hook;
24 PyObject *parse_float;
25 PyObject *parse_int;
26 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000027 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028} PyScannerObject;
29
30static PyMemberDef scanner_members[] = {
31 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
32 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
33 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
34 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
35 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
36 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
37 {NULL}
38};
39
40typedef struct _PyEncoderObject {
41 PyObject_HEAD
42 PyObject *markers;
43 PyObject *defaultfn;
44 PyObject *encoder;
45 PyObject *indent;
46 PyObject *key_separator;
47 PyObject *item_separator;
48 PyObject *sort_keys;
49 PyObject *skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010050 PyCFunction fast_encode;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000051 int allow_nan;
52} PyEncoderObject;
53
54static PyMemberDef encoder_members[] = {
55 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
56 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
57 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
58 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
59 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
60 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
61 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
62 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
63 {NULL}
64};
65
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020066static PyObject *
67join_list_unicode(PyObject *lst)
68{
69 /* return u''.join(lst) */
70 static PyObject *sep = NULL;
71 if (sep == NULL) {
72 sep = PyUnicode_FromStringAndSize("", 0);
73 if (sep == NULL)
74 return NULL;
75 }
76 return PyUnicode_Join(sep, lst);
77}
78
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +020079/* Forward decls */
80
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000081static PyObject *
82ascii_escape_unicode(PyObject *pystr);
83static PyObject *
84py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
85void init_json(void);
86static PyObject *
87scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
88static PyObject *
89_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
90static PyObject *
91scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
92static int
93scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
94static void
95scanner_dealloc(PyObject *self);
96static int
97scanner_clear(PyObject *self);
98static PyObject *
99encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
100static int
101encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
102static void
103encoder_dealloc(PyObject *self);
104static int
105encoder_clear(PyObject *self);
106static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200107encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200109encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000110static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +0200111encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000112static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000113_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000114static void
115raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
116static PyObject *
117encoder_encode_string(PyEncoderObject *s, PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000118static PyObject *
Ethan Furmana4998a72013-08-10 13:01:45 -0700119encoder_encode_long(PyEncoderObject* s UNUSED, PyObject *obj);
120static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000121encoder_encode_float(PyEncoderObject *s, PyObject *obj);
122
Christian Heimes90540002008-05-08 14:29:10 +0000123#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000124#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000125
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000126static Py_ssize_t
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200127ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000128{
129 /* Escape unicode code point c to ASCII escape sequences
130 in char *output. output must have at least 12 bytes unused to
131 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000132 output[chars++] = '\\';
133 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000134 case '\\': output[chars++] = c; break;
135 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000136 case '\b': output[chars++] = 'b'; break;
137 case '\f': output[chars++] = 'f'; break;
138 case '\n': output[chars++] = 'n'; break;
139 case '\r': output[chars++] = 'r'; break;
140 case '\t': output[chars++] = 't'; break;
141 default:
Christian Heimes90540002008-05-08 14:29:10 +0000142 if (c >= 0x10000) {
143 /* UTF-16 surrogate pair */
Victor Stinner76df43d2012-10-30 01:42:39 +0100144 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000145 output[chars++] = 'u';
Victor Stinner76df43d2012-10-30 01:42:39 +0100146 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
147 output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
148 output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
149 output[chars++] = Py_hexdigits[(v ) & 0xf];
150 c = Py_UNICODE_LOW_SURROGATE(c);
Christian Heimes90540002008-05-08 14:29:10 +0000151 output[chars++] = '\\';
152 }
Christian Heimes90540002008-05-08 14:29:10 +0000153 output[chars++] = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +0200154 output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
155 output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
156 output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
157 output[chars++] = Py_hexdigits[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000158 }
159 return chars;
160}
161
162static PyObject *
163ascii_escape_unicode(PyObject *pystr)
164{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000165 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000166 Py_ssize_t i;
167 Py_ssize_t input_chars;
168 Py_ssize_t output_size;
169 Py_ssize_t chars;
170 PyObject *rval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200171 void *input;
172 unsigned char *output;
173 int kind;
Christian Heimes90540002008-05-08 14:29:10 +0000174
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200175 if (PyUnicode_READY(pystr) == -1)
176 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000177
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200178 input_chars = PyUnicode_GET_LENGTH(pystr);
179 input = PyUnicode_DATA(pystr);
180 kind = PyUnicode_KIND(pystr);
181
182 /* Compute the output size */
183 for (i = 0, output_size = 2; i < input_chars; i++) {
184 Py_UCS4 c = PyUnicode_READ(kind, input, i);
185 if (S_CHAR(c))
186 output_size++;
187 else {
188 switch(c) {
Victor Stinnerd9c06312011-10-11 21:56:19 +0200189 case '\\': case '"': case '\b': case '\f':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200190 case '\n': case '\r': case '\t':
191 output_size += 2; break;
192 default:
193 output_size += c >= 0x10000 ? 12 : 6;
194 }
195 }
196 }
197
198 rval = PyUnicode_New(output_size, 127);
Christian Heimes90540002008-05-08 14:29:10 +0000199 if (rval == NULL) {
200 return NULL;
201 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200202 output = PyUnicode_1BYTE_DATA(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000203 chars = 0;
204 output[chars++] = '"';
205 for (i = 0; i < input_chars; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200206 Py_UCS4 c = PyUnicode_READ(kind, input, i);
Christian Heimes90540002008-05-08 14:29:10 +0000207 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000208 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000209 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000210 else {
211 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000212 }
Christian Heimes90540002008-05-08 14:29:10 +0000213 }
214 output[chars++] = '"';
Christian Heimesf402e922013-01-03 09:21:55 +0100215#ifdef Py_DEBUG
Victor Stinner8f825062012-04-27 13:55:39 +0200216 assert(_PyUnicode_CheckConsistency(rval, 1));
Christian Heimesf402e922013-01-03 09:21:55 +0100217#endif
Christian Heimes90540002008-05-08 14:29:10 +0000218 return rval;
219}
220
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100221static PyObject *
222escape_unicode(PyObject *pystr)
223{
224 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
225 Py_ssize_t i;
226 Py_ssize_t input_chars;
227 Py_ssize_t output_size;
228 Py_ssize_t chars;
229 PyObject *rval;
230 void *input;
231 int kind;
232 Py_UCS4 maxchar;
233
234 if (PyUnicode_READY(pystr) == -1)
235 return NULL;
236
237 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
238 input_chars = PyUnicode_GET_LENGTH(pystr);
239 input = PyUnicode_DATA(pystr);
240 kind = PyUnicode_KIND(pystr);
241
242 /* Compute the output size */
243 for (i = 0, output_size = 2; i < input_chars; i++) {
244 Py_UCS4 c = PyUnicode_READ(kind, input, i);
245 switch (c) {
246 case '\\': case '"': case '\b': case '\f':
247 case '\n': case '\r': case '\t':
248 output_size += 2;
249 break;
250 default:
251 if (c <= 0x1f)
252 output_size += 6;
253 else
254 output_size++;
255 }
256 }
257
258 rval = PyUnicode_New(output_size, maxchar);
259 if (rval == NULL)
260 return NULL;
261
262 kind = PyUnicode_KIND(rval);
263
264#define ENCODE_OUTPUT do { \
265 chars = 0; \
266 output[chars++] = '"'; \
267 for (i = 0; i < input_chars; i++) { \
268 Py_UCS4 c = PyUnicode_READ(kind, input, i); \
269 switch (c) { \
270 case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
271 case '"': output[chars++] = '\\'; output[chars++] = c; break; \
272 case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
273 case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
274 case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
275 case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
276 case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
277 default: \
278 if (c <= 0x1f) { \
279 output[chars++] = '\\'; \
280 output[chars++] = 'u'; \
281 output[chars++] = '0'; \
282 output[chars++] = '0'; \
283 output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
284 output[chars++] = Py_hexdigits[(c ) & 0xf]; \
285 } else { \
286 output[chars++] = c; \
287 } \
288 } \
289 } \
290 output[chars++] = '"'; \
291 } while (0)
292
293 if (kind == PyUnicode_1BYTE_KIND) {
294 Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
295 ENCODE_OUTPUT;
296 } else if (kind == PyUnicode_2BYTE_KIND) {
297 Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
298 ENCODE_OUTPUT;
299 } else {
300 Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
301 assert(kind == PyUnicode_4BYTE_KIND);
302 ENCODE_OUTPUT;
303 }
304#undef ENCODE_OUTPUT
305
306#ifdef Py_DEBUG
307 assert(_PyUnicode_CheckConsistency(rval, 1));
308#endif
309 return rval;
310}
311
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000312static void
Christian Heimes90540002008-05-08 14:29:10 +0000313raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
314{
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200315 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
316 static PyObject *JSONDecodeError = NULL;
317 PyObject *exc;
318 if (JSONDecodeError == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000319 PyObject *decoder = PyImport_ImportModule("json.decoder");
320 if (decoder == NULL)
321 return;
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200322 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000323 Py_DECREF(decoder);
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200324 if (JSONDecodeError == NULL)
Christian Heimes90540002008-05-08 14:29:10 +0000325 return;
Christian Heimes90540002008-05-08 14:29:10 +0000326 }
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200327 exc = PyObject_CallFunction(JSONDecodeError, "(zOn)", msg, s, end);
328 if (exc) {
329 PyErr_SetObject(JSONDecodeError, exc);
330 Py_DECREF(exc);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000331 }
Christian Heimes90540002008-05-08 14:29:10 +0000332}
333
Ezio Melotti37623ab2013-01-03 08:44:15 +0200334static void
335raise_stop_iteration(Py_ssize_t idx)
336{
337 PyObject *value = PyLong_FromSsize_t(idx);
338 if (value != NULL) {
339 PyErr_SetObject(PyExc_StopIteration, value);
340 Py_DECREF(value);
341 }
342}
343
Christian Heimes90540002008-05-08 14:29:10 +0000344static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000345_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
346 /* return (rval, idx) tuple, stealing reference to rval */
347 PyObject *tpl;
348 PyObject *pyidx;
349 /*
350 steal a reference to rval, returns (rval, idx)
351 */
352 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000353 return NULL;
354 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000355 pyidx = PyLong_FromSsize_t(idx);
356 if (pyidx == NULL) {
357 Py_DECREF(rval);
358 return NULL;
359 }
360 tpl = PyTuple_New(2);
361 if (tpl == NULL) {
362 Py_DECREF(pyidx);
363 Py_DECREF(rval);
364 return NULL;
365 }
366 PyTuple_SET_ITEM(tpl, 0, rval);
367 PyTuple_SET_ITEM(tpl, 1, pyidx);
368 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000369}
370
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000371#define APPEND_OLD_CHUNK \
372 if (chunk != NULL) { \
373 if (chunks == NULL) { \
374 chunks = PyList_New(0); \
375 if (chunks == NULL) { \
376 goto bail; \
377 } \
378 } \
379 if (PyList_Append(chunks, chunk)) { \
Victor Stinner31a3ec32014-09-10 23:31:42 +0200380 Py_CLEAR(chunk); \
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000381 goto bail; \
382 } \
383 Py_CLEAR(chunk); \
384 }
385
Christian Heimes90540002008-05-08 14:29:10 +0000386static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000387scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000388{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000389 /* Read the JSON string from PyUnicode pystr.
390 end is the index of the first character after the quote.
391 if strict is zero then literal control characters are allowed
392 *next_end_ptr is a return-by-reference index of the character
393 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000394
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000395 Return value is a new PyUnicode
396 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000397 PyObject *rval = NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200398 Py_ssize_t len;
Christian Heimes90540002008-05-08 14:29:10 +0000399 Py_ssize_t begin = end - 1;
Brett Cannonb94767f2011-02-22 20:15:44 +0000400 Py_ssize_t next /* = begin */;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200401 const void *buf;
402 int kind;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000403 PyObject *chunks = NULL;
404 PyObject *chunk = NULL;
405
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200406 if (PyUnicode_READY(pystr) == -1)
407 return 0;
408
409 len = PyUnicode_GET_LENGTH(pystr);
410 buf = PyUnicode_DATA(pystr);
411 kind = PyUnicode_KIND(pystr);
412
Ezio Melotti37623ab2013-01-03 08:44:15 +0200413 if (end < 0 || len < end) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000414 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
415 goto bail;
416 }
Christian Heimes90540002008-05-08 14:29:10 +0000417 while (1) {
418 /* Find the end of the string or the next escape */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200419 Py_UCS4 c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000420 for (next = end; next < len; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200421 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000422 if (c == '"' || c == '\\') {
423 break;
424 }
425 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000426 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000427 goto bail;
428 }
429 }
430 if (!(c == '"' || c == '\\')) {
431 raise_errmsg("Unterminated string starting at", pystr, begin);
432 goto bail;
433 }
434 /* Pick up this chunk if it's not zero length */
435 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000436 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200437 chunk = PyUnicode_FromKindAndData(
438 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200439 (char*)buf + kind * end,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200440 next - end);
Christian Heimes90540002008-05-08 14:29:10 +0000441 if (chunk == NULL) {
442 goto bail;
443 }
Christian Heimes90540002008-05-08 14:29:10 +0000444 }
445 next++;
446 if (c == '"') {
447 end = next;
448 break;
449 }
450 if (next == len) {
451 raise_errmsg("Unterminated string starting at", pystr, begin);
452 goto bail;
453 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454 c = PyUnicode_READ(kind, buf, next);
Christian Heimes90540002008-05-08 14:29:10 +0000455 if (c != 'u') {
456 /* Non-unicode backslash escapes */
457 end = next + 1;
458 switch (c) {
459 case '"': break;
460 case '\\': break;
461 case '/': break;
462 case 'b': c = '\b'; break;
463 case 'f': c = '\f'; break;
464 case 'n': c = '\n'; break;
465 case 'r': c = '\r'; break;
466 case 't': c = '\t'; break;
467 default: c = 0;
468 }
469 if (c == 0) {
470 raise_errmsg("Invalid \\escape", pystr, end - 2);
471 goto bail;
472 }
473 }
474 else {
475 c = 0;
476 next++;
477 end = next + 4;
478 if (end >= len) {
479 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
480 goto bail;
481 }
482 /* Decode 4 hex digits */
483 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200484 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000485 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000486 switch (digit) {
487 case '0': case '1': case '2': case '3': case '4':
488 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000489 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000490 case 'a': case 'b': case 'c': case 'd': case 'e':
491 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000492 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000493 case 'A': case 'B': case 'C': case 'D': case 'E':
494 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000495 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000496 default:
497 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
498 goto bail;
499 }
500 }
Christian Heimes90540002008-05-08 14:29:10 +0000501 /* Surrogate pair */
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200502 if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
503 PyUnicode_READ(kind, buf, next++) == '\\' &&
504 PyUnicode_READ(kind, buf, next++) == 'u') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200505 Py_UCS4 c2 = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000506 end += 6;
507 /* Decode 4 hex digits */
508 for (; next < end; next++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200509 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000510 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000511 switch (digit) {
512 case '0': case '1': case '2': case '3': case '4':
513 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000514 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000515 case 'a': case 'b': case 'c': case 'd': case 'e':
516 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000517 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000518 case 'A': case 'B': case 'C': case 'D': case 'E':
519 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000520 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000521 default:
522 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
523 goto bail;
524 }
525 }
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200526 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
527 c = Py_UNICODE_JOIN_SURROGATES(c, c2);
528 else
529 end -= 6;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000530 }
Christian Heimes90540002008-05-08 14:29:10 +0000531 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000532 APPEND_OLD_CHUNK
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200533 chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
Christian Heimes90540002008-05-08 14:29:10 +0000534 if (chunk == NULL) {
535 goto bail;
536 }
Christian Heimes90540002008-05-08 14:29:10 +0000537 }
538
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000539 if (chunks == NULL) {
540 if (chunk != NULL)
541 rval = chunk;
542 else
543 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000544 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000545 else {
546 APPEND_OLD_CHUNK
547 rval = join_list_unicode(chunks);
548 if (rval == NULL) {
549 goto bail;
550 }
551 Py_CLEAR(chunks);
552 }
553
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000554 *next_end_ptr = end;
555 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000556bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000557 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000558 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000559 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000560 return NULL;
561}
562
563PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000564 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000565 "\n"
566 "Scan the string s for a JSON string. End is the index of the\n"
567 "character in s after the quote that started the JSON string.\n"
568 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
569 "on attempt to decode an invalid string. If strict is False then literal\n"
570 "control characters are allowed in the string.\n"
571 "\n"
572 "Returns a tuple of the decoded string and the index of the character in s\n"
573 "after the end quote."
574);
Christian Heimes90540002008-05-08 14:29:10 +0000575
576static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000577py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000578{
579 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000580 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000581 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000582 Py_ssize_t next_end = -1;
583 int strict = 1;
Antoine Pitroucbb02842012-12-01 19:34:16 +0100584 if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000585 return NULL;
586 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000587 if (PyUnicode_Check(pystr)) {
588 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000589 }
590 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000591 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000592 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000593 Py_TYPE(pystr)->tp_name);
594 return NULL;
595 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000596 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000597}
598
599PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000600 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000601 "\n"
602 "Return an ASCII-only JSON representation of a Python string"
603);
Christian Heimes90540002008-05-08 14:29:10 +0000604
605static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000606py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000607{
608 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000609 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000610 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000611 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000612 rval = ascii_escape_unicode(pystr);
613 }
614 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000615 PyErr_Format(PyExc_TypeError,
616 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000617 Py_TYPE(pystr)->tp_name);
618 return NULL;
619 }
Christian Heimes90540002008-05-08 14:29:10 +0000620 return rval;
621}
622
Antoine Pitroudc3eaa82015-01-11 16:41:01 +0100623
624PyDoc_STRVAR(pydoc_encode_basestring,
625 "encode_basestring(string) -> string\n"
626 "\n"
627 "Return a JSON representation of a Python string"
628);
629
630static PyObject *
631py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
632{
633 PyObject *rval;
634 /* Return a JSON representation of a Python string */
635 /* METH_O */
636 if (PyUnicode_Check(pystr)) {
637 rval = escape_unicode(pystr);
638 }
639 else {
640 PyErr_Format(PyExc_TypeError,
641 "first argument must be a string, not %.80s",
642 Py_TYPE(pystr)->tp_name);
643 return NULL;
644 }
645 return rval;
646}
647
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000648static void
649scanner_dealloc(PyObject *self)
650{
651 /* Deallocate scanner object */
652 scanner_clear(self);
653 Py_TYPE(self)->tp_free(self);
654}
655
656static int
657scanner_traverse(PyObject *self, visitproc visit, void *arg)
658{
659 PyScannerObject *s;
660 assert(PyScanner_Check(self));
661 s = (PyScannerObject *)self;
662 Py_VISIT(s->strict);
663 Py_VISIT(s->object_hook);
664 Py_VISIT(s->object_pairs_hook);
665 Py_VISIT(s->parse_float);
666 Py_VISIT(s->parse_int);
667 Py_VISIT(s->parse_constant);
668 return 0;
669}
670
671static int
672scanner_clear(PyObject *self)
673{
674 PyScannerObject *s;
675 assert(PyScanner_Check(self));
676 s = (PyScannerObject *)self;
677 Py_CLEAR(s->strict);
678 Py_CLEAR(s->object_hook);
679 Py_CLEAR(s->object_pairs_hook);
680 Py_CLEAR(s->parse_float);
681 Py_CLEAR(s->parse_int);
682 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000683 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000684 return 0;
685}
686
687static PyObject *
688_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
689 /* Read a JSON object from PyUnicode pystr.
690 idx is the index of the first character after the opening curly brace.
691 *next_idx_ptr is a return-by-reference index to the first character after
692 the closing curly brace.
693
694 Returns a new PyObject (usually a dict, but object_hook can change that)
695 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200696 void *str;
697 int kind;
698 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000699 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000700 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000701 PyObject *key = NULL;
702 int strict = PyObject_IsTrue(s->strict);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000703 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000704 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000705
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200706 if (PyUnicode_READY(pystr) == -1)
707 return NULL;
708
709 str = PyUnicode_DATA(pystr);
710 kind = PyUnicode_KIND(pystr);
711 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
712
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000713 if (has_pairs_hook)
714 rval = PyList_New(0);
715 else
716 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000717 if (rval == NULL)
718 return NULL;
719
720 /* skip whitespace after { */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200721 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000722
723 /* only loop if the object is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200724 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
725 while (1) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000726 PyObject *memokey;
727
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000728 /* read key */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200729 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200730 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000731 goto bail;
732 }
733 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
734 if (key == NULL)
735 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000736 memokey = PyDict_GetItem(s->memo, key);
737 if (memokey != NULL) {
738 Py_INCREF(memokey);
739 Py_DECREF(key);
740 key = memokey;
741 }
742 else {
743 if (PyDict_SetItem(s->memo, key, key) < 0)
744 goto bail;
745 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000746 idx = next_idx;
747
748 /* skip whitespace between key and : delimiter, read :, skip whitespace */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200749 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
750 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200751 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000752 goto bail;
753 }
754 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200755 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000756
757 /* read any JSON term */
758 val = scan_once_unicode(s, pystr, idx, &next_idx);
759 if (val == NULL)
760 goto bail;
761
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000762 if (has_pairs_hook) {
763 PyObject *item = PyTuple_Pack(2, key, val);
764 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000765 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000766 Py_CLEAR(key);
767 Py_CLEAR(val);
768 if (PyList_Append(rval, item) == -1) {
769 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000770 goto bail;
771 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000772 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000773 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000774 else {
775 if (PyDict_SetItem(rval, key, val) < 0)
776 goto bail;
777 Py_CLEAR(key);
778 Py_CLEAR(val);
779 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000780 idx = next_idx;
781
782 /* skip whitespace before } or , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200783 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000784
785 /* bail if the object is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200786 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000787 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200788 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200789 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000790 goto bail;
791 }
792 idx++;
793
794 /* skip whitespace after , delimiter */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200795 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000796 }
797 }
798
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000799 *next_idx_ptr = idx + 1;
800
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000801 if (has_pairs_hook) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000802 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000803 Py_DECREF(rval);
804 return val;
805 }
806
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000807 /* if object_hook is not None: rval = object_hook(rval) */
808 if (s->object_hook != Py_None) {
809 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000810 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000811 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000812 }
813 return rval;
814bail:
815 Py_XDECREF(key);
816 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000817 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000818 return NULL;
819}
820
821static PyObject *
822_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
823 /* Read a JSON array from PyString pystr.
824 idx is the index of the first character after the opening brace.
825 *next_idx_ptr is a return-by-reference index to the first character after
826 the closing brace.
827
828 Returns a new PyList
829 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200830 void *str;
831 int kind;
832 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000833 PyObject *val = NULL;
834 PyObject *rval = PyList_New(0);
835 Py_ssize_t next_idx;
836 if (rval == NULL)
837 return NULL;
838
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200839 if (PyUnicode_READY(pystr) == -1)
840 return NULL;
841
842 str = PyUnicode_DATA(pystr);
843 kind = PyUnicode_KIND(pystr);
844 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
845
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000846 /* skip whitespace after [ */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200847 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000848
849 /* only loop if the array is non-empty */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200850 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
851 while (1) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000852
853 /* read any JSON term */
854 val = scan_once_unicode(s, pystr, idx, &next_idx);
855 if (val == NULL)
856 goto bail;
857
858 if (PyList_Append(rval, val) == -1)
859 goto bail;
860
861 Py_CLEAR(val);
862 idx = next_idx;
863
864 /* skip whitespace between term and , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200865 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000866
867 /* bail if the array is closed or we didn't get the , delimiter */
Ezio Melotti37623ab2013-01-03 08:44:15 +0200868 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000869 break;
Ezio Melotti37623ab2013-01-03 08:44:15 +0200870 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200871 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000872 goto bail;
873 }
874 idx++;
875
876 /* skip whitespace after , */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200877 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000878 }
879 }
880
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200881 /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
882 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200883 raise_errmsg("Expecting value", pystr, end_idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000884 goto bail;
885 }
886 *next_idx_ptr = idx + 1;
887 return rval;
888bail:
889 Py_XDECREF(val);
890 Py_DECREF(rval);
891 return NULL;
892}
893
894static PyObject *
895_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
896 /* Read a JSON constant from PyString pystr.
897 constant is the constant string that was found
898 ("NaN", "Infinity", "-Infinity").
899 idx is the index of the first character of the constant
900 *next_idx_ptr is a return-by-reference index to the first character after
901 the constant.
902
903 Returns the result of parse_constant
904 */
905 PyObject *cstr;
906 PyObject *rval;
907 /* constant is "NaN", "Infinity", or "-Infinity" */
908 cstr = PyUnicode_InternFromString(constant);
909 if (cstr == NULL)
910 return NULL;
911
912 /* rval = parse_constant(constant) */
913 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
Victor Stinnerc4f281e2011-10-11 22:11:42 +0200914 idx += PyUnicode_GET_LENGTH(cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000915 Py_DECREF(cstr);
916 *next_idx_ptr = idx;
917 return rval;
918}
919
920static PyObject *
921_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
922 /* Read a JSON number from PyUnicode pystr.
923 idx is the index of the first character of the number
924 *next_idx_ptr is a return-by-reference index to the first character after
925 the number.
926
927 Returns a new PyObject representation of that number:
928 PyInt, PyLong, or PyFloat.
929 May return other types if parse_int or parse_float are set
930 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200931 void *str;
932 int kind;
933 Py_ssize_t end_idx;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000934 Py_ssize_t idx = start;
935 int is_float = 0;
936 PyObject *rval;
Antoine Pitrouf6454512011-04-25 19:16:06 +0200937 PyObject *numstr = NULL;
938 PyObject *custom_func;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000939
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200940 if (PyUnicode_READY(pystr) == -1)
941 return NULL;
942
943 str = PyUnicode_DATA(pystr);
944 kind = PyUnicode_KIND(pystr);
945 end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
946
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000947 /* read a sign if it's there, make sure it's not the end of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200948 if (PyUnicode_READ(kind, str, idx) == '-') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000949 idx++;
950 if (idx > end_idx) {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200951 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000952 return NULL;
953 }
954 }
955
956 /* read as many integer digits as we find as long as it doesn't start with 0 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200957 if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000958 idx++;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200959 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000960 }
961 /* if it starts with 0 we only expect one integer digit */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200962 else if (PyUnicode_READ(kind, str, idx) == '0') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000963 idx++;
964 }
965 /* no integer digits, error */
966 else {
Ezio Melotti37623ab2013-01-03 08:44:15 +0200967 raise_stop_iteration(start);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000968 return NULL;
969 }
970
971 /* if the next char is '.' followed by a digit then read all float digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200972 if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000973 is_float = 1;
974 idx += 2;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200975 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000976 }
977
978 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200979 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000980 Py_ssize_t e_start = idx;
981 idx++;
982
983 /* read an exponent sign if present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200984 if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000985
986 /* read all digits */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200987 while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000988
989 /* if we got a digit, then parse as float. if not, backtrack */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200990 if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000991 is_float = 1;
992 }
993 else {
994 idx = e_start;
995 }
996 }
997
Antoine Pitrouf6454512011-04-25 19:16:06 +0200998 if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
999 custom_func = s->parse_float;
1000 else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1001 custom_func = s->parse_int;
1002 else
1003 custom_func = NULL;
1004
1005 if (custom_func) {
1006 /* copy the section we determined to be a number */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001007 numstr = PyUnicode_FromKindAndData(kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001008 (char*)str + kind * start,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001009 idx - start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001010 if (numstr == NULL)
1011 return NULL;
1012 rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001013 }
1014 else {
Antoine Pitrouf6454512011-04-25 19:16:06 +02001015 Py_ssize_t i, n;
1016 char *buf;
1017 /* Straight conversion to ASCII, to avoid costly conversion of
1018 decimal unicode digits (which cannot appear here) */
1019 n = idx - start;
1020 numstr = PyBytes_FromStringAndSize(NULL, n);
1021 if (numstr == NULL)
1022 return NULL;
1023 buf = PyBytes_AS_STRING(numstr);
1024 for (i = 0; i < n; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001025 buf[i] = (char) PyUnicode_READ(kind, str, i + start);
Antoine Pitrouf6454512011-04-25 19:16:06 +02001026 }
1027 if (is_float)
1028 rval = PyFloat_FromString(numstr);
1029 else
1030 rval = PyLong_FromString(buf, NULL, 10);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001031 }
1032 Py_DECREF(numstr);
1033 *next_idx_ptr = idx;
1034 return rval;
1035}
1036
1037static PyObject *
1038scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1039{
1040 /* Read one JSON term (of any kind) from PyUnicode pystr.
1041 idx is the index of the first character of the term
1042 *next_idx_ptr is a return-by-reference index to the first character after
1043 the number.
1044
1045 Returns a new PyObject representation of the term.
1046 */
Ezio Melotti362b9512011-05-07 17:58:09 +03001047 PyObject *res;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001048 void *str;
1049 int kind;
1050 Py_ssize_t length;
1051
1052 if (PyUnicode_READY(pystr) == -1)
1053 return NULL;
1054
1055 str = PyUnicode_DATA(pystr);
1056 kind = PyUnicode_KIND(pystr);
1057 length = PyUnicode_GET_LENGTH(pystr);
1058
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001059 if (idx < 0) {
Benjamin Peterson9beee042014-04-14 11:46:51 -04001060 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
Benjamin Peterson6ef2b362014-04-14 11:45:21 -04001061 return NULL;
1062 }
1063 if (idx >= length) {
Ezio Melotti37623ab2013-01-03 08:44:15 +02001064 raise_stop_iteration(idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001065 return NULL;
1066 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001067
1068 switch (PyUnicode_READ(kind, str, idx)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001069 case '"':
1070 /* string */
1071 return scanstring_unicode(pystr, idx + 1,
1072 PyObject_IsTrue(s->strict),
1073 next_idx_ptr);
1074 case '{':
1075 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +03001076 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1077 "from a unicode string"))
1078 return NULL;
1079 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1080 Py_LeaveRecursiveCall();
1081 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001082 case '[':
1083 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +03001084 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1085 "from a unicode string"))
1086 return NULL;
1087 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1088 Py_LeaveRecursiveCall();
1089 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001090 case 'n':
1091 /* null */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001092 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001093 Py_INCREF(Py_None);
1094 *next_idx_ptr = idx + 4;
1095 return Py_None;
1096 }
1097 break;
1098 case 't':
1099 /* true */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001100 if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001101 Py_INCREF(Py_True);
1102 *next_idx_ptr = idx + 4;
1103 return Py_True;
1104 }
1105 break;
1106 case 'f':
1107 /* false */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001108 if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1109 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1110 PyUnicode_READ(kind, str, idx + 3) == 's' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001111 PyUnicode_READ(kind, str, idx + 4) == 'e') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001112 Py_INCREF(Py_False);
1113 *next_idx_ptr = idx + 5;
1114 return Py_False;
1115 }
1116 break;
1117 case 'N':
1118 /* NaN */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001119 if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001120 PyUnicode_READ(kind, str, idx + 2) == 'N') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001121 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1122 }
1123 break;
1124 case 'I':
1125 /* Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001126 if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1127 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1128 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001129 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001130 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1131 PyUnicode_READ(kind, str, idx + 6) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001132 PyUnicode_READ(kind, str, idx + 7) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001133 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1134 }
1135 break;
1136 case '-':
1137 /* -Infinity */
Victor Stinnerd9c06312011-10-11 21:56:19 +02001138 if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001139 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1140 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001141 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001142 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
Victor Stinnerd9c06312011-10-11 21:56:19 +02001143 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1144 PyUnicode_READ(kind, str, idx + 7) == 't' &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001145 PyUnicode_READ(kind, str, idx + 8) == 'y') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001146 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1147 }
1148 break;
1149 }
1150 /* Didn't find a string, object, array, or named constant. Look for a number. */
1151 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1152}
1153
1154static PyObject *
1155scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1156{
1157 /* Python callable interface to scan_once_{str,unicode} */
1158 PyObject *pystr;
1159 PyObject *rval;
1160 Py_ssize_t idx;
1161 Py_ssize_t next_idx = -1;
1162 static char *kwlist[] = {"string", "idx", NULL};
1163 PyScannerObject *s;
1164 assert(PyScanner_Check(self));
1165 s = (PyScannerObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001166 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001167 return NULL;
1168
1169 if (PyUnicode_Check(pystr)) {
1170 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1171 }
1172 else {
1173 PyErr_Format(PyExc_TypeError,
1174 "first argument must be a string, not %.80s",
1175 Py_TYPE(pystr)->tp_name);
1176 return NULL;
1177 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001178 PyDict_Clear(s->memo);
1179 if (rval == NULL)
1180 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001181 return _build_rval_index_tuple(rval, next_idx);
1182}
1183
1184static PyObject *
1185scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1186{
1187 PyScannerObject *s;
1188 s = (PyScannerObject *)type->tp_alloc(type, 0);
1189 if (s != NULL) {
1190 s->strict = NULL;
1191 s->object_hook = NULL;
1192 s->object_pairs_hook = NULL;
1193 s->parse_float = NULL;
1194 s->parse_int = NULL;
1195 s->parse_constant = NULL;
1196 }
1197 return (PyObject *)s;
1198}
1199
1200static int
1201scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1202{
1203 /* Initialize Scanner object */
1204 PyObject *ctx;
1205 static char *kwlist[] = {"context", NULL};
1206 PyScannerObject *s;
1207
1208 assert(PyScanner_Check(self));
1209 s = (PyScannerObject *)self;
1210
1211 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1212 return -1;
1213
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001214 if (s->memo == NULL) {
1215 s->memo = PyDict_New();
1216 if (s->memo == NULL)
1217 goto bail;
1218 }
1219
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001220 /* All of these will fail "gracefully" so we don't need to verify them */
1221 s->strict = PyObject_GetAttrString(ctx, "strict");
1222 if (s->strict == NULL)
1223 goto bail;
1224 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1225 if (s->object_hook == NULL)
1226 goto bail;
1227 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1228 if (s->object_pairs_hook == NULL)
1229 goto bail;
1230 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1231 if (s->parse_float == NULL)
1232 goto bail;
1233 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1234 if (s->parse_int == NULL)
1235 goto bail;
1236 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1237 if (s->parse_constant == NULL)
1238 goto bail;
1239
1240 return 0;
1241
1242bail:
1243 Py_CLEAR(s->strict);
1244 Py_CLEAR(s->object_hook);
1245 Py_CLEAR(s->object_pairs_hook);
1246 Py_CLEAR(s->parse_float);
1247 Py_CLEAR(s->parse_int);
1248 Py_CLEAR(s->parse_constant);
1249 return -1;
1250}
1251
1252PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1253
1254static
1255PyTypeObject PyScannerType = {
1256 PyVarObject_HEAD_INIT(NULL, 0)
1257 "_json.Scanner", /* tp_name */
1258 sizeof(PyScannerObject), /* tp_basicsize */
1259 0, /* tp_itemsize */
1260 scanner_dealloc, /* tp_dealloc */
1261 0, /* tp_print */
1262 0, /* tp_getattr */
1263 0, /* tp_setattr */
1264 0, /* tp_compare */
1265 0, /* tp_repr */
1266 0, /* tp_as_number */
1267 0, /* tp_as_sequence */
1268 0, /* tp_as_mapping */
1269 0, /* tp_hash */
1270 scanner_call, /* tp_call */
1271 0, /* tp_str */
1272 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1273 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1274 0, /* tp_as_buffer */
1275 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1276 scanner_doc, /* tp_doc */
1277 scanner_traverse, /* tp_traverse */
1278 scanner_clear, /* tp_clear */
1279 0, /* tp_richcompare */
1280 0, /* tp_weaklistoffset */
1281 0, /* tp_iter */
1282 0, /* tp_iternext */
1283 0, /* tp_methods */
1284 scanner_members, /* tp_members */
1285 0, /* tp_getset */
1286 0, /* tp_base */
1287 0, /* tp_dict */
1288 0, /* tp_descr_get */
1289 0, /* tp_descr_set */
1290 0, /* tp_dictoffset */
1291 scanner_init, /* tp_init */
1292 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1293 scanner_new, /* tp_new */
1294 0,/* PyObject_GC_Del, */ /* tp_free */
1295};
1296
1297static PyObject *
1298encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1299{
1300 PyEncoderObject *s;
1301 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1302 if (s != NULL) {
1303 s->markers = NULL;
1304 s->defaultfn = NULL;
1305 s->encoder = NULL;
1306 s->indent = NULL;
1307 s->key_separator = NULL;
1308 s->item_separator = NULL;
1309 s->sort_keys = NULL;
1310 s->skipkeys = NULL;
1311 }
1312 return (PyObject *)s;
1313}
1314
1315static int
1316encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1317{
1318 /* initialize Encoder object */
1319 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1320
1321 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001322 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1323 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001324
1325 assert(PyEncoder_Check(self));
1326 s = (PyEncoderObject *)self;
1327
1328 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001329 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1330 &sort_keys, &skipkeys, &allow_nan))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001331 return -1;
1332
Antoine Pitrou781eba72009-12-08 15:57:31 +00001333 s->markers = markers;
1334 s->defaultfn = defaultfn;
1335 s->encoder = encoder;
1336 s->indent = indent;
1337 s->key_separator = key_separator;
1338 s->item_separator = item_separator;
1339 s->sort_keys = sort_keys;
1340 s->skipkeys = skipkeys;
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001341 s->fast_encode = NULL;
1342 if (PyCFunction_Check(s->encoder)) {
1343 PyCFunction f = PyCFunction_GetFunction(s->encoder);
1344 if (f == (PyCFunction)py_encode_basestring_ascii ||
1345 f == (PyCFunction)py_encode_basestring) {
1346 s->fast_encode = f;
1347 }
1348 }
Antoine Pitrou781eba72009-12-08 15:57:31 +00001349 s->allow_nan = PyObject_IsTrue(allow_nan);
1350
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001351 Py_INCREF(s->markers);
1352 Py_INCREF(s->defaultfn);
1353 Py_INCREF(s->encoder);
1354 Py_INCREF(s->indent);
1355 Py_INCREF(s->key_separator);
1356 Py_INCREF(s->item_separator);
1357 Py_INCREF(s->sort_keys);
1358 Py_INCREF(s->skipkeys);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001359 return 0;
1360}
1361
1362static PyObject *
1363encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1364{
1365 /* Python callable interface to encode_listencode_obj */
1366 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1367 PyObject *obj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001368 Py_ssize_t indent_level;
1369 PyEncoderObject *s;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001370 _PyAccu acc;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001371
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001372 assert(PyEncoder_Check(self));
1373 s = (PyEncoderObject *)self;
Antoine Pitroucbb02842012-12-01 19:34:16 +01001374 if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1375 &obj, &indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001376 return NULL;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001377 if (_PyAccu_Init(&acc))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001378 return NULL;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001379 if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001380 _PyAccu_Destroy(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001381 return NULL;
1382 }
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001383 return _PyAccu_FinishAsList(&acc);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001384}
1385
1386static PyObject *
1387_encoded_const(PyObject *obj)
1388{
1389 /* Return the JSON string representation of None, True, False */
1390 if (obj == Py_None) {
1391 static PyObject *s_null = NULL;
1392 if (s_null == NULL) {
1393 s_null = PyUnicode_InternFromString("null");
1394 }
1395 Py_INCREF(s_null);
1396 return s_null;
1397 }
1398 else if (obj == Py_True) {
1399 static PyObject *s_true = NULL;
1400 if (s_true == NULL) {
1401 s_true = PyUnicode_InternFromString("true");
1402 }
1403 Py_INCREF(s_true);
1404 return s_true;
1405 }
1406 else if (obj == Py_False) {
1407 static PyObject *s_false = NULL;
1408 if (s_false == NULL) {
1409 s_false = PyUnicode_InternFromString("false");
1410 }
1411 Py_INCREF(s_false);
1412 return s_false;
1413 }
1414 else {
1415 PyErr_SetString(PyExc_ValueError, "not a const");
1416 return NULL;
1417 }
1418}
1419
1420static PyObject *
Ethan Furmana4998a72013-08-10 13:01:45 -07001421encoder_encode_long(PyEncoderObject* s UNUSED, PyObject *obj)
1422{
1423 /* Return the JSON representation of a PyLong and PyLong subclasses.
1424 Calls int() on PyLong subclasses in case the str() was changed.
1425 Added specifically to deal with IntEnum. See Issue18264. */
1426 PyObject *encoded, *longobj;
1427 if (PyLong_CheckExact(obj)) {
1428 encoded = PyObject_Str(obj);
1429 }
1430 else {
1431 longobj = PyNumber_Long(obj);
1432 if (longobj == NULL) {
1433 PyErr_SetString(
1434 PyExc_ValueError,
1435 "Unable to coerce int subclass to int"
1436 );
1437 return NULL;
1438 }
1439 encoded = PyObject_Str(longobj);
1440 Py_DECREF(longobj);
1441 }
1442 return encoded;
1443}
1444
1445
1446static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001447encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1448{
Ethan Furmana4998a72013-08-10 13:01:45 -07001449 /* Return the JSON representation of a PyFloat.
1450 Modified to call float() on float subclasses in case the subclass
1451 changes the repr. See Issue18264. */
1452 PyObject *encoded, *floatobj;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001453 double i = PyFloat_AS_DOUBLE(obj);
1454 if (!Py_IS_FINITE(i)) {
1455 if (!s->allow_nan) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001456 PyErr_SetString(
1457 PyExc_ValueError,
1458 "Out of range float values are not JSON compliant"
1459 );
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001460 return NULL;
1461 }
1462 if (i > 0) {
1463 return PyUnicode_FromString("Infinity");
1464 }
1465 else if (i < 0) {
1466 return PyUnicode_FromString("-Infinity");
1467 }
1468 else {
1469 return PyUnicode_FromString("NaN");
1470 }
1471 }
Ethan Furmana4998a72013-08-10 13:01:45 -07001472 /* coerce float subclasses to float (primarily for Enum) */
1473 if (PyFloat_CheckExact(obj)) {
1474 /* Use a better float format here? */
1475 encoded = PyObject_Repr(obj);
1476 }
1477 else {
1478 floatobj = PyNumber_Float(obj);
1479 if (floatobj == NULL) {
1480 PyErr_SetString(
1481 PyExc_ValueError,
1482 "Unable to coerce float subclass to float"
1483 );
1484 return NULL;
1485 }
1486 encoded = PyObject_Repr(floatobj);
1487 Py_DECREF(floatobj);
1488 }
1489 return encoded;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001490}
1491
1492static PyObject *
1493encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1494{
1495 /* Return the JSON representation of a string */
1496 if (s->fast_encode)
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001497 return s->fast_encode(NULL, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001498 else
1499 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1500}
1501
1502static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001503_steal_accumulate(_PyAccu *acc, PyObject *stolen)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001504{
1505 /* Append stolen and then decrement its reference count */
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001506 int rval = _PyAccu_Accumulate(acc, stolen);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001507 Py_DECREF(stolen);
1508 return rval;
1509}
1510
1511static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001512encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001513 PyObject *obj, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001514{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001515 /* Encode Python object obj to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001516 PyObject *newobj;
1517 int rv;
1518
1519 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1520 PyObject *cstr = _encoded_const(obj);
1521 if (cstr == NULL)
1522 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001523 return _steal_accumulate(acc, cstr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001524 }
1525 else if (PyUnicode_Check(obj))
1526 {
1527 PyObject *encoded = encoder_encode_string(s, obj);
1528 if (encoded == NULL)
1529 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001530 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001531 }
1532 else if (PyLong_Check(obj)) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001533 PyObject *encoded = encoder_encode_long(s, obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001534 if (encoded == NULL)
1535 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001536 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001537 }
1538 else if (PyFloat_Check(obj)) {
1539 PyObject *encoded = encoder_encode_float(s, obj);
1540 if (encoded == NULL)
1541 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001542 return _steal_accumulate(acc, encoded);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001543 }
1544 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001545 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1546 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001547 rv = encoder_listencode_list(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001548 Py_LeaveRecursiveCall();
1549 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001550 }
1551 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001552 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1553 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001554 rv = encoder_listencode_dict(s, acc, obj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001555 Py_LeaveRecursiveCall();
1556 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001557 }
1558 else {
1559 PyObject *ident = NULL;
1560 if (s->markers != Py_None) {
1561 int has_key;
1562 ident = PyLong_FromVoidPtr(obj);
1563 if (ident == NULL)
1564 return -1;
1565 has_key = PyDict_Contains(s->markers, ident);
1566 if (has_key) {
1567 if (has_key != -1)
1568 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1569 Py_DECREF(ident);
1570 return -1;
1571 }
1572 if (PyDict_SetItem(s->markers, ident, obj)) {
1573 Py_DECREF(ident);
1574 return -1;
1575 }
1576 }
1577 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1578 if (newobj == NULL) {
1579 Py_XDECREF(ident);
1580 return -1;
1581 }
Ezio Melotti13672652011-05-11 01:02:56 +03001582
1583 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1584 return -1;
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001585 rv = encoder_listencode_obj(s, acc, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001586 Py_LeaveRecursiveCall();
1587
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001588 Py_DECREF(newobj);
1589 if (rv) {
1590 Py_XDECREF(ident);
1591 return -1;
1592 }
1593 if (ident != NULL) {
1594 if (PyDict_DelItem(s->markers, ident)) {
1595 Py_XDECREF(ident);
1596 return -1;
1597 }
1598 Py_XDECREF(ident);
1599 }
1600 return rv;
1601 }
1602}
1603
1604static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001605encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001606 PyObject *dct, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001607{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001608 /* Encode Python dict dct a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001609 static PyObject *open_dict = NULL;
1610 static PyObject *close_dict = NULL;
1611 static PyObject *empty_dict = NULL;
1612 PyObject *kstr = NULL;
1613 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001614 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001615 PyObject *items;
1616 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001617 int skipkeys;
1618 Py_ssize_t idx;
1619
1620 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1621 open_dict = PyUnicode_InternFromString("{");
1622 close_dict = PyUnicode_InternFromString("}");
1623 empty_dict = PyUnicode_InternFromString("{}");
1624 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1625 return -1;
1626 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001627 if (Py_SIZE(dct) == 0)
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001628 return _PyAccu_Accumulate(acc, empty_dict);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001629
1630 if (s->markers != Py_None) {
1631 int has_key;
1632 ident = PyLong_FromVoidPtr(dct);
1633 if (ident == NULL)
1634 goto bail;
1635 has_key = PyDict_Contains(s->markers, ident);
1636 if (has_key) {
1637 if (has_key != -1)
1638 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1639 goto bail;
1640 }
1641 if (PyDict_SetItem(s->markers, ident, dct)) {
1642 goto bail;
1643 }
1644 }
1645
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001646 if (_PyAccu_Accumulate(acc, open_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001647 goto bail;
1648
1649 if (s->indent != Py_None) {
1650 /* TODO: DOES NOT RUN */
1651 indent_level += 1;
1652 /*
1653 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1654 separator = _item_separator + newline_indent
1655 buf += newline_indent
1656 */
1657 }
1658
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001659 if (PyObject_IsTrue(s->sort_keys)) {
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001660 /* First sort the keys then replace them with (key, value) tuples. */
1661 Py_ssize_t i, nitems;
1662 items = PyMapping_Keys(dct);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 if (items == NULL)
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001664 goto bail;
1665 if (!PyList_Check(items)) {
1666 PyErr_SetString(PyExc_ValueError, "keys must return list");
1667 goto bail;
1668 }
1669 if (PyList_Sort(items) < 0)
1670 goto bail;
1671 nitems = PyList_GET_SIZE(items);
1672 for (i = 0; i < nitems; i++) {
1673 PyObject *key, *value;
1674 key = PyList_GET_ITEM(items, i);
1675 value = PyDict_GetItem(dct, key);
1676 item = PyTuple_Pack(2, key, value);
1677 if (item == NULL)
1678 goto bail;
1679 PyList_SET_ITEM(items, i, item);
Victor Stinner31a3ec32014-09-10 23:31:42 +02001680 item = NULL;
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001681 Py_DECREF(key);
1682 }
1683 }
1684 else {
1685 items = PyMapping_Items(dct);
1686 }
1687 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001688 goto bail;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001689 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001690 Py_DECREF(items);
1691 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001692 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001693 skipkeys = PyObject_IsTrue(s->skipkeys);
1694 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001695 while ((item = PyIter_Next(it)) != NULL) {
1696 PyObject *encoded, *key, *value;
1697 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
1698 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1699 goto bail;
1700 }
1701 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001702 if (PyUnicode_Check(key)) {
1703 Py_INCREF(key);
1704 kstr = key;
1705 }
1706 else if (PyFloat_Check(key)) {
1707 kstr = encoder_encode_float(s, key);
1708 if (kstr == NULL)
1709 goto bail;
1710 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001711 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001712 /* This must come before the PyLong_Check because
1713 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001714 kstr = _encoded_const(key);
1715 if (kstr == NULL)
1716 goto bail;
1717 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001718 else if (PyLong_Check(key)) {
Ethan Furmana4998a72013-08-10 13:01:45 -07001719 kstr = encoder_encode_long(s, key);
1720 if (kstr == NULL) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001721 goto bail;
Ethan Furmana4998a72013-08-10 13:01:45 -07001722 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001723 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001724 else if (skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001725 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001726 continue;
1727 }
1728 else {
1729 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001730 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001731 goto bail;
1732 }
1733
1734 if (idx) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001735 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001736 goto bail;
1737 }
1738
1739 encoded = encoder_encode_string(s, kstr);
1740 Py_CLEAR(kstr);
1741 if (encoded == NULL)
1742 goto bail;
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001743 if (_PyAccu_Accumulate(acc, encoded)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001744 Py_DECREF(encoded);
1745 goto bail;
1746 }
1747 Py_DECREF(encoded);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001748 if (_PyAccu_Accumulate(acc, s->key_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001749 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001750
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001751 value = PyTuple_GET_ITEM(item, 1);
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001752 if (encoder_listencode_obj(s, acc, value, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001753 goto bail;
1754 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001755 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001756 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001757 if (PyErr_Occurred())
1758 goto bail;
1759 Py_CLEAR(it);
1760
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001761 if (ident != NULL) {
1762 if (PyDict_DelItem(s->markers, ident))
1763 goto bail;
1764 Py_CLEAR(ident);
1765 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001766 /* TODO DOES NOT RUN; dead code
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001767 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001768 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001769
1770 yield '\n' + (' ' * (_indent * _current_indent_level))
1771 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001772 if (_PyAccu_Accumulate(acc, close_dict))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001773 goto bail;
1774 return 0;
1775
1776bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001777 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001778 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001779 Py_XDECREF(kstr);
1780 Py_XDECREF(ident);
1781 return -1;
1782}
1783
1784
1785static int
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001786encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001787 PyObject *seq, Py_ssize_t indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001788{
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001789 /* Encode Python list seq to a JSON term */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001790 static PyObject *open_array = NULL;
1791 static PyObject *close_array = NULL;
1792 static PyObject *empty_array = NULL;
1793 PyObject *ident = NULL;
1794 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001795 Py_ssize_t i;
1796
1797 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1798 open_array = PyUnicode_InternFromString("[");
1799 close_array = PyUnicode_InternFromString("]");
1800 empty_array = PyUnicode_InternFromString("[]");
1801 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1802 return -1;
1803 }
1804 ident = NULL;
1805 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1806 if (s_fast == NULL)
1807 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001808 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001809 Py_DECREF(s_fast);
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001810 return _PyAccu_Accumulate(acc, empty_array);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001811 }
1812
1813 if (s->markers != Py_None) {
1814 int has_key;
1815 ident = PyLong_FromVoidPtr(seq);
1816 if (ident == NULL)
1817 goto bail;
1818 has_key = PyDict_Contains(s->markers, ident);
1819 if (has_key) {
1820 if (has_key != -1)
1821 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1822 goto bail;
1823 }
1824 if (PyDict_SetItem(s->markers, ident, seq)) {
1825 goto bail;
1826 }
1827 }
1828
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001829 if (_PyAccu_Accumulate(acc, open_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001830 goto bail;
1831 if (s->indent != Py_None) {
1832 /* TODO: DOES NOT RUN */
1833 indent_level += 1;
1834 /*
1835 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1836 separator = _item_separator + newline_indent
1837 buf += newline_indent
1838 */
1839 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001840 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1841 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001842 if (i) {
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001843 if (_PyAccu_Accumulate(acc, s->item_separator))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001844 goto bail;
1845 }
Antoine Pitroudf7fc9d2011-08-19 18:03:14 +02001846 if (encoder_listencode_obj(s, acc, obj, indent_level))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001847 goto bail;
1848 }
1849 if (ident != NULL) {
1850 if (PyDict_DelItem(s->markers, ident))
1851 goto bail;
1852 Py_CLEAR(ident);
1853 }
Brett Cannonb94767f2011-02-22 20:15:44 +00001854
1855 /* TODO: DOES NOT RUN
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001856 if (s->indent != Py_None) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001857 indent_level -= 1;
Brett Cannonb94767f2011-02-22 20:15:44 +00001858
1859 yield '\n' + (' ' * (_indent * _current_indent_level))
1860 }*/
Antoine Pitrou90c30e82011-10-06 19:09:51 +02001861 if (_PyAccu_Accumulate(acc, close_array))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001862 goto bail;
1863 Py_DECREF(s_fast);
1864 return 0;
1865
1866bail:
1867 Py_XDECREF(ident);
1868 Py_DECREF(s_fast);
1869 return -1;
1870}
1871
1872static void
1873encoder_dealloc(PyObject *self)
1874{
1875 /* Deallocate Encoder */
1876 encoder_clear(self);
1877 Py_TYPE(self)->tp_free(self);
1878}
1879
1880static int
1881encoder_traverse(PyObject *self, visitproc visit, void *arg)
1882{
1883 PyEncoderObject *s;
1884 assert(PyEncoder_Check(self));
1885 s = (PyEncoderObject *)self;
1886 Py_VISIT(s->markers);
1887 Py_VISIT(s->defaultfn);
1888 Py_VISIT(s->encoder);
1889 Py_VISIT(s->indent);
1890 Py_VISIT(s->key_separator);
1891 Py_VISIT(s->item_separator);
1892 Py_VISIT(s->sort_keys);
1893 Py_VISIT(s->skipkeys);
1894 return 0;
1895}
1896
1897static int
1898encoder_clear(PyObject *self)
1899{
1900 /* Deallocate Encoder */
1901 PyEncoderObject *s;
1902 assert(PyEncoder_Check(self));
1903 s = (PyEncoderObject *)self;
1904 Py_CLEAR(s->markers);
1905 Py_CLEAR(s->defaultfn);
1906 Py_CLEAR(s->encoder);
1907 Py_CLEAR(s->indent);
1908 Py_CLEAR(s->key_separator);
1909 Py_CLEAR(s->item_separator);
1910 Py_CLEAR(s->sort_keys);
1911 Py_CLEAR(s->skipkeys);
1912 return 0;
1913}
1914
1915PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1916
1917static
1918PyTypeObject PyEncoderType = {
1919 PyVarObject_HEAD_INIT(NULL, 0)
1920 "_json.Encoder", /* tp_name */
1921 sizeof(PyEncoderObject), /* tp_basicsize */
1922 0, /* tp_itemsize */
1923 encoder_dealloc, /* tp_dealloc */
1924 0, /* tp_print */
1925 0, /* tp_getattr */
1926 0, /* tp_setattr */
1927 0, /* tp_compare */
1928 0, /* tp_repr */
1929 0, /* tp_as_number */
1930 0, /* tp_as_sequence */
1931 0, /* tp_as_mapping */
1932 0, /* tp_hash */
1933 encoder_call, /* tp_call */
1934 0, /* tp_str */
1935 0, /* tp_getattro */
1936 0, /* tp_setattro */
1937 0, /* tp_as_buffer */
1938 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1939 encoder_doc, /* tp_doc */
1940 encoder_traverse, /* tp_traverse */
1941 encoder_clear, /* tp_clear */
1942 0, /* tp_richcompare */
1943 0, /* tp_weaklistoffset */
1944 0, /* tp_iter */
1945 0, /* tp_iternext */
1946 0, /* tp_methods */
1947 encoder_members, /* tp_members */
1948 0, /* tp_getset */
1949 0, /* tp_base */
1950 0, /* tp_dict */
1951 0, /* tp_descr_get */
1952 0, /* tp_descr_set */
1953 0, /* tp_dictoffset */
1954 encoder_init, /* tp_init */
1955 0, /* tp_alloc */
1956 encoder_new, /* tp_new */
1957 0, /* tp_free */
1958};
1959
1960static PyMethodDef speedups_methods[] = {
1961 {"encode_basestring_ascii",
1962 (PyCFunction)py_encode_basestring_ascii,
1963 METH_O,
1964 pydoc_encode_basestring_ascii},
Antoine Pitroudc3eaa82015-01-11 16:41:01 +01001965 {"encode_basestring",
1966 (PyCFunction)py_encode_basestring,
1967 METH_O,
1968 pydoc_encode_basestring},
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001969 {"scanstring",
1970 (PyCFunction)py_scanstring,
1971 METH_VARARGS,
1972 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001973 {NULL, NULL, 0, NULL}
1974};
1975
1976PyDoc_STRVAR(module_doc,
1977"json speedups\n");
1978
Martin v. Löwis1a214512008-06-11 05:26:20 +00001979static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001980 PyModuleDef_HEAD_INIT,
1981 "_json",
1982 module_doc,
1983 -1,
1984 speedups_methods,
1985 NULL,
1986 NULL,
1987 NULL,
1988 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001989};
1990
1991PyObject*
1992PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001993{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001994 PyObject *m = PyModule_Create(&jsonmodule);
1995 if (!m)
1996 return NULL;
1997 PyScannerType.tp_new = PyType_GenericNew;
1998 if (PyType_Ready(&PyScannerType) < 0)
1999 goto fail;
2000 PyEncoderType.tp_new = PyType_GenericNew;
2001 if (PyType_Ready(&PyEncoderType) < 0)
2002 goto fail;
2003 Py_INCREF((PyObject*)&PyScannerType);
2004 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
2005 Py_DECREF((PyObject*)&PyScannerType);
2006 goto fail;
2007 }
2008 Py_INCREF((PyObject*)&PyEncoderType);
2009 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
2010 Py_DECREF((PyObject*)&PyEncoderType);
2011 goto fail;
2012 }
2013 return m;
2014 fail:
2015 Py_DECREF(m);
2016 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00002017}