blob: 2d660831867f81b2aee13ca42b6dedec43467c24 [file] [log] [blame]
Brett Cannon4b964f92008-05-05 20:21:38 +00001#include "Python.h"
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
16
17#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
Brett Cannon4b964f92008-05-05 20:21:38 +000022
23#define DEFAULT_ENCODING "utf-8"
Brett Cannon4b964f92008-05-05 20:21:38 +000024
Bob Ippolitod914e3f2009-03-17 23:19:00 +000025#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30static PyTypeObject PyScannerType;
31static PyTypeObject PyEncoderType;
32
33typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
Raymond Hettinger91852ca2009-03-19 19:19:03 +000038 PyObject *pairs_hook;
Bob Ippolitod914e3f2009-03-17 23:19:00 +000039 PyObject *parse_float;
40 PyObject *parse_int;
41 PyObject *parse_constant;
42} PyScannerObject;
43
44static PyMemberDef scanner_members[] = {
45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
Raymond Hettinger91852ca2009-03-19 19:19:03 +000048 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
Bob Ippolitod914e3f2009-03-17 23:19:00 +000049 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52 {NULL}
53};
54
55typedef struct _PyEncoderObject {
56 PyObject_HEAD
57 PyObject *markers;
58 PyObject *defaultfn;
59 PyObject *encoder;
60 PyObject *indent;
61 PyObject *key_separator;
62 PyObject *item_separator;
63 PyObject *sort_keys;
64 PyObject *skipkeys;
65 int fast_encode;
66 int allow_nan;
67} PyEncoderObject;
68
69static PyMemberDef encoder_members[] = {
70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78 {NULL}
79};
80
81static Py_ssize_t
82ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83static PyObject *
84ascii_escape_unicode(PyObject *pystr);
85static PyObject *
86ascii_escape_str(PyObject *pystr);
87static PyObject *
88py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89void init_json(void);
90static PyObject *
91scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92static PyObject *
93scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94static PyObject *
95_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96static PyObject *
97scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98static int
99scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100static void
101scanner_dealloc(PyObject *self);
102static int
103scanner_clear(PyObject *self);
104static PyObject *
105encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106static int
107encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108static void
109encoder_dealloc(PyObject *self);
110static int
111encoder_clear(PyObject *self);
112static int
113encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114static int
115encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116static int
117encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118static PyObject *
Raymond Hettingera0b8d9a2009-03-19 19:24:43 +0000119_encoded_const(PyObject *obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000120static void
121raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122static PyObject *
123encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124static int
125_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126static PyObject *
127_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128static PyObject *
129encoder_encode_float(PyEncoderObject *s, PyObject *obj);
130
131#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
133
134#define MIN_EXPANSION 6
Brett Cannon4b964f92008-05-05 20:21:38 +0000135#ifdef Py_UNICODE_WIDE
136#define MAX_EXPANSION (2 * MIN_EXPANSION)
137#else
138#define MAX_EXPANSION MIN_EXPANSION
139#endif
140
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000141static int
142_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
143{
144 /* PyObject to Py_ssize_t converter */
145 *size_ptr = PyInt_AsSsize_t(o);
Georg Brandlf71ba952009-05-05 07:48:12 +0000146 if (*size_ptr == -1 && PyErr_Occurred())
147 return 0;
148 return 1;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000149}
150
151static PyObject *
152_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
153{
154 /* Py_ssize_t to PyObject converter */
155 return PyInt_FromSsize_t(*size_ptr);
156}
157
Brett Cannon4b964f92008-05-05 20:21:38 +0000158static Py_ssize_t
159ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
160{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000161 /* Escape unicode code point c to ASCII escape sequences
162 in char *output. output must have at least 12 bytes unused to
163 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Brett Cannon4b964f92008-05-05 20:21:38 +0000164 output[chars++] = '\\';
165 switch (c) {
166 case '\\': output[chars++] = (char)c; break;
167 case '"': output[chars++] = (char)c; break;
168 case '\b': output[chars++] = 'b'; break;
169 case '\f': output[chars++] = 'f'; break;
170 case '\n': output[chars++] = 'n'; break;
171 case '\r': output[chars++] = 'r'; break;
172 case '\t': output[chars++] = 't'; break;
173 default:
174#ifdef Py_UNICODE_WIDE
175 if (c >= 0x10000) {
176 /* UTF-16 surrogate pair */
177 Py_UNICODE v = c - 0x10000;
178 c = 0xd800 | ((v >> 10) & 0x3ff);
179 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000180 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
182 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
183 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000184 c = 0xdc00 | (v & 0x3ff);
185 output[chars++] = '\\';
186 }
187#endif
188 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000189 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
191 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
192 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000193 }
194 return chars;
195}
196
197static PyObject *
198ascii_escape_unicode(PyObject *pystr)
199{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000200 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000201 Py_ssize_t i;
202 Py_ssize_t input_chars;
203 Py_ssize_t output_size;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000204 Py_ssize_t max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000205 Py_ssize_t chars;
206 PyObject *rval;
207 char *output;
208 Py_UNICODE *input_unicode;
209
210 input_chars = PyUnicode_GET_SIZE(pystr);
211 input_unicode = PyUnicode_AS_UNICODE(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000212
Brett Cannon4b964f92008-05-05 20:21:38 +0000213 /* One char input can be up to 6 chars output, estimate 4 of these */
Benjamin Peterson04a53852016-08-13 16:47:25 -0700214 if (input_chars > (PY_SSIZE_T_MAX - 2)/ MAX_EXPANSION) {
215 PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
216 return NULL;
217 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000218 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000219 max_output_size = 2 + (input_chars * MAX_EXPANSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000220 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000221 if (rval == NULL) {
222 return NULL;
223 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000224 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000225 chars = 0;
226 output[chars++] = '"';
227 for (i = 0; i < input_chars; i++) {
228 Py_UNICODE c = input_unicode[i];
229 if (S_CHAR(c)) {
230 output[chars++] = (char)c;
231 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000232 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000233 chars = ascii_escape_char(c, output, chars);
234 }
235 if (output_size - chars < (1 + MAX_EXPANSION)) {
236 /* There's more than four, so let's resize by a lot */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000237 Py_ssize_t new_output_size = output_size * 2;
Brett Cannon4b964f92008-05-05 20:21:38 +0000238 /* This is an upper bound */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000239 if (new_output_size > max_output_size) {
240 new_output_size = max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000241 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000242 /* Make sure that the output size changed before resizing */
243 if (new_output_size != output_size) {
244 output_size = new_output_size;
245 if (_PyString_Resize(&rval, output_size) == -1) {
246 return NULL;
247 }
248 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000249 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000250 }
251 }
252 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000253 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000254 return NULL;
255 }
256 return rval;
257}
258
259static PyObject *
260ascii_escape_str(PyObject *pystr)
261{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000262 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000263 Py_ssize_t i;
264 Py_ssize_t input_chars;
265 Py_ssize_t output_size;
266 Py_ssize_t chars;
267 PyObject *rval;
268 char *output;
269 char *input_str;
270
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000271 input_chars = PyString_GET_SIZE(pystr);
272 input_str = PyString_AS_STRING(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000273
274 /* Fast path for a string that's already ASCII */
275 for (i = 0; i < input_chars; i++) {
276 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
277 if (!S_CHAR(c)) {
278 /* If we have to escape something, scan the string for unicode */
279 Py_ssize_t j;
280 for (j = i; j < input_chars; j++) {
281 c = (Py_UNICODE)(unsigned char)input_str[j];
282 if (c > 0x7f) {
283 /* We hit a non-ASCII character, bail to unicode mode */
284 PyObject *uni;
285 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
286 if (uni == NULL) {
287 return NULL;
288 }
289 rval = ascii_escape_unicode(uni);
290 Py_DECREF(uni);
291 return rval;
292 }
293 }
294 break;
295 }
296 }
297
298 if (i == input_chars) {
299 /* Input is already ASCII */
300 output_size = 2 + input_chars;
301 }
302 else {
303 /* One char input can be up to 6 chars output, estimate 4 of these */
304 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
305 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000306 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000307 if (rval == NULL) {
308 return NULL;
309 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000310 output = PyString_AS_STRING(rval);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000311 output[0] = '"';
312
313 /* We know that everything up to i is ASCII already */
314 chars = i + 1;
315 memcpy(&output[1], input_str, i);
316
317 for (; i < input_chars; i++) {
318 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
Brett Cannon4b964f92008-05-05 20:21:38 +0000319 if (S_CHAR(c)) {
320 output[chars++] = (char)c;
321 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000322 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000323 chars = ascii_escape_char(c, output, chars);
324 }
325 /* An ASCII char can't possibly expand to a surrogate! */
326 if (output_size - chars < (1 + MIN_EXPANSION)) {
327 /* There's more than four, so let's resize by a lot */
328 output_size *= 2;
329 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
330 output_size = 2 + (input_chars * MIN_EXPANSION);
331 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000332 if (_PyString_Resize(&rval, output_size) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000333 return NULL;
334 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000335 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000336 }
337 }
338 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000339 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000340 return NULL;
341 }
342 return rval;
343}
344
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000345static void
Brett Cannon4b964f92008-05-05 20:21:38 +0000346raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
347{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000348 /* Use the Python function json.decoder.errmsg to raise a nice
349 looking ValueError exception */
Brett Cannon4b964f92008-05-05 20:21:38 +0000350 static PyObject *errmsg_fn = NULL;
351 PyObject *pymsg;
352 if (errmsg_fn == NULL) {
353 PyObject *decoder = PyImport_ImportModule("json.decoder");
354 if (decoder == NULL)
355 return;
356 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000357 Py_DECREF(decoder);
Brett Cannon4b964f92008-05-05 20:21:38 +0000358 if (errmsg_fn == NULL)
359 return;
Brett Cannon4b964f92008-05-05 20:21:38 +0000360 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000361 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
Benjamin Peterson595e3cb2008-10-16 21:09:28 +0000362 if (pymsg) {
363 PyErr_SetObject(PyExc_ValueError, pymsg);
364 Py_DECREF(pymsg);
365 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000366}
367
368static PyObject *
369join_list_unicode(PyObject *lst)
370{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000371 /* return u''.join(lst) */
372 static PyObject *joinfn = NULL;
373 if (joinfn == NULL) {
374 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
375 if (ustr == NULL)
376 return NULL;
377
378 joinfn = PyObject_GetAttrString(ustr, "join");
379 Py_DECREF(ustr);
380 if (joinfn == NULL)
381 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000382 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000383 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
Brett Cannon4b964f92008-05-05 20:21:38 +0000384}
385
386static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000387_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
388 /* return (rval, idx) tuple, stealing reference to rval */
389 PyObject *tpl;
390 PyObject *pyidx;
391 /*
392 steal a reference to rval, returns (rval, idx)
393 */
394 if (rval == NULL) {
395 return NULL;
396 }
397 pyidx = PyInt_FromSsize_t(idx);
398 if (pyidx == NULL) {
399 Py_DECREF(rval);
400 return NULL;
401 }
402 tpl = PyTuple_New(2);
403 if (tpl == NULL) {
404 Py_DECREF(pyidx);
405 Py_DECREF(rval);
406 return NULL;
407 }
408 PyTuple_SET_ITEM(tpl, 0, rval);
409 PyTuple_SET_ITEM(tpl, 1, pyidx);
410 return tpl;
411}
412
413static PyObject *
414scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
415{
416 /* Read the JSON string from PyString pystr.
417 end is the index of the first character after the quote.
418 encoding is the encoding of pystr (must be an ASCII superset)
419 if strict is zero then literal control characters are allowed
420 *next_end_ptr is a return-by-reference index of the character
421 after the end quote
422
423 Return value is a new PyString (if ASCII-only) or PyUnicode
424 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000425 PyObject *rval;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000426 Py_ssize_t len = PyString_GET_SIZE(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000427 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000428 Py_ssize_t next;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000429 char *buf = PyString_AS_STRING(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000430 PyObject *chunks = PyList_New(0);
431 if (chunks == NULL) {
432 goto bail;
433 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000434 if (end < 0 || len <= end) {
435 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
436 goto bail;
437 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000438 while (1) {
439 /* Find the end of the string or the next escape */
440 Py_UNICODE c = 0;
441 PyObject *chunk = NULL;
442 for (next = end; next < len; next++) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000443 c = (unsigned char)buf[next];
Brett Cannon4b964f92008-05-05 20:21:38 +0000444 if (c == '"' || c == '\\') {
445 break;
446 }
447 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000448 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000449 goto bail;
450 }
451 }
452 if (!(c == '"' || c == '\\')) {
453 raise_errmsg("Unterminated string starting at", pystr, begin);
454 goto bail;
455 }
456 /* Pick up this chunk if it's not zero length */
457 if (next != end) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000458 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000459 if (strchunk == NULL) {
460 goto bail;
461 }
Barry Warsawfa658272010-11-02 21:03:09 +0000462 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
463 Py_DECREF(strchunk);
464 if (chunk == NULL) {
465 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000466 }
467 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000468 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000469 goto bail;
470 }
471 Py_DECREF(chunk);
472 }
473 next++;
474 if (c == '"') {
475 end = next;
476 break;
477 }
478 if (next == len) {
479 raise_errmsg("Unterminated string starting at", pystr, begin);
480 goto bail;
481 }
482 c = buf[next];
483 if (c != 'u') {
484 /* Non-unicode backslash escapes */
485 end = next + 1;
486 switch (c) {
487 case '"': break;
488 case '\\': break;
489 case '/': break;
490 case 'b': c = '\b'; break;
491 case 'f': c = '\f'; break;
492 case 'n': c = '\n'; break;
493 case 'r': c = '\r'; break;
494 case 't': c = '\t'; break;
495 default: c = 0;
496 }
497 if (c == 0) {
498 raise_errmsg("Invalid \\escape", pystr, end - 2);
499 goto bail;
500 }
501 }
502 else {
503 c = 0;
504 next++;
505 end = next + 4;
506 if (end >= len) {
507 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
508 goto bail;
509 }
510 /* Decode 4 hex digits */
511 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000512 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000513 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000514 switch (digit) {
515 case '0': case '1': case '2': case '3': case '4':
516 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000517 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000518 case 'a': case 'b': case 'c': case 'd': case 'e':
519 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000520 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000521 case 'A': case 'B': case 'C': case 'D': case 'E':
522 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000523 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000524 default:
525 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
526 goto bail;
527 }
528 }
529#ifdef Py_UNICODE_WIDE
530 /* Surrogate pair */
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200531 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
532 buf[next++] == '\\' &&
533 buf[next++] == 'u') {
Brett Cannon4b964f92008-05-05 20:21:38 +0000534 Py_UNICODE c2 = 0;
Brett Cannon4b964f92008-05-05 20:21:38 +0000535 end += 6;
536 /* Decode 4 hex digits */
537 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000538 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000539 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000540 switch (digit) {
541 case '0': case '1': case '2': case '3': case '4':
542 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000543 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000544 case 'a': case 'b': case 'c': case 'd': case 'e':
545 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000546 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000547 case 'A': case 'B': case 'C': case 'D': case 'E':
548 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000549 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000550 default:
551 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
552 goto bail;
553 }
554 }
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200555 if ((c2 & 0xfc00) == 0xdc00)
556 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
557 else
558 end -= 6;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000559 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000560#endif
561 }
Barry Warsawfa658272010-11-02 21:03:09 +0000562 chunk = PyUnicode_FromUnicode(&c, 1);
563 if (chunk == NULL) {
564 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000565 }
566 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000567 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000568 goto bail;
569 }
570 Py_DECREF(chunk);
571 }
572
Ezio Melottidf8a8f72011-05-04 14:40:53 +0300573 rval = join_list_unicode(chunks);
Brett Cannon4b964f92008-05-05 20:21:38 +0000574 if (rval == NULL) {
575 goto bail;
576 }
Benjamin Peterson336680e2008-10-16 21:48:06 +0000577 Py_CLEAR(chunks);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000578 *next_end_ptr = end;
579 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000580bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000581 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000582 Py_XDECREF(chunks);
583 return NULL;
584}
585
586
587static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000588scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000589{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000590 /* Read the JSON string from PyUnicode pystr.
591 end is the index of the first character after the quote.
592 if strict is zero then literal control characters are allowed
593 *next_end_ptr is a return-by-reference index of the character
594 after the end quote
595
596 Return value is a new PyUnicode
597 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000598 PyObject *rval;
599 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
600 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000601 Py_ssize_t next;
Brett Cannon4b964f92008-05-05 20:21:38 +0000602 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
603 PyObject *chunks = PyList_New(0);
604 if (chunks == NULL) {
605 goto bail;
606 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000607 if (end < 0 || len <= end) {
608 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
609 goto bail;
610 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000611 while (1) {
612 /* Find the end of the string or the next escape */
613 Py_UNICODE c = 0;
614 PyObject *chunk = NULL;
615 for (next = end; next < len; next++) {
616 c = buf[next];
617 if (c == '"' || c == '\\') {
618 break;
619 }
620 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000621 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000622 goto bail;
623 }
624 }
625 if (!(c == '"' || c == '\\')) {
626 raise_errmsg("Unterminated string starting at", pystr, begin);
627 goto bail;
628 }
629 /* Pick up this chunk if it's not zero length */
630 if (next != end) {
631 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
632 if (chunk == NULL) {
633 goto bail;
634 }
635 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson87e6ad22008-10-16 21:27:54 +0000636 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000637 goto bail;
638 }
639 Py_DECREF(chunk);
640 }
641 next++;
642 if (c == '"') {
643 end = next;
644 break;
645 }
646 if (next == len) {
647 raise_errmsg("Unterminated string starting at", pystr, begin);
648 goto bail;
649 }
650 c = buf[next];
651 if (c != 'u') {
652 /* Non-unicode backslash escapes */
653 end = next + 1;
654 switch (c) {
655 case '"': break;
656 case '\\': break;
657 case '/': break;
658 case 'b': c = '\b'; break;
659 case 'f': c = '\f'; break;
660 case 'n': c = '\n'; break;
661 case 'r': c = '\r'; break;
662 case 't': c = '\t'; break;
663 default: c = 0;
664 }
665 if (c == 0) {
666 raise_errmsg("Invalid \\escape", pystr, end - 2);
667 goto bail;
668 }
669 }
670 else {
671 c = 0;
672 next++;
673 end = next + 4;
674 if (end >= len) {
675 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
676 goto bail;
677 }
678 /* Decode 4 hex digits */
679 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000680 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000681 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000682 switch (digit) {
683 case '0': case '1': case '2': case '3': case '4':
684 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000685 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000686 case 'a': case 'b': case 'c': case 'd': case 'e':
687 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000688 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000689 case 'A': case 'B': case 'C': case 'D': case 'E':
690 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000691 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000692 default:
693 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
694 goto bail;
695 }
696 }
697#ifdef Py_UNICODE_WIDE
698 /* Surrogate pair */
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200699 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
700 buf[next++] == '\\' && buf[next++] == 'u') {
Brett Cannon4b964f92008-05-05 20:21:38 +0000701 Py_UNICODE c2 = 0;
Brett Cannon4b964f92008-05-05 20:21:38 +0000702 end += 6;
703 /* Decode 4 hex digits */
704 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000705 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000706 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000707 switch (digit) {
708 case '0': case '1': case '2': case '3': case '4':
709 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000710 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000711 case 'a': case 'b': case 'c': case 'd': case 'e':
712 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000713 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000714 case 'A': case 'B': case 'C': case 'D': case 'E':
715 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000716 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000717 default:
718 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
719 goto bail;
720 }
721 }
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200722 if ((c2 & 0xfc00) == 0xdc00)
723 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
724 else
725 end -= 6;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000726 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000727#endif
728 }
729 chunk = PyUnicode_FromUnicode(&c, 1);
730 if (chunk == NULL) {
731 goto bail;
732 }
733 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000734 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000735 goto bail;
736 }
737 Py_DECREF(chunk);
738 }
739
740 rval = join_list_unicode(chunks);
741 if (rval == NULL) {
742 goto bail;
743 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000744 Py_DECREF(chunks);
745 *next_end_ptr = end;
746 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000747bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000748 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000749 Py_XDECREF(chunks);
750 return NULL;
751}
752
753PyDoc_STRVAR(pydoc_scanstring,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000754 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
755 "\n"
756 "Scan the string s for a JSON string. End is the index of the\n"
757 "character in s after the quote that started the JSON string.\n"
758 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
759 "on attempt to decode an invalid string. If strict is False then literal\n"
760 "control characters are allowed in the string.\n"
761 "\n"
762 "Returns a tuple of the decoded string and the index of the character in s\n"
763 "after the end quote."
764);
Brett Cannon4b964f92008-05-05 20:21:38 +0000765
766static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000767py_scanstring(PyObject* self UNUSED, PyObject *args)
Brett Cannon4b964f92008-05-05 20:21:38 +0000768{
769 PyObject *pystr;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000770 PyObject *rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000771 Py_ssize_t end;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000772 Py_ssize_t next_end = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000773 char *encoding = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000774 int strict = 1;
775 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000776 return NULL;
777 }
778 if (encoding == NULL) {
779 encoding = DEFAULT_ENCODING;
780 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000781 if (PyString_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000782 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000783 }
784 else if (PyUnicode_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000785 rval = scanstring_unicode(pystr, end, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000786 }
787 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000788 PyErr_Format(PyExc_TypeError,
789 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000790 Py_TYPE(pystr)->tp_name);
791 return NULL;
792 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000793 return _build_rval_index_tuple(rval, next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000794}
795
796PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000797 "encode_basestring_ascii(basestring) -> str\n"
798 "\n"
799 "Return an ASCII-only JSON representation of a Python string"
800);
Brett Cannon4b964f92008-05-05 20:21:38 +0000801
802static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000803py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000804{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000805 /* Return an ASCII-only JSON representation of a Python string */
Brett Cannon4b964f92008-05-05 20:21:38 +0000806 /* METH_O */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000807 if (PyString_Check(pystr)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000808 return ascii_escape_str(pystr);
809 }
810 else if (PyUnicode_Check(pystr)) {
811 return ascii_escape_unicode(pystr);
812 }
813 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000814 PyErr_Format(PyExc_TypeError,
815 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000816 Py_TYPE(pystr)->tp_name);
817 return NULL;
818 }
819}
820
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000821static void
822scanner_dealloc(PyObject *self)
823{
824 /* Deallocate scanner object */
825 scanner_clear(self);
826 Py_TYPE(self)->tp_free(self);
827}
828
829static int
830scanner_traverse(PyObject *self, visitproc visit, void *arg)
831{
832 PyScannerObject *s;
833 assert(PyScanner_Check(self));
834 s = (PyScannerObject *)self;
835 Py_VISIT(s->encoding);
836 Py_VISIT(s->strict);
837 Py_VISIT(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000838 Py_VISIT(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000839 Py_VISIT(s->parse_float);
840 Py_VISIT(s->parse_int);
841 Py_VISIT(s->parse_constant);
842 return 0;
843}
844
845static int
846scanner_clear(PyObject *self)
847{
848 PyScannerObject *s;
849 assert(PyScanner_Check(self));
850 s = (PyScannerObject *)self;
851 Py_CLEAR(s->encoding);
852 Py_CLEAR(s->strict);
853 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000854 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000855 Py_CLEAR(s->parse_float);
856 Py_CLEAR(s->parse_int);
857 Py_CLEAR(s->parse_constant);
858 return 0;
859}
860
861static PyObject *
862_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
863 /* Read a JSON object from PyString pystr.
864 idx is the index of the first character after the opening curly brace.
865 *next_idx_ptr is a return-by-reference index to the first character after
866 the closing curly brace.
867
868 Returns a new PyObject (usually a dict, but object_hook can change that)
869 */
870 char *str = PyString_AS_STRING(pystr);
871 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000872 PyObject *rval;
873 PyObject *pairs;
874 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000875 PyObject *key = NULL;
876 PyObject *val = NULL;
877 char *encoding = PyString_AS_STRING(s->encoding);
878 int strict = PyObject_IsTrue(s->strict);
879 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000880
Serhiy Storchaka5127ed72015-05-30 17:45:12 +0300881 if (strict < 0)
882 return NULL;
883
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000884 pairs = PyList_New(0);
885 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000886 return NULL;
887
888 /* skip whitespace after { */
889 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
890
891 /* only loop if the object is non-empty */
892 if (idx <= end_idx && str[idx] != '}') {
893 while (idx <= end_idx) {
894 /* read key */
895 if (str[idx] != '"') {
896 raise_errmsg("Expecting property name", pystr, idx);
897 goto bail;
898 }
899 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
900 if (key == NULL)
901 goto bail;
902 idx = next_idx;
903
904 /* skip whitespace between key and : delimiter, read :, skip whitespace */
905 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
906 if (idx > end_idx || str[idx] != ':') {
907 raise_errmsg("Expecting : delimiter", pystr, idx);
908 goto bail;
909 }
910 idx++;
911 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
912
913 /* read any JSON data type */
914 val = scan_once_str(s, pystr, idx, &next_idx);
915 if (val == NULL)
916 goto bail;
917
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000918 item = PyTuple_Pack(2, key, val);
919 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000920 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000921 Py_CLEAR(key);
922 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000923 if (PyList_Append(pairs, item) == -1) {
924 Py_DECREF(item);
925 goto bail;
926 }
927 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000928 idx = next_idx;
929
930 /* skip whitespace before } or , */
931 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
932
933 /* bail if the object is closed or we didn't get the , delimiter */
934 if (idx > end_idx) break;
935 if (str[idx] == '}') {
936 break;
937 }
938 else if (str[idx] != ',') {
939 raise_errmsg("Expecting , delimiter", pystr, idx);
940 goto bail;
941 }
942 idx++;
943
944 /* skip whitespace after , delimiter */
945 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
946 }
947 }
948 /* verify that idx < end_idx, str[idx] should be '}' */
949 if (idx > end_idx || str[idx] != '}') {
950 raise_errmsg("Expecting object", pystr, end_idx);
951 goto bail;
952 }
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000953
954 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
955 if (s->pairs_hook != Py_None) {
956 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
957 if (val == NULL)
958 goto bail;
959 Py_DECREF(pairs);
960 *next_idx_ptr = idx + 1;
961 return val;
962 }
963
964 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
965 pairs, NULL);
966 if (rval == NULL)
967 goto bail;
968 Py_CLEAR(pairs);
969
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000970 /* if object_hook is not None: rval = object_hook(rval) */
971 if (s->object_hook != Py_None) {
972 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
973 if (val == NULL)
974 goto bail;
975 Py_DECREF(rval);
976 rval = val;
977 val = NULL;
978 }
979 *next_idx_ptr = idx + 1;
980 return rval;
981bail:
982 Py_XDECREF(key);
983 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000984 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000985 return NULL;
986}
987
988static PyObject *
989_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
990 /* Read a JSON object from PyUnicode pystr.
991 idx is the index of the first character after the opening curly brace.
992 *next_idx_ptr is a return-by-reference index to the first character after
993 the closing curly brace.
994
995 Returns a new PyObject (usually a dict, but object_hook can change that)
996 */
997 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
998 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000999 PyObject *rval;
1000 PyObject *pairs;
1001 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001002 PyObject *key = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001003 PyObject *val = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001004 int strict = PyObject_IsTrue(s->strict);
1005 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001006
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001007 if (strict < 0)
1008 return NULL;
1009
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001010 pairs = PyList_New(0);
1011 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001012 return NULL;
1013
1014 /* skip whitespace after { */
1015 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1016
1017 /* only loop if the object is non-empty */
1018 if (idx <= end_idx && str[idx] != '}') {
1019 while (idx <= end_idx) {
1020 /* read key */
1021 if (str[idx] != '"') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001022 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001023 goto bail;
1024 }
1025 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1026 if (key == NULL)
1027 goto bail;
1028 idx = next_idx;
1029
1030 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1031 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1032 if (idx > end_idx || str[idx] != ':') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001033 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001034 goto bail;
1035 }
1036 idx++;
1037 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1038
1039 /* read any JSON term */
1040 val = scan_once_unicode(s, pystr, idx, &next_idx);
1041 if (val == NULL)
1042 goto bail;
1043
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001044 item = PyTuple_Pack(2, key, val);
1045 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001046 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001047 Py_CLEAR(key);
1048 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001049 if (PyList_Append(pairs, item) == -1) {
1050 Py_DECREF(item);
1051 goto bail;
1052 }
1053 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001054 idx = next_idx;
1055
1056 /* skip whitespace before } or , */
1057 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1058
1059 /* bail if the object is closed or we didn't get the , delimiter */
1060 if (idx > end_idx) break;
1061 if (str[idx] == '}') {
1062 break;
1063 }
1064 else if (str[idx] != ',') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001065 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001066 goto bail;
1067 }
1068 idx++;
1069
1070 /* skip whitespace after , delimiter */
1071 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1072 }
1073 }
1074
1075 /* verify that idx < end_idx, str[idx] should be '}' */
1076 if (idx > end_idx || str[idx] != '}') {
1077 raise_errmsg("Expecting object", pystr, end_idx);
1078 goto bail;
1079 }
1080
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001081 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1082 if (s->pairs_hook != Py_None) {
1083 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1084 if (val == NULL)
1085 goto bail;
1086 Py_DECREF(pairs);
1087 *next_idx_ptr = idx + 1;
1088 return val;
1089 }
1090
1091 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1092 pairs, NULL);
1093 if (rval == NULL)
1094 goto bail;
1095 Py_CLEAR(pairs);
1096
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001097 /* if object_hook is not None: rval = object_hook(rval) */
1098 if (s->object_hook != Py_None) {
1099 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1100 if (val == NULL)
1101 goto bail;
1102 Py_DECREF(rval);
1103 rval = val;
1104 val = NULL;
1105 }
1106 *next_idx_ptr = idx + 1;
1107 return rval;
1108bail:
1109 Py_XDECREF(key);
1110 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001111 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001112 return NULL;
1113}
1114
1115static PyObject *
1116_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1117 /* Read a JSON array from PyString pystr.
1118 idx is the index of the first character after the opening brace.
1119 *next_idx_ptr is a return-by-reference index to the first character after
1120 the closing brace.
1121
1122 Returns a new PyList
1123 */
1124 char *str = PyString_AS_STRING(pystr);
1125 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1126 PyObject *val = NULL;
1127 PyObject *rval = PyList_New(0);
1128 Py_ssize_t next_idx;
1129 if (rval == NULL)
1130 return NULL;
1131
1132 /* skip whitespace after [ */
1133 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1134
1135 /* only loop if the array is non-empty */
1136 if (idx <= end_idx && str[idx] != ']') {
1137 while (idx <= end_idx) {
1138
1139 /* read any JSON term and de-tuplefy the (rval, idx) */
1140 val = scan_once_str(s, pystr, idx, &next_idx);
1141 if (val == NULL)
1142 goto bail;
1143
1144 if (PyList_Append(rval, val) == -1)
1145 goto bail;
1146
1147 Py_CLEAR(val);
1148 idx = next_idx;
1149
1150 /* skip whitespace between term and , */
1151 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1152
1153 /* bail if the array is closed or we didn't get the , delimiter */
1154 if (idx > end_idx) break;
1155 if (str[idx] == ']') {
1156 break;
1157 }
1158 else if (str[idx] != ',') {
1159 raise_errmsg("Expecting , delimiter", pystr, idx);
1160 goto bail;
1161 }
1162 idx++;
1163
1164 /* skip whitespace after , */
1165 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1166 }
1167 }
1168
1169 /* verify that idx < end_idx, str[idx] should be ']' */
1170 if (idx > end_idx || str[idx] != ']') {
1171 raise_errmsg("Expecting object", pystr, end_idx);
1172 goto bail;
1173 }
1174 *next_idx_ptr = idx + 1;
1175 return rval;
1176bail:
1177 Py_XDECREF(val);
1178 Py_DECREF(rval);
1179 return NULL;
1180}
1181
1182static PyObject *
1183_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1184 /* Read a JSON array from PyString pystr.
1185 idx is the index of the first character after the opening brace.
1186 *next_idx_ptr is a return-by-reference index to the first character after
1187 the closing brace.
1188
1189 Returns a new PyList
1190 */
1191 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1192 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1193 PyObject *val = NULL;
1194 PyObject *rval = PyList_New(0);
1195 Py_ssize_t next_idx;
1196 if (rval == NULL)
1197 return NULL;
1198
1199 /* skip whitespace after [ */
1200 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1201
1202 /* only loop if the array is non-empty */
1203 if (idx <= end_idx && str[idx] != ']') {
1204 while (idx <= end_idx) {
1205
1206 /* read any JSON term */
1207 val = scan_once_unicode(s, pystr, idx, &next_idx);
1208 if (val == NULL)
1209 goto bail;
1210
1211 if (PyList_Append(rval, val) == -1)
1212 goto bail;
1213
1214 Py_CLEAR(val);
1215 idx = next_idx;
1216
1217 /* skip whitespace between term and , */
1218 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1219
1220 /* bail if the array is closed or we didn't get the , delimiter */
1221 if (idx > end_idx) break;
1222 if (str[idx] == ']') {
1223 break;
1224 }
1225 else if (str[idx] != ',') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001226 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001227 goto bail;
1228 }
1229 idx++;
1230
1231 /* skip whitespace after , */
1232 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1233 }
1234 }
1235
1236 /* verify that idx < end_idx, str[idx] should be ']' */
1237 if (idx > end_idx || str[idx] != ']') {
1238 raise_errmsg("Expecting object", pystr, end_idx);
1239 goto bail;
1240 }
1241 *next_idx_ptr = idx + 1;
1242 return rval;
1243bail:
1244 Py_XDECREF(val);
1245 Py_DECREF(rval);
1246 return NULL;
1247}
1248
1249static PyObject *
1250_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1251 /* Read a JSON constant from PyString pystr.
1252 constant is the constant string that was found
1253 ("NaN", "Infinity", "-Infinity").
1254 idx is the index of the first character of the constant
1255 *next_idx_ptr is a return-by-reference index to the first character after
1256 the constant.
1257
1258 Returns the result of parse_constant
1259 */
1260 PyObject *cstr;
1261 PyObject *rval;
1262 /* constant is "NaN", "Infinity", or "-Infinity" */
1263 cstr = PyString_InternFromString(constant);
1264 if (cstr == NULL)
1265 return NULL;
1266
1267 /* rval = parse_constant(constant) */
1268 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1269 idx += PyString_GET_SIZE(cstr);
1270 Py_DECREF(cstr);
1271 *next_idx_ptr = idx;
1272 return rval;
1273}
1274
1275static PyObject *
1276_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1277 /* Read a JSON number from PyString pystr.
1278 idx is the index of the first character of the number
1279 *next_idx_ptr is a return-by-reference index to the first character after
1280 the number.
1281
1282 Returns a new PyObject representation of that number:
1283 PyInt, PyLong, or PyFloat.
1284 May return other types if parse_int or parse_float are set
1285 */
1286 char *str = PyString_AS_STRING(pystr);
1287 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1288 Py_ssize_t idx = start;
1289 int is_float = 0;
1290 PyObject *rval;
1291 PyObject *numstr;
1292
1293 /* read a sign if it's there, make sure it's not the end of the string */
1294 if (str[idx] == '-') {
1295 idx++;
1296 if (idx > end_idx) {
1297 PyErr_SetNone(PyExc_StopIteration);
1298 return NULL;
1299 }
1300 }
1301
1302 /* read as many integer digits as we find as long as it doesn't start with 0 */
1303 if (str[idx] >= '1' && str[idx] <= '9') {
1304 idx++;
1305 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1306 }
1307 /* if it starts with 0 we only expect one integer digit */
1308 else if (str[idx] == '0') {
1309 idx++;
1310 }
1311 /* no integer digits, error */
1312 else {
1313 PyErr_SetNone(PyExc_StopIteration);
1314 return NULL;
1315 }
1316
1317 /* if the next char is '.' followed by a digit then read all float digits */
1318 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1319 is_float = 1;
1320 idx += 2;
1321 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1322 }
1323
1324 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1325 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1326
1327 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1328 Py_ssize_t e_start = idx;
1329 idx++;
1330
1331 /* read an exponent sign if present */
1332 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1333
1334 /* read all digits */
1335 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1336
1337 /* if we got a digit, then parse as float. if not, backtrack */
1338 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1339 is_float = 1;
1340 }
1341 else {
1342 idx = e_start;
1343 }
1344 }
1345
1346 /* copy the section we determined to be a number */
1347 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1348 if (numstr == NULL)
1349 return NULL;
1350 if (is_float) {
1351 /* parse as a float using a fast path if available, otherwise call user defined method */
1352 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1353 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1354 }
1355 else {
Eric Smith129c97d2009-10-28 08:44:37 +00001356 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1357 NULL, NULL);
1358 if (d == -1.0 && PyErr_Occurred())
1359 return NULL;
1360 rval = PyFloat_FromDouble(d);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001361 }
1362 }
1363 else {
1364 /* parse as an int using a fast path if available, otherwise call user defined method */
1365 if (s->parse_int != (PyObject *)&PyInt_Type) {
1366 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1367 }
1368 else {
1369 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1370 }
1371 }
1372 Py_DECREF(numstr);
1373 *next_idx_ptr = idx;
1374 return rval;
1375}
1376
1377static PyObject *
1378_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1379 /* Read a JSON number from PyUnicode pystr.
1380 idx is the index of the first character of the number
1381 *next_idx_ptr is a return-by-reference index to the first character after
1382 the number.
1383
1384 Returns a new PyObject representation of that number:
1385 PyInt, PyLong, or PyFloat.
1386 May return other types if parse_int or parse_float are set
1387 */
1388 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1389 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1390 Py_ssize_t idx = start;
1391 int is_float = 0;
1392 PyObject *rval;
1393 PyObject *numstr;
1394
1395 /* read a sign if it's there, make sure it's not the end of the string */
1396 if (str[idx] == '-') {
1397 idx++;
1398 if (idx > end_idx) {
1399 PyErr_SetNone(PyExc_StopIteration);
1400 return NULL;
1401 }
1402 }
1403
1404 /* read as many integer digits as we find as long as it doesn't start with 0 */
1405 if (str[idx] >= '1' && str[idx] <= '9') {
1406 idx++;
1407 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1408 }
1409 /* if it starts with 0 we only expect one integer digit */
1410 else if (str[idx] == '0') {
1411 idx++;
1412 }
1413 /* no integer digits, error */
1414 else {
1415 PyErr_SetNone(PyExc_StopIteration);
1416 return NULL;
1417 }
1418
1419 /* if the next char is '.' followed by a digit then read all float digits */
1420 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1421 is_float = 1;
1422 idx += 2;
Bob Ippolito76a982a2009-03-29 22:33:58 +00001423 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001424 }
1425
1426 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1427 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1428 Py_ssize_t e_start = idx;
1429 idx++;
1430
1431 /* read an exponent sign if present */
1432 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1433
1434 /* read all digits */
1435 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1436
1437 /* if we got a digit, then parse as float. if not, backtrack */
1438 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1439 is_float = 1;
1440 }
1441 else {
1442 idx = e_start;
1443 }
1444 }
1445
1446 /* copy the section we determined to be a number */
1447 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1448 if (numstr == NULL)
1449 return NULL;
1450 if (is_float) {
1451 /* parse as a float using a fast path if available, otherwise call user defined method */
1452 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1453 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1454 }
1455 else {
1456 rval = PyFloat_FromString(numstr, NULL);
1457 }
1458 }
1459 else {
1460 /* no fast path for unicode -> int, just call */
1461 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1462 }
1463 Py_DECREF(numstr);
1464 *next_idx_ptr = idx;
1465 return rval;
1466}
1467
1468static PyObject *
1469scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1470{
1471 /* Read one JSON term (of any kind) from PyString pystr.
1472 idx is the index of the first character of the term
1473 *next_idx_ptr is a return-by-reference index to the first character after
1474 the number.
1475
1476 Returns a new PyObject representation of the term.
1477 */
Ezio Melotticec46492011-05-07 17:40:23 +03001478 PyObject *res;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001479 int strict;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001480 char *str = PyString_AS_STRING(pystr);
1481 Py_ssize_t length = PyString_GET_SIZE(pystr);
Benjamin Peterson3e5d87b2014-04-14 11:43:09 -04001482 if (idx < 0) {
1483 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1484 return NULL;
1485 }
1486 if (idx >= length) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001487 PyErr_SetNone(PyExc_StopIteration);
1488 return NULL;
1489 }
1490 switch (str[idx]) {
1491 case '"':
1492 /* string */
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001493 strict = PyObject_IsTrue(s->strict);
1494 if (strict < 0)
1495 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001496 return scanstring_str(pystr, idx + 1,
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001497 PyString_AS_STRING(s->encoding), strict, next_idx_ptr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001498 case '{':
1499 /* object */
Ezio Melotticec46492011-05-07 17:40:23 +03001500 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1501 "from a byte string"))
1502 return NULL;
1503 res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1504 Py_LeaveRecursiveCall();
1505 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001506 case '[':
1507 /* array */
Ezio Melotticec46492011-05-07 17:40:23 +03001508 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1509 "from a byte string"))
1510 return NULL;
1511 res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1512 Py_LeaveRecursiveCall();
1513 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001514 case 'n':
1515 /* null */
1516 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1517 Py_INCREF(Py_None);
1518 *next_idx_ptr = idx + 4;
1519 return Py_None;
1520 }
1521 break;
1522 case 't':
1523 /* true */
1524 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1525 Py_INCREF(Py_True);
1526 *next_idx_ptr = idx + 4;
1527 return Py_True;
1528 }
1529 break;
1530 case 'f':
1531 /* false */
1532 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1533 Py_INCREF(Py_False);
1534 *next_idx_ptr = idx + 5;
1535 return Py_False;
1536 }
1537 break;
1538 case 'N':
1539 /* NaN */
1540 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1541 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1542 }
1543 break;
1544 case 'I':
1545 /* Infinity */
1546 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1547 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1548 }
1549 break;
1550 case '-':
1551 /* -Infinity */
1552 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1553 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1554 }
1555 break;
1556 }
1557 /* Didn't find a string, object, array, or named constant. Look for a number. */
1558 return _match_number_str(s, pystr, idx, next_idx_ptr);
1559}
1560
1561static PyObject *
1562scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1563{
1564 /* Read one JSON term (of any kind) from PyUnicode pystr.
1565 idx is the index of the first character of the term
1566 *next_idx_ptr is a return-by-reference index to the first character after
1567 the number.
1568
1569 Returns a new PyObject representation of the term.
1570 */
Ezio Melotticec46492011-05-07 17:40:23 +03001571 PyObject *res;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001572 int strict;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001573 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1574 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
Benjamin Peterson3e5d87b2014-04-14 11:43:09 -04001575 if (idx < 0) {
1576 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1577 return NULL;
1578 }
1579 if (idx >= length) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001580 PyErr_SetNone(PyExc_StopIteration);
1581 return NULL;
1582 }
1583 switch (str[idx]) {
1584 case '"':
1585 /* string */
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001586 strict = PyObject_IsTrue(s->strict);
1587 if (strict < 0)
1588 return NULL;
1589 return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001590 case '{':
1591 /* object */
Ezio Melotticec46492011-05-07 17:40:23 +03001592 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1593 "from a unicode string"))
1594 return NULL;
1595 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1596 Py_LeaveRecursiveCall();
1597 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001598 case '[':
1599 /* array */
Ezio Melotticec46492011-05-07 17:40:23 +03001600 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1601 "from a unicode string"))
1602 return NULL;
1603 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1604 Py_LeaveRecursiveCall();
1605 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001606 case 'n':
1607 /* null */
1608 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1609 Py_INCREF(Py_None);
1610 *next_idx_ptr = idx + 4;
1611 return Py_None;
1612 }
1613 break;
1614 case 't':
1615 /* true */
1616 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1617 Py_INCREF(Py_True);
1618 *next_idx_ptr = idx + 4;
1619 return Py_True;
1620 }
1621 break;
1622 case 'f':
1623 /* false */
1624 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1625 Py_INCREF(Py_False);
1626 *next_idx_ptr = idx + 5;
1627 return Py_False;
1628 }
1629 break;
1630 case 'N':
1631 /* NaN */
1632 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1633 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1634 }
1635 break;
1636 case 'I':
1637 /* Infinity */
1638 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1639 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1640 }
1641 break;
1642 case '-':
1643 /* -Infinity */
1644 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1645 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1646 }
1647 break;
1648 }
1649 /* Didn't find a string, object, array, or named constant. Look for a number. */
1650 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1651}
1652
1653static PyObject *
1654scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1655{
1656 /* Python callable interface to scan_once_{str,unicode} */
1657 PyObject *pystr;
1658 PyObject *rval;
1659 Py_ssize_t idx;
1660 Py_ssize_t next_idx = -1;
1661 static char *kwlist[] = {"string", "idx", NULL};
1662 PyScannerObject *s;
1663 assert(PyScanner_Check(self));
1664 s = (PyScannerObject *)self;
1665 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1666 return NULL;
1667
1668 if (PyString_Check(pystr)) {
1669 rval = scan_once_str(s, pystr, idx, &next_idx);
1670 }
1671 else if (PyUnicode_Check(pystr)) {
1672 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1673 }
1674 else {
1675 PyErr_Format(PyExc_TypeError,
1676 "first argument must be a string, not %.80s",
1677 Py_TYPE(pystr)->tp_name);
1678 return NULL;
1679 }
1680 return _build_rval_index_tuple(rval, next_idx);
1681}
1682
1683static PyObject *
1684scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1685{
1686 PyScannerObject *s;
1687 s = (PyScannerObject *)type->tp_alloc(type, 0);
1688 if (s != NULL) {
1689 s->encoding = NULL;
1690 s->strict = NULL;
1691 s->object_hook = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001692 s->pairs_hook = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001693 s->parse_float = NULL;
1694 s->parse_int = NULL;
1695 s->parse_constant = NULL;
1696 }
1697 return (PyObject *)s;
1698}
1699
1700static int
1701scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1702{
1703 /* Initialize Scanner object */
1704 PyObject *ctx;
1705 static char *kwlist[] = {"context", NULL};
1706 PyScannerObject *s;
1707
1708 assert(PyScanner_Check(self));
1709 s = (PyScannerObject *)self;
1710
1711 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1712 return -1;
1713
1714 /* PyString_AS_STRING is used on encoding */
1715 s->encoding = PyObject_GetAttrString(ctx, "encoding");
Antoine Pitrou187177f2009-12-08 15:40:51 +00001716 if (s->encoding == NULL)
1717 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001718 if (s->encoding == Py_None) {
1719 Py_DECREF(Py_None);
1720 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1721 }
1722 else if (PyUnicode_Check(s->encoding)) {
1723 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001724 Py_SETREF(s->encoding, tmp);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001725 }
Amaury Forgeot d'Arcdee76e62012-01-13 22:53:25 +01001726 if (s->encoding == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001727 goto bail;
Amaury Forgeot d'Arcdee76e62012-01-13 22:53:25 +01001728 if (!PyString_Check(s->encoding)) {
1729 PyErr_Format(PyExc_TypeError,
1730 "encoding must be a string, not %.80s",
1731 Py_TYPE(s->encoding)->tp_name);
1732 goto bail;
1733 }
1734
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001735
1736 /* All of these will fail "gracefully" so we don't need to verify them */
1737 s->strict = PyObject_GetAttrString(ctx, "strict");
1738 if (s->strict == NULL)
1739 goto bail;
1740 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1741 if (s->object_hook == NULL)
1742 goto bail;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001743 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
Georg Brandld823bdc2011-01-02 14:20:16 +00001744 if (s->pairs_hook == NULL)
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001745 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001746 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1747 if (s->parse_float == NULL)
1748 goto bail;
1749 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1750 if (s->parse_int == NULL)
1751 goto bail;
1752 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1753 if (s->parse_constant == NULL)
1754 goto bail;
1755
1756 return 0;
1757
1758bail:
1759 Py_CLEAR(s->encoding);
1760 Py_CLEAR(s->strict);
1761 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001762 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001763 Py_CLEAR(s->parse_float);
1764 Py_CLEAR(s->parse_int);
1765 Py_CLEAR(s->parse_constant);
1766 return -1;
1767}
1768
1769PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1770
1771static
1772PyTypeObject PyScannerType = {
1773 PyObject_HEAD_INIT(NULL)
1774 0, /* tp_internal */
1775 "_json.Scanner", /* tp_name */
1776 sizeof(PyScannerObject), /* tp_basicsize */
1777 0, /* tp_itemsize */
1778 scanner_dealloc, /* tp_dealloc */
1779 0, /* tp_print */
1780 0, /* tp_getattr */
1781 0, /* tp_setattr */
1782 0, /* tp_compare */
1783 0, /* tp_repr */
1784 0, /* tp_as_number */
1785 0, /* tp_as_sequence */
1786 0, /* tp_as_mapping */
1787 0, /* tp_hash */
1788 scanner_call, /* tp_call */
1789 0, /* tp_str */
1790 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1791 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1792 0, /* tp_as_buffer */
1793 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1794 scanner_doc, /* tp_doc */
1795 scanner_traverse, /* tp_traverse */
1796 scanner_clear, /* tp_clear */
1797 0, /* tp_richcompare */
1798 0, /* tp_weaklistoffset */
1799 0, /* tp_iter */
1800 0, /* tp_iternext */
1801 0, /* tp_methods */
1802 scanner_members, /* tp_members */
1803 0, /* tp_getset */
1804 0, /* tp_base */
1805 0, /* tp_dict */
1806 0, /* tp_descr_get */
1807 0, /* tp_descr_set */
1808 0, /* tp_dictoffset */
1809 scanner_init, /* tp_init */
1810 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1811 scanner_new, /* tp_new */
1812 0,/* PyObject_GC_Del, */ /* tp_free */
1813};
1814
1815static PyObject *
1816encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1817{
1818 PyEncoderObject *s;
1819 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1820 if (s != NULL) {
1821 s->markers = NULL;
1822 s->defaultfn = NULL;
1823 s->encoder = NULL;
1824 s->indent = NULL;
1825 s->key_separator = NULL;
1826 s->item_separator = NULL;
1827 s->sort_keys = NULL;
1828 s->skipkeys = NULL;
1829 }
1830 return (PyObject *)s;
1831}
1832
1833static int
1834encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1835{
1836 /* initialize Encoder object */
1837 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1838
1839 PyEncoderObject *s;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001840 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001841 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan_obj;
1842 int allow_nan;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001843
1844 assert(PyEncoder_Check(self));
1845 s = (PyEncoderObject *)self;
1846
1847 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou187177f2009-12-08 15:40:51 +00001848 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001849 &sort_keys, &skipkeys, &allow_nan_obj))
1850 return -1;
1851
1852 allow_nan = PyObject_IsTrue(allow_nan_obj);
1853 if (allow_nan < 0)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001854 return -1;
1855
Serhiy Storchaka76a64ca2015-07-26 09:07:53 +03001856 if (markers != Py_None && !PyDict_Check(markers)) {
1857 PyErr_Format(PyExc_TypeError,
1858 "make_encoder() argument 1 must be dict or None, "
1859 "not %.200s", Py_TYPE(markers)->tp_name);
1860 return -1;
1861 }
1862
Antoine Pitrou187177f2009-12-08 15:40:51 +00001863 s->markers = markers;
1864 s->defaultfn = defaultfn;
1865 s->encoder = encoder;
1866 s->indent = indent;
1867 s->key_separator = key_separator;
1868 s->item_separator = item_separator;
1869 s->sort_keys = sort_keys;
1870 s->skipkeys = skipkeys;
1871 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001872 s->allow_nan = allow_nan;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001873
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001874 Py_INCREF(s->markers);
1875 Py_INCREF(s->defaultfn);
1876 Py_INCREF(s->encoder);
1877 Py_INCREF(s->indent);
1878 Py_INCREF(s->key_separator);
1879 Py_INCREF(s->item_separator);
1880 Py_INCREF(s->sort_keys);
1881 Py_INCREF(s->skipkeys);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001882 return 0;
1883}
1884
1885static PyObject *
1886encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1887{
1888 /* Python callable interface to encode_listencode_obj */
1889 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1890 PyObject *obj;
1891 PyObject *rval;
1892 Py_ssize_t indent_level;
1893 PyEncoderObject *s;
1894 assert(PyEncoder_Check(self));
1895 s = (PyEncoderObject *)self;
1896 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1897 &obj, _convertPyInt_AsSsize_t, &indent_level))
1898 return NULL;
1899 rval = PyList_New(0);
1900 if (rval == NULL)
1901 return NULL;
1902 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1903 Py_DECREF(rval);
1904 return NULL;
1905 }
1906 return rval;
1907}
1908
1909static PyObject *
1910_encoded_const(PyObject *obj)
1911{
1912 /* Return the JSON string representation of None, True, False */
1913 if (obj == Py_None) {
1914 static PyObject *s_null = NULL;
1915 if (s_null == NULL) {
1916 s_null = PyString_InternFromString("null");
1917 }
1918 Py_INCREF(s_null);
1919 return s_null;
1920 }
1921 else if (obj == Py_True) {
1922 static PyObject *s_true = NULL;
1923 if (s_true == NULL) {
1924 s_true = PyString_InternFromString("true");
1925 }
1926 Py_INCREF(s_true);
1927 return s_true;
1928 }
1929 else if (obj == Py_False) {
1930 static PyObject *s_false = NULL;
1931 if (s_false == NULL) {
1932 s_false = PyString_InternFromString("false");
1933 }
1934 Py_INCREF(s_false);
1935 return s_false;
1936 }
1937 else {
1938 PyErr_SetString(PyExc_ValueError, "not a const");
1939 return NULL;
1940 }
1941}
1942
1943static PyObject *
1944encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1945{
1946 /* Return the JSON representation of a PyFloat */
1947 double i = PyFloat_AS_DOUBLE(obj);
1948 if (!Py_IS_FINITE(i)) {
1949 if (!s->allow_nan) {
1950 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1951 return NULL;
1952 }
1953 if (i > 0) {
1954 return PyString_FromString("Infinity");
1955 }
1956 else if (i < 0) {
1957 return PyString_FromString("-Infinity");
1958 }
1959 else {
1960 return PyString_FromString("NaN");
1961 }
1962 }
Mark Dickinsone6239a32016-09-03 17:45:00 +01001963 /* Make sure to use the base float class repr method */
1964 return PyFloat_Type.tp_repr(obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001965}
1966
1967static PyObject *
1968encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1969{
1970 /* Return the JSON representation of a string */
1971 if (s->fast_encode)
1972 return py_encode_basestring_ascii(NULL, obj);
1973 else
1974 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1975}
1976
1977static int
1978_steal_list_append(PyObject *lst, PyObject *stolen)
1979{
1980 /* Append stolen and then decrement its reference count */
1981 int rval = PyList_Append(lst, stolen);
1982 Py_DECREF(stolen);
1983 return rval;
1984}
1985
1986static int
1987encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1988{
1989 /* Encode Python object obj to a JSON term, rval is a PyList */
1990 PyObject *newobj;
1991 int rv;
1992
1993 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1994 PyObject *cstr = _encoded_const(obj);
1995 if (cstr == NULL)
1996 return -1;
1997 return _steal_list_append(rval, cstr);
1998 }
1999 else if (PyString_Check(obj) || PyUnicode_Check(obj))
2000 {
2001 PyObject *encoded = encoder_encode_string(s, obj);
2002 if (encoded == NULL)
2003 return -1;
2004 return _steal_list_append(rval, encoded);
2005 }
2006 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
2007 PyObject *encoded = PyObject_Str(obj);
2008 if (encoded == NULL)
2009 return -1;
2010 return _steal_list_append(rval, encoded);
2011 }
2012 else if (PyFloat_Check(obj)) {
2013 PyObject *encoded = encoder_encode_float(s, obj);
2014 if (encoded == NULL)
2015 return -1;
2016 return _steal_list_append(rval, encoded);
2017 }
2018 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002019 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2020 return -1;
2021 rv = encoder_listencode_list(s, rval, obj, indent_level);
2022 Py_LeaveRecursiveCall();
2023 return rv;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002024 }
2025 else if (PyDict_Check(obj)) {
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002026 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2027 return -1;
2028 rv = encoder_listencode_dict(s, rval, obj, indent_level);
2029 Py_LeaveRecursiveCall();
2030 return rv;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002031 }
2032 else {
2033 PyObject *ident = NULL;
2034 if (s->markers != Py_None) {
2035 int has_key;
2036 ident = PyLong_FromVoidPtr(obj);
2037 if (ident == NULL)
2038 return -1;
2039 has_key = PyDict_Contains(s->markers, ident);
2040 if (has_key) {
2041 if (has_key != -1)
2042 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2043 Py_DECREF(ident);
2044 return -1;
2045 }
2046 if (PyDict_SetItem(s->markers, ident, obj)) {
2047 Py_DECREF(ident);
2048 return -1;
2049 }
2050 }
2051 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2052 if (newobj == NULL) {
2053 Py_XDECREF(ident);
2054 return -1;
2055 }
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002056
2057 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2058 return -1;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002059 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002060 Py_LeaveRecursiveCall();
2061
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002062 Py_DECREF(newobj);
2063 if (rv) {
2064 Py_XDECREF(ident);
2065 return -1;
2066 }
2067 if (ident != NULL) {
2068 if (PyDict_DelItem(s->markers, ident)) {
2069 Py_XDECREF(ident);
2070 return -1;
2071 }
2072 Py_XDECREF(ident);
2073 }
2074 return rv;
2075 }
2076}
2077
2078static int
2079encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2080{
2081 /* Encode Python dict dct a JSON term, rval is a PyList */
2082 static PyObject *open_dict = NULL;
2083 static PyObject *close_dict = NULL;
2084 static PyObject *empty_dict = NULL;
2085 PyObject *kstr = NULL;
2086 PyObject *ident = NULL;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002087 PyObject *key = NULL;
2088 PyObject *value = NULL;
2089 PyObject *it = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002090 int skipkeys;
2091 Py_ssize_t idx;
2092
2093 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2094 open_dict = PyString_InternFromString("{");
2095 close_dict = PyString_InternFromString("}");
2096 empty_dict = PyString_InternFromString("{}");
2097 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2098 return -1;
2099 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002100 if (Py_SIZE(dct) == 0)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002101 return PyList_Append(rval, empty_dict);
2102
2103 if (s->markers != Py_None) {
2104 int has_key;
2105 ident = PyLong_FromVoidPtr(dct);
2106 if (ident == NULL)
2107 goto bail;
2108 has_key = PyDict_Contains(s->markers, ident);
2109 if (has_key) {
2110 if (has_key != -1)
2111 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2112 goto bail;
2113 }
2114 if (PyDict_SetItem(s->markers, ident, dct)) {
2115 goto bail;
2116 }
2117 }
2118
2119 if (PyList_Append(rval, open_dict))
2120 goto bail;
2121
2122 if (s->indent != Py_None) {
2123 /* TODO: DOES NOT RUN */
2124 indent_level += 1;
2125 /*
2126 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2127 separator = _item_separator + newline_indent
2128 buf += newline_indent
2129 */
2130 }
2131
2132 /* TODO: C speedup not implemented for sort_keys */
2133
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002134 it = PyObject_GetIter(dct);
2135 if (it == NULL)
2136 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002137 skipkeys = PyObject_IsTrue(s->skipkeys);
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03002138 if (skipkeys < 0)
2139 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002140 idx = 0;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002141 while ((key = PyIter_Next(it)) != NULL) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002142 PyObject *encoded;
2143
2144 if (PyString_Check(key) || PyUnicode_Check(key)) {
2145 Py_INCREF(key);
2146 kstr = key;
2147 }
2148 else if (PyFloat_Check(key)) {
2149 kstr = encoder_encode_float(s, key);
2150 if (kstr == NULL)
2151 goto bail;
2152 }
2153 else if (PyInt_Check(key) || PyLong_Check(key)) {
2154 kstr = PyObject_Str(key);
2155 if (kstr == NULL)
2156 goto bail;
2157 }
2158 else if (key == Py_True || key == Py_False || key == Py_None) {
2159 kstr = _encoded_const(key);
2160 if (kstr == NULL)
2161 goto bail;
2162 }
2163 else if (skipkeys) {
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002164 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002165 continue;
2166 }
2167 else {
2168 /* TODO: include repr of key */
Doug Hellmannf31db932010-07-21 12:36:33 +00002169 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002170 goto bail;
2171 }
2172
2173 if (idx) {
2174 if (PyList_Append(rval, s->item_separator))
2175 goto bail;
2176 }
2177
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002178 value = PyObject_GetItem(dct, key);
2179 if (value == NULL)
2180 goto bail;
2181
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002182 encoded = encoder_encode_string(s, kstr);
2183 Py_CLEAR(kstr);
2184 if (encoded == NULL)
2185 goto bail;
2186 if (PyList_Append(rval, encoded)) {
2187 Py_DECREF(encoded);
2188 goto bail;
2189 }
2190 Py_DECREF(encoded);
2191 if (PyList_Append(rval, s->key_separator))
2192 goto bail;
2193 if (encoder_listencode_obj(s, rval, value, indent_level))
2194 goto bail;
2195 idx += 1;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002196 Py_CLEAR(value);
2197 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002198 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002199 if (PyErr_Occurred())
2200 goto bail;
2201 Py_CLEAR(it);
2202
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002203 if (ident != NULL) {
2204 if (PyDict_DelItem(s->markers, ident))
2205 goto bail;
2206 Py_CLEAR(ident);
2207 }
2208 if (s->indent != Py_None) {
2209 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002210 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002211 indent_level -= 1;
2212
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002213 yield '\n' + (' ' * (_indent * _current_indent_level))
2214 */
2215 }
2216 if (PyList_Append(rval, close_dict))
2217 goto bail;
2218 return 0;
2219
2220bail:
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002221 Py_XDECREF(it);
2222 Py_XDECREF(key);
2223 Py_XDECREF(value);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002224 Py_XDECREF(kstr);
2225 Py_XDECREF(ident);
2226 return -1;
2227}
2228
2229
2230static int
2231encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2232{
2233 /* Encode Python list seq to a JSON term, rval is a PyList */
2234 static PyObject *open_array = NULL;
2235 static PyObject *close_array = NULL;
2236 static PyObject *empty_array = NULL;
2237 PyObject *ident = NULL;
2238 PyObject *s_fast = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002239 Py_ssize_t i;
2240
2241 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2242 open_array = PyString_InternFromString("[");
2243 close_array = PyString_InternFromString("]");
2244 empty_array = PyString_InternFromString("[]");
2245 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2246 return -1;
2247 }
2248 ident = NULL;
2249 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2250 if (s_fast == NULL)
2251 return -1;
Antoine Pitroue9e35c32012-11-01 20:07:40 +01002252 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002253 Py_DECREF(s_fast);
2254 return PyList_Append(rval, empty_array);
2255 }
2256
2257 if (s->markers != Py_None) {
2258 int has_key;
2259 ident = PyLong_FromVoidPtr(seq);
2260 if (ident == NULL)
2261 goto bail;
2262 has_key = PyDict_Contains(s->markers, ident);
2263 if (has_key) {
2264 if (has_key != -1)
2265 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2266 goto bail;
2267 }
2268 if (PyDict_SetItem(s->markers, ident, seq)) {
2269 goto bail;
2270 }
2271 }
2272
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002273 if (PyList_Append(rval, open_array))
2274 goto bail;
2275 if (s->indent != Py_None) {
2276 /* TODO: DOES NOT RUN */
2277 indent_level += 1;
2278 /*
2279 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2280 separator = _item_separator + newline_indent
2281 buf += newline_indent
2282 */
2283 }
Antoine Pitroue9e35c32012-11-01 20:07:40 +01002284 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
2285 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002286 if (i) {
2287 if (PyList_Append(rval, s->item_separator))
2288 goto bail;
2289 }
2290 if (encoder_listencode_obj(s, rval, obj, indent_level))
2291 goto bail;
2292 }
2293 if (ident != NULL) {
2294 if (PyDict_DelItem(s->markers, ident))
2295 goto bail;
2296 Py_CLEAR(ident);
2297 }
2298 if (s->indent != Py_None) {
2299 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002300 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002301 indent_level -= 1;
2302
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002303 yield '\n' + (' ' * (_indent * _current_indent_level))
2304 */
2305 }
2306 if (PyList_Append(rval, close_array))
2307 goto bail;
2308 Py_DECREF(s_fast);
2309 return 0;
2310
2311bail:
2312 Py_XDECREF(ident);
2313 Py_DECREF(s_fast);
2314 return -1;
2315}
2316
2317static void
2318encoder_dealloc(PyObject *self)
2319{
2320 /* Deallocate Encoder */
2321 encoder_clear(self);
2322 Py_TYPE(self)->tp_free(self);
2323}
2324
2325static int
2326encoder_traverse(PyObject *self, visitproc visit, void *arg)
2327{
2328 PyEncoderObject *s;
2329 assert(PyEncoder_Check(self));
2330 s = (PyEncoderObject *)self;
2331 Py_VISIT(s->markers);
2332 Py_VISIT(s->defaultfn);
2333 Py_VISIT(s->encoder);
2334 Py_VISIT(s->indent);
2335 Py_VISIT(s->key_separator);
2336 Py_VISIT(s->item_separator);
2337 Py_VISIT(s->sort_keys);
2338 Py_VISIT(s->skipkeys);
2339 return 0;
2340}
2341
2342static int
2343encoder_clear(PyObject *self)
2344{
2345 /* Deallocate Encoder */
2346 PyEncoderObject *s;
2347 assert(PyEncoder_Check(self));
2348 s = (PyEncoderObject *)self;
2349 Py_CLEAR(s->markers);
2350 Py_CLEAR(s->defaultfn);
2351 Py_CLEAR(s->encoder);
2352 Py_CLEAR(s->indent);
2353 Py_CLEAR(s->key_separator);
2354 Py_CLEAR(s->item_separator);
2355 Py_CLEAR(s->sort_keys);
2356 Py_CLEAR(s->skipkeys);
2357 return 0;
2358}
2359
2360PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2361
2362static
2363PyTypeObject PyEncoderType = {
2364 PyObject_HEAD_INIT(NULL)
2365 0, /* tp_internal */
2366 "_json.Encoder", /* tp_name */
2367 sizeof(PyEncoderObject), /* tp_basicsize */
2368 0, /* tp_itemsize */
2369 encoder_dealloc, /* tp_dealloc */
2370 0, /* tp_print */
2371 0, /* tp_getattr */
2372 0, /* tp_setattr */
2373 0, /* tp_compare */
2374 0, /* tp_repr */
2375 0, /* tp_as_number */
2376 0, /* tp_as_sequence */
2377 0, /* tp_as_mapping */
2378 0, /* tp_hash */
2379 encoder_call, /* tp_call */
2380 0, /* tp_str */
2381 0, /* tp_getattro */
2382 0, /* tp_setattro */
2383 0, /* tp_as_buffer */
2384 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2385 encoder_doc, /* tp_doc */
2386 encoder_traverse, /* tp_traverse */
2387 encoder_clear, /* tp_clear */
2388 0, /* tp_richcompare */
2389 0, /* tp_weaklistoffset */
2390 0, /* tp_iter */
2391 0, /* tp_iternext */
2392 0, /* tp_methods */
2393 encoder_members, /* tp_members */
2394 0, /* tp_getset */
2395 0, /* tp_base */
2396 0, /* tp_dict */
2397 0, /* tp_descr_get */
2398 0, /* tp_descr_set */
2399 0, /* tp_dictoffset */
2400 encoder_init, /* tp_init */
2401 0, /* tp_alloc */
2402 encoder_new, /* tp_new */
2403 0, /* tp_free */
2404};
2405
2406static PyMethodDef speedups_methods[] = {
2407 {"encode_basestring_ascii",
2408 (PyCFunction)py_encode_basestring_ascii,
2409 METH_O,
2410 pydoc_encode_basestring_ascii},
2411 {"scanstring",
2412 (PyCFunction)py_scanstring,
2413 METH_VARARGS,
2414 pydoc_scanstring},
Brett Cannon4b964f92008-05-05 20:21:38 +00002415 {NULL, NULL, 0, NULL}
2416};
2417
2418PyDoc_STRVAR(module_doc,
2419"json speedups\n");
2420
2421void
2422init_json(void)
2423{
2424 PyObject *m;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002425 PyScannerType.tp_new = PyType_GenericNew;
2426 if (PyType_Ready(&PyScannerType) < 0)
2427 return;
2428 PyEncoderType.tp_new = PyType_GenericNew;
2429 if (PyType_Ready(&PyEncoderType) < 0)
2430 return;
2431 m = Py_InitModule3("_json", speedups_methods, module_doc);
Serhiy Storchaka045c4512015-07-24 12:58:25 +03002432 if (m == NULL)
2433 return;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002434 Py_INCREF((PyObject*)&PyScannerType);
2435 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2436 Py_INCREF((PyObject*)&PyEncoderType);
2437 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
Brett Cannon4b964f92008-05-05 20:21:38 +00002438}