blob: 7860e9a6eca4cd1eb1841db17d426732fe053cca [file] [log] [blame]
Brett Cannon4b964f92008-05-05 20:21:38 +00001#include "Python.h"
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
16
17#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
Brett Cannon4b964f92008-05-05 20:21:38 +000022
23#define DEFAULT_ENCODING "utf-8"
Brett Cannon4b964f92008-05-05 20:21:38 +000024
Bob Ippolitod914e3f2009-03-17 23:19:00 +000025#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30static PyTypeObject PyScannerType;
31static PyTypeObject PyEncoderType;
32
33typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
Raymond Hettinger91852ca2009-03-19 19:19:03 +000038 PyObject *pairs_hook;
Bob Ippolitod914e3f2009-03-17 23:19:00 +000039 PyObject *parse_float;
40 PyObject *parse_int;
41 PyObject *parse_constant;
42} PyScannerObject;
43
44static PyMemberDef scanner_members[] = {
45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
Raymond Hettinger91852ca2009-03-19 19:19:03 +000048 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
Bob Ippolitod914e3f2009-03-17 23:19:00 +000049 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52 {NULL}
53};
54
55typedef struct _PyEncoderObject {
56 PyObject_HEAD
57 PyObject *markers;
58 PyObject *defaultfn;
59 PyObject *encoder;
60 PyObject *indent;
61 PyObject *key_separator;
62 PyObject *item_separator;
63 PyObject *sort_keys;
64 PyObject *skipkeys;
65 int fast_encode;
66 int allow_nan;
67} PyEncoderObject;
68
69static PyMemberDef encoder_members[] = {
70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78 {NULL}
79};
80
81static Py_ssize_t
82ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83static PyObject *
84ascii_escape_unicode(PyObject *pystr);
85static PyObject *
86ascii_escape_str(PyObject *pystr);
87static PyObject *
88py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89void init_json(void);
90static PyObject *
91scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92static PyObject *
93scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94static PyObject *
95_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96static PyObject *
97scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98static int
99scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100static void
101scanner_dealloc(PyObject *self);
102static int
103scanner_clear(PyObject *self);
104static PyObject *
105encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106static int
107encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108static void
109encoder_dealloc(PyObject *self);
110static int
111encoder_clear(PyObject *self);
112static int
113encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114static int
115encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116static int
117encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118static PyObject *
Raymond Hettingera0b8d9a2009-03-19 19:24:43 +0000119_encoded_const(PyObject *obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000120static void
121raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122static PyObject *
123encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124static int
125_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126static PyObject *
127_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128static PyObject *
129encoder_encode_float(PyEncoderObject *s, PyObject *obj);
130
131#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
133
134#define MIN_EXPANSION 6
Brett Cannon4b964f92008-05-05 20:21:38 +0000135#ifdef Py_UNICODE_WIDE
136#define MAX_EXPANSION (2 * MIN_EXPANSION)
137#else
138#define MAX_EXPANSION MIN_EXPANSION
139#endif
140
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000141static int
142_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
143{
144 /* PyObject to Py_ssize_t converter */
145 *size_ptr = PyInt_AsSsize_t(o);
Georg Brandlf71ba952009-05-05 07:48:12 +0000146 if (*size_ptr == -1 && PyErr_Occurred())
147 return 0;
148 return 1;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000149}
150
151static PyObject *
152_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
153{
154 /* Py_ssize_t to PyObject converter */
155 return PyInt_FromSsize_t(*size_ptr);
156}
157
Brett Cannon4b964f92008-05-05 20:21:38 +0000158static Py_ssize_t
159ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
160{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000161 /* Escape unicode code point c to ASCII escape sequences
162 in char *output. output must have at least 12 bytes unused to
163 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Brett Cannon4b964f92008-05-05 20:21:38 +0000164 output[chars++] = '\\';
165 switch (c) {
166 case '\\': output[chars++] = (char)c; break;
167 case '"': output[chars++] = (char)c; break;
168 case '\b': output[chars++] = 'b'; break;
169 case '\f': output[chars++] = 'f'; break;
170 case '\n': output[chars++] = 'n'; break;
171 case '\r': output[chars++] = 'r'; break;
172 case '\t': output[chars++] = 't'; break;
173 default:
174#ifdef Py_UNICODE_WIDE
175 if (c >= 0x10000) {
176 /* UTF-16 surrogate pair */
177 Py_UNICODE v = c - 0x10000;
178 c = 0xd800 | ((v >> 10) & 0x3ff);
179 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000180 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
182 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
183 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000184 c = 0xdc00 | (v & 0x3ff);
185 output[chars++] = '\\';
186 }
187#endif
188 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000189 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
191 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
192 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000193 }
194 return chars;
195}
196
197static PyObject *
198ascii_escape_unicode(PyObject *pystr)
199{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000200 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000201 Py_ssize_t i;
202 Py_ssize_t input_chars;
203 Py_ssize_t output_size;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000204 Py_ssize_t max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000205 Py_ssize_t chars;
Benjamin Petersonaa034202016-09-26 23:55:41 -0700206 Py_ssize_t incr;
Brett Cannon4b964f92008-05-05 20:21:38 +0000207 PyObject *rval;
208 char *output;
209 Py_UNICODE *input_unicode;
210
211 input_chars = PyUnicode_GET_SIZE(pystr);
212 input_unicode = PyUnicode_AS_UNICODE(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000213
Benjamin Petersonaa034202016-09-26 23:55:41 -0700214 output_size = input_chars;
215 incr = 2; /* for quotes */
Brett Cannon4b964f92008-05-05 20:21:38 +0000216 /* One char input can be up to 6 chars output, estimate 4 of these */
Benjamin Petersonaa034202016-09-26 23:55:41 -0700217 incr += MIN_EXPANSION * 4;
218 if (PY_SSIZE_T_MAX - incr < output_size) {
219 PyErr_NoMemory();
Benjamin Peterson04a53852016-08-13 16:47:25 -0700220 return NULL;
221 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700222 output_size += incr;
223 if (PY_SSIZE_T_MAX / MAX_EXPANSION < input_chars ||
224 PY_SSIZE_T_MAX - 2 < input_chars * MAX_EXPANSION)
225 max_output_size = PY_SSIZE_T_MAX;
226 else
227 max_output_size = 2 + (input_chars * MAX_EXPANSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000228 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000229 if (rval == NULL) {
230 return NULL;
231 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000232 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000233 chars = 0;
234 output[chars++] = '"';
235 for (i = 0; i < input_chars; i++) {
236 Py_UNICODE c = input_unicode[i];
237 if (S_CHAR(c)) {
238 output[chars++] = (char)c;
239 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000240 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000241 chars = ascii_escape_char(c, output, chars);
242 }
243 if (output_size - chars < (1 + MAX_EXPANSION)) {
Benjamin Petersonaa034202016-09-26 23:55:41 -0700244 if (output_size == PY_SSIZE_T_MAX) {
245 Py_DECREF(rval);
246 PyErr_NoMemory();
247 return NULL;
248 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000249 /* There's more than four, so let's resize by a lot */
Benjamin Petersonaa034202016-09-26 23:55:41 -0700250 if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
251 output_size *= 2;
252 else
253 output_size = max_output_size;
254 if (_PyString_Resize(&rval, output_size) == -1) {
255 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000256 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700257 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000258 }
259 }
260 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000261 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000262 return NULL;
263 }
264 return rval;
265}
266
267static PyObject *
268ascii_escape_str(PyObject *pystr)
269{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000270 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000271 Py_ssize_t i;
272 Py_ssize_t input_chars;
273 Py_ssize_t output_size;
Benjamin Petersonaa034202016-09-26 23:55:41 -0700274 Py_ssize_t max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000275 Py_ssize_t chars;
Benjamin Petersonaa034202016-09-26 23:55:41 -0700276 Py_ssize_t incr;
Brett Cannon4b964f92008-05-05 20:21:38 +0000277 PyObject *rval;
278 char *output;
279 char *input_str;
280
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000281 input_chars = PyString_GET_SIZE(pystr);
282 input_str = PyString_AS_STRING(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000283
284 /* Fast path for a string that's already ASCII */
285 for (i = 0; i < input_chars; i++) {
286 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
287 if (!S_CHAR(c)) {
288 /* If we have to escape something, scan the string for unicode */
289 Py_ssize_t j;
290 for (j = i; j < input_chars; j++) {
291 c = (Py_UNICODE)(unsigned char)input_str[j];
292 if (c > 0x7f) {
293 /* We hit a non-ASCII character, bail to unicode mode */
294 PyObject *uni;
295 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
296 if (uni == NULL) {
297 return NULL;
298 }
299 rval = ascii_escape_unicode(uni);
300 Py_DECREF(uni);
301 return rval;
302 }
303 }
304 break;
305 }
306 }
307
Benjamin Petersonaa034202016-09-26 23:55:41 -0700308 output_size = input_chars;
309 incr = 2; /* for quotes */
310 if (i != input_chars) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000311 /* One char input can be up to 6 chars output, estimate 4 of these */
Benjamin Petersonaa034202016-09-26 23:55:41 -0700312 incr += MIN_EXPANSION * 4;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000313 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700314 if (PY_SSIZE_T_MAX - incr < output_size) {
315 PyErr_NoMemory();
316 return NULL;
317 }
318 output_size += incr;
319 if (PY_SSIZE_T_MAX / MIN_EXPANSION < input_chars ||
320 PY_SSIZE_T_MAX - 2 < input_chars * MIN_EXPANSION)
321 max_output_size = PY_SSIZE_T_MAX;
322 else
323 max_output_size = 2 + (input_chars * MIN_EXPANSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000324 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000325 if (rval == NULL) {
326 return NULL;
327 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000328 output = PyString_AS_STRING(rval);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000329 output[0] = '"';
330
331 /* We know that everything up to i is ASCII already */
332 chars = i + 1;
333 memcpy(&output[1], input_str, i);
334
335 for (; i < input_chars; i++) {
336 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
Brett Cannon4b964f92008-05-05 20:21:38 +0000337 if (S_CHAR(c)) {
338 output[chars++] = (char)c;
339 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000340 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000341 chars = ascii_escape_char(c, output, chars);
342 }
343 /* An ASCII char can't possibly expand to a surrogate! */
344 if (output_size - chars < (1 + MIN_EXPANSION)) {
Benjamin Petersonaa034202016-09-26 23:55:41 -0700345 if (output_size == PY_SSIZE_T_MAX) {
346 Py_DECREF(rval);
347 PyErr_NoMemory();
348 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000349 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700350 /* There's more than four, so let's resize by a lot */
351 if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
352 output_size *= 2;
353 else
354 output_size = max_output_size;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000355 if (_PyString_Resize(&rval, output_size) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000356 return NULL;
357 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000358 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000359 }
360 }
361 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000362 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000363 return NULL;
364 }
365 return rval;
366}
367
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000368static void
Brett Cannon4b964f92008-05-05 20:21:38 +0000369raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
370{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000371 /* Use the Python function json.decoder.errmsg to raise a nice
372 looking ValueError exception */
Brett Cannon4b964f92008-05-05 20:21:38 +0000373 static PyObject *errmsg_fn = NULL;
374 PyObject *pymsg;
375 if (errmsg_fn == NULL) {
376 PyObject *decoder = PyImport_ImportModule("json.decoder");
377 if (decoder == NULL)
378 return;
379 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000380 Py_DECREF(decoder);
Brett Cannon4b964f92008-05-05 20:21:38 +0000381 if (errmsg_fn == NULL)
382 return;
Brett Cannon4b964f92008-05-05 20:21:38 +0000383 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000384 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
Benjamin Peterson595e3cb2008-10-16 21:09:28 +0000385 if (pymsg) {
386 PyErr_SetObject(PyExc_ValueError, pymsg);
387 Py_DECREF(pymsg);
388 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000389}
390
391static PyObject *
392join_list_unicode(PyObject *lst)
393{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000394 /* return u''.join(lst) */
395 static PyObject *joinfn = NULL;
396 if (joinfn == NULL) {
397 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
398 if (ustr == NULL)
399 return NULL;
400
401 joinfn = PyObject_GetAttrString(ustr, "join");
402 Py_DECREF(ustr);
403 if (joinfn == NULL)
404 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000405 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000406 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
Brett Cannon4b964f92008-05-05 20:21:38 +0000407}
408
409static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000410_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
411 /* return (rval, idx) tuple, stealing reference to rval */
412 PyObject *tpl;
413 PyObject *pyidx;
414 /*
415 steal a reference to rval, returns (rval, idx)
416 */
417 if (rval == NULL) {
418 return NULL;
419 }
420 pyidx = PyInt_FromSsize_t(idx);
421 if (pyidx == NULL) {
422 Py_DECREF(rval);
423 return NULL;
424 }
425 tpl = PyTuple_New(2);
426 if (tpl == NULL) {
427 Py_DECREF(pyidx);
428 Py_DECREF(rval);
429 return NULL;
430 }
431 PyTuple_SET_ITEM(tpl, 0, rval);
432 PyTuple_SET_ITEM(tpl, 1, pyidx);
433 return tpl;
434}
435
436static PyObject *
437scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
438{
439 /* Read the JSON string from PyString pystr.
440 end is the index of the first character after the quote.
441 encoding is the encoding of pystr (must be an ASCII superset)
442 if strict is zero then literal control characters are allowed
443 *next_end_ptr is a return-by-reference index of the character
444 after the end quote
445
446 Return value is a new PyString (if ASCII-only) or PyUnicode
447 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000448 PyObject *rval;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000449 Py_ssize_t len = PyString_GET_SIZE(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000450 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000451 Py_ssize_t next;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000452 char *buf = PyString_AS_STRING(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000453 PyObject *chunks = PyList_New(0);
454 if (chunks == NULL) {
455 goto bail;
456 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000457 if (end < 0 || len <= end) {
458 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
459 goto bail;
460 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000461 while (1) {
462 /* Find the end of the string or the next escape */
463 Py_UNICODE c = 0;
464 PyObject *chunk = NULL;
465 for (next = end; next < len; next++) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000466 c = (unsigned char)buf[next];
Brett Cannon4b964f92008-05-05 20:21:38 +0000467 if (c == '"' || c == '\\') {
468 break;
469 }
470 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000471 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000472 goto bail;
473 }
474 }
475 if (!(c == '"' || c == '\\')) {
476 raise_errmsg("Unterminated string starting at", pystr, begin);
477 goto bail;
478 }
479 /* Pick up this chunk if it's not zero length */
480 if (next != end) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000481 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000482 if (strchunk == NULL) {
483 goto bail;
484 }
Barry Warsawfa658272010-11-02 21:03:09 +0000485 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
486 Py_DECREF(strchunk);
487 if (chunk == NULL) {
488 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000489 }
490 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000491 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000492 goto bail;
493 }
494 Py_DECREF(chunk);
495 }
496 next++;
497 if (c == '"') {
498 end = next;
499 break;
500 }
501 if (next == len) {
502 raise_errmsg("Unterminated string starting at", pystr, begin);
503 goto bail;
504 }
505 c = buf[next];
506 if (c != 'u') {
507 /* Non-unicode backslash escapes */
508 end = next + 1;
509 switch (c) {
510 case '"': break;
511 case '\\': break;
512 case '/': break;
513 case 'b': c = '\b'; break;
514 case 'f': c = '\f'; break;
515 case 'n': c = '\n'; break;
516 case 'r': c = '\r'; break;
517 case 't': c = '\t'; break;
518 default: c = 0;
519 }
520 if (c == 0) {
521 raise_errmsg("Invalid \\escape", pystr, end - 2);
522 goto bail;
523 }
524 }
525 else {
526 c = 0;
527 next++;
528 end = next + 4;
529 if (end >= len) {
530 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
531 goto bail;
532 }
533 /* Decode 4 hex digits */
534 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000535 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000536 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000537 switch (digit) {
538 case '0': case '1': case '2': case '3': case '4':
539 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000540 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000541 case 'a': case 'b': case 'c': case 'd': case 'e':
542 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000543 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000544 case 'A': case 'B': case 'C': case 'D': case 'E':
545 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000546 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000547 default:
548 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
549 goto bail;
550 }
551 }
552#ifdef Py_UNICODE_WIDE
553 /* Surrogate pair */
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200554 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
555 buf[next++] == '\\' &&
556 buf[next++] == 'u') {
Brett Cannon4b964f92008-05-05 20:21:38 +0000557 Py_UNICODE c2 = 0;
Brett Cannon4b964f92008-05-05 20:21:38 +0000558 end += 6;
559 /* Decode 4 hex digits */
560 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000561 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000562 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000563 switch (digit) {
564 case '0': case '1': case '2': case '3': case '4':
565 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000566 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000567 case 'a': case 'b': case 'c': case 'd': case 'e':
568 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000569 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000570 case 'A': case 'B': case 'C': case 'D': case 'E':
571 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000572 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000573 default:
574 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
575 goto bail;
576 }
577 }
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200578 if ((c2 & 0xfc00) == 0xdc00)
579 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
580 else
581 end -= 6;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000582 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000583#endif
584 }
Barry Warsawfa658272010-11-02 21:03:09 +0000585 chunk = PyUnicode_FromUnicode(&c, 1);
586 if (chunk == NULL) {
587 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000588 }
589 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000590 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000591 goto bail;
592 }
593 Py_DECREF(chunk);
594 }
595
Ezio Melottidf8a8f72011-05-04 14:40:53 +0300596 rval = join_list_unicode(chunks);
Brett Cannon4b964f92008-05-05 20:21:38 +0000597 if (rval == NULL) {
598 goto bail;
599 }
Benjamin Peterson336680e2008-10-16 21:48:06 +0000600 Py_CLEAR(chunks);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000601 *next_end_ptr = end;
602 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000603bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000604 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000605 Py_XDECREF(chunks);
606 return NULL;
607}
608
609
610static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000611scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000612{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000613 /* Read the JSON string from PyUnicode pystr.
614 end is the index of the first character after the quote.
615 if strict is zero then literal control characters are allowed
616 *next_end_ptr is a return-by-reference index of the character
617 after the end quote
618
619 Return value is a new PyUnicode
620 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000621 PyObject *rval;
622 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
623 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000624 Py_ssize_t next;
Brett Cannon4b964f92008-05-05 20:21:38 +0000625 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
626 PyObject *chunks = PyList_New(0);
627 if (chunks == NULL) {
628 goto bail;
629 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000630 if (end < 0 || len <= end) {
631 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
632 goto bail;
633 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000634 while (1) {
635 /* Find the end of the string or the next escape */
636 Py_UNICODE c = 0;
637 PyObject *chunk = NULL;
638 for (next = end; next < len; next++) {
639 c = buf[next];
640 if (c == '"' || c == '\\') {
641 break;
642 }
643 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000644 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000645 goto bail;
646 }
647 }
648 if (!(c == '"' || c == '\\')) {
649 raise_errmsg("Unterminated string starting at", pystr, begin);
650 goto bail;
651 }
652 /* Pick up this chunk if it's not zero length */
653 if (next != end) {
654 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
655 if (chunk == NULL) {
656 goto bail;
657 }
658 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson87e6ad22008-10-16 21:27:54 +0000659 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000660 goto bail;
661 }
662 Py_DECREF(chunk);
663 }
664 next++;
665 if (c == '"') {
666 end = next;
667 break;
668 }
669 if (next == len) {
670 raise_errmsg("Unterminated string starting at", pystr, begin);
671 goto bail;
672 }
673 c = buf[next];
674 if (c != 'u') {
675 /* Non-unicode backslash escapes */
676 end = next + 1;
677 switch (c) {
678 case '"': break;
679 case '\\': break;
680 case '/': break;
681 case 'b': c = '\b'; break;
682 case 'f': c = '\f'; break;
683 case 'n': c = '\n'; break;
684 case 'r': c = '\r'; break;
685 case 't': c = '\t'; break;
686 default: c = 0;
687 }
688 if (c == 0) {
689 raise_errmsg("Invalid \\escape", pystr, end - 2);
690 goto bail;
691 }
692 }
693 else {
694 c = 0;
695 next++;
696 end = next + 4;
697 if (end >= len) {
698 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
699 goto bail;
700 }
701 /* Decode 4 hex digits */
702 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000703 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000704 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000705 switch (digit) {
706 case '0': case '1': case '2': case '3': case '4':
707 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000708 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000709 case 'a': case 'b': case 'c': case 'd': case 'e':
710 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000711 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000712 case 'A': case 'B': case 'C': case 'D': case 'E':
713 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000714 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000715 default:
716 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
717 goto bail;
718 }
719 }
720#ifdef Py_UNICODE_WIDE
721 /* Surrogate pair */
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200722 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
723 buf[next++] == '\\' && buf[next++] == 'u') {
Brett Cannon4b964f92008-05-05 20:21:38 +0000724 Py_UNICODE c2 = 0;
Brett Cannon4b964f92008-05-05 20:21:38 +0000725 end += 6;
726 /* Decode 4 hex digits */
727 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000728 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000729 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000730 switch (digit) {
731 case '0': case '1': case '2': case '3': case '4':
732 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000733 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000734 case 'a': case 'b': case 'c': case 'd': case 'e':
735 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000736 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000737 case 'A': case 'B': case 'C': case 'D': case 'E':
738 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000739 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000740 default:
741 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
742 goto bail;
743 }
744 }
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200745 if ((c2 & 0xfc00) == 0xdc00)
746 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
747 else
748 end -= 6;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000749 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000750#endif
751 }
752 chunk = PyUnicode_FromUnicode(&c, 1);
753 if (chunk == NULL) {
754 goto bail;
755 }
756 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000757 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000758 goto bail;
759 }
760 Py_DECREF(chunk);
761 }
762
763 rval = join_list_unicode(chunks);
764 if (rval == NULL) {
765 goto bail;
766 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000767 Py_DECREF(chunks);
768 *next_end_ptr = end;
769 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000770bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000771 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000772 Py_XDECREF(chunks);
773 return NULL;
774}
775
776PyDoc_STRVAR(pydoc_scanstring,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000777 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
778 "\n"
779 "Scan the string s for a JSON string. End is the index of the\n"
780 "character in s after the quote that started the JSON string.\n"
781 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
782 "on attempt to decode an invalid string. If strict is False then literal\n"
783 "control characters are allowed in the string.\n"
784 "\n"
785 "Returns a tuple of the decoded string and the index of the character in s\n"
786 "after the end quote."
787);
Brett Cannon4b964f92008-05-05 20:21:38 +0000788
789static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000790py_scanstring(PyObject* self UNUSED, PyObject *args)
Brett Cannon4b964f92008-05-05 20:21:38 +0000791{
792 PyObject *pystr;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000793 PyObject *rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000794 Py_ssize_t end;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000795 Py_ssize_t next_end = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000796 char *encoding = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000797 int strict = 1;
798 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000799 return NULL;
800 }
801 if (encoding == NULL) {
802 encoding = DEFAULT_ENCODING;
803 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000804 if (PyString_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000805 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000806 }
807 else if (PyUnicode_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000808 rval = scanstring_unicode(pystr, end, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000809 }
810 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000811 PyErr_Format(PyExc_TypeError,
812 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000813 Py_TYPE(pystr)->tp_name);
814 return NULL;
815 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000816 return _build_rval_index_tuple(rval, next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000817}
818
819PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000820 "encode_basestring_ascii(basestring) -> str\n"
821 "\n"
822 "Return an ASCII-only JSON representation of a Python string"
823);
Brett Cannon4b964f92008-05-05 20:21:38 +0000824
825static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000826py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000827{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000828 /* Return an ASCII-only JSON representation of a Python string */
Brett Cannon4b964f92008-05-05 20:21:38 +0000829 /* METH_O */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000830 if (PyString_Check(pystr)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000831 return ascii_escape_str(pystr);
832 }
833 else if (PyUnicode_Check(pystr)) {
834 return ascii_escape_unicode(pystr);
835 }
836 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000837 PyErr_Format(PyExc_TypeError,
838 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000839 Py_TYPE(pystr)->tp_name);
840 return NULL;
841 }
842}
843
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000844static void
845scanner_dealloc(PyObject *self)
846{
847 /* Deallocate scanner object */
848 scanner_clear(self);
849 Py_TYPE(self)->tp_free(self);
850}
851
852static int
853scanner_traverse(PyObject *self, visitproc visit, void *arg)
854{
855 PyScannerObject *s;
856 assert(PyScanner_Check(self));
857 s = (PyScannerObject *)self;
858 Py_VISIT(s->encoding);
859 Py_VISIT(s->strict);
860 Py_VISIT(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000861 Py_VISIT(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000862 Py_VISIT(s->parse_float);
863 Py_VISIT(s->parse_int);
864 Py_VISIT(s->parse_constant);
865 return 0;
866}
867
868static int
869scanner_clear(PyObject *self)
870{
871 PyScannerObject *s;
872 assert(PyScanner_Check(self));
873 s = (PyScannerObject *)self;
874 Py_CLEAR(s->encoding);
875 Py_CLEAR(s->strict);
876 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000877 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000878 Py_CLEAR(s->parse_float);
879 Py_CLEAR(s->parse_int);
880 Py_CLEAR(s->parse_constant);
881 return 0;
882}
883
884static PyObject *
885_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
886 /* Read a JSON object from PyString pystr.
887 idx is the index of the first character after the opening curly brace.
888 *next_idx_ptr is a return-by-reference index to the first character after
889 the closing curly brace.
890
891 Returns a new PyObject (usually a dict, but object_hook can change that)
892 */
893 char *str = PyString_AS_STRING(pystr);
894 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000895 PyObject *rval;
896 PyObject *pairs;
897 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000898 PyObject *key = NULL;
899 PyObject *val = NULL;
900 char *encoding = PyString_AS_STRING(s->encoding);
901 int strict = PyObject_IsTrue(s->strict);
902 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000903
Serhiy Storchaka5127ed72015-05-30 17:45:12 +0300904 if (strict < 0)
905 return NULL;
906
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000907 pairs = PyList_New(0);
908 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000909 return NULL;
910
911 /* skip whitespace after { */
912 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
913
914 /* only loop if the object is non-empty */
915 if (idx <= end_idx && str[idx] != '}') {
916 while (idx <= end_idx) {
917 /* read key */
918 if (str[idx] != '"') {
919 raise_errmsg("Expecting property name", pystr, idx);
920 goto bail;
921 }
922 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
923 if (key == NULL)
924 goto bail;
925 idx = next_idx;
926
927 /* skip whitespace between key and : delimiter, read :, skip whitespace */
928 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
929 if (idx > end_idx || str[idx] != ':') {
930 raise_errmsg("Expecting : delimiter", pystr, idx);
931 goto bail;
932 }
933 idx++;
934 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
935
936 /* read any JSON data type */
937 val = scan_once_str(s, pystr, idx, &next_idx);
938 if (val == NULL)
939 goto bail;
940
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000941 item = PyTuple_Pack(2, key, val);
942 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000943 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000944 Py_CLEAR(key);
945 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000946 if (PyList_Append(pairs, item) == -1) {
947 Py_DECREF(item);
948 goto bail;
949 }
950 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000951 idx = next_idx;
952
953 /* skip whitespace before } or , */
954 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
955
956 /* bail if the object is closed or we didn't get the , delimiter */
957 if (idx > end_idx) break;
958 if (str[idx] == '}') {
959 break;
960 }
961 else if (str[idx] != ',') {
962 raise_errmsg("Expecting , delimiter", pystr, idx);
963 goto bail;
964 }
965 idx++;
966
967 /* skip whitespace after , delimiter */
968 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
969 }
970 }
971 /* verify that idx < end_idx, str[idx] should be '}' */
972 if (idx > end_idx || str[idx] != '}') {
973 raise_errmsg("Expecting object", pystr, end_idx);
974 goto bail;
975 }
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000976
977 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
978 if (s->pairs_hook != Py_None) {
979 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
980 if (val == NULL)
981 goto bail;
982 Py_DECREF(pairs);
983 *next_idx_ptr = idx + 1;
984 return val;
985 }
986
987 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
988 pairs, NULL);
989 if (rval == NULL)
990 goto bail;
991 Py_CLEAR(pairs);
992
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000993 /* if object_hook is not None: rval = object_hook(rval) */
994 if (s->object_hook != Py_None) {
995 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
996 if (val == NULL)
997 goto bail;
998 Py_DECREF(rval);
999 rval = val;
1000 val = NULL;
1001 }
1002 *next_idx_ptr = idx + 1;
1003 return rval;
1004bail:
1005 Py_XDECREF(key);
1006 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001007 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001008 return NULL;
1009}
1010
1011static PyObject *
1012_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1013 /* Read a JSON object from PyUnicode pystr.
1014 idx is the index of the first character after the opening curly brace.
1015 *next_idx_ptr is a return-by-reference index to the first character after
1016 the closing curly brace.
1017
1018 Returns a new PyObject (usually a dict, but object_hook can change that)
1019 */
1020 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1021 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001022 PyObject *rval;
1023 PyObject *pairs;
1024 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001025 PyObject *key = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001026 PyObject *val = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001027 int strict = PyObject_IsTrue(s->strict);
1028 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001029
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001030 if (strict < 0)
1031 return NULL;
1032
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001033 pairs = PyList_New(0);
1034 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001035 return NULL;
1036
1037 /* skip whitespace after { */
1038 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1039
1040 /* only loop if the object is non-empty */
1041 if (idx <= end_idx && str[idx] != '}') {
1042 while (idx <= end_idx) {
1043 /* read key */
1044 if (str[idx] != '"') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001045 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001046 goto bail;
1047 }
1048 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1049 if (key == NULL)
1050 goto bail;
1051 idx = next_idx;
1052
1053 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1054 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1055 if (idx > end_idx || str[idx] != ':') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001056 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001057 goto bail;
1058 }
1059 idx++;
1060 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1061
1062 /* read any JSON term */
1063 val = scan_once_unicode(s, pystr, idx, &next_idx);
1064 if (val == NULL)
1065 goto bail;
1066
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001067 item = PyTuple_Pack(2, key, val);
1068 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001069 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001070 Py_CLEAR(key);
1071 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001072 if (PyList_Append(pairs, item) == -1) {
1073 Py_DECREF(item);
1074 goto bail;
1075 }
1076 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001077 idx = next_idx;
1078
1079 /* skip whitespace before } or , */
1080 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1081
1082 /* bail if the object is closed or we didn't get the , delimiter */
1083 if (idx > end_idx) break;
1084 if (str[idx] == '}') {
1085 break;
1086 }
1087 else if (str[idx] != ',') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001088 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001089 goto bail;
1090 }
1091 idx++;
1092
1093 /* skip whitespace after , delimiter */
1094 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1095 }
1096 }
1097
1098 /* verify that idx < end_idx, str[idx] should be '}' */
1099 if (idx > end_idx || str[idx] != '}') {
1100 raise_errmsg("Expecting object", pystr, end_idx);
1101 goto bail;
1102 }
1103
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001104 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1105 if (s->pairs_hook != Py_None) {
1106 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1107 if (val == NULL)
1108 goto bail;
1109 Py_DECREF(pairs);
1110 *next_idx_ptr = idx + 1;
1111 return val;
1112 }
1113
1114 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1115 pairs, NULL);
1116 if (rval == NULL)
1117 goto bail;
1118 Py_CLEAR(pairs);
1119
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001120 /* if object_hook is not None: rval = object_hook(rval) */
1121 if (s->object_hook != Py_None) {
1122 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1123 if (val == NULL)
1124 goto bail;
1125 Py_DECREF(rval);
1126 rval = val;
1127 val = NULL;
1128 }
1129 *next_idx_ptr = idx + 1;
1130 return rval;
1131bail:
1132 Py_XDECREF(key);
1133 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001134 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001135 return NULL;
1136}
1137
1138static PyObject *
1139_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1140 /* Read a JSON array from PyString pystr.
1141 idx is the index of the first character after the opening brace.
1142 *next_idx_ptr is a return-by-reference index to the first character after
1143 the closing brace.
1144
1145 Returns a new PyList
1146 */
1147 char *str = PyString_AS_STRING(pystr);
1148 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1149 PyObject *val = NULL;
1150 PyObject *rval = PyList_New(0);
1151 Py_ssize_t next_idx;
1152 if (rval == NULL)
1153 return NULL;
1154
1155 /* skip whitespace after [ */
1156 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1157
1158 /* only loop if the array is non-empty */
1159 if (idx <= end_idx && str[idx] != ']') {
1160 while (idx <= end_idx) {
1161
1162 /* read any JSON term and de-tuplefy the (rval, idx) */
1163 val = scan_once_str(s, pystr, idx, &next_idx);
1164 if (val == NULL)
1165 goto bail;
1166
1167 if (PyList_Append(rval, val) == -1)
1168 goto bail;
1169
1170 Py_CLEAR(val);
1171 idx = next_idx;
1172
1173 /* skip whitespace between term and , */
1174 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1175
1176 /* bail if the array is closed or we didn't get the , delimiter */
1177 if (idx > end_idx) break;
1178 if (str[idx] == ']') {
1179 break;
1180 }
1181 else if (str[idx] != ',') {
1182 raise_errmsg("Expecting , delimiter", pystr, idx);
1183 goto bail;
1184 }
1185 idx++;
1186
1187 /* skip whitespace after , */
1188 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1189 }
1190 }
1191
1192 /* verify that idx < end_idx, str[idx] should be ']' */
1193 if (idx > end_idx || str[idx] != ']') {
1194 raise_errmsg("Expecting object", pystr, end_idx);
1195 goto bail;
1196 }
1197 *next_idx_ptr = idx + 1;
1198 return rval;
1199bail:
1200 Py_XDECREF(val);
1201 Py_DECREF(rval);
1202 return NULL;
1203}
1204
1205static PyObject *
1206_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1207 /* Read a JSON array from PyString pystr.
1208 idx is the index of the first character after the opening brace.
1209 *next_idx_ptr is a return-by-reference index to the first character after
1210 the closing brace.
1211
1212 Returns a new PyList
1213 */
1214 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1215 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1216 PyObject *val = NULL;
1217 PyObject *rval = PyList_New(0);
1218 Py_ssize_t next_idx;
1219 if (rval == NULL)
1220 return NULL;
1221
1222 /* skip whitespace after [ */
1223 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1224
1225 /* only loop if the array is non-empty */
1226 if (idx <= end_idx && str[idx] != ']') {
1227 while (idx <= end_idx) {
1228
1229 /* read any JSON term */
1230 val = scan_once_unicode(s, pystr, idx, &next_idx);
1231 if (val == NULL)
1232 goto bail;
1233
1234 if (PyList_Append(rval, val) == -1)
1235 goto bail;
1236
1237 Py_CLEAR(val);
1238 idx = next_idx;
1239
1240 /* skip whitespace between term and , */
1241 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1242
1243 /* bail if the array is closed or we didn't get the , delimiter */
1244 if (idx > end_idx) break;
1245 if (str[idx] == ']') {
1246 break;
1247 }
1248 else if (str[idx] != ',') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001249 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001250 goto bail;
1251 }
1252 idx++;
1253
1254 /* skip whitespace after , */
1255 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1256 }
1257 }
1258
1259 /* verify that idx < end_idx, str[idx] should be ']' */
1260 if (idx > end_idx || str[idx] != ']') {
1261 raise_errmsg("Expecting object", pystr, end_idx);
1262 goto bail;
1263 }
1264 *next_idx_ptr = idx + 1;
1265 return rval;
1266bail:
1267 Py_XDECREF(val);
1268 Py_DECREF(rval);
1269 return NULL;
1270}
1271
1272static PyObject *
1273_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1274 /* Read a JSON constant from PyString pystr.
1275 constant is the constant string that was found
1276 ("NaN", "Infinity", "-Infinity").
1277 idx is the index of the first character of the constant
1278 *next_idx_ptr is a return-by-reference index to the first character after
1279 the constant.
1280
1281 Returns the result of parse_constant
1282 */
1283 PyObject *cstr;
1284 PyObject *rval;
1285 /* constant is "NaN", "Infinity", or "-Infinity" */
1286 cstr = PyString_InternFromString(constant);
1287 if (cstr == NULL)
1288 return NULL;
1289
1290 /* rval = parse_constant(constant) */
1291 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1292 idx += PyString_GET_SIZE(cstr);
1293 Py_DECREF(cstr);
1294 *next_idx_ptr = idx;
1295 return rval;
1296}
1297
1298static PyObject *
1299_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1300 /* Read a JSON number from PyString pystr.
1301 idx is the index of the first character of the number
1302 *next_idx_ptr is a return-by-reference index to the first character after
1303 the number.
1304
1305 Returns a new PyObject representation of that number:
1306 PyInt, PyLong, or PyFloat.
1307 May return other types if parse_int or parse_float are set
1308 */
1309 char *str = PyString_AS_STRING(pystr);
1310 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1311 Py_ssize_t idx = start;
1312 int is_float = 0;
1313 PyObject *rval;
1314 PyObject *numstr;
1315
1316 /* read a sign if it's there, make sure it's not the end of the string */
1317 if (str[idx] == '-') {
1318 idx++;
1319 if (idx > end_idx) {
1320 PyErr_SetNone(PyExc_StopIteration);
1321 return NULL;
1322 }
1323 }
1324
1325 /* read as many integer digits as we find as long as it doesn't start with 0 */
1326 if (str[idx] >= '1' && str[idx] <= '9') {
1327 idx++;
1328 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1329 }
1330 /* if it starts with 0 we only expect one integer digit */
1331 else if (str[idx] == '0') {
1332 idx++;
1333 }
1334 /* no integer digits, error */
1335 else {
1336 PyErr_SetNone(PyExc_StopIteration);
1337 return NULL;
1338 }
1339
1340 /* if the next char is '.' followed by a digit then read all float digits */
1341 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1342 is_float = 1;
1343 idx += 2;
1344 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1345 }
1346
1347 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1348 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1349
1350 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1351 Py_ssize_t e_start = idx;
1352 idx++;
1353
1354 /* read an exponent sign if present */
1355 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1356
1357 /* read all digits */
1358 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1359
1360 /* if we got a digit, then parse as float. if not, backtrack */
1361 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1362 is_float = 1;
1363 }
1364 else {
1365 idx = e_start;
1366 }
1367 }
1368
1369 /* copy the section we determined to be a number */
1370 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1371 if (numstr == NULL)
1372 return NULL;
1373 if (is_float) {
1374 /* parse as a float using a fast path if available, otherwise call user defined method */
1375 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1376 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1377 }
1378 else {
Eric Smith129c97d2009-10-28 08:44:37 +00001379 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1380 NULL, NULL);
1381 if (d == -1.0 && PyErr_Occurred())
1382 return NULL;
1383 rval = PyFloat_FromDouble(d);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001384 }
1385 }
1386 else {
1387 /* parse as an int using a fast path if available, otherwise call user defined method */
1388 if (s->parse_int != (PyObject *)&PyInt_Type) {
1389 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1390 }
1391 else {
1392 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1393 }
1394 }
1395 Py_DECREF(numstr);
1396 *next_idx_ptr = idx;
1397 return rval;
1398}
1399
1400static PyObject *
1401_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1402 /* Read a JSON number from PyUnicode pystr.
1403 idx is the index of the first character of the number
1404 *next_idx_ptr is a return-by-reference index to the first character after
1405 the number.
1406
1407 Returns a new PyObject representation of that number:
1408 PyInt, PyLong, or PyFloat.
1409 May return other types if parse_int or parse_float are set
1410 */
1411 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1412 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1413 Py_ssize_t idx = start;
1414 int is_float = 0;
1415 PyObject *rval;
1416 PyObject *numstr;
1417
1418 /* read a sign if it's there, make sure it's not the end of the string */
1419 if (str[idx] == '-') {
1420 idx++;
1421 if (idx > end_idx) {
1422 PyErr_SetNone(PyExc_StopIteration);
1423 return NULL;
1424 }
1425 }
1426
1427 /* read as many integer digits as we find as long as it doesn't start with 0 */
1428 if (str[idx] >= '1' && str[idx] <= '9') {
1429 idx++;
1430 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1431 }
1432 /* if it starts with 0 we only expect one integer digit */
1433 else if (str[idx] == '0') {
1434 idx++;
1435 }
1436 /* no integer digits, error */
1437 else {
1438 PyErr_SetNone(PyExc_StopIteration);
1439 return NULL;
1440 }
1441
1442 /* if the next char is '.' followed by a digit then read all float digits */
1443 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1444 is_float = 1;
1445 idx += 2;
Bob Ippolito76a982a2009-03-29 22:33:58 +00001446 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001447 }
1448
1449 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1450 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1451 Py_ssize_t e_start = idx;
1452 idx++;
1453
1454 /* read an exponent sign if present */
1455 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1456
1457 /* read all digits */
1458 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1459
1460 /* if we got a digit, then parse as float. if not, backtrack */
1461 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1462 is_float = 1;
1463 }
1464 else {
1465 idx = e_start;
1466 }
1467 }
1468
1469 /* copy the section we determined to be a number */
1470 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1471 if (numstr == NULL)
1472 return NULL;
1473 if (is_float) {
1474 /* parse as a float using a fast path if available, otherwise call user defined method */
1475 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1476 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1477 }
1478 else {
1479 rval = PyFloat_FromString(numstr, NULL);
1480 }
1481 }
1482 else {
1483 /* no fast path for unicode -> int, just call */
1484 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1485 }
1486 Py_DECREF(numstr);
1487 *next_idx_ptr = idx;
1488 return rval;
1489}
1490
1491static PyObject *
1492scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1493{
1494 /* Read one JSON term (of any kind) from PyString pystr.
1495 idx is the index of the first character of the term
1496 *next_idx_ptr is a return-by-reference index to the first character after
1497 the number.
1498
1499 Returns a new PyObject representation of the term.
1500 */
Ezio Melotticec46492011-05-07 17:40:23 +03001501 PyObject *res;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001502 int strict;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001503 char *str = PyString_AS_STRING(pystr);
1504 Py_ssize_t length = PyString_GET_SIZE(pystr);
Benjamin Peterson3e5d87b2014-04-14 11:43:09 -04001505 if (idx < 0) {
1506 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1507 return NULL;
1508 }
1509 if (idx >= length) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001510 PyErr_SetNone(PyExc_StopIteration);
1511 return NULL;
1512 }
1513 switch (str[idx]) {
1514 case '"':
1515 /* string */
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001516 strict = PyObject_IsTrue(s->strict);
1517 if (strict < 0)
1518 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001519 return scanstring_str(pystr, idx + 1,
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001520 PyString_AS_STRING(s->encoding), strict, next_idx_ptr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001521 case '{':
1522 /* object */
Ezio Melotticec46492011-05-07 17:40:23 +03001523 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1524 "from a byte string"))
1525 return NULL;
1526 res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1527 Py_LeaveRecursiveCall();
1528 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001529 case '[':
1530 /* array */
Ezio Melotticec46492011-05-07 17:40:23 +03001531 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1532 "from a byte string"))
1533 return NULL;
1534 res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1535 Py_LeaveRecursiveCall();
1536 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001537 case 'n':
1538 /* null */
1539 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1540 Py_INCREF(Py_None);
1541 *next_idx_ptr = idx + 4;
1542 return Py_None;
1543 }
1544 break;
1545 case 't':
1546 /* true */
1547 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1548 Py_INCREF(Py_True);
1549 *next_idx_ptr = idx + 4;
1550 return Py_True;
1551 }
1552 break;
1553 case 'f':
1554 /* false */
1555 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1556 Py_INCREF(Py_False);
1557 *next_idx_ptr = idx + 5;
1558 return Py_False;
1559 }
1560 break;
1561 case 'N':
1562 /* NaN */
1563 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1564 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1565 }
1566 break;
1567 case 'I':
1568 /* Infinity */
1569 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1570 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1571 }
1572 break;
1573 case '-':
1574 /* -Infinity */
1575 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1576 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1577 }
1578 break;
1579 }
1580 /* Didn't find a string, object, array, or named constant. Look for a number. */
1581 return _match_number_str(s, pystr, idx, next_idx_ptr);
1582}
1583
1584static PyObject *
1585scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1586{
1587 /* Read one JSON term (of any kind) from PyUnicode pystr.
1588 idx is the index of the first character of the term
1589 *next_idx_ptr is a return-by-reference index to the first character after
1590 the number.
1591
1592 Returns a new PyObject representation of the term.
1593 */
Ezio Melotticec46492011-05-07 17:40:23 +03001594 PyObject *res;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001595 int strict;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001596 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1597 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
Benjamin Peterson3e5d87b2014-04-14 11:43:09 -04001598 if (idx < 0) {
1599 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1600 return NULL;
1601 }
1602 if (idx >= length) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001603 PyErr_SetNone(PyExc_StopIteration);
1604 return NULL;
1605 }
1606 switch (str[idx]) {
1607 case '"':
1608 /* string */
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001609 strict = PyObject_IsTrue(s->strict);
1610 if (strict < 0)
1611 return NULL;
1612 return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001613 case '{':
1614 /* object */
Ezio Melotticec46492011-05-07 17:40:23 +03001615 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1616 "from a unicode string"))
1617 return NULL;
1618 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1619 Py_LeaveRecursiveCall();
1620 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001621 case '[':
1622 /* array */
Ezio Melotticec46492011-05-07 17:40:23 +03001623 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1624 "from a unicode string"))
1625 return NULL;
1626 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1627 Py_LeaveRecursiveCall();
1628 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001629 case 'n':
1630 /* null */
1631 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1632 Py_INCREF(Py_None);
1633 *next_idx_ptr = idx + 4;
1634 return Py_None;
1635 }
1636 break;
1637 case 't':
1638 /* true */
1639 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1640 Py_INCREF(Py_True);
1641 *next_idx_ptr = idx + 4;
1642 return Py_True;
1643 }
1644 break;
1645 case 'f':
1646 /* false */
1647 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1648 Py_INCREF(Py_False);
1649 *next_idx_ptr = idx + 5;
1650 return Py_False;
1651 }
1652 break;
1653 case 'N':
1654 /* NaN */
1655 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1656 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1657 }
1658 break;
1659 case 'I':
1660 /* Infinity */
1661 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1662 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1663 }
1664 break;
1665 case '-':
1666 /* -Infinity */
1667 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1668 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1669 }
1670 break;
1671 }
1672 /* Didn't find a string, object, array, or named constant. Look for a number. */
1673 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1674}
1675
1676static PyObject *
1677scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1678{
1679 /* Python callable interface to scan_once_{str,unicode} */
1680 PyObject *pystr;
1681 PyObject *rval;
1682 Py_ssize_t idx;
1683 Py_ssize_t next_idx = -1;
1684 static char *kwlist[] = {"string", "idx", NULL};
1685 PyScannerObject *s;
1686 assert(PyScanner_Check(self));
1687 s = (PyScannerObject *)self;
1688 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1689 return NULL;
1690
1691 if (PyString_Check(pystr)) {
1692 rval = scan_once_str(s, pystr, idx, &next_idx);
1693 }
1694 else if (PyUnicode_Check(pystr)) {
1695 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1696 }
1697 else {
1698 PyErr_Format(PyExc_TypeError,
1699 "first argument must be a string, not %.80s",
1700 Py_TYPE(pystr)->tp_name);
1701 return NULL;
1702 }
1703 return _build_rval_index_tuple(rval, next_idx);
1704}
1705
1706static PyObject *
1707scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1708{
1709 PyScannerObject *s;
1710 s = (PyScannerObject *)type->tp_alloc(type, 0);
1711 if (s != NULL) {
1712 s->encoding = NULL;
1713 s->strict = NULL;
1714 s->object_hook = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001715 s->pairs_hook = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001716 s->parse_float = NULL;
1717 s->parse_int = NULL;
1718 s->parse_constant = NULL;
1719 }
1720 return (PyObject *)s;
1721}
1722
1723static int
1724scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1725{
1726 /* Initialize Scanner object */
1727 PyObject *ctx;
1728 static char *kwlist[] = {"context", NULL};
1729 PyScannerObject *s;
1730
1731 assert(PyScanner_Check(self));
1732 s = (PyScannerObject *)self;
1733
1734 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1735 return -1;
1736
1737 /* PyString_AS_STRING is used on encoding */
1738 s->encoding = PyObject_GetAttrString(ctx, "encoding");
Antoine Pitrou187177f2009-12-08 15:40:51 +00001739 if (s->encoding == NULL)
1740 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001741 if (s->encoding == Py_None) {
1742 Py_DECREF(Py_None);
1743 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1744 }
1745 else if (PyUnicode_Check(s->encoding)) {
1746 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001747 Py_SETREF(s->encoding, tmp);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001748 }
Amaury Forgeot d'Arcdee76e62012-01-13 22:53:25 +01001749 if (s->encoding == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001750 goto bail;
Amaury Forgeot d'Arcdee76e62012-01-13 22:53:25 +01001751 if (!PyString_Check(s->encoding)) {
1752 PyErr_Format(PyExc_TypeError,
1753 "encoding must be a string, not %.80s",
1754 Py_TYPE(s->encoding)->tp_name);
1755 goto bail;
1756 }
1757
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001758
1759 /* All of these will fail "gracefully" so we don't need to verify them */
1760 s->strict = PyObject_GetAttrString(ctx, "strict");
1761 if (s->strict == NULL)
1762 goto bail;
1763 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1764 if (s->object_hook == NULL)
1765 goto bail;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001766 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
Georg Brandld823bdc2011-01-02 14:20:16 +00001767 if (s->pairs_hook == NULL)
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001768 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001769 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1770 if (s->parse_float == NULL)
1771 goto bail;
1772 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1773 if (s->parse_int == NULL)
1774 goto bail;
1775 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1776 if (s->parse_constant == NULL)
1777 goto bail;
1778
1779 return 0;
1780
1781bail:
1782 Py_CLEAR(s->encoding);
1783 Py_CLEAR(s->strict);
1784 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001785 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001786 Py_CLEAR(s->parse_float);
1787 Py_CLEAR(s->parse_int);
1788 Py_CLEAR(s->parse_constant);
1789 return -1;
1790}
1791
1792PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1793
1794static
1795PyTypeObject PyScannerType = {
1796 PyObject_HEAD_INIT(NULL)
1797 0, /* tp_internal */
1798 "_json.Scanner", /* tp_name */
1799 sizeof(PyScannerObject), /* tp_basicsize */
1800 0, /* tp_itemsize */
1801 scanner_dealloc, /* tp_dealloc */
1802 0, /* tp_print */
1803 0, /* tp_getattr */
1804 0, /* tp_setattr */
1805 0, /* tp_compare */
1806 0, /* tp_repr */
1807 0, /* tp_as_number */
1808 0, /* tp_as_sequence */
1809 0, /* tp_as_mapping */
1810 0, /* tp_hash */
1811 scanner_call, /* tp_call */
1812 0, /* tp_str */
1813 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1814 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1815 0, /* tp_as_buffer */
1816 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1817 scanner_doc, /* tp_doc */
1818 scanner_traverse, /* tp_traverse */
1819 scanner_clear, /* tp_clear */
1820 0, /* tp_richcompare */
1821 0, /* tp_weaklistoffset */
1822 0, /* tp_iter */
1823 0, /* tp_iternext */
1824 0, /* tp_methods */
1825 scanner_members, /* tp_members */
1826 0, /* tp_getset */
1827 0, /* tp_base */
1828 0, /* tp_dict */
1829 0, /* tp_descr_get */
1830 0, /* tp_descr_set */
1831 0, /* tp_dictoffset */
1832 scanner_init, /* tp_init */
1833 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1834 scanner_new, /* tp_new */
1835 0,/* PyObject_GC_Del, */ /* tp_free */
1836};
1837
1838static PyObject *
1839encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1840{
1841 PyEncoderObject *s;
1842 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1843 if (s != NULL) {
1844 s->markers = NULL;
1845 s->defaultfn = NULL;
1846 s->encoder = NULL;
1847 s->indent = NULL;
1848 s->key_separator = NULL;
1849 s->item_separator = NULL;
1850 s->sort_keys = NULL;
1851 s->skipkeys = NULL;
1852 }
1853 return (PyObject *)s;
1854}
1855
1856static int
1857encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1858{
1859 /* initialize Encoder object */
1860 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1861
1862 PyEncoderObject *s;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001863 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001864 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan_obj;
1865 int allow_nan;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001866
1867 assert(PyEncoder_Check(self));
1868 s = (PyEncoderObject *)self;
1869
1870 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou187177f2009-12-08 15:40:51 +00001871 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001872 &sort_keys, &skipkeys, &allow_nan_obj))
1873 return -1;
1874
1875 allow_nan = PyObject_IsTrue(allow_nan_obj);
1876 if (allow_nan < 0)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001877 return -1;
1878
Serhiy Storchaka76a64ca2015-07-26 09:07:53 +03001879 if (markers != Py_None && !PyDict_Check(markers)) {
1880 PyErr_Format(PyExc_TypeError,
1881 "make_encoder() argument 1 must be dict or None, "
1882 "not %.200s", Py_TYPE(markers)->tp_name);
1883 return -1;
1884 }
1885
Antoine Pitrou187177f2009-12-08 15:40:51 +00001886 s->markers = markers;
1887 s->defaultfn = defaultfn;
1888 s->encoder = encoder;
1889 s->indent = indent;
1890 s->key_separator = key_separator;
1891 s->item_separator = item_separator;
1892 s->sort_keys = sort_keys;
1893 s->skipkeys = skipkeys;
1894 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001895 s->allow_nan = allow_nan;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001896
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001897 Py_INCREF(s->markers);
1898 Py_INCREF(s->defaultfn);
1899 Py_INCREF(s->encoder);
1900 Py_INCREF(s->indent);
1901 Py_INCREF(s->key_separator);
1902 Py_INCREF(s->item_separator);
1903 Py_INCREF(s->sort_keys);
1904 Py_INCREF(s->skipkeys);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001905 return 0;
1906}
1907
1908static PyObject *
1909encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1910{
1911 /* Python callable interface to encode_listencode_obj */
1912 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1913 PyObject *obj;
1914 PyObject *rval;
1915 Py_ssize_t indent_level;
1916 PyEncoderObject *s;
1917 assert(PyEncoder_Check(self));
1918 s = (PyEncoderObject *)self;
1919 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1920 &obj, _convertPyInt_AsSsize_t, &indent_level))
1921 return NULL;
1922 rval = PyList_New(0);
1923 if (rval == NULL)
1924 return NULL;
1925 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1926 Py_DECREF(rval);
1927 return NULL;
1928 }
1929 return rval;
1930}
1931
1932static PyObject *
1933_encoded_const(PyObject *obj)
1934{
1935 /* Return the JSON string representation of None, True, False */
1936 if (obj == Py_None) {
1937 static PyObject *s_null = NULL;
1938 if (s_null == NULL) {
1939 s_null = PyString_InternFromString("null");
1940 }
1941 Py_INCREF(s_null);
1942 return s_null;
1943 }
1944 else if (obj == Py_True) {
1945 static PyObject *s_true = NULL;
1946 if (s_true == NULL) {
1947 s_true = PyString_InternFromString("true");
1948 }
1949 Py_INCREF(s_true);
1950 return s_true;
1951 }
1952 else if (obj == Py_False) {
1953 static PyObject *s_false = NULL;
1954 if (s_false == NULL) {
1955 s_false = PyString_InternFromString("false");
1956 }
1957 Py_INCREF(s_false);
1958 return s_false;
1959 }
1960 else {
1961 PyErr_SetString(PyExc_ValueError, "not a const");
1962 return NULL;
1963 }
1964}
1965
1966static PyObject *
1967encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1968{
1969 /* Return the JSON representation of a PyFloat */
1970 double i = PyFloat_AS_DOUBLE(obj);
1971 if (!Py_IS_FINITE(i)) {
1972 if (!s->allow_nan) {
1973 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1974 return NULL;
1975 }
1976 if (i > 0) {
1977 return PyString_FromString("Infinity");
1978 }
1979 else if (i < 0) {
1980 return PyString_FromString("-Infinity");
1981 }
1982 else {
1983 return PyString_FromString("NaN");
1984 }
1985 }
Mark Dickinsone6239a32016-09-03 17:45:00 +01001986 /* Make sure to use the base float class repr method */
1987 return PyFloat_Type.tp_repr(obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001988}
1989
1990static PyObject *
1991encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1992{
1993 /* Return the JSON representation of a string */
1994 if (s->fast_encode)
1995 return py_encode_basestring_ascii(NULL, obj);
1996 else
1997 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1998}
1999
2000static int
2001_steal_list_append(PyObject *lst, PyObject *stolen)
2002{
2003 /* Append stolen and then decrement its reference count */
2004 int rval = PyList_Append(lst, stolen);
2005 Py_DECREF(stolen);
2006 return rval;
2007}
2008
2009static int
2010encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
2011{
2012 /* Encode Python object obj to a JSON term, rval is a PyList */
2013 PyObject *newobj;
2014 int rv;
2015
2016 if (obj == Py_None || obj == Py_True || obj == Py_False) {
2017 PyObject *cstr = _encoded_const(obj);
2018 if (cstr == NULL)
2019 return -1;
2020 return _steal_list_append(rval, cstr);
2021 }
2022 else if (PyString_Check(obj) || PyUnicode_Check(obj))
2023 {
2024 PyObject *encoded = encoder_encode_string(s, obj);
2025 if (encoded == NULL)
2026 return -1;
2027 return _steal_list_append(rval, encoded);
2028 }
2029 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
2030 PyObject *encoded = PyObject_Str(obj);
2031 if (encoded == NULL)
2032 return -1;
2033 return _steal_list_append(rval, encoded);
2034 }
2035 else if (PyFloat_Check(obj)) {
2036 PyObject *encoded = encoder_encode_float(s, obj);
2037 if (encoded == NULL)
2038 return -1;
2039 return _steal_list_append(rval, encoded);
2040 }
2041 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002042 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2043 return -1;
2044 rv = encoder_listencode_list(s, rval, obj, indent_level);
2045 Py_LeaveRecursiveCall();
2046 return rv;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002047 }
2048 else if (PyDict_Check(obj)) {
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002049 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2050 return -1;
2051 rv = encoder_listencode_dict(s, rval, obj, indent_level);
2052 Py_LeaveRecursiveCall();
2053 return rv;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002054 }
2055 else {
2056 PyObject *ident = NULL;
2057 if (s->markers != Py_None) {
2058 int has_key;
2059 ident = PyLong_FromVoidPtr(obj);
2060 if (ident == NULL)
2061 return -1;
2062 has_key = PyDict_Contains(s->markers, ident);
2063 if (has_key) {
2064 if (has_key != -1)
2065 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2066 Py_DECREF(ident);
2067 return -1;
2068 }
2069 if (PyDict_SetItem(s->markers, ident, obj)) {
2070 Py_DECREF(ident);
2071 return -1;
2072 }
2073 }
2074 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2075 if (newobj == NULL) {
2076 Py_XDECREF(ident);
2077 return -1;
2078 }
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002079
Serhiy Storchakaf04790a2017-01-03 11:17:53 +02002080 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
2081 Py_DECREF(newobj);
2082 Py_XDECREF(ident);
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002083 return -1;
Serhiy Storchakaf04790a2017-01-03 11:17:53 +02002084 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002085 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002086 Py_LeaveRecursiveCall();
2087
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002088 Py_DECREF(newobj);
2089 if (rv) {
2090 Py_XDECREF(ident);
2091 return -1;
2092 }
2093 if (ident != NULL) {
2094 if (PyDict_DelItem(s->markers, ident)) {
2095 Py_XDECREF(ident);
2096 return -1;
2097 }
2098 Py_XDECREF(ident);
2099 }
2100 return rv;
2101 }
2102}
2103
2104static int
2105encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2106{
2107 /* Encode Python dict dct a JSON term, rval is a PyList */
2108 static PyObject *open_dict = NULL;
2109 static PyObject *close_dict = NULL;
2110 static PyObject *empty_dict = NULL;
2111 PyObject *kstr = NULL;
2112 PyObject *ident = NULL;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002113 PyObject *key = NULL;
2114 PyObject *value = NULL;
2115 PyObject *it = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002116 int skipkeys;
2117 Py_ssize_t idx;
2118
2119 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2120 open_dict = PyString_InternFromString("{");
2121 close_dict = PyString_InternFromString("}");
2122 empty_dict = PyString_InternFromString("{}");
2123 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2124 return -1;
2125 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002126 if (Py_SIZE(dct) == 0)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002127 return PyList_Append(rval, empty_dict);
2128
2129 if (s->markers != Py_None) {
2130 int has_key;
2131 ident = PyLong_FromVoidPtr(dct);
2132 if (ident == NULL)
2133 goto bail;
2134 has_key = PyDict_Contains(s->markers, ident);
2135 if (has_key) {
2136 if (has_key != -1)
2137 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2138 goto bail;
2139 }
2140 if (PyDict_SetItem(s->markers, ident, dct)) {
2141 goto bail;
2142 }
2143 }
2144
2145 if (PyList_Append(rval, open_dict))
2146 goto bail;
2147
2148 if (s->indent != Py_None) {
2149 /* TODO: DOES NOT RUN */
2150 indent_level += 1;
2151 /*
2152 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2153 separator = _item_separator + newline_indent
2154 buf += newline_indent
2155 */
2156 }
2157
2158 /* TODO: C speedup not implemented for sort_keys */
2159
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002160 it = PyObject_GetIter(dct);
2161 if (it == NULL)
2162 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002163 skipkeys = PyObject_IsTrue(s->skipkeys);
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03002164 if (skipkeys < 0)
2165 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002166 idx = 0;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002167 while ((key = PyIter_Next(it)) != NULL) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002168 PyObject *encoded;
2169
2170 if (PyString_Check(key) || PyUnicode_Check(key)) {
2171 Py_INCREF(key);
2172 kstr = key;
2173 }
2174 else if (PyFloat_Check(key)) {
2175 kstr = encoder_encode_float(s, key);
2176 if (kstr == NULL)
2177 goto bail;
2178 }
2179 else if (PyInt_Check(key) || PyLong_Check(key)) {
2180 kstr = PyObject_Str(key);
2181 if (kstr == NULL)
2182 goto bail;
2183 }
2184 else if (key == Py_True || key == Py_False || key == Py_None) {
2185 kstr = _encoded_const(key);
2186 if (kstr == NULL)
2187 goto bail;
2188 }
2189 else if (skipkeys) {
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002190 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002191 continue;
2192 }
2193 else {
2194 /* TODO: include repr of key */
Doug Hellmannf31db932010-07-21 12:36:33 +00002195 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002196 goto bail;
2197 }
2198
2199 if (idx) {
2200 if (PyList_Append(rval, s->item_separator))
2201 goto bail;
2202 }
2203
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002204 value = PyObject_GetItem(dct, key);
2205 if (value == NULL)
2206 goto bail;
2207
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002208 encoded = encoder_encode_string(s, kstr);
2209 Py_CLEAR(kstr);
2210 if (encoded == NULL)
2211 goto bail;
2212 if (PyList_Append(rval, encoded)) {
2213 Py_DECREF(encoded);
2214 goto bail;
2215 }
2216 Py_DECREF(encoded);
2217 if (PyList_Append(rval, s->key_separator))
2218 goto bail;
2219 if (encoder_listencode_obj(s, rval, value, indent_level))
2220 goto bail;
2221 idx += 1;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002222 Py_CLEAR(value);
2223 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002224 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002225 if (PyErr_Occurred())
2226 goto bail;
2227 Py_CLEAR(it);
2228
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002229 if (ident != NULL) {
2230 if (PyDict_DelItem(s->markers, ident))
2231 goto bail;
2232 Py_CLEAR(ident);
2233 }
2234 if (s->indent != Py_None) {
2235 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002236 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002237 indent_level -= 1;
2238
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002239 yield '\n' + (' ' * (_indent * _current_indent_level))
2240 */
2241 }
2242 if (PyList_Append(rval, close_dict))
2243 goto bail;
2244 return 0;
2245
2246bail:
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002247 Py_XDECREF(it);
2248 Py_XDECREF(key);
2249 Py_XDECREF(value);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002250 Py_XDECREF(kstr);
2251 Py_XDECREF(ident);
2252 return -1;
2253}
2254
2255
2256static int
2257encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2258{
2259 /* Encode Python list seq to a JSON term, rval is a PyList */
2260 static PyObject *open_array = NULL;
2261 static PyObject *close_array = NULL;
2262 static PyObject *empty_array = NULL;
2263 PyObject *ident = NULL;
2264 PyObject *s_fast = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002265 Py_ssize_t i;
2266
2267 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2268 open_array = PyString_InternFromString("[");
2269 close_array = PyString_InternFromString("]");
2270 empty_array = PyString_InternFromString("[]");
2271 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2272 return -1;
2273 }
2274 ident = NULL;
2275 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2276 if (s_fast == NULL)
2277 return -1;
Antoine Pitroue9e35c32012-11-01 20:07:40 +01002278 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002279 Py_DECREF(s_fast);
2280 return PyList_Append(rval, empty_array);
2281 }
2282
2283 if (s->markers != Py_None) {
2284 int has_key;
2285 ident = PyLong_FromVoidPtr(seq);
2286 if (ident == NULL)
2287 goto bail;
2288 has_key = PyDict_Contains(s->markers, ident);
2289 if (has_key) {
2290 if (has_key != -1)
2291 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2292 goto bail;
2293 }
2294 if (PyDict_SetItem(s->markers, ident, seq)) {
2295 goto bail;
2296 }
2297 }
2298
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002299 if (PyList_Append(rval, open_array))
2300 goto bail;
2301 if (s->indent != Py_None) {
2302 /* TODO: DOES NOT RUN */
2303 indent_level += 1;
2304 /*
2305 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2306 separator = _item_separator + newline_indent
2307 buf += newline_indent
2308 */
2309 }
Antoine Pitroue9e35c32012-11-01 20:07:40 +01002310 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
2311 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002312 if (i) {
2313 if (PyList_Append(rval, s->item_separator))
2314 goto bail;
2315 }
2316 if (encoder_listencode_obj(s, rval, obj, indent_level))
2317 goto bail;
2318 }
2319 if (ident != NULL) {
2320 if (PyDict_DelItem(s->markers, ident))
2321 goto bail;
2322 Py_CLEAR(ident);
2323 }
2324 if (s->indent != Py_None) {
2325 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002326 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002327 indent_level -= 1;
2328
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002329 yield '\n' + (' ' * (_indent * _current_indent_level))
2330 */
2331 }
2332 if (PyList_Append(rval, close_array))
2333 goto bail;
2334 Py_DECREF(s_fast);
2335 return 0;
2336
2337bail:
2338 Py_XDECREF(ident);
2339 Py_DECREF(s_fast);
2340 return -1;
2341}
2342
2343static void
2344encoder_dealloc(PyObject *self)
2345{
2346 /* Deallocate Encoder */
2347 encoder_clear(self);
2348 Py_TYPE(self)->tp_free(self);
2349}
2350
2351static int
2352encoder_traverse(PyObject *self, visitproc visit, void *arg)
2353{
2354 PyEncoderObject *s;
2355 assert(PyEncoder_Check(self));
2356 s = (PyEncoderObject *)self;
2357 Py_VISIT(s->markers);
2358 Py_VISIT(s->defaultfn);
2359 Py_VISIT(s->encoder);
2360 Py_VISIT(s->indent);
2361 Py_VISIT(s->key_separator);
2362 Py_VISIT(s->item_separator);
2363 Py_VISIT(s->sort_keys);
2364 Py_VISIT(s->skipkeys);
2365 return 0;
2366}
2367
2368static int
2369encoder_clear(PyObject *self)
2370{
2371 /* Deallocate Encoder */
2372 PyEncoderObject *s;
2373 assert(PyEncoder_Check(self));
2374 s = (PyEncoderObject *)self;
2375 Py_CLEAR(s->markers);
2376 Py_CLEAR(s->defaultfn);
2377 Py_CLEAR(s->encoder);
2378 Py_CLEAR(s->indent);
2379 Py_CLEAR(s->key_separator);
2380 Py_CLEAR(s->item_separator);
2381 Py_CLEAR(s->sort_keys);
2382 Py_CLEAR(s->skipkeys);
2383 return 0;
2384}
2385
2386PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2387
2388static
2389PyTypeObject PyEncoderType = {
2390 PyObject_HEAD_INIT(NULL)
2391 0, /* tp_internal */
2392 "_json.Encoder", /* tp_name */
2393 sizeof(PyEncoderObject), /* tp_basicsize */
2394 0, /* tp_itemsize */
2395 encoder_dealloc, /* tp_dealloc */
2396 0, /* tp_print */
2397 0, /* tp_getattr */
2398 0, /* tp_setattr */
2399 0, /* tp_compare */
2400 0, /* tp_repr */
2401 0, /* tp_as_number */
2402 0, /* tp_as_sequence */
2403 0, /* tp_as_mapping */
2404 0, /* tp_hash */
2405 encoder_call, /* tp_call */
2406 0, /* tp_str */
2407 0, /* tp_getattro */
2408 0, /* tp_setattro */
2409 0, /* tp_as_buffer */
2410 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2411 encoder_doc, /* tp_doc */
2412 encoder_traverse, /* tp_traverse */
2413 encoder_clear, /* tp_clear */
2414 0, /* tp_richcompare */
2415 0, /* tp_weaklistoffset */
2416 0, /* tp_iter */
2417 0, /* tp_iternext */
2418 0, /* tp_methods */
2419 encoder_members, /* tp_members */
2420 0, /* tp_getset */
2421 0, /* tp_base */
2422 0, /* tp_dict */
2423 0, /* tp_descr_get */
2424 0, /* tp_descr_set */
2425 0, /* tp_dictoffset */
2426 encoder_init, /* tp_init */
2427 0, /* tp_alloc */
2428 encoder_new, /* tp_new */
2429 0, /* tp_free */
2430};
2431
2432static PyMethodDef speedups_methods[] = {
2433 {"encode_basestring_ascii",
2434 (PyCFunction)py_encode_basestring_ascii,
2435 METH_O,
2436 pydoc_encode_basestring_ascii},
2437 {"scanstring",
2438 (PyCFunction)py_scanstring,
2439 METH_VARARGS,
2440 pydoc_scanstring},
Brett Cannon4b964f92008-05-05 20:21:38 +00002441 {NULL, NULL, 0, NULL}
2442};
2443
2444PyDoc_STRVAR(module_doc,
2445"json speedups\n");
2446
2447void
2448init_json(void)
2449{
2450 PyObject *m;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002451 PyScannerType.tp_new = PyType_GenericNew;
2452 if (PyType_Ready(&PyScannerType) < 0)
2453 return;
2454 PyEncoderType.tp_new = PyType_GenericNew;
2455 if (PyType_Ready(&PyEncoderType) < 0)
2456 return;
2457 m = Py_InitModule3("_json", speedups_methods, module_doc);
Serhiy Storchaka045c4512015-07-24 12:58:25 +03002458 if (m == NULL)
2459 return;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002460 Py_INCREF((PyObject*)&PyScannerType);
2461 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2462 Py_INCREF((PyObject*)&PyEncoderType);
2463 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
Brett Cannon4b964f92008-05-05 20:21:38 +00002464}