blob: 42c93aba1e22a28a6e17b46e29ad44b75991f13a [file] [log] [blame]
Brett Cannon4b964f92008-05-05 20:21:38 +00001#include "Python.h"
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
16
17#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
Brett Cannon4b964f92008-05-05 20:21:38 +000022
23#define DEFAULT_ENCODING "utf-8"
Brett Cannon4b964f92008-05-05 20:21:38 +000024
Bob Ippolitod914e3f2009-03-17 23:19:00 +000025#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30static PyTypeObject PyScannerType;
31static PyTypeObject PyEncoderType;
32
33typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
Raymond Hettinger91852ca2009-03-19 19:19:03 +000038 PyObject *pairs_hook;
Bob Ippolitod914e3f2009-03-17 23:19:00 +000039 PyObject *parse_float;
40 PyObject *parse_int;
41 PyObject *parse_constant;
42} PyScannerObject;
43
44static PyMemberDef scanner_members[] = {
45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
Raymond Hettinger91852ca2009-03-19 19:19:03 +000048 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
Bob Ippolitod914e3f2009-03-17 23:19:00 +000049 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52 {NULL}
53};
54
55typedef struct _PyEncoderObject {
56 PyObject_HEAD
57 PyObject *markers;
58 PyObject *defaultfn;
59 PyObject *encoder;
60 PyObject *indent;
61 PyObject *key_separator;
62 PyObject *item_separator;
63 PyObject *sort_keys;
64 PyObject *skipkeys;
65 int fast_encode;
66 int allow_nan;
67} PyEncoderObject;
68
69static PyMemberDef encoder_members[] = {
70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78 {NULL}
79};
80
81static Py_ssize_t
82ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83static PyObject *
84ascii_escape_unicode(PyObject *pystr);
85static PyObject *
86ascii_escape_str(PyObject *pystr);
87static PyObject *
88py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89void init_json(void);
90static PyObject *
91scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92static PyObject *
93scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94static PyObject *
95_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96static PyObject *
97scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Bob Ippolitod914e3f2009-03-17 23:19:00 +000098static void
99scanner_dealloc(PyObject *self);
100static int
101scanner_clear(PyObject *self);
102static PyObject *
103encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000104static void
105encoder_dealloc(PyObject *self);
106static int
107encoder_clear(PyObject *self);
108static int
109encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
110static int
111encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
112static int
113encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
114static PyObject *
Raymond Hettingera0b8d9a2009-03-19 19:24:43 +0000115_encoded_const(PyObject *obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000116static void
117raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
118static PyObject *
119encoder_encode_string(PyEncoderObject *s, PyObject *obj);
120static int
121_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
122static PyObject *
123_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
124static PyObject *
125encoder_encode_float(PyEncoderObject *s, PyObject *obj);
126
127#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
128#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
129
130#define MIN_EXPANSION 6
Brett Cannon4b964f92008-05-05 20:21:38 +0000131#ifdef Py_UNICODE_WIDE
132#define MAX_EXPANSION (2 * MIN_EXPANSION)
133#else
134#define MAX_EXPANSION MIN_EXPANSION
135#endif
136
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000137static int
138_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
139{
140 /* PyObject to Py_ssize_t converter */
141 *size_ptr = PyInt_AsSsize_t(o);
Georg Brandlf71ba952009-05-05 07:48:12 +0000142 if (*size_ptr == -1 && PyErr_Occurred())
143 return 0;
144 return 1;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000145}
146
147static PyObject *
148_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
149{
150 /* Py_ssize_t to PyObject converter */
151 return PyInt_FromSsize_t(*size_ptr);
152}
153
Brett Cannon4b964f92008-05-05 20:21:38 +0000154static Py_ssize_t
155ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
156{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000157 /* Escape unicode code point c to ASCII escape sequences
158 in char *output. output must have at least 12 bytes unused to
159 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Brett Cannon4b964f92008-05-05 20:21:38 +0000160 output[chars++] = '\\';
161 switch (c) {
162 case '\\': output[chars++] = (char)c; break;
163 case '"': output[chars++] = (char)c; break;
164 case '\b': output[chars++] = 'b'; break;
165 case '\f': output[chars++] = 'f'; break;
166 case '\n': output[chars++] = 'n'; break;
167 case '\r': output[chars++] = 'r'; break;
168 case '\t': output[chars++] = 't'; break;
169 default:
170#ifdef Py_UNICODE_WIDE
171 if (c >= 0x10000) {
172 /* UTF-16 surrogate pair */
173 Py_UNICODE v = c - 0x10000;
174 c = 0xd800 | ((v >> 10) & 0x3ff);
175 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000176 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
177 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
178 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
179 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000180 c = 0xdc00 | (v & 0x3ff);
181 output[chars++] = '\\';
182 }
183#endif
184 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000185 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
186 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
187 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
188 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000189 }
190 return chars;
191}
192
193static PyObject *
194ascii_escape_unicode(PyObject *pystr)
195{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000196 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000197 Py_ssize_t i;
198 Py_ssize_t input_chars;
199 Py_ssize_t output_size;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000200 Py_ssize_t max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000201 Py_ssize_t chars;
Benjamin Petersonaa034202016-09-26 23:55:41 -0700202 Py_ssize_t incr;
Brett Cannon4b964f92008-05-05 20:21:38 +0000203 PyObject *rval;
204 char *output;
205 Py_UNICODE *input_unicode;
206
207 input_chars = PyUnicode_GET_SIZE(pystr);
208 input_unicode = PyUnicode_AS_UNICODE(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000209
Benjamin Petersonaa034202016-09-26 23:55:41 -0700210 output_size = input_chars;
211 incr = 2; /* for quotes */
Brett Cannon4b964f92008-05-05 20:21:38 +0000212 /* One char input can be up to 6 chars output, estimate 4 of these */
Benjamin Petersonaa034202016-09-26 23:55:41 -0700213 incr += MIN_EXPANSION * 4;
214 if (PY_SSIZE_T_MAX - incr < output_size) {
215 PyErr_NoMemory();
Benjamin Peterson04a53852016-08-13 16:47:25 -0700216 return NULL;
217 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700218 output_size += incr;
219 if (PY_SSIZE_T_MAX / MAX_EXPANSION < input_chars ||
220 PY_SSIZE_T_MAX - 2 < input_chars * MAX_EXPANSION)
221 max_output_size = PY_SSIZE_T_MAX;
222 else
223 max_output_size = 2 + (input_chars * MAX_EXPANSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000224 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000225 if (rval == NULL) {
226 return NULL;
227 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000228 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000229 chars = 0;
230 output[chars++] = '"';
231 for (i = 0; i < input_chars; i++) {
232 Py_UNICODE c = input_unicode[i];
233 if (S_CHAR(c)) {
234 output[chars++] = (char)c;
235 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000236 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000237 chars = ascii_escape_char(c, output, chars);
238 }
239 if (output_size - chars < (1 + MAX_EXPANSION)) {
Benjamin Petersonaa034202016-09-26 23:55:41 -0700240 if (output_size == PY_SSIZE_T_MAX) {
241 Py_DECREF(rval);
242 PyErr_NoMemory();
243 return NULL;
244 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000245 /* There's more than four, so let's resize by a lot */
Benjamin Petersonaa034202016-09-26 23:55:41 -0700246 if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
247 output_size *= 2;
248 else
249 output_size = max_output_size;
250 if (_PyString_Resize(&rval, output_size) == -1) {
251 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000252 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700253 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000254 }
255 }
256 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000257 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000258 return NULL;
259 }
260 return rval;
261}
262
263static PyObject *
264ascii_escape_str(PyObject *pystr)
265{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000266 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000267 Py_ssize_t i;
268 Py_ssize_t input_chars;
269 Py_ssize_t output_size;
Benjamin Petersonaa034202016-09-26 23:55:41 -0700270 Py_ssize_t max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000271 Py_ssize_t chars;
Benjamin Petersonaa034202016-09-26 23:55:41 -0700272 Py_ssize_t incr;
Brett Cannon4b964f92008-05-05 20:21:38 +0000273 PyObject *rval;
274 char *output;
275 char *input_str;
276
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000277 input_chars = PyString_GET_SIZE(pystr);
278 input_str = PyString_AS_STRING(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000279
280 /* Fast path for a string that's already ASCII */
281 for (i = 0; i < input_chars; i++) {
282 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
283 if (!S_CHAR(c)) {
284 /* If we have to escape something, scan the string for unicode */
285 Py_ssize_t j;
286 for (j = i; j < input_chars; j++) {
287 c = (Py_UNICODE)(unsigned char)input_str[j];
288 if (c > 0x7f) {
289 /* We hit a non-ASCII character, bail to unicode mode */
290 PyObject *uni;
291 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
292 if (uni == NULL) {
293 return NULL;
294 }
295 rval = ascii_escape_unicode(uni);
296 Py_DECREF(uni);
297 return rval;
298 }
299 }
300 break;
301 }
302 }
303
Benjamin Petersonaa034202016-09-26 23:55:41 -0700304 output_size = input_chars;
305 incr = 2; /* for quotes */
306 if (i != input_chars) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000307 /* One char input can be up to 6 chars output, estimate 4 of these */
Benjamin Petersonaa034202016-09-26 23:55:41 -0700308 incr += MIN_EXPANSION * 4;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000309 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700310 if (PY_SSIZE_T_MAX - incr < output_size) {
311 PyErr_NoMemory();
312 return NULL;
313 }
314 output_size += incr;
315 if (PY_SSIZE_T_MAX / MIN_EXPANSION < input_chars ||
316 PY_SSIZE_T_MAX - 2 < input_chars * MIN_EXPANSION)
317 max_output_size = PY_SSIZE_T_MAX;
318 else
319 max_output_size = 2 + (input_chars * MIN_EXPANSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000320 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000321 if (rval == NULL) {
322 return NULL;
323 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000324 output = PyString_AS_STRING(rval);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000325 output[0] = '"';
326
327 /* We know that everything up to i is ASCII already */
328 chars = i + 1;
329 memcpy(&output[1], input_str, i);
330
331 for (; i < input_chars; i++) {
332 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
Brett Cannon4b964f92008-05-05 20:21:38 +0000333 if (S_CHAR(c)) {
334 output[chars++] = (char)c;
335 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000336 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000337 chars = ascii_escape_char(c, output, chars);
338 }
339 /* An ASCII char can't possibly expand to a surrogate! */
340 if (output_size - chars < (1 + MIN_EXPANSION)) {
Benjamin Petersonaa034202016-09-26 23:55:41 -0700341 if (output_size == PY_SSIZE_T_MAX) {
342 Py_DECREF(rval);
343 PyErr_NoMemory();
344 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000345 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700346 /* There's more than four, so let's resize by a lot */
347 if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
348 output_size *= 2;
349 else
350 output_size = max_output_size;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000351 if (_PyString_Resize(&rval, output_size) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000352 return NULL;
353 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000354 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000355 }
356 }
357 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000358 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000359 return NULL;
360 }
361 return rval;
362}
363
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000364static void
Brett Cannon4b964f92008-05-05 20:21:38 +0000365raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
366{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000367 /* Use the Python function json.decoder.errmsg to raise a nice
368 looking ValueError exception */
Brett Cannon4b964f92008-05-05 20:21:38 +0000369 static PyObject *errmsg_fn = NULL;
370 PyObject *pymsg;
371 if (errmsg_fn == NULL) {
372 PyObject *decoder = PyImport_ImportModule("json.decoder");
373 if (decoder == NULL)
374 return;
375 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000376 Py_DECREF(decoder);
Brett Cannon4b964f92008-05-05 20:21:38 +0000377 if (errmsg_fn == NULL)
378 return;
Brett Cannon4b964f92008-05-05 20:21:38 +0000379 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000380 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
Benjamin Peterson595e3cb2008-10-16 21:09:28 +0000381 if (pymsg) {
382 PyErr_SetObject(PyExc_ValueError, pymsg);
383 Py_DECREF(pymsg);
384 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000385}
386
387static PyObject *
388join_list_unicode(PyObject *lst)
389{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000390 /* return u''.join(lst) */
391 static PyObject *joinfn = NULL;
392 if (joinfn == NULL) {
393 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
394 if (ustr == NULL)
395 return NULL;
396
397 joinfn = PyObject_GetAttrString(ustr, "join");
398 Py_DECREF(ustr);
399 if (joinfn == NULL)
400 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000401 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000402 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
Brett Cannon4b964f92008-05-05 20:21:38 +0000403}
404
405static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000406_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
407 /* return (rval, idx) tuple, stealing reference to rval */
408 PyObject *tpl;
409 PyObject *pyidx;
410 /*
411 steal a reference to rval, returns (rval, idx)
412 */
413 if (rval == NULL) {
414 return NULL;
415 }
416 pyidx = PyInt_FromSsize_t(idx);
417 if (pyidx == NULL) {
418 Py_DECREF(rval);
419 return NULL;
420 }
421 tpl = PyTuple_New(2);
422 if (tpl == NULL) {
423 Py_DECREF(pyidx);
424 Py_DECREF(rval);
425 return NULL;
426 }
427 PyTuple_SET_ITEM(tpl, 0, rval);
428 PyTuple_SET_ITEM(tpl, 1, pyidx);
429 return tpl;
430}
431
432static PyObject *
433scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
434{
435 /* Read the JSON string from PyString pystr.
436 end is the index of the first character after the quote.
437 encoding is the encoding of pystr (must be an ASCII superset)
438 if strict is zero then literal control characters are allowed
439 *next_end_ptr is a return-by-reference index of the character
440 after the end quote
441
442 Return value is a new PyString (if ASCII-only) or PyUnicode
443 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000444 PyObject *rval;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000445 Py_ssize_t len = PyString_GET_SIZE(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000446 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000447 Py_ssize_t next;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000448 char *buf = PyString_AS_STRING(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000449 PyObject *chunks = PyList_New(0);
450 if (chunks == NULL) {
451 goto bail;
452 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000453 if (end < 0 || len <= end) {
454 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
455 goto bail;
456 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000457 while (1) {
458 /* Find the end of the string or the next escape */
459 Py_UNICODE c = 0;
460 PyObject *chunk = NULL;
461 for (next = end; next < len; next++) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000462 c = (unsigned char)buf[next];
Brett Cannon4b964f92008-05-05 20:21:38 +0000463 if (c == '"' || c == '\\') {
464 break;
465 }
466 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000467 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000468 goto bail;
469 }
470 }
471 if (!(c == '"' || c == '\\')) {
472 raise_errmsg("Unterminated string starting at", pystr, begin);
473 goto bail;
474 }
475 /* Pick up this chunk if it's not zero length */
476 if (next != end) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000477 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000478 if (strchunk == NULL) {
479 goto bail;
480 }
Barry Warsawfa658272010-11-02 21:03:09 +0000481 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
482 Py_DECREF(strchunk);
483 if (chunk == NULL) {
484 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000485 }
486 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000487 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000488 goto bail;
489 }
490 Py_DECREF(chunk);
491 }
492 next++;
493 if (c == '"') {
494 end = next;
495 break;
496 }
497 if (next == len) {
498 raise_errmsg("Unterminated string starting at", pystr, begin);
499 goto bail;
500 }
501 c = buf[next];
502 if (c != 'u') {
503 /* Non-unicode backslash escapes */
504 end = next + 1;
505 switch (c) {
506 case '"': break;
507 case '\\': break;
508 case '/': break;
509 case 'b': c = '\b'; break;
510 case 'f': c = '\f'; break;
511 case 'n': c = '\n'; break;
512 case 'r': c = '\r'; break;
513 case 't': c = '\t'; break;
514 default: c = 0;
515 }
516 if (c == 0) {
517 raise_errmsg("Invalid \\escape", pystr, end - 2);
518 goto bail;
519 }
520 }
521 else {
522 c = 0;
523 next++;
524 end = next + 4;
525 if (end >= len) {
526 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
527 goto bail;
528 }
529 /* Decode 4 hex digits */
530 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000531 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000532 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000533 switch (digit) {
534 case '0': case '1': case '2': case '3': case '4':
535 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000536 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000537 case 'a': case 'b': case 'c': case 'd': case 'e':
538 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000539 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000540 case 'A': case 'B': case 'C': case 'D': case 'E':
541 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000542 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000543 default:
544 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
545 goto bail;
546 }
547 }
548#ifdef Py_UNICODE_WIDE
549 /* Surrogate pair */
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200550 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
551 buf[next++] == '\\' &&
552 buf[next++] == 'u') {
Brett Cannon4b964f92008-05-05 20:21:38 +0000553 Py_UNICODE c2 = 0;
Brett Cannon4b964f92008-05-05 20:21:38 +0000554 end += 6;
555 /* Decode 4 hex digits */
556 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000557 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000558 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000559 switch (digit) {
560 case '0': case '1': case '2': case '3': case '4':
561 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000562 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000563 case 'a': case 'b': case 'c': case 'd': case 'e':
564 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000565 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000566 case 'A': case 'B': case 'C': case 'D': case 'E':
567 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000568 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000569 default:
570 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
571 goto bail;
572 }
573 }
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200574 if ((c2 & 0xfc00) == 0xdc00)
575 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
576 else
577 end -= 6;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000578 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000579#endif
580 }
Barry Warsawfa658272010-11-02 21:03:09 +0000581 chunk = PyUnicode_FromUnicode(&c, 1);
582 if (chunk == NULL) {
583 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000584 }
585 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000586 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000587 goto bail;
588 }
589 Py_DECREF(chunk);
590 }
591
Ezio Melottidf8a8f72011-05-04 14:40:53 +0300592 rval = join_list_unicode(chunks);
Brett Cannon4b964f92008-05-05 20:21:38 +0000593 if (rval == NULL) {
594 goto bail;
595 }
Benjamin Peterson336680e2008-10-16 21:48:06 +0000596 Py_CLEAR(chunks);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000597 *next_end_ptr = end;
598 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000599bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000600 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000601 Py_XDECREF(chunks);
602 return NULL;
603}
604
605
606static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000607scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000608{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000609 /* Read the JSON string from PyUnicode pystr.
610 end is the index of the first character after the quote.
611 if strict is zero then literal control characters are allowed
612 *next_end_ptr is a return-by-reference index of the character
613 after the end quote
614
615 Return value is a new PyUnicode
616 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000617 PyObject *rval;
618 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
619 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000620 Py_ssize_t next;
Brett Cannon4b964f92008-05-05 20:21:38 +0000621 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
622 PyObject *chunks = PyList_New(0);
623 if (chunks == NULL) {
624 goto bail;
625 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000626 if (end < 0 || len <= end) {
627 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
628 goto bail;
629 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000630 while (1) {
631 /* Find the end of the string or the next escape */
632 Py_UNICODE c = 0;
633 PyObject *chunk = NULL;
634 for (next = end; next < len; next++) {
635 c = buf[next];
636 if (c == '"' || c == '\\') {
637 break;
638 }
639 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000640 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000641 goto bail;
642 }
643 }
644 if (!(c == '"' || c == '\\')) {
645 raise_errmsg("Unterminated string starting at", pystr, begin);
646 goto bail;
647 }
648 /* Pick up this chunk if it's not zero length */
649 if (next != end) {
650 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
651 if (chunk == NULL) {
652 goto bail;
653 }
654 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson87e6ad22008-10-16 21:27:54 +0000655 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000656 goto bail;
657 }
658 Py_DECREF(chunk);
659 }
660 next++;
661 if (c == '"') {
662 end = next;
663 break;
664 }
665 if (next == len) {
666 raise_errmsg("Unterminated string starting at", pystr, begin);
667 goto bail;
668 }
669 c = buf[next];
670 if (c != 'u') {
671 /* Non-unicode backslash escapes */
672 end = next + 1;
673 switch (c) {
674 case '"': break;
675 case '\\': break;
676 case '/': break;
677 case 'b': c = '\b'; break;
678 case 'f': c = '\f'; break;
679 case 'n': c = '\n'; break;
680 case 'r': c = '\r'; break;
681 case 't': c = '\t'; break;
682 default: c = 0;
683 }
684 if (c == 0) {
685 raise_errmsg("Invalid \\escape", pystr, end - 2);
686 goto bail;
687 }
688 }
689 else {
690 c = 0;
691 next++;
692 end = next + 4;
693 if (end >= len) {
694 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
695 goto bail;
696 }
697 /* Decode 4 hex digits */
698 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000699 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000700 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000701 switch (digit) {
702 case '0': case '1': case '2': case '3': case '4':
703 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000704 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000705 case 'a': case 'b': case 'c': case 'd': case 'e':
706 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000707 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000708 case 'A': case 'B': case 'C': case 'D': case 'E':
709 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000710 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000711 default:
712 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
713 goto bail;
714 }
715 }
716#ifdef Py_UNICODE_WIDE
717 /* Surrogate pair */
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200718 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
719 buf[next++] == '\\' && buf[next++] == 'u') {
Brett Cannon4b964f92008-05-05 20:21:38 +0000720 Py_UNICODE c2 = 0;
Brett Cannon4b964f92008-05-05 20:21:38 +0000721 end += 6;
722 /* Decode 4 hex digits */
723 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000724 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000725 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000726 switch (digit) {
727 case '0': case '1': case '2': case '3': case '4':
728 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000729 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000730 case 'a': case 'b': case 'c': case 'd': case 'e':
731 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000732 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000733 case 'A': case 'B': case 'C': case 'D': case 'E':
734 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000735 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000736 default:
737 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
738 goto bail;
739 }
740 }
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200741 if ((c2 & 0xfc00) == 0xdc00)
742 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
743 else
744 end -= 6;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000745 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000746#endif
747 }
748 chunk = PyUnicode_FromUnicode(&c, 1);
749 if (chunk == NULL) {
750 goto bail;
751 }
752 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000753 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000754 goto bail;
755 }
756 Py_DECREF(chunk);
757 }
758
759 rval = join_list_unicode(chunks);
760 if (rval == NULL) {
761 goto bail;
762 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000763 Py_DECREF(chunks);
764 *next_end_ptr = end;
765 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000766bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000767 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000768 Py_XDECREF(chunks);
769 return NULL;
770}
771
772PyDoc_STRVAR(pydoc_scanstring,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000773 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
774 "\n"
775 "Scan the string s for a JSON string. End is the index of the\n"
776 "character in s after the quote that started the JSON string.\n"
777 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
778 "on attempt to decode an invalid string. If strict is False then literal\n"
779 "control characters are allowed in the string.\n"
780 "\n"
781 "Returns a tuple of the decoded string and the index of the character in s\n"
782 "after the end quote."
783);
Brett Cannon4b964f92008-05-05 20:21:38 +0000784
785static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000786py_scanstring(PyObject* self UNUSED, PyObject *args)
Brett Cannon4b964f92008-05-05 20:21:38 +0000787{
788 PyObject *pystr;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000789 PyObject *rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000790 Py_ssize_t end;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000791 Py_ssize_t next_end = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000792 char *encoding = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000793 int strict = 1;
794 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000795 return NULL;
796 }
797 if (encoding == NULL) {
798 encoding = DEFAULT_ENCODING;
799 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000800 if (PyString_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000801 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000802 }
803 else if (PyUnicode_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000804 rval = scanstring_unicode(pystr, end, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000805 }
806 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000807 PyErr_Format(PyExc_TypeError,
808 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000809 Py_TYPE(pystr)->tp_name);
810 return NULL;
811 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000812 return _build_rval_index_tuple(rval, next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000813}
814
815PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000816 "encode_basestring_ascii(basestring) -> str\n"
817 "\n"
818 "Return an ASCII-only JSON representation of a Python string"
819);
Brett Cannon4b964f92008-05-05 20:21:38 +0000820
821static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000822py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000823{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000824 /* Return an ASCII-only JSON representation of a Python string */
Brett Cannon4b964f92008-05-05 20:21:38 +0000825 /* METH_O */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000826 if (PyString_Check(pystr)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000827 return ascii_escape_str(pystr);
828 }
829 else if (PyUnicode_Check(pystr)) {
830 return ascii_escape_unicode(pystr);
831 }
832 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000833 PyErr_Format(PyExc_TypeError,
834 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000835 Py_TYPE(pystr)->tp_name);
836 return NULL;
837 }
838}
839
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000840static void
841scanner_dealloc(PyObject *self)
842{
843 /* Deallocate scanner object */
844 scanner_clear(self);
845 Py_TYPE(self)->tp_free(self);
846}
847
848static int
849scanner_traverse(PyObject *self, visitproc visit, void *arg)
850{
851 PyScannerObject *s;
852 assert(PyScanner_Check(self));
853 s = (PyScannerObject *)self;
854 Py_VISIT(s->encoding);
855 Py_VISIT(s->strict);
856 Py_VISIT(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000857 Py_VISIT(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000858 Py_VISIT(s->parse_float);
859 Py_VISIT(s->parse_int);
860 Py_VISIT(s->parse_constant);
861 return 0;
862}
863
864static int
865scanner_clear(PyObject *self)
866{
867 PyScannerObject *s;
868 assert(PyScanner_Check(self));
869 s = (PyScannerObject *)self;
870 Py_CLEAR(s->encoding);
871 Py_CLEAR(s->strict);
872 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000873 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000874 Py_CLEAR(s->parse_float);
875 Py_CLEAR(s->parse_int);
876 Py_CLEAR(s->parse_constant);
877 return 0;
878}
879
880static PyObject *
881_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
882 /* Read a JSON object from PyString pystr.
883 idx is the index of the first character after the opening curly brace.
884 *next_idx_ptr is a return-by-reference index to the first character after
885 the closing curly brace.
886
887 Returns a new PyObject (usually a dict, but object_hook can change that)
888 */
889 char *str = PyString_AS_STRING(pystr);
890 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000891 PyObject *rval;
892 PyObject *pairs;
893 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000894 PyObject *key = NULL;
895 PyObject *val = NULL;
896 char *encoding = PyString_AS_STRING(s->encoding);
897 int strict = PyObject_IsTrue(s->strict);
898 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000899
Serhiy Storchaka5127ed72015-05-30 17:45:12 +0300900 if (strict < 0)
901 return NULL;
902
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000903 pairs = PyList_New(0);
904 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000905 return NULL;
906
907 /* skip whitespace after { */
908 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
909
910 /* only loop if the object is non-empty */
911 if (idx <= end_idx && str[idx] != '}') {
912 while (idx <= end_idx) {
913 /* read key */
914 if (str[idx] != '"') {
915 raise_errmsg("Expecting property name", pystr, idx);
916 goto bail;
917 }
918 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
919 if (key == NULL)
920 goto bail;
921 idx = next_idx;
922
923 /* skip whitespace between key and : delimiter, read :, skip whitespace */
924 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
925 if (idx > end_idx || str[idx] != ':') {
926 raise_errmsg("Expecting : delimiter", pystr, idx);
927 goto bail;
928 }
929 idx++;
930 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
931
932 /* read any JSON data type */
933 val = scan_once_str(s, pystr, idx, &next_idx);
934 if (val == NULL)
935 goto bail;
936
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000937 item = PyTuple_Pack(2, key, val);
938 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000939 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000940 Py_CLEAR(key);
941 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000942 if (PyList_Append(pairs, item) == -1) {
943 Py_DECREF(item);
944 goto bail;
945 }
946 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000947 idx = next_idx;
948
949 /* skip whitespace before } or , */
950 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
951
952 /* bail if the object is closed or we didn't get the , delimiter */
953 if (idx > end_idx) break;
954 if (str[idx] == '}') {
955 break;
956 }
957 else if (str[idx] != ',') {
958 raise_errmsg("Expecting , delimiter", pystr, idx);
959 goto bail;
960 }
961 idx++;
962
963 /* skip whitespace after , delimiter */
964 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
965 }
966 }
967 /* verify that idx < end_idx, str[idx] should be '}' */
968 if (idx > end_idx || str[idx] != '}') {
969 raise_errmsg("Expecting object", pystr, end_idx);
970 goto bail;
971 }
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000972
973 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
974 if (s->pairs_hook != Py_None) {
975 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
976 if (val == NULL)
977 goto bail;
978 Py_DECREF(pairs);
979 *next_idx_ptr = idx + 1;
980 return val;
981 }
982
983 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
984 pairs, NULL);
985 if (rval == NULL)
986 goto bail;
987 Py_CLEAR(pairs);
988
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000989 /* if object_hook is not None: rval = object_hook(rval) */
990 if (s->object_hook != Py_None) {
991 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
992 if (val == NULL)
993 goto bail;
994 Py_DECREF(rval);
995 rval = val;
996 val = NULL;
997 }
998 *next_idx_ptr = idx + 1;
999 return rval;
1000bail:
1001 Py_XDECREF(key);
1002 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001003 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001004 return NULL;
1005}
1006
1007static PyObject *
1008_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1009 /* Read a JSON object from PyUnicode pystr.
1010 idx is the index of the first character after the opening curly brace.
1011 *next_idx_ptr is a return-by-reference index to the first character after
1012 the closing curly brace.
1013
1014 Returns a new PyObject (usually a dict, but object_hook can change that)
1015 */
1016 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1017 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001018 PyObject *rval;
1019 PyObject *pairs;
1020 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001021 PyObject *key = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001022 PyObject *val = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001023 int strict = PyObject_IsTrue(s->strict);
1024 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001025
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001026 if (strict < 0)
1027 return NULL;
1028
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001029 pairs = PyList_New(0);
1030 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001031 return NULL;
1032
1033 /* skip whitespace after { */
1034 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1035
1036 /* only loop if the object is non-empty */
1037 if (idx <= end_idx && str[idx] != '}') {
1038 while (idx <= end_idx) {
1039 /* read key */
1040 if (str[idx] != '"') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001041 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001042 goto bail;
1043 }
1044 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1045 if (key == NULL)
1046 goto bail;
1047 idx = next_idx;
1048
1049 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1050 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1051 if (idx > end_idx || str[idx] != ':') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001052 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001053 goto bail;
1054 }
1055 idx++;
1056 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1057
1058 /* read any JSON term */
1059 val = scan_once_unicode(s, pystr, idx, &next_idx);
1060 if (val == NULL)
1061 goto bail;
1062
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001063 item = PyTuple_Pack(2, key, val);
1064 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001065 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001066 Py_CLEAR(key);
1067 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001068 if (PyList_Append(pairs, item) == -1) {
1069 Py_DECREF(item);
1070 goto bail;
1071 }
1072 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001073 idx = next_idx;
1074
1075 /* skip whitespace before } or , */
1076 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1077
1078 /* bail if the object is closed or we didn't get the , delimiter */
1079 if (idx > end_idx) break;
1080 if (str[idx] == '}') {
1081 break;
1082 }
1083 else if (str[idx] != ',') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001084 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001085 goto bail;
1086 }
1087 idx++;
1088
1089 /* skip whitespace after , delimiter */
1090 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1091 }
1092 }
1093
1094 /* verify that idx < end_idx, str[idx] should be '}' */
1095 if (idx > end_idx || str[idx] != '}') {
1096 raise_errmsg("Expecting object", pystr, end_idx);
1097 goto bail;
1098 }
1099
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001100 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1101 if (s->pairs_hook != Py_None) {
1102 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1103 if (val == NULL)
1104 goto bail;
1105 Py_DECREF(pairs);
1106 *next_idx_ptr = idx + 1;
1107 return val;
1108 }
1109
1110 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1111 pairs, NULL);
1112 if (rval == NULL)
1113 goto bail;
1114 Py_CLEAR(pairs);
1115
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001116 /* if object_hook is not None: rval = object_hook(rval) */
1117 if (s->object_hook != Py_None) {
1118 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1119 if (val == NULL)
1120 goto bail;
1121 Py_DECREF(rval);
1122 rval = val;
1123 val = NULL;
1124 }
1125 *next_idx_ptr = idx + 1;
1126 return rval;
1127bail:
1128 Py_XDECREF(key);
1129 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001130 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001131 return NULL;
1132}
1133
1134static PyObject *
1135_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1136 /* Read a JSON array from PyString pystr.
1137 idx is the index of the first character after the opening brace.
1138 *next_idx_ptr is a return-by-reference index to the first character after
1139 the closing brace.
1140
1141 Returns a new PyList
1142 */
1143 char *str = PyString_AS_STRING(pystr);
1144 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1145 PyObject *val = NULL;
1146 PyObject *rval = PyList_New(0);
1147 Py_ssize_t next_idx;
1148 if (rval == NULL)
1149 return NULL;
1150
1151 /* skip whitespace after [ */
1152 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1153
1154 /* only loop if the array is non-empty */
1155 if (idx <= end_idx && str[idx] != ']') {
1156 while (idx <= end_idx) {
1157
1158 /* read any JSON term and de-tuplefy the (rval, idx) */
1159 val = scan_once_str(s, pystr, idx, &next_idx);
1160 if (val == NULL)
1161 goto bail;
1162
1163 if (PyList_Append(rval, val) == -1)
1164 goto bail;
1165
1166 Py_CLEAR(val);
1167 idx = next_idx;
1168
1169 /* skip whitespace between term and , */
1170 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1171
1172 /* bail if the array is closed or we didn't get the , delimiter */
1173 if (idx > end_idx) break;
1174 if (str[idx] == ']') {
1175 break;
1176 }
1177 else if (str[idx] != ',') {
1178 raise_errmsg("Expecting , delimiter", pystr, idx);
1179 goto bail;
1180 }
1181 idx++;
1182
1183 /* skip whitespace after , */
1184 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1185 }
1186 }
1187
1188 /* verify that idx < end_idx, str[idx] should be ']' */
1189 if (idx > end_idx || str[idx] != ']') {
1190 raise_errmsg("Expecting object", pystr, end_idx);
1191 goto bail;
1192 }
1193 *next_idx_ptr = idx + 1;
1194 return rval;
1195bail:
1196 Py_XDECREF(val);
1197 Py_DECREF(rval);
1198 return NULL;
1199}
1200
1201static PyObject *
1202_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1203 /* Read a JSON array from PyString pystr.
1204 idx is the index of the first character after the opening brace.
1205 *next_idx_ptr is a return-by-reference index to the first character after
1206 the closing brace.
1207
1208 Returns a new PyList
1209 */
1210 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1211 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1212 PyObject *val = NULL;
1213 PyObject *rval = PyList_New(0);
1214 Py_ssize_t next_idx;
1215 if (rval == NULL)
1216 return NULL;
1217
1218 /* skip whitespace after [ */
1219 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1220
1221 /* only loop if the array is non-empty */
1222 if (idx <= end_idx && str[idx] != ']') {
1223 while (idx <= end_idx) {
1224
1225 /* read any JSON term */
1226 val = scan_once_unicode(s, pystr, idx, &next_idx);
1227 if (val == NULL)
1228 goto bail;
1229
1230 if (PyList_Append(rval, val) == -1)
1231 goto bail;
1232
1233 Py_CLEAR(val);
1234 idx = next_idx;
1235
1236 /* skip whitespace between term and , */
1237 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1238
1239 /* bail if the array is closed or we didn't get the , delimiter */
1240 if (idx > end_idx) break;
1241 if (str[idx] == ']') {
1242 break;
1243 }
1244 else if (str[idx] != ',') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001245 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001246 goto bail;
1247 }
1248 idx++;
1249
1250 /* skip whitespace after , */
1251 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1252 }
1253 }
1254
1255 /* verify that idx < end_idx, str[idx] should be ']' */
1256 if (idx > end_idx || str[idx] != ']') {
1257 raise_errmsg("Expecting object", pystr, end_idx);
1258 goto bail;
1259 }
1260 *next_idx_ptr = idx + 1;
1261 return rval;
1262bail:
1263 Py_XDECREF(val);
1264 Py_DECREF(rval);
1265 return NULL;
1266}
1267
1268static PyObject *
1269_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1270 /* Read a JSON constant from PyString pystr.
1271 constant is the constant string that was found
1272 ("NaN", "Infinity", "-Infinity").
1273 idx is the index of the first character of the constant
1274 *next_idx_ptr is a return-by-reference index to the first character after
1275 the constant.
1276
1277 Returns the result of parse_constant
1278 */
1279 PyObject *cstr;
1280 PyObject *rval;
1281 /* constant is "NaN", "Infinity", or "-Infinity" */
1282 cstr = PyString_InternFromString(constant);
1283 if (cstr == NULL)
1284 return NULL;
1285
1286 /* rval = parse_constant(constant) */
1287 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1288 idx += PyString_GET_SIZE(cstr);
1289 Py_DECREF(cstr);
1290 *next_idx_ptr = idx;
1291 return rval;
1292}
1293
1294static PyObject *
1295_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1296 /* Read a JSON number from PyString pystr.
1297 idx is the index of the first character of the number
1298 *next_idx_ptr is a return-by-reference index to the first character after
1299 the number.
1300
1301 Returns a new PyObject representation of that number:
1302 PyInt, PyLong, or PyFloat.
1303 May return other types if parse_int or parse_float are set
1304 */
1305 char *str = PyString_AS_STRING(pystr);
1306 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1307 Py_ssize_t idx = start;
1308 int is_float = 0;
1309 PyObject *rval;
1310 PyObject *numstr;
1311
1312 /* read a sign if it's there, make sure it's not the end of the string */
1313 if (str[idx] == '-') {
1314 idx++;
1315 if (idx > end_idx) {
1316 PyErr_SetNone(PyExc_StopIteration);
1317 return NULL;
1318 }
1319 }
1320
1321 /* read as many integer digits as we find as long as it doesn't start with 0 */
1322 if (str[idx] >= '1' && str[idx] <= '9') {
1323 idx++;
1324 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1325 }
1326 /* if it starts with 0 we only expect one integer digit */
1327 else if (str[idx] == '0') {
1328 idx++;
1329 }
1330 /* no integer digits, error */
1331 else {
1332 PyErr_SetNone(PyExc_StopIteration);
1333 return NULL;
1334 }
1335
1336 /* if the next char is '.' followed by a digit then read all float digits */
1337 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1338 is_float = 1;
1339 idx += 2;
1340 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1341 }
1342
1343 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1344 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1345
1346 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1347 Py_ssize_t e_start = idx;
1348 idx++;
1349
1350 /* read an exponent sign if present */
1351 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1352
1353 /* read all digits */
1354 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1355
1356 /* if we got a digit, then parse as float. if not, backtrack */
1357 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1358 is_float = 1;
1359 }
1360 else {
1361 idx = e_start;
1362 }
1363 }
1364
1365 /* copy the section we determined to be a number */
1366 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1367 if (numstr == NULL)
1368 return NULL;
1369 if (is_float) {
1370 /* parse as a float using a fast path if available, otherwise call user defined method */
1371 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1372 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1373 }
1374 else {
Eric Smith129c97d2009-10-28 08:44:37 +00001375 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1376 NULL, NULL);
1377 if (d == -1.0 && PyErr_Occurred())
1378 return NULL;
1379 rval = PyFloat_FromDouble(d);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001380 }
1381 }
1382 else {
1383 /* parse as an int using a fast path if available, otherwise call user defined method */
1384 if (s->parse_int != (PyObject *)&PyInt_Type) {
1385 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1386 }
1387 else {
1388 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1389 }
1390 }
1391 Py_DECREF(numstr);
1392 *next_idx_ptr = idx;
1393 return rval;
1394}
1395
1396static PyObject *
1397_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1398 /* Read a JSON number from PyUnicode pystr.
1399 idx is the index of the first character of the number
1400 *next_idx_ptr is a return-by-reference index to the first character after
1401 the number.
1402
1403 Returns a new PyObject representation of that number:
1404 PyInt, PyLong, or PyFloat.
1405 May return other types if parse_int or parse_float are set
1406 */
1407 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1408 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1409 Py_ssize_t idx = start;
1410 int is_float = 0;
1411 PyObject *rval;
1412 PyObject *numstr;
1413
1414 /* read a sign if it's there, make sure it's not the end of the string */
1415 if (str[idx] == '-') {
1416 idx++;
1417 if (idx > end_idx) {
1418 PyErr_SetNone(PyExc_StopIteration);
1419 return NULL;
1420 }
1421 }
1422
1423 /* read as many integer digits as we find as long as it doesn't start with 0 */
1424 if (str[idx] >= '1' && str[idx] <= '9') {
1425 idx++;
1426 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1427 }
1428 /* if it starts with 0 we only expect one integer digit */
1429 else if (str[idx] == '0') {
1430 idx++;
1431 }
1432 /* no integer digits, error */
1433 else {
1434 PyErr_SetNone(PyExc_StopIteration);
1435 return NULL;
1436 }
1437
1438 /* if the next char is '.' followed by a digit then read all float digits */
1439 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1440 is_float = 1;
1441 idx += 2;
Bob Ippolito76a982a2009-03-29 22:33:58 +00001442 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001443 }
1444
1445 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1446 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1447 Py_ssize_t e_start = idx;
1448 idx++;
1449
1450 /* read an exponent sign if present */
1451 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1452
1453 /* read all digits */
1454 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1455
1456 /* if we got a digit, then parse as float. if not, backtrack */
1457 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1458 is_float = 1;
1459 }
1460 else {
1461 idx = e_start;
1462 }
1463 }
1464
1465 /* copy the section we determined to be a number */
1466 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1467 if (numstr == NULL)
1468 return NULL;
1469 if (is_float) {
1470 /* parse as a float using a fast path if available, otherwise call user defined method */
1471 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1472 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1473 }
1474 else {
1475 rval = PyFloat_FromString(numstr, NULL);
1476 }
1477 }
1478 else {
1479 /* no fast path for unicode -> int, just call */
1480 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1481 }
1482 Py_DECREF(numstr);
1483 *next_idx_ptr = idx;
1484 return rval;
1485}
1486
1487static PyObject *
1488scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1489{
1490 /* Read one JSON term (of any kind) from PyString pystr.
1491 idx is the index of the first character of the term
1492 *next_idx_ptr is a return-by-reference index to the first character after
1493 the number.
1494
1495 Returns a new PyObject representation of the term.
1496 */
Ezio Melotticec46492011-05-07 17:40:23 +03001497 PyObject *res;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001498 int strict;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001499 char *str = PyString_AS_STRING(pystr);
1500 Py_ssize_t length = PyString_GET_SIZE(pystr);
Benjamin Peterson3e5d87b2014-04-14 11:43:09 -04001501 if (idx < 0) {
1502 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1503 return NULL;
1504 }
1505 if (idx >= length) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001506 PyErr_SetNone(PyExc_StopIteration);
1507 return NULL;
1508 }
1509 switch (str[idx]) {
1510 case '"':
1511 /* string */
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001512 strict = PyObject_IsTrue(s->strict);
1513 if (strict < 0)
1514 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001515 return scanstring_str(pystr, idx + 1,
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001516 PyString_AS_STRING(s->encoding), strict, next_idx_ptr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001517 case '{':
1518 /* object */
Ezio Melotticec46492011-05-07 17:40:23 +03001519 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1520 "from a byte string"))
1521 return NULL;
1522 res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1523 Py_LeaveRecursiveCall();
1524 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001525 case '[':
1526 /* array */
Ezio Melotticec46492011-05-07 17:40:23 +03001527 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1528 "from a byte string"))
1529 return NULL;
1530 res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1531 Py_LeaveRecursiveCall();
1532 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001533 case 'n':
1534 /* null */
1535 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1536 Py_INCREF(Py_None);
1537 *next_idx_ptr = idx + 4;
1538 return Py_None;
1539 }
1540 break;
1541 case 't':
1542 /* true */
1543 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1544 Py_INCREF(Py_True);
1545 *next_idx_ptr = idx + 4;
1546 return Py_True;
1547 }
1548 break;
1549 case 'f':
1550 /* false */
1551 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1552 Py_INCREF(Py_False);
1553 *next_idx_ptr = idx + 5;
1554 return Py_False;
1555 }
1556 break;
1557 case 'N':
1558 /* NaN */
1559 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1560 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1561 }
1562 break;
1563 case 'I':
1564 /* Infinity */
1565 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1566 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1567 }
1568 break;
1569 case '-':
1570 /* -Infinity */
1571 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1572 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1573 }
1574 break;
1575 }
1576 /* Didn't find a string, object, array, or named constant. Look for a number. */
1577 return _match_number_str(s, pystr, idx, next_idx_ptr);
1578}
1579
1580static PyObject *
1581scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1582{
1583 /* Read one JSON term (of any kind) from PyUnicode pystr.
1584 idx is the index of the first character of the term
1585 *next_idx_ptr is a return-by-reference index to the first character after
1586 the number.
1587
1588 Returns a new PyObject representation of the term.
1589 */
Ezio Melotticec46492011-05-07 17:40:23 +03001590 PyObject *res;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001591 int strict;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001592 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1593 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
Benjamin Peterson3e5d87b2014-04-14 11:43:09 -04001594 if (idx < 0) {
1595 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1596 return NULL;
1597 }
1598 if (idx >= length) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001599 PyErr_SetNone(PyExc_StopIteration);
1600 return NULL;
1601 }
1602 switch (str[idx]) {
1603 case '"':
1604 /* string */
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001605 strict = PyObject_IsTrue(s->strict);
1606 if (strict < 0)
1607 return NULL;
1608 return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001609 case '{':
1610 /* object */
Ezio Melotticec46492011-05-07 17:40:23 +03001611 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1612 "from a unicode string"))
1613 return NULL;
1614 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1615 Py_LeaveRecursiveCall();
1616 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001617 case '[':
1618 /* array */
Ezio Melotticec46492011-05-07 17:40:23 +03001619 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1620 "from a unicode string"))
1621 return NULL;
1622 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1623 Py_LeaveRecursiveCall();
1624 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001625 case 'n':
1626 /* null */
1627 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1628 Py_INCREF(Py_None);
1629 *next_idx_ptr = idx + 4;
1630 return Py_None;
1631 }
1632 break;
1633 case 't':
1634 /* true */
1635 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1636 Py_INCREF(Py_True);
1637 *next_idx_ptr = idx + 4;
1638 return Py_True;
1639 }
1640 break;
1641 case 'f':
1642 /* false */
1643 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1644 Py_INCREF(Py_False);
1645 *next_idx_ptr = idx + 5;
1646 return Py_False;
1647 }
1648 break;
1649 case 'N':
1650 /* NaN */
1651 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1652 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1653 }
1654 break;
1655 case 'I':
1656 /* Infinity */
1657 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1658 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1659 }
1660 break;
1661 case '-':
1662 /* -Infinity */
1663 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1664 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1665 }
1666 break;
1667 }
1668 /* Didn't find a string, object, array, or named constant. Look for a number. */
1669 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1670}
1671
1672static PyObject *
1673scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1674{
1675 /* Python callable interface to scan_once_{str,unicode} */
1676 PyObject *pystr;
1677 PyObject *rval;
1678 Py_ssize_t idx;
1679 Py_ssize_t next_idx = -1;
1680 static char *kwlist[] = {"string", "idx", NULL};
1681 PyScannerObject *s;
1682 assert(PyScanner_Check(self));
1683 s = (PyScannerObject *)self;
1684 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1685 return NULL;
1686
1687 if (PyString_Check(pystr)) {
1688 rval = scan_once_str(s, pystr, idx, &next_idx);
1689 }
1690 else if (PyUnicode_Check(pystr)) {
1691 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1692 }
1693 else {
1694 PyErr_Format(PyExc_TypeError,
1695 "first argument must be a string, not %.80s",
1696 Py_TYPE(pystr)->tp_name);
1697 return NULL;
1698 }
1699 return _build_rval_index_tuple(rval, next_idx);
1700}
1701
1702static PyObject *
1703scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1704{
1705 PyScannerObject *s;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001706 PyObject *ctx;
1707 static char *kwlist[] = {"context", NULL};
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001708
1709 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001710 return NULL;
1711
1712 s = (PyScannerObject *)type->tp_alloc(type, 0);
1713 if (s == NULL)
1714 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001715
1716 /* PyString_AS_STRING is used on encoding */
1717 s->encoding = PyObject_GetAttrString(ctx, "encoding");
Antoine Pitrou187177f2009-12-08 15:40:51 +00001718 if (s->encoding == NULL)
1719 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001720 if (s->encoding == Py_None) {
1721 Py_DECREF(Py_None);
1722 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1723 }
1724 else if (PyUnicode_Check(s->encoding)) {
1725 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001726 Py_SETREF(s->encoding, tmp);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001727 }
Amaury Forgeot d'Arcdee76e62012-01-13 22:53:25 +01001728 if (s->encoding == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001729 goto bail;
Amaury Forgeot d'Arcdee76e62012-01-13 22:53:25 +01001730 if (!PyString_Check(s->encoding)) {
1731 PyErr_Format(PyExc_TypeError,
1732 "encoding must be a string, not %.80s",
1733 Py_TYPE(s->encoding)->tp_name);
1734 goto bail;
1735 }
1736
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001737
1738 /* All of these will fail "gracefully" so we don't need to verify them */
1739 s->strict = PyObject_GetAttrString(ctx, "strict");
1740 if (s->strict == NULL)
1741 goto bail;
1742 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1743 if (s->object_hook == NULL)
1744 goto bail;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001745 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
Georg Brandld823bdc2011-01-02 14:20:16 +00001746 if (s->pairs_hook == NULL)
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001747 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001748 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1749 if (s->parse_float == NULL)
1750 goto bail;
1751 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1752 if (s->parse_int == NULL)
1753 goto bail;
1754 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1755 if (s->parse_constant == NULL)
1756 goto bail;
1757
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001758 return (PyObject *)s;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001759
1760bail:
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001761 Py_DECREF(s);
1762 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001763}
1764
1765PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1766
1767static
1768PyTypeObject PyScannerType = {
1769 PyObject_HEAD_INIT(NULL)
1770 0, /* tp_internal */
1771 "_json.Scanner", /* tp_name */
1772 sizeof(PyScannerObject), /* tp_basicsize */
1773 0, /* tp_itemsize */
1774 scanner_dealloc, /* tp_dealloc */
1775 0, /* tp_print */
1776 0, /* tp_getattr */
1777 0, /* tp_setattr */
1778 0, /* tp_compare */
1779 0, /* tp_repr */
1780 0, /* tp_as_number */
1781 0, /* tp_as_sequence */
1782 0, /* tp_as_mapping */
1783 0, /* tp_hash */
1784 scanner_call, /* tp_call */
1785 0, /* tp_str */
1786 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1787 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1788 0, /* tp_as_buffer */
1789 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1790 scanner_doc, /* tp_doc */
1791 scanner_traverse, /* tp_traverse */
1792 scanner_clear, /* tp_clear */
1793 0, /* tp_richcompare */
1794 0, /* tp_weaklistoffset */
1795 0, /* tp_iter */
1796 0, /* tp_iternext */
1797 0, /* tp_methods */
1798 scanner_members, /* tp_members */
1799 0, /* tp_getset */
1800 0, /* tp_base */
1801 0, /* tp_dict */
1802 0, /* tp_descr_get */
1803 0, /* tp_descr_set */
1804 0, /* tp_dictoffset */
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001805 0, /* tp_init */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001806 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1807 scanner_new, /* tp_new */
1808 0,/* PyObject_GC_Del, */ /* tp_free */
1809};
1810
1811static PyObject *
1812encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1813{
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001814 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1815
1816 PyEncoderObject *s;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001817 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001818 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan_obj;
1819 int allow_nan;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001820
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001821 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou187177f2009-12-08 15:40:51 +00001822 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001823 &sort_keys, &skipkeys, &allow_nan_obj))
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001824 return NULL;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001825
1826 allow_nan = PyObject_IsTrue(allow_nan_obj);
1827 if (allow_nan < 0)
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001828 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001829
Serhiy Storchaka76a64ca2015-07-26 09:07:53 +03001830 if (markers != Py_None && !PyDict_Check(markers)) {
1831 PyErr_Format(PyExc_TypeError,
1832 "make_encoder() argument 1 must be dict or None, "
1833 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001834 return NULL;
Serhiy Storchaka76a64ca2015-07-26 09:07:53 +03001835 }
1836
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001837 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1838 if (s == NULL)
1839 return NULL;
1840
Antoine Pitrou187177f2009-12-08 15:40:51 +00001841 s->markers = markers;
1842 s->defaultfn = defaultfn;
1843 s->encoder = encoder;
1844 s->indent = indent;
1845 s->key_separator = key_separator;
1846 s->item_separator = item_separator;
1847 s->sort_keys = sort_keys;
1848 s->skipkeys = skipkeys;
1849 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001850 s->allow_nan = allow_nan;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001851
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001852 Py_INCREF(s->markers);
1853 Py_INCREF(s->defaultfn);
1854 Py_INCREF(s->encoder);
1855 Py_INCREF(s->indent);
1856 Py_INCREF(s->key_separator);
1857 Py_INCREF(s->item_separator);
1858 Py_INCREF(s->sort_keys);
1859 Py_INCREF(s->skipkeys);
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001860 return (PyObject *)s;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001861}
1862
1863static PyObject *
1864encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1865{
1866 /* Python callable interface to encode_listencode_obj */
1867 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1868 PyObject *obj;
1869 PyObject *rval;
1870 Py_ssize_t indent_level;
1871 PyEncoderObject *s;
1872 assert(PyEncoder_Check(self));
1873 s = (PyEncoderObject *)self;
1874 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1875 &obj, _convertPyInt_AsSsize_t, &indent_level))
1876 return NULL;
1877 rval = PyList_New(0);
1878 if (rval == NULL)
1879 return NULL;
1880 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1881 Py_DECREF(rval);
1882 return NULL;
1883 }
1884 return rval;
1885}
1886
1887static PyObject *
1888_encoded_const(PyObject *obj)
1889{
1890 /* Return the JSON string representation of None, True, False */
1891 if (obj == Py_None) {
1892 static PyObject *s_null = NULL;
1893 if (s_null == NULL) {
1894 s_null = PyString_InternFromString("null");
1895 }
1896 Py_INCREF(s_null);
1897 return s_null;
1898 }
1899 else if (obj == Py_True) {
1900 static PyObject *s_true = NULL;
1901 if (s_true == NULL) {
1902 s_true = PyString_InternFromString("true");
1903 }
1904 Py_INCREF(s_true);
1905 return s_true;
1906 }
1907 else if (obj == Py_False) {
1908 static PyObject *s_false = NULL;
1909 if (s_false == NULL) {
1910 s_false = PyString_InternFromString("false");
1911 }
1912 Py_INCREF(s_false);
1913 return s_false;
1914 }
1915 else {
1916 PyErr_SetString(PyExc_ValueError, "not a const");
1917 return NULL;
1918 }
1919}
1920
1921static PyObject *
1922encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1923{
1924 /* Return the JSON representation of a PyFloat */
1925 double i = PyFloat_AS_DOUBLE(obj);
1926 if (!Py_IS_FINITE(i)) {
1927 if (!s->allow_nan) {
1928 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1929 return NULL;
1930 }
1931 if (i > 0) {
1932 return PyString_FromString("Infinity");
1933 }
1934 else if (i < 0) {
1935 return PyString_FromString("-Infinity");
1936 }
1937 else {
1938 return PyString_FromString("NaN");
1939 }
1940 }
Mark Dickinsone6239a32016-09-03 17:45:00 +01001941 /* Make sure to use the base float class repr method */
1942 return PyFloat_Type.tp_repr(obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001943}
1944
1945static PyObject *
1946encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1947{
1948 /* Return the JSON representation of a string */
1949 if (s->fast_encode)
1950 return py_encode_basestring_ascii(NULL, obj);
1951 else
1952 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1953}
1954
1955static int
1956_steal_list_append(PyObject *lst, PyObject *stolen)
1957{
1958 /* Append stolen and then decrement its reference count */
1959 int rval = PyList_Append(lst, stolen);
1960 Py_DECREF(stolen);
1961 return rval;
1962}
1963
1964static int
1965encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1966{
1967 /* Encode Python object obj to a JSON term, rval is a PyList */
1968 PyObject *newobj;
1969 int rv;
1970
1971 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1972 PyObject *cstr = _encoded_const(obj);
1973 if (cstr == NULL)
1974 return -1;
1975 return _steal_list_append(rval, cstr);
1976 }
1977 else if (PyString_Check(obj) || PyUnicode_Check(obj))
1978 {
1979 PyObject *encoded = encoder_encode_string(s, obj);
1980 if (encoded == NULL)
1981 return -1;
1982 return _steal_list_append(rval, encoded);
1983 }
1984 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1985 PyObject *encoded = PyObject_Str(obj);
1986 if (encoded == NULL)
1987 return -1;
1988 return _steal_list_append(rval, encoded);
1989 }
1990 else if (PyFloat_Check(obj)) {
1991 PyObject *encoded = encoder_encode_float(s, obj);
1992 if (encoded == NULL)
1993 return -1;
1994 return _steal_list_append(rval, encoded);
1995 }
1996 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melottic1ec7b52011-05-11 00:55:35 +03001997 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1998 return -1;
1999 rv = encoder_listencode_list(s, rval, obj, indent_level);
2000 Py_LeaveRecursiveCall();
2001 return rv;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002002 }
2003 else if (PyDict_Check(obj)) {
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002004 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2005 return -1;
2006 rv = encoder_listencode_dict(s, rval, obj, indent_level);
2007 Py_LeaveRecursiveCall();
2008 return rv;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002009 }
2010 else {
2011 PyObject *ident = NULL;
2012 if (s->markers != Py_None) {
2013 int has_key;
2014 ident = PyLong_FromVoidPtr(obj);
2015 if (ident == NULL)
2016 return -1;
2017 has_key = PyDict_Contains(s->markers, ident);
2018 if (has_key) {
2019 if (has_key != -1)
2020 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2021 Py_DECREF(ident);
2022 return -1;
2023 }
2024 if (PyDict_SetItem(s->markers, ident, obj)) {
2025 Py_DECREF(ident);
2026 return -1;
2027 }
2028 }
2029 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2030 if (newobj == NULL) {
2031 Py_XDECREF(ident);
2032 return -1;
2033 }
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002034
Serhiy Storchakaf04790a2017-01-03 11:17:53 +02002035 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
2036 Py_DECREF(newobj);
2037 Py_XDECREF(ident);
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002038 return -1;
Serhiy Storchakaf04790a2017-01-03 11:17:53 +02002039 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002040 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002041 Py_LeaveRecursiveCall();
2042
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002043 Py_DECREF(newobj);
2044 if (rv) {
2045 Py_XDECREF(ident);
2046 return -1;
2047 }
2048 if (ident != NULL) {
2049 if (PyDict_DelItem(s->markers, ident)) {
2050 Py_XDECREF(ident);
2051 return -1;
2052 }
2053 Py_XDECREF(ident);
2054 }
2055 return rv;
2056 }
2057}
2058
2059static int
2060encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2061{
2062 /* Encode Python dict dct a JSON term, rval is a PyList */
2063 static PyObject *open_dict = NULL;
2064 static PyObject *close_dict = NULL;
2065 static PyObject *empty_dict = NULL;
2066 PyObject *kstr = NULL;
2067 PyObject *ident = NULL;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002068 PyObject *key = NULL;
2069 PyObject *value = NULL;
2070 PyObject *it = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002071 int skipkeys;
2072 Py_ssize_t idx;
2073
2074 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2075 open_dict = PyString_InternFromString("{");
2076 close_dict = PyString_InternFromString("}");
2077 empty_dict = PyString_InternFromString("{}");
2078 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2079 return -1;
2080 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002081 if (Py_SIZE(dct) == 0)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002082 return PyList_Append(rval, empty_dict);
2083
2084 if (s->markers != Py_None) {
2085 int has_key;
2086 ident = PyLong_FromVoidPtr(dct);
2087 if (ident == NULL)
2088 goto bail;
2089 has_key = PyDict_Contains(s->markers, ident);
2090 if (has_key) {
2091 if (has_key != -1)
2092 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2093 goto bail;
2094 }
2095 if (PyDict_SetItem(s->markers, ident, dct)) {
2096 goto bail;
2097 }
2098 }
2099
2100 if (PyList_Append(rval, open_dict))
2101 goto bail;
2102
2103 if (s->indent != Py_None) {
2104 /* TODO: DOES NOT RUN */
2105 indent_level += 1;
2106 /*
2107 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2108 separator = _item_separator + newline_indent
2109 buf += newline_indent
2110 */
2111 }
2112
2113 /* TODO: C speedup not implemented for sort_keys */
2114
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002115 it = PyObject_GetIter(dct);
2116 if (it == NULL)
2117 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002118 skipkeys = PyObject_IsTrue(s->skipkeys);
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03002119 if (skipkeys < 0)
2120 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002121 idx = 0;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002122 while ((key = PyIter_Next(it)) != NULL) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002123 PyObject *encoded;
2124
2125 if (PyString_Check(key) || PyUnicode_Check(key)) {
2126 Py_INCREF(key);
2127 kstr = key;
2128 }
2129 else if (PyFloat_Check(key)) {
2130 kstr = encoder_encode_float(s, key);
2131 if (kstr == NULL)
2132 goto bail;
2133 }
2134 else if (PyInt_Check(key) || PyLong_Check(key)) {
2135 kstr = PyObject_Str(key);
2136 if (kstr == NULL)
2137 goto bail;
2138 }
2139 else if (key == Py_True || key == Py_False || key == Py_None) {
2140 kstr = _encoded_const(key);
2141 if (kstr == NULL)
2142 goto bail;
2143 }
2144 else if (skipkeys) {
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002145 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002146 continue;
2147 }
2148 else {
2149 /* TODO: include repr of key */
Doug Hellmannf31db932010-07-21 12:36:33 +00002150 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002151 goto bail;
2152 }
2153
2154 if (idx) {
2155 if (PyList_Append(rval, s->item_separator))
2156 goto bail;
2157 }
2158
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002159 value = PyObject_GetItem(dct, key);
2160 if (value == NULL)
2161 goto bail;
2162
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002163 encoded = encoder_encode_string(s, kstr);
2164 Py_CLEAR(kstr);
2165 if (encoded == NULL)
2166 goto bail;
2167 if (PyList_Append(rval, encoded)) {
2168 Py_DECREF(encoded);
2169 goto bail;
2170 }
2171 Py_DECREF(encoded);
2172 if (PyList_Append(rval, s->key_separator))
2173 goto bail;
2174 if (encoder_listencode_obj(s, rval, value, indent_level))
2175 goto bail;
2176 idx += 1;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002177 Py_CLEAR(value);
2178 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002179 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002180 if (PyErr_Occurred())
2181 goto bail;
2182 Py_CLEAR(it);
2183
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002184 if (ident != NULL) {
2185 if (PyDict_DelItem(s->markers, ident))
2186 goto bail;
2187 Py_CLEAR(ident);
2188 }
2189 if (s->indent != Py_None) {
2190 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002191 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002192 indent_level -= 1;
2193
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002194 yield '\n' + (' ' * (_indent * _current_indent_level))
2195 */
2196 }
2197 if (PyList_Append(rval, close_dict))
2198 goto bail;
2199 return 0;
2200
2201bail:
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002202 Py_XDECREF(it);
2203 Py_XDECREF(key);
2204 Py_XDECREF(value);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002205 Py_XDECREF(kstr);
2206 Py_XDECREF(ident);
2207 return -1;
2208}
2209
2210
2211static int
2212encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2213{
2214 /* Encode Python list seq to a JSON term, rval is a PyList */
2215 static PyObject *open_array = NULL;
2216 static PyObject *close_array = NULL;
2217 static PyObject *empty_array = NULL;
2218 PyObject *ident = NULL;
2219 PyObject *s_fast = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002220 Py_ssize_t i;
2221
2222 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2223 open_array = PyString_InternFromString("[");
2224 close_array = PyString_InternFromString("]");
2225 empty_array = PyString_InternFromString("[]");
2226 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2227 return -1;
2228 }
2229 ident = NULL;
2230 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2231 if (s_fast == NULL)
2232 return -1;
Antoine Pitroue9e35c32012-11-01 20:07:40 +01002233 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002234 Py_DECREF(s_fast);
2235 return PyList_Append(rval, empty_array);
2236 }
2237
2238 if (s->markers != Py_None) {
2239 int has_key;
2240 ident = PyLong_FromVoidPtr(seq);
2241 if (ident == NULL)
2242 goto bail;
2243 has_key = PyDict_Contains(s->markers, ident);
2244 if (has_key) {
2245 if (has_key != -1)
2246 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2247 goto bail;
2248 }
2249 if (PyDict_SetItem(s->markers, ident, seq)) {
2250 goto bail;
2251 }
2252 }
2253
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002254 if (PyList_Append(rval, open_array))
2255 goto bail;
2256 if (s->indent != Py_None) {
2257 /* TODO: DOES NOT RUN */
2258 indent_level += 1;
2259 /*
2260 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2261 separator = _item_separator + newline_indent
2262 buf += newline_indent
2263 */
2264 }
Antoine Pitroue9e35c32012-11-01 20:07:40 +01002265 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
2266 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002267 if (i) {
2268 if (PyList_Append(rval, s->item_separator))
2269 goto bail;
2270 }
2271 if (encoder_listencode_obj(s, rval, obj, indent_level))
2272 goto bail;
2273 }
2274 if (ident != NULL) {
2275 if (PyDict_DelItem(s->markers, ident))
2276 goto bail;
2277 Py_CLEAR(ident);
2278 }
2279 if (s->indent != Py_None) {
2280 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002281 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002282 indent_level -= 1;
2283
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002284 yield '\n' + (' ' * (_indent * _current_indent_level))
2285 */
2286 }
2287 if (PyList_Append(rval, close_array))
2288 goto bail;
2289 Py_DECREF(s_fast);
2290 return 0;
2291
2292bail:
2293 Py_XDECREF(ident);
2294 Py_DECREF(s_fast);
2295 return -1;
2296}
2297
2298static void
2299encoder_dealloc(PyObject *self)
2300{
2301 /* Deallocate Encoder */
2302 encoder_clear(self);
2303 Py_TYPE(self)->tp_free(self);
2304}
2305
2306static int
2307encoder_traverse(PyObject *self, visitproc visit, void *arg)
2308{
2309 PyEncoderObject *s;
2310 assert(PyEncoder_Check(self));
2311 s = (PyEncoderObject *)self;
2312 Py_VISIT(s->markers);
2313 Py_VISIT(s->defaultfn);
2314 Py_VISIT(s->encoder);
2315 Py_VISIT(s->indent);
2316 Py_VISIT(s->key_separator);
2317 Py_VISIT(s->item_separator);
2318 Py_VISIT(s->sort_keys);
2319 Py_VISIT(s->skipkeys);
2320 return 0;
2321}
2322
2323static int
2324encoder_clear(PyObject *self)
2325{
2326 /* Deallocate Encoder */
2327 PyEncoderObject *s;
2328 assert(PyEncoder_Check(self));
2329 s = (PyEncoderObject *)self;
2330 Py_CLEAR(s->markers);
2331 Py_CLEAR(s->defaultfn);
2332 Py_CLEAR(s->encoder);
2333 Py_CLEAR(s->indent);
2334 Py_CLEAR(s->key_separator);
2335 Py_CLEAR(s->item_separator);
2336 Py_CLEAR(s->sort_keys);
2337 Py_CLEAR(s->skipkeys);
2338 return 0;
2339}
2340
2341PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2342
2343static
2344PyTypeObject PyEncoderType = {
2345 PyObject_HEAD_INIT(NULL)
2346 0, /* tp_internal */
2347 "_json.Encoder", /* tp_name */
2348 sizeof(PyEncoderObject), /* tp_basicsize */
2349 0, /* tp_itemsize */
2350 encoder_dealloc, /* tp_dealloc */
2351 0, /* tp_print */
2352 0, /* tp_getattr */
2353 0, /* tp_setattr */
2354 0, /* tp_compare */
2355 0, /* tp_repr */
2356 0, /* tp_as_number */
2357 0, /* tp_as_sequence */
2358 0, /* tp_as_mapping */
2359 0, /* tp_hash */
2360 encoder_call, /* tp_call */
2361 0, /* tp_str */
2362 0, /* tp_getattro */
2363 0, /* tp_setattro */
2364 0, /* tp_as_buffer */
2365 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2366 encoder_doc, /* tp_doc */
2367 encoder_traverse, /* tp_traverse */
2368 encoder_clear, /* tp_clear */
2369 0, /* tp_richcompare */
2370 0, /* tp_weaklistoffset */
2371 0, /* tp_iter */
2372 0, /* tp_iternext */
2373 0, /* tp_methods */
2374 encoder_members, /* tp_members */
2375 0, /* tp_getset */
2376 0, /* tp_base */
2377 0, /* tp_dict */
2378 0, /* tp_descr_get */
2379 0, /* tp_descr_set */
2380 0, /* tp_dictoffset */
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03002381 0, /* tp_init */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002382 0, /* tp_alloc */
2383 encoder_new, /* tp_new */
2384 0, /* tp_free */
2385};
2386
2387static PyMethodDef speedups_methods[] = {
2388 {"encode_basestring_ascii",
2389 (PyCFunction)py_encode_basestring_ascii,
2390 METH_O,
2391 pydoc_encode_basestring_ascii},
2392 {"scanstring",
2393 (PyCFunction)py_scanstring,
2394 METH_VARARGS,
2395 pydoc_scanstring},
Brett Cannon4b964f92008-05-05 20:21:38 +00002396 {NULL, NULL, 0, NULL}
2397};
2398
2399PyDoc_STRVAR(module_doc,
2400"json speedups\n");
2401
2402void
2403init_json(void)
2404{
2405 PyObject *m;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002406 if (PyType_Ready(&PyScannerType) < 0)
2407 return;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002408 if (PyType_Ready(&PyEncoderType) < 0)
2409 return;
2410 m = Py_InitModule3("_json", speedups_methods, module_doc);
Serhiy Storchaka045c4512015-07-24 12:58:25 +03002411 if (m == NULL)
2412 return;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002413 Py_INCREF((PyObject*)&PyScannerType);
2414 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2415 Py_INCREF((PyObject*)&PyEncoderType);
2416 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
Brett Cannon4b964f92008-05-05 20:21:38 +00002417}