blob: 3a88882f0c986b4db7c0d76c450c74b1b1324fa3 [file] [log] [blame]
Brett Cannon4b964f92008-05-05 20:21:38 +00001#include "Python.h"
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
16
17#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
Brett Cannon4b964f92008-05-05 20:21:38 +000022
23#define DEFAULT_ENCODING "utf-8"
Brett Cannon4b964f92008-05-05 20:21:38 +000024
Bob Ippolitod914e3f2009-03-17 23:19:00 +000025#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30static PyTypeObject PyScannerType;
31static PyTypeObject PyEncoderType;
32
33typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
Raymond Hettinger91852ca2009-03-19 19:19:03 +000038 PyObject *pairs_hook;
Bob Ippolitod914e3f2009-03-17 23:19:00 +000039 PyObject *parse_float;
40 PyObject *parse_int;
41 PyObject *parse_constant;
42} PyScannerObject;
43
44static PyMemberDef scanner_members[] = {
45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
Raymond Hettinger91852ca2009-03-19 19:19:03 +000048 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
Bob Ippolitod914e3f2009-03-17 23:19:00 +000049 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52 {NULL}
53};
54
55typedef struct _PyEncoderObject {
56 PyObject_HEAD
57 PyObject *markers;
58 PyObject *defaultfn;
59 PyObject *encoder;
60 PyObject *indent;
61 PyObject *key_separator;
62 PyObject *item_separator;
63 PyObject *sort_keys;
64 PyObject *skipkeys;
65 int fast_encode;
66 int allow_nan;
67} PyEncoderObject;
68
69static PyMemberDef encoder_members[] = {
70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78 {NULL}
79};
80
81static Py_ssize_t
82ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83static PyObject *
84ascii_escape_unicode(PyObject *pystr);
85static PyObject *
86ascii_escape_str(PyObject *pystr);
87static PyObject *
88py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89void init_json(void);
90static PyObject *
91scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92static PyObject *
93scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94static PyObject *
95_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96static PyObject *
97scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Bob Ippolitod914e3f2009-03-17 23:19:00 +000098static void
99scanner_dealloc(PyObject *self);
100static int
101scanner_clear(PyObject *self);
102static PyObject *
103encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000104static void
105encoder_dealloc(PyObject *self);
106static int
107encoder_clear(PyObject *self);
108static int
109encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
110static int
111encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
112static int
113encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
114static PyObject *
Raymond Hettingera0b8d9a2009-03-19 19:24:43 +0000115_encoded_const(PyObject *obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000116static void
117raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
118static PyObject *
119encoder_encode_string(PyEncoderObject *s, PyObject *obj);
120static int
121_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
122static PyObject *
123_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
124static PyObject *
125encoder_encode_float(PyEncoderObject *s, PyObject *obj);
126
127#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
128#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
129
130#define MIN_EXPANSION 6
Brett Cannon4b964f92008-05-05 20:21:38 +0000131#ifdef Py_UNICODE_WIDE
132#define MAX_EXPANSION (2 * MIN_EXPANSION)
133#else
134#define MAX_EXPANSION MIN_EXPANSION
135#endif
136
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000137static int
138_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
139{
140 /* PyObject to Py_ssize_t converter */
141 *size_ptr = PyInt_AsSsize_t(o);
Georg Brandlf71ba952009-05-05 07:48:12 +0000142 if (*size_ptr == -1 && PyErr_Occurred())
143 return 0;
144 return 1;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000145}
146
147static PyObject *
148_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
149{
150 /* Py_ssize_t to PyObject converter */
151 return PyInt_FromSsize_t(*size_ptr);
152}
153
Brett Cannon4b964f92008-05-05 20:21:38 +0000154static Py_ssize_t
155ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
156{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000157 /* Escape unicode code point c to ASCII escape sequences
158 in char *output. output must have at least 12 bytes unused to
159 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Brett Cannon4b964f92008-05-05 20:21:38 +0000160 output[chars++] = '\\';
161 switch (c) {
162 case '\\': output[chars++] = (char)c; break;
163 case '"': output[chars++] = (char)c; break;
164 case '\b': output[chars++] = 'b'; break;
165 case '\f': output[chars++] = 'f'; break;
166 case '\n': output[chars++] = 'n'; break;
167 case '\r': output[chars++] = 'r'; break;
168 case '\t': output[chars++] = 't'; break;
169 default:
170#ifdef Py_UNICODE_WIDE
171 if (c >= 0x10000) {
172 /* UTF-16 surrogate pair */
173 Py_UNICODE v = c - 0x10000;
174 c = 0xd800 | ((v >> 10) & 0x3ff);
175 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000176 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
177 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
178 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
179 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000180 c = 0xdc00 | (v & 0x3ff);
181 output[chars++] = '\\';
182 }
183#endif
184 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000185 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
186 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
187 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
188 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000189 }
190 return chars;
191}
192
193static PyObject *
194ascii_escape_unicode(PyObject *pystr)
195{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000196 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000197 Py_ssize_t i;
198 Py_ssize_t input_chars;
199 Py_ssize_t output_size;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000200 Py_ssize_t max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000201 Py_ssize_t chars;
Benjamin Petersonaa034202016-09-26 23:55:41 -0700202 Py_ssize_t incr;
Brett Cannon4b964f92008-05-05 20:21:38 +0000203 PyObject *rval;
204 char *output;
205 Py_UNICODE *input_unicode;
206
207 input_chars = PyUnicode_GET_SIZE(pystr);
208 input_unicode = PyUnicode_AS_UNICODE(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000209
Benjamin Petersonaa034202016-09-26 23:55:41 -0700210 output_size = input_chars;
211 incr = 2; /* for quotes */
Brett Cannon4b964f92008-05-05 20:21:38 +0000212 /* One char input can be up to 6 chars output, estimate 4 of these */
Benjamin Petersonaa034202016-09-26 23:55:41 -0700213 incr += MIN_EXPANSION * 4;
214 if (PY_SSIZE_T_MAX - incr < output_size) {
215 PyErr_NoMemory();
Benjamin Peterson04a53852016-08-13 16:47:25 -0700216 return NULL;
217 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700218 output_size += incr;
219 if (PY_SSIZE_T_MAX / MAX_EXPANSION < input_chars ||
220 PY_SSIZE_T_MAX - 2 < input_chars * MAX_EXPANSION)
221 max_output_size = PY_SSIZE_T_MAX;
222 else
223 max_output_size = 2 + (input_chars * MAX_EXPANSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000224 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000225 if (rval == NULL) {
226 return NULL;
227 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000228 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000229 chars = 0;
230 output[chars++] = '"';
231 for (i = 0; i < input_chars; i++) {
232 Py_UNICODE c = input_unicode[i];
233 if (S_CHAR(c)) {
234 output[chars++] = (char)c;
235 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000236 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000237 chars = ascii_escape_char(c, output, chars);
238 }
239 if (output_size - chars < (1 + MAX_EXPANSION)) {
Benjamin Petersonaa034202016-09-26 23:55:41 -0700240 if (output_size == PY_SSIZE_T_MAX) {
241 Py_DECREF(rval);
242 PyErr_NoMemory();
243 return NULL;
244 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000245 /* There's more than four, so let's resize by a lot */
Benjamin Petersonaa034202016-09-26 23:55:41 -0700246 if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
247 output_size *= 2;
248 else
249 output_size = max_output_size;
250 if (_PyString_Resize(&rval, output_size) == -1) {
251 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000252 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700253 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000254 }
255 }
256 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000257 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000258 return NULL;
259 }
260 return rval;
261}
262
263static PyObject *
264ascii_escape_str(PyObject *pystr)
265{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000266 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000267 Py_ssize_t i;
268 Py_ssize_t input_chars;
269 Py_ssize_t output_size;
Benjamin Petersonaa034202016-09-26 23:55:41 -0700270 Py_ssize_t max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000271 Py_ssize_t chars;
Benjamin Petersonaa034202016-09-26 23:55:41 -0700272 Py_ssize_t incr;
Brett Cannon4b964f92008-05-05 20:21:38 +0000273 PyObject *rval;
274 char *output;
275 char *input_str;
276
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000277 input_chars = PyString_GET_SIZE(pystr);
278 input_str = PyString_AS_STRING(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000279
280 /* Fast path for a string that's already ASCII */
281 for (i = 0; i < input_chars; i++) {
282 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
283 if (!S_CHAR(c)) {
284 /* If we have to escape something, scan the string for unicode */
285 Py_ssize_t j;
286 for (j = i; j < input_chars; j++) {
287 c = (Py_UNICODE)(unsigned char)input_str[j];
288 if (c > 0x7f) {
289 /* We hit a non-ASCII character, bail to unicode mode */
290 PyObject *uni;
291 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
292 if (uni == NULL) {
293 return NULL;
294 }
295 rval = ascii_escape_unicode(uni);
296 Py_DECREF(uni);
297 return rval;
298 }
299 }
300 break;
301 }
302 }
303
Benjamin Petersonaa034202016-09-26 23:55:41 -0700304 output_size = input_chars;
305 incr = 2; /* for quotes */
306 if (i != input_chars) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000307 /* One char input can be up to 6 chars output, estimate 4 of these */
Benjamin Petersonaa034202016-09-26 23:55:41 -0700308 incr += MIN_EXPANSION * 4;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000309 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700310 if (PY_SSIZE_T_MAX - incr < output_size) {
311 PyErr_NoMemory();
312 return NULL;
313 }
314 output_size += incr;
315 if (PY_SSIZE_T_MAX / MIN_EXPANSION < input_chars ||
316 PY_SSIZE_T_MAX - 2 < input_chars * MIN_EXPANSION)
317 max_output_size = PY_SSIZE_T_MAX;
318 else
319 max_output_size = 2 + (input_chars * MIN_EXPANSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000320 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000321 if (rval == NULL) {
322 return NULL;
323 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000324 output = PyString_AS_STRING(rval);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000325 output[0] = '"';
326
327 /* We know that everything up to i is ASCII already */
328 chars = i + 1;
329 memcpy(&output[1], input_str, i);
330
331 for (; i < input_chars; i++) {
332 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
Brett Cannon4b964f92008-05-05 20:21:38 +0000333 if (S_CHAR(c)) {
334 output[chars++] = (char)c;
335 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000336 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000337 chars = ascii_escape_char(c, output, chars);
338 }
339 /* An ASCII char can't possibly expand to a surrogate! */
340 if (output_size - chars < (1 + MIN_EXPANSION)) {
Benjamin Petersonaa034202016-09-26 23:55:41 -0700341 if (output_size == PY_SSIZE_T_MAX) {
342 Py_DECREF(rval);
343 PyErr_NoMemory();
344 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000345 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700346 /* There's more than four, so let's resize by a lot */
347 if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
348 output_size *= 2;
349 else
350 output_size = max_output_size;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000351 if (_PyString_Resize(&rval, output_size) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000352 return NULL;
353 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000354 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000355 }
356 }
357 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000358 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000359 return NULL;
360 }
361 return rval;
362}
363
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000364static void
Brett Cannon4b964f92008-05-05 20:21:38 +0000365raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
366{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000367 /* Use the Python function json.decoder.errmsg to raise a nice
368 looking ValueError exception */
Brett Cannon4b964f92008-05-05 20:21:38 +0000369 static PyObject *errmsg_fn = NULL;
370 PyObject *pymsg;
371 if (errmsg_fn == NULL) {
372 PyObject *decoder = PyImport_ImportModule("json.decoder");
373 if (decoder == NULL)
374 return;
375 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000376 Py_DECREF(decoder);
Brett Cannon4b964f92008-05-05 20:21:38 +0000377 if (errmsg_fn == NULL)
378 return;
Brett Cannon4b964f92008-05-05 20:21:38 +0000379 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000380 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
Benjamin Peterson595e3cb2008-10-16 21:09:28 +0000381 if (pymsg) {
382 PyErr_SetObject(PyExc_ValueError, pymsg);
383 Py_DECREF(pymsg);
384 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000385}
386
387static PyObject *
388join_list_unicode(PyObject *lst)
389{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000390 /* return u''.join(lst) */
391 static PyObject *joinfn = NULL;
392 if (joinfn == NULL) {
393 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
394 if (ustr == NULL)
395 return NULL;
396
397 joinfn = PyObject_GetAttrString(ustr, "join");
398 Py_DECREF(ustr);
399 if (joinfn == NULL)
400 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000401 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000402 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
Brett Cannon4b964f92008-05-05 20:21:38 +0000403}
404
405static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000406_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
407 /* return (rval, idx) tuple, stealing reference to rval */
408 PyObject *tpl;
409 PyObject *pyidx;
410 /*
411 steal a reference to rval, returns (rval, idx)
412 */
413 if (rval == NULL) {
414 return NULL;
415 }
416 pyidx = PyInt_FromSsize_t(idx);
417 if (pyidx == NULL) {
418 Py_DECREF(rval);
419 return NULL;
420 }
421 tpl = PyTuple_New(2);
422 if (tpl == NULL) {
423 Py_DECREF(pyidx);
424 Py_DECREF(rval);
425 return NULL;
426 }
427 PyTuple_SET_ITEM(tpl, 0, rval);
428 PyTuple_SET_ITEM(tpl, 1, pyidx);
429 return tpl;
430}
431
432static PyObject *
433scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
434{
435 /* Read the JSON string from PyString pystr.
436 end is the index of the first character after the quote.
437 encoding is the encoding of pystr (must be an ASCII superset)
438 if strict is zero then literal control characters are allowed
439 *next_end_ptr is a return-by-reference index of the character
440 after the end quote
441
442 Return value is a new PyString (if ASCII-only) or PyUnicode
443 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000444 PyObject *rval;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000445 Py_ssize_t len = PyString_GET_SIZE(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000446 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000447 Py_ssize_t next;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000448 char *buf = PyString_AS_STRING(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000449 PyObject *chunks = PyList_New(0);
450 if (chunks == NULL) {
451 goto bail;
452 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000453 if (end < 0 || len <= end) {
454 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
455 goto bail;
456 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000457 while (1) {
458 /* Find the end of the string or the next escape */
459 Py_UNICODE c = 0;
460 PyObject *chunk = NULL;
461 for (next = end; next < len; next++) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000462 c = (unsigned char)buf[next];
Brett Cannon4b964f92008-05-05 20:21:38 +0000463 if (c == '"' || c == '\\') {
464 break;
465 }
466 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000467 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000468 goto bail;
469 }
470 }
471 if (!(c == '"' || c == '\\')) {
472 raise_errmsg("Unterminated string starting at", pystr, begin);
473 goto bail;
474 }
475 /* Pick up this chunk if it's not zero length */
476 if (next != end) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000477 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000478 if (strchunk == NULL) {
479 goto bail;
480 }
Barry Warsawfa658272010-11-02 21:03:09 +0000481 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
482 Py_DECREF(strchunk);
483 if (chunk == NULL) {
484 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000485 }
486 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000487 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000488 goto bail;
489 }
490 Py_DECREF(chunk);
491 }
492 next++;
493 if (c == '"') {
494 end = next;
495 break;
496 }
497 if (next == len) {
498 raise_errmsg("Unterminated string starting at", pystr, begin);
499 goto bail;
500 }
501 c = buf[next];
502 if (c != 'u') {
503 /* Non-unicode backslash escapes */
504 end = next + 1;
505 switch (c) {
506 case '"': break;
507 case '\\': break;
508 case '/': break;
509 case 'b': c = '\b'; break;
510 case 'f': c = '\f'; break;
511 case 'n': c = '\n'; break;
512 case 'r': c = '\r'; break;
513 case 't': c = '\t'; break;
514 default: c = 0;
515 }
516 if (c == 0) {
517 raise_errmsg("Invalid \\escape", pystr, end - 2);
518 goto bail;
519 }
520 }
521 else {
522 c = 0;
523 next++;
524 end = next + 4;
525 if (end >= len) {
526 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
527 goto bail;
528 }
529 /* Decode 4 hex digits */
530 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000531 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000532 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000533 switch (digit) {
534 case '0': case '1': case '2': case '3': case '4':
535 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000536 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000537 case 'a': case 'b': case 'c': case 'd': case 'e':
538 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000539 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000540 case 'A': case 'B': case 'C': case 'D': case 'E':
541 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000542 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000543 default:
544 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
545 goto bail;
546 }
547 }
548#ifdef Py_UNICODE_WIDE
549 /* Surrogate pair */
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200550 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
551 buf[next++] == '\\' &&
552 buf[next++] == 'u') {
Brett Cannon4b964f92008-05-05 20:21:38 +0000553 Py_UNICODE c2 = 0;
Brett Cannon4b964f92008-05-05 20:21:38 +0000554 end += 6;
555 /* Decode 4 hex digits */
556 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000557 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000558 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000559 switch (digit) {
560 case '0': case '1': case '2': case '3': case '4':
561 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000562 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000563 case 'a': case 'b': case 'c': case 'd': case 'e':
564 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000565 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000566 case 'A': case 'B': case 'C': case 'D': case 'E':
567 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000568 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000569 default:
570 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
571 goto bail;
572 }
573 }
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200574 if ((c2 & 0xfc00) == 0xdc00)
575 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
576 else
577 end -= 6;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000578 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000579#endif
580 }
Barry Warsawfa658272010-11-02 21:03:09 +0000581 chunk = PyUnicode_FromUnicode(&c, 1);
582 if (chunk == NULL) {
583 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000584 }
585 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000586 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000587 goto bail;
588 }
589 Py_DECREF(chunk);
590 }
591
Ezio Melottidf8a8f72011-05-04 14:40:53 +0300592 rval = join_list_unicode(chunks);
Brett Cannon4b964f92008-05-05 20:21:38 +0000593 if (rval == NULL) {
594 goto bail;
595 }
Benjamin Peterson336680e2008-10-16 21:48:06 +0000596 Py_CLEAR(chunks);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000597 *next_end_ptr = end;
598 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000599bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000600 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000601 Py_XDECREF(chunks);
602 return NULL;
603}
604
605
606static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000607scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000608{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000609 /* Read the JSON string from PyUnicode pystr.
610 end is the index of the first character after the quote.
611 if strict is zero then literal control characters are allowed
612 *next_end_ptr is a return-by-reference index of the character
613 after the end quote
614
615 Return value is a new PyUnicode
616 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000617 PyObject *rval;
618 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
619 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000620 Py_ssize_t next;
Brett Cannon4b964f92008-05-05 20:21:38 +0000621 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
622 PyObject *chunks = PyList_New(0);
623 if (chunks == NULL) {
624 goto bail;
625 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000626 if (end < 0 || len <= end) {
627 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
628 goto bail;
629 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000630 while (1) {
631 /* Find the end of the string or the next escape */
632 Py_UNICODE c = 0;
633 PyObject *chunk = NULL;
634 for (next = end; next < len; next++) {
635 c = buf[next];
636 if (c == '"' || c == '\\') {
637 break;
638 }
639 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000640 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000641 goto bail;
642 }
643 }
644 if (!(c == '"' || c == '\\')) {
645 raise_errmsg("Unterminated string starting at", pystr, begin);
646 goto bail;
647 }
648 /* Pick up this chunk if it's not zero length */
649 if (next != end) {
650 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
651 if (chunk == NULL) {
652 goto bail;
653 }
654 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson87e6ad22008-10-16 21:27:54 +0000655 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000656 goto bail;
657 }
658 Py_DECREF(chunk);
659 }
660 next++;
661 if (c == '"') {
662 end = next;
663 break;
664 }
665 if (next == len) {
666 raise_errmsg("Unterminated string starting at", pystr, begin);
667 goto bail;
668 }
669 c = buf[next];
670 if (c != 'u') {
671 /* Non-unicode backslash escapes */
672 end = next + 1;
673 switch (c) {
674 case '"': break;
675 case '\\': break;
676 case '/': break;
677 case 'b': c = '\b'; break;
678 case 'f': c = '\f'; break;
679 case 'n': c = '\n'; break;
680 case 'r': c = '\r'; break;
681 case 't': c = '\t'; break;
682 default: c = 0;
683 }
684 if (c == 0) {
685 raise_errmsg("Invalid \\escape", pystr, end - 2);
686 goto bail;
687 }
688 }
689 else {
690 c = 0;
691 next++;
692 end = next + 4;
693 if (end >= len) {
694 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
695 goto bail;
696 }
697 /* Decode 4 hex digits */
698 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000699 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000700 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000701 switch (digit) {
702 case '0': case '1': case '2': case '3': case '4':
703 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000704 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000705 case 'a': case 'b': case 'c': case 'd': case 'e':
706 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000707 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000708 case 'A': case 'B': case 'C': case 'D': case 'E':
709 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000710 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000711 default:
712 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
713 goto bail;
714 }
715 }
716#ifdef Py_UNICODE_WIDE
717 /* Surrogate pair */
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200718 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
719 buf[next++] == '\\' && buf[next++] == 'u') {
Brett Cannon4b964f92008-05-05 20:21:38 +0000720 Py_UNICODE c2 = 0;
Brett Cannon4b964f92008-05-05 20:21:38 +0000721 end += 6;
722 /* Decode 4 hex digits */
723 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000724 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000725 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000726 switch (digit) {
727 case '0': case '1': case '2': case '3': case '4':
728 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000729 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000730 case 'a': case 'b': case 'c': case 'd': case 'e':
731 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000732 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000733 case 'A': case 'B': case 'C': case 'D': case 'E':
734 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000735 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000736 default:
737 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
738 goto bail;
739 }
740 }
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200741 if ((c2 & 0xfc00) == 0xdc00)
742 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
743 else
744 end -= 6;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000745 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000746#endif
747 }
748 chunk = PyUnicode_FromUnicode(&c, 1);
749 if (chunk == NULL) {
750 goto bail;
751 }
752 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000753 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000754 goto bail;
755 }
756 Py_DECREF(chunk);
757 }
758
759 rval = join_list_unicode(chunks);
760 if (rval == NULL) {
761 goto bail;
762 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000763 Py_DECREF(chunks);
764 *next_end_ptr = end;
765 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000766bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000767 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000768 Py_XDECREF(chunks);
769 return NULL;
770}
771
772PyDoc_STRVAR(pydoc_scanstring,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000773 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
774 "\n"
775 "Scan the string s for a JSON string. End is the index of the\n"
776 "character in s after the quote that started the JSON string.\n"
777 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
778 "on attempt to decode an invalid string. If strict is False then literal\n"
779 "control characters are allowed in the string.\n"
780 "\n"
781 "Returns a tuple of the decoded string and the index of the character in s\n"
782 "after the end quote."
783);
Brett Cannon4b964f92008-05-05 20:21:38 +0000784
785static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000786py_scanstring(PyObject* self UNUSED, PyObject *args)
Brett Cannon4b964f92008-05-05 20:21:38 +0000787{
788 PyObject *pystr;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000789 PyObject *rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000790 Py_ssize_t end;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000791 Py_ssize_t next_end = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000792 char *encoding = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000793 int strict = 1;
794 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000795 return NULL;
796 }
797 if (encoding == NULL) {
798 encoding = DEFAULT_ENCODING;
799 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000800 if (PyString_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000801 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000802 }
803 else if (PyUnicode_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000804 rval = scanstring_unicode(pystr, end, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000805 }
806 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000807 PyErr_Format(PyExc_TypeError,
808 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000809 Py_TYPE(pystr)->tp_name);
810 return NULL;
811 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000812 return _build_rval_index_tuple(rval, next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000813}
814
815PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000816 "encode_basestring_ascii(basestring) -> str\n"
817 "\n"
818 "Return an ASCII-only JSON representation of a Python string"
819);
Brett Cannon4b964f92008-05-05 20:21:38 +0000820
821static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000822py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000823{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000824 /* Return an ASCII-only JSON representation of a Python string */
Brett Cannon4b964f92008-05-05 20:21:38 +0000825 /* METH_O */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000826 if (PyString_Check(pystr)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000827 return ascii_escape_str(pystr);
828 }
829 else if (PyUnicode_Check(pystr)) {
830 return ascii_escape_unicode(pystr);
831 }
832 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000833 PyErr_Format(PyExc_TypeError,
834 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000835 Py_TYPE(pystr)->tp_name);
836 return NULL;
837 }
838}
839
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000840static void
841scanner_dealloc(PyObject *self)
842{
INADA Naoki4cde4bd2017-09-04 12:31:41 +0900843 /* bpo-31095: UnTrack is needed before calling any callbacks */
844 PyObject_GC_UnTrack(self);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000845 scanner_clear(self);
846 Py_TYPE(self)->tp_free(self);
847}
848
849static int
850scanner_traverse(PyObject *self, visitproc visit, void *arg)
851{
852 PyScannerObject *s;
853 assert(PyScanner_Check(self));
854 s = (PyScannerObject *)self;
855 Py_VISIT(s->encoding);
856 Py_VISIT(s->strict);
857 Py_VISIT(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000858 Py_VISIT(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000859 Py_VISIT(s->parse_float);
860 Py_VISIT(s->parse_int);
861 Py_VISIT(s->parse_constant);
862 return 0;
863}
864
865static int
866scanner_clear(PyObject *self)
867{
868 PyScannerObject *s;
869 assert(PyScanner_Check(self));
870 s = (PyScannerObject *)self;
871 Py_CLEAR(s->encoding);
872 Py_CLEAR(s->strict);
873 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000874 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000875 Py_CLEAR(s->parse_float);
876 Py_CLEAR(s->parse_int);
877 Py_CLEAR(s->parse_constant);
878 return 0;
879}
880
881static PyObject *
882_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
883 /* Read a JSON object from PyString pystr.
884 idx is the index of the first character after the opening curly brace.
885 *next_idx_ptr is a return-by-reference index to the first character after
886 the closing curly brace.
887
888 Returns a new PyObject (usually a dict, but object_hook can change that)
889 */
890 char *str = PyString_AS_STRING(pystr);
891 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000892 PyObject *rval;
893 PyObject *pairs;
894 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000895 PyObject *key = NULL;
896 PyObject *val = NULL;
897 char *encoding = PyString_AS_STRING(s->encoding);
898 int strict = PyObject_IsTrue(s->strict);
899 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000900
Serhiy Storchaka5127ed72015-05-30 17:45:12 +0300901 if (strict < 0)
902 return NULL;
903
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000904 pairs = PyList_New(0);
905 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000906 return NULL;
907
908 /* skip whitespace after { */
909 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
910
911 /* only loop if the object is non-empty */
912 if (idx <= end_idx && str[idx] != '}') {
913 while (idx <= end_idx) {
914 /* read key */
915 if (str[idx] != '"') {
916 raise_errmsg("Expecting property name", pystr, idx);
917 goto bail;
918 }
919 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
920 if (key == NULL)
921 goto bail;
922 idx = next_idx;
923
924 /* skip whitespace between key and : delimiter, read :, skip whitespace */
925 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
926 if (idx > end_idx || str[idx] != ':') {
927 raise_errmsg("Expecting : delimiter", pystr, idx);
928 goto bail;
929 }
930 idx++;
931 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
932
933 /* read any JSON data type */
934 val = scan_once_str(s, pystr, idx, &next_idx);
935 if (val == NULL)
936 goto bail;
937
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000938 item = PyTuple_Pack(2, key, val);
939 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000940 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000941 Py_CLEAR(key);
942 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000943 if (PyList_Append(pairs, item) == -1) {
944 Py_DECREF(item);
945 goto bail;
946 }
947 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000948 idx = next_idx;
949
950 /* skip whitespace before } or , */
951 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
952
953 /* bail if the object is closed or we didn't get the , delimiter */
954 if (idx > end_idx) break;
955 if (str[idx] == '}') {
956 break;
957 }
958 else if (str[idx] != ',') {
959 raise_errmsg("Expecting , delimiter", pystr, idx);
960 goto bail;
961 }
962 idx++;
963
964 /* skip whitespace after , delimiter */
965 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
966 }
967 }
968 /* verify that idx < end_idx, str[idx] should be '}' */
969 if (idx > end_idx || str[idx] != '}') {
970 raise_errmsg("Expecting object", pystr, end_idx);
971 goto bail;
972 }
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000973
974 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
975 if (s->pairs_hook != Py_None) {
976 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
977 if (val == NULL)
978 goto bail;
979 Py_DECREF(pairs);
980 *next_idx_ptr = idx + 1;
981 return val;
982 }
983
984 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
985 pairs, NULL);
986 if (rval == NULL)
987 goto bail;
988 Py_CLEAR(pairs);
989
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000990 /* if object_hook is not None: rval = object_hook(rval) */
991 if (s->object_hook != Py_None) {
992 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
993 if (val == NULL)
994 goto bail;
995 Py_DECREF(rval);
996 rval = val;
997 val = NULL;
998 }
999 *next_idx_ptr = idx + 1;
1000 return rval;
1001bail:
1002 Py_XDECREF(key);
1003 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001004 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001005 return NULL;
1006}
1007
1008static PyObject *
1009_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1010 /* Read a JSON object from PyUnicode pystr.
1011 idx is the index of the first character after the opening curly brace.
1012 *next_idx_ptr is a return-by-reference index to the first character after
1013 the closing curly brace.
1014
1015 Returns a new PyObject (usually a dict, but object_hook can change that)
1016 */
1017 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1018 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001019 PyObject *rval;
1020 PyObject *pairs;
1021 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001022 PyObject *key = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001023 PyObject *val = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001024 int strict = PyObject_IsTrue(s->strict);
1025 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001026
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001027 if (strict < 0)
1028 return NULL;
1029
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001030 pairs = PyList_New(0);
1031 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001032 return NULL;
1033
1034 /* skip whitespace after { */
1035 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1036
1037 /* only loop if the object is non-empty */
1038 if (idx <= end_idx && str[idx] != '}') {
1039 while (idx <= end_idx) {
1040 /* read key */
1041 if (str[idx] != '"') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001042 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001043 goto bail;
1044 }
1045 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1046 if (key == NULL)
1047 goto bail;
1048 idx = next_idx;
1049
1050 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1051 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1052 if (idx > end_idx || str[idx] != ':') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001053 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001054 goto bail;
1055 }
1056 idx++;
1057 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1058
1059 /* read any JSON term */
1060 val = scan_once_unicode(s, pystr, idx, &next_idx);
1061 if (val == NULL)
1062 goto bail;
1063
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001064 item = PyTuple_Pack(2, key, val);
1065 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001066 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001067 Py_CLEAR(key);
1068 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001069 if (PyList_Append(pairs, item) == -1) {
1070 Py_DECREF(item);
1071 goto bail;
1072 }
1073 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001074 idx = next_idx;
1075
1076 /* skip whitespace before } or , */
1077 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1078
1079 /* bail if the object is closed or we didn't get the , delimiter */
1080 if (idx > end_idx) break;
1081 if (str[idx] == '}') {
1082 break;
1083 }
1084 else if (str[idx] != ',') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001085 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001086 goto bail;
1087 }
1088 idx++;
1089
1090 /* skip whitespace after , delimiter */
1091 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1092 }
1093 }
1094
1095 /* verify that idx < end_idx, str[idx] should be '}' */
1096 if (idx > end_idx || str[idx] != '}') {
1097 raise_errmsg("Expecting object", pystr, end_idx);
1098 goto bail;
1099 }
1100
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001101 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1102 if (s->pairs_hook != Py_None) {
1103 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1104 if (val == NULL)
1105 goto bail;
1106 Py_DECREF(pairs);
1107 *next_idx_ptr = idx + 1;
1108 return val;
1109 }
1110
1111 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1112 pairs, NULL);
1113 if (rval == NULL)
1114 goto bail;
1115 Py_CLEAR(pairs);
1116
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001117 /* if object_hook is not None: rval = object_hook(rval) */
1118 if (s->object_hook != Py_None) {
1119 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1120 if (val == NULL)
1121 goto bail;
1122 Py_DECREF(rval);
1123 rval = val;
1124 val = NULL;
1125 }
1126 *next_idx_ptr = idx + 1;
1127 return rval;
1128bail:
1129 Py_XDECREF(key);
1130 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001131 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001132 return NULL;
1133}
1134
1135static PyObject *
1136_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1137 /* Read a JSON array from PyString pystr.
1138 idx is the index of the first character after the opening brace.
1139 *next_idx_ptr is a return-by-reference index to the first character after
1140 the closing brace.
1141
1142 Returns a new PyList
1143 */
1144 char *str = PyString_AS_STRING(pystr);
1145 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1146 PyObject *val = NULL;
1147 PyObject *rval = PyList_New(0);
1148 Py_ssize_t next_idx;
1149 if (rval == NULL)
1150 return NULL;
1151
1152 /* skip whitespace after [ */
1153 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1154
1155 /* only loop if the array is non-empty */
1156 if (idx <= end_idx && str[idx] != ']') {
1157 while (idx <= end_idx) {
1158
1159 /* read any JSON term and de-tuplefy the (rval, idx) */
1160 val = scan_once_str(s, pystr, idx, &next_idx);
1161 if (val == NULL)
1162 goto bail;
1163
1164 if (PyList_Append(rval, val) == -1)
1165 goto bail;
1166
1167 Py_CLEAR(val);
1168 idx = next_idx;
1169
1170 /* skip whitespace between term and , */
1171 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1172
1173 /* bail if the array is closed or we didn't get the , delimiter */
1174 if (idx > end_idx) break;
1175 if (str[idx] == ']') {
1176 break;
1177 }
1178 else if (str[idx] != ',') {
1179 raise_errmsg("Expecting , delimiter", pystr, idx);
1180 goto bail;
1181 }
1182 idx++;
1183
1184 /* skip whitespace after , */
1185 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1186 }
1187 }
1188
1189 /* verify that idx < end_idx, str[idx] should be ']' */
1190 if (idx > end_idx || str[idx] != ']') {
1191 raise_errmsg("Expecting object", pystr, end_idx);
1192 goto bail;
1193 }
1194 *next_idx_ptr = idx + 1;
1195 return rval;
1196bail:
1197 Py_XDECREF(val);
1198 Py_DECREF(rval);
1199 return NULL;
1200}
1201
1202static PyObject *
1203_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1204 /* Read a JSON array from PyString pystr.
1205 idx is the index of the first character after the opening brace.
1206 *next_idx_ptr is a return-by-reference index to the first character after
1207 the closing brace.
1208
1209 Returns a new PyList
1210 */
1211 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1212 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1213 PyObject *val = NULL;
1214 PyObject *rval = PyList_New(0);
1215 Py_ssize_t next_idx;
1216 if (rval == NULL)
1217 return NULL;
1218
1219 /* skip whitespace after [ */
1220 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1221
1222 /* only loop if the array is non-empty */
1223 if (idx <= end_idx && str[idx] != ']') {
1224 while (idx <= end_idx) {
1225
1226 /* read any JSON term */
1227 val = scan_once_unicode(s, pystr, idx, &next_idx);
1228 if (val == NULL)
1229 goto bail;
1230
1231 if (PyList_Append(rval, val) == -1)
1232 goto bail;
1233
1234 Py_CLEAR(val);
1235 idx = next_idx;
1236
1237 /* skip whitespace between term and , */
1238 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1239
1240 /* bail if the array is closed or we didn't get the , delimiter */
1241 if (idx > end_idx) break;
1242 if (str[idx] == ']') {
1243 break;
1244 }
1245 else if (str[idx] != ',') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001246 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001247 goto bail;
1248 }
1249 idx++;
1250
1251 /* skip whitespace after , */
1252 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1253 }
1254 }
1255
1256 /* verify that idx < end_idx, str[idx] should be ']' */
1257 if (idx > end_idx || str[idx] != ']') {
1258 raise_errmsg("Expecting object", pystr, end_idx);
1259 goto bail;
1260 }
1261 *next_idx_ptr = idx + 1;
1262 return rval;
1263bail:
1264 Py_XDECREF(val);
1265 Py_DECREF(rval);
1266 return NULL;
1267}
1268
1269static PyObject *
1270_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1271 /* Read a JSON constant from PyString pystr.
1272 constant is the constant string that was found
1273 ("NaN", "Infinity", "-Infinity").
1274 idx is the index of the first character of the constant
1275 *next_idx_ptr is a return-by-reference index to the first character after
1276 the constant.
1277
1278 Returns the result of parse_constant
1279 */
1280 PyObject *cstr;
1281 PyObject *rval;
1282 /* constant is "NaN", "Infinity", or "-Infinity" */
1283 cstr = PyString_InternFromString(constant);
1284 if (cstr == NULL)
1285 return NULL;
1286
1287 /* rval = parse_constant(constant) */
1288 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1289 idx += PyString_GET_SIZE(cstr);
1290 Py_DECREF(cstr);
1291 *next_idx_ptr = idx;
1292 return rval;
1293}
1294
1295static PyObject *
1296_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1297 /* Read a JSON number from PyString pystr.
1298 idx is the index of the first character of the number
1299 *next_idx_ptr is a return-by-reference index to the first character after
1300 the number.
1301
1302 Returns a new PyObject representation of that number:
1303 PyInt, PyLong, or PyFloat.
1304 May return other types if parse_int or parse_float are set
1305 */
1306 char *str = PyString_AS_STRING(pystr);
1307 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1308 Py_ssize_t idx = start;
1309 int is_float = 0;
1310 PyObject *rval;
1311 PyObject *numstr;
1312
1313 /* read a sign if it's there, make sure it's not the end of the string */
1314 if (str[idx] == '-') {
1315 idx++;
1316 if (idx > end_idx) {
1317 PyErr_SetNone(PyExc_StopIteration);
1318 return NULL;
1319 }
1320 }
1321
1322 /* read as many integer digits as we find as long as it doesn't start with 0 */
1323 if (str[idx] >= '1' && str[idx] <= '9') {
1324 idx++;
1325 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1326 }
1327 /* if it starts with 0 we only expect one integer digit */
1328 else if (str[idx] == '0') {
1329 idx++;
1330 }
1331 /* no integer digits, error */
1332 else {
1333 PyErr_SetNone(PyExc_StopIteration);
1334 return NULL;
1335 }
1336
1337 /* if the next char is '.' followed by a digit then read all float digits */
1338 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1339 is_float = 1;
1340 idx += 2;
1341 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1342 }
1343
1344 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1345 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1346
1347 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1348 Py_ssize_t e_start = idx;
1349 idx++;
1350
1351 /* read an exponent sign if present */
1352 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1353
1354 /* read all digits */
1355 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1356
1357 /* if we got a digit, then parse as float. if not, backtrack */
1358 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1359 is_float = 1;
1360 }
1361 else {
1362 idx = e_start;
1363 }
1364 }
1365
1366 /* copy the section we determined to be a number */
1367 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1368 if (numstr == NULL)
1369 return NULL;
1370 if (is_float) {
1371 /* parse as a float using a fast path if available, otherwise call user defined method */
1372 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1373 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1374 }
1375 else {
Eric Smith129c97d2009-10-28 08:44:37 +00001376 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1377 NULL, NULL);
1378 if (d == -1.0 && PyErr_Occurred())
1379 return NULL;
1380 rval = PyFloat_FromDouble(d);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001381 }
1382 }
1383 else {
1384 /* parse as an int using a fast path if available, otherwise call user defined method */
1385 if (s->parse_int != (PyObject *)&PyInt_Type) {
1386 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1387 }
1388 else {
1389 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1390 }
1391 }
1392 Py_DECREF(numstr);
1393 *next_idx_ptr = idx;
1394 return rval;
1395}
1396
1397static PyObject *
1398_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1399 /* Read a JSON number from PyUnicode pystr.
1400 idx is the index of the first character of the number
1401 *next_idx_ptr is a return-by-reference index to the first character after
1402 the number.
1403
1404 Returns a new PyObject representation of that number:
1405 PyInt, PyLong, or PyFloat.
1406 May return other types if parse_int or parse_float are set
1407 */
1408 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1409 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1410 Py_ssize_t idx = start;
1411 int is_float = 0;
1412 PyObject *rval;
1413 PyObject *numstr;
1414
1415 /* read a sign if it's there, make sure it's not the end of the string */
1416 if (str[idx] == '-') {
1417 idx++;
1418 if (idx > end_idx) {
1419 PyErr_SetNone(PyExc_StopIteration);
1420 return NULL;
1421 }
1422 }
1423
1424 /* read as many integer digits as we find as long as it doesn't start with 0 */
1425 if (str[idx] >= '1' && str[idx] <= '9') {
1426 idx++;
1427 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1428 }
1429 /* if it starts with 0 we only expect one integer digit */
1430 else if (str[idx] == '0') {
1431 idx++;
1432 }
1433 /* no integer digits, error */
1434 else {
1435 PyErr_SetNone(PyExc_StopIteration);
1436 return NULL;
1437 }
1438
1439 /* if the next char is '.' followed by a digit then read all float digits */
1440 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1441 is_float = 1;
1442 idx += 2;
Bob Ippolito76a982a2009-03-29 22:33:58 +00001443 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001444 }
1445
1446 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1447 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1448 Py_ssize_t e_start = idx;
1449 idx++;
1450
1451 /* read an exponent sign if present */
1452 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1453
1454 /* read all digits */
1455 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1456
1457 /* if we got a digit, then parse as float. if not, backtrack */
1458 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1459 is_float = 1;
1460 }
1461 else {
1462 idx = e_start;
1463 }
1464 }
1465
1466 /* copy the section we determined to be a number */
1467 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1468 if (numstr == NULL)
1469 return NULL;
1470 if (is_float) {
1471 /* parse as a float using a fast path if available, otherwise call user defined method */
1472 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1473 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1474 }
1475 else {
1476 rval = PyFloat_FromString(numstr, NULL);
1477 }
1478 }
1479 else {
1480 /* no fast path for unicode -> int, just call */
1481 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1482 }
1483 Py_DECREF(numstr);
1484 *next_idx_ptr = idx;
1485 return rval;
1486}
1487
1488static PyObject *
1489scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1490{
1491 /* Read one JSON term (of any kind) from PyString pystr.
1492 idx is the index of the first character of the term
1493 *next_idx_ptr is a return-by-reference index to the first character after
1494 the number.
1495
1496 Returns a new PyObject representation of the term.
1497 */
Ezio Melotticec46492011-05-07 17:40:23 +03001498 PyObject *res;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001499 int strict;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001500 char *str = PyString_AS_STRING(pystr);
1501 Py_ssize_t length = PyString_GET_SIZE(pystr);
Benjamin Peterson3e5d87b2014-04-14 11:43:09 -04001502 if (idx < 0) {
1503 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1504 return NULL;
1505 }
1506 if (idx >= length) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001507 PyErr_SetNone(PyExc_StopIteration);
1508 return NULL;
1509 }
1510 switch (str[idx]) {
1511 case '"':
1512 /* string */
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001513 strict = PyObject_IsTrue(s->strict);
1514 if (strict < 0)
1515 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001516 return scanstring_str(pystr, idx + 1,
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001517 PyString_AS_STRING(s->encoding), strict, next_idx_ptr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001518 case '{':
1519 /* object */
Ezio Melotticec46492011-05-07 17:40:23 +03001520 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1521 "from a byte string"))
1522 return NULL;
1523 res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1524 Py_LeaveRecursiveCall();
1525 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001526 case '[':
1527 /* array */
Ezio Melotticec46492011-05-07 17:40:23 +03001528 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1529 "from a byte string"))
1530 return NULL;
1531 res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1532 Py_LeaveRecursiveCall();
1533 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001534 case 'n':
1535 /* null */
1536 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1537 Py_INCREF(Py_None);
1538 *next_idx_ptr = idx + 4;
1539 return Py_None;
1540 }
1541 break;
1542 case 't':
1543 /* true */
1544 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1545 Py_INCREF(Py_True);
1546 *next_idx_ptr = idx + 4;
1547 return Py_True;
1548 }
1549 break;
1550 case 'f':
1551 /* false */
1552 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1553 Py_INCREF(Py_False);
1554 *next_idx_ptr = idx + 5;
1555 return Py_False;
1556 }
1557 break;
1558 case 'N':
1559 /* NaN */
1560 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1561 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1562 }
1563 break;
1564 case 'I':
1565 /* Infinity */
1566 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1567 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1568 }
1569 break;
1570 case '-':
1571 /* -Infinity */
1572 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1573 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1574 }
1575 break;
1576 }
1577 /* Didn't find a string, object, array, or named constant. Look for a number. */
1578 return _match_number_str(s, pystr, idx, next_idx_ptr);
1579}
1580
1581static PyObject *
1582scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1583{
1584 /* Read one JSON term (of any kind) from PyUnicode pystr.
1585 idx is the index of the first character of the term
1586 *next_idx_ptr is a return-by-reference index to the first character after
1587 the number.
1588
1589 Returns a new PyObject representation of the term.
1590 */
Ezio Melotticec46492011-05-07 17:40:23 +03001591 PyObject *res;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001592 int strict;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001593 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1594 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
Benjamin Peterson3e5d87b2014-04-14 11:43:09 -04001595 if (idx < 0) {
1596 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1597 return NULL;
1598 }
1599 if (idx >= length) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001600 PyErr_SetNone(PyExc_StopIteration);
1601 return NULL;
1602 }
1603 switch (str[idx]) {
1604 case '"':
1605 /* string */
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001606 strict = PyObject_IsTrue(s->strict);
1607 if (strict < 0)
1608 return NULL;
1609 return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001610 case '{':
1611 /* object */
Ezio Melotticec46492011-05-07 17:40:23 +03001612 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1613 "from a unicode string"))
1614 return NULL;
1615 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1616 Py_LeaveRecursiveCall();
1617 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001618 case '[':
1619 /* array */
Ezio Melotticec46492011-05-07 17:40:23 +03001620 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1621 "from a unicode string"))
1622 return NULL;
1623 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1624 Py_LeaveRecursiveCall();
1625 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001626 case 'n':
1627 /* null */
1628 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1629 Py_INCREF(Py_None);
1630 *next_idx_ptr = idx + 4;
1631 return Py_None;
1632 }
1633 break;
1634 case 't':
1635 /* true */
1636 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1637 Py_INCREF(Py_True);
1638 *next_idx_ptr = idx + 4;
1639 return Py_True;
1640 }
1641 break;
1642 case 'f':
1643 /* false */
1644 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1645 Py_INCREF(Py_False);
1646 *next_idx_ptr = idx + 5;
1647 return Py_False;
1648 }
1649 break;
1650 case 'N':
1651 /* NaN */
1652 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1653 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1654 }
1655 break;
1656 case 'I':
1657 /* Infinity */
1658 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1659 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1660 }
1661 break;
1662 case '-':
1663 /* -Infinity */
1664 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1665 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1666 }
1667 break;
1668 }
1669 /* Didn't find a string, object, array, or named constant. Look for a number. */
1670 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1671}
1672
1673static PyObject *
1674scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1675{
1676 /* Python callable interface to scan_once_{str,unicode} */
1677 PyObject *pystr;
1678 PyObject *rval;
1679 Py_ssize_t idx;
1680 Py_ssize_t next_idx = -1;
1681 static char *kwlist[] = {"string", "idx", NULL};
1682 PyScannerObject *s;
1683 assert(PyScanner_Check(self));
1684 s = (PyScannerObject *)self;
1685 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1686 return NULL;
1687
1688 if (PyString_Check(pystr)) {
1689 rval = scan_once_str(s, pystr, idx, &next_idx);
1690 }
1691 else if (PyUnicode_Check(pystr)) {
1692 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1693 }
1694 else {
1695 PyErr_Format(PyExc_TypeError,
1696 "first argument must be a string, not %.80s",
1697 Py_TYPE(pystr)->tp_name);
1698 return NULL;
1699 }
1700 return _build_rval_index_tuple(rval, next_idx);
1701}
1702
1703static PyObject *
1704scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1705{
1706 PyScannerObject *s;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001707 PyObject *ctx;
1708 static char *kwlist[] = {"context", NULL};
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001709
1710 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001711 return NULL;
1712
1713 s = (PyScannerObject *)type->tp_alloc(type, 0);
1714 if (s == NULL)
1715 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001716
1717 /* PyString_AS_STRING is used on encoding */
1718 s->encoding = PyObject_GetAttrString(ctx, "encoding");
Antoine Pitrou187177f2009-12-08 15:40:51 +00001719 if (s->encoding == NULL)
1720 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001721 if (s->encoding == Py_None) {
1722 Py_DECREF(Py_None);
1723 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1724 }
1725 else if (PyUnicode_Check(s->encoding)) {
1726 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001727 Py_SETREF(s->encoding, tmp);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001728 }
Amaury Forgeot d'Arcdee76e62012-01-13 22:53:25 +01001729 if (s->encoding == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001730 goto bail;
Amaury Forgeot d'Arcdee76e62012-01-13 22:53:25 +01001731 if (!PyString_Check(s->encoding)) {
1732 PyErr_Format(PyExc_TypeError,
1733 "encoding must be a string, not %.80s",
1734 Py_TYPE(s->encoding)->tp_name);
1735 goto bail;
1736 }
1737
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001738
1739 /* All of these will fail "gracefully" so we don't need to verify them */
1740 s->strict = PyObject_GetAttrString(ctx, "strict");
1741 if (s->strict == NULL)
1742 goto bail;
1743 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1744 if (s->object_hook == NULL)
1745 goto bail;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001746 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
Georg Brandld823bdc2011-01-02 14:20:16 +00001747 if (s->pairs_hook == NULL)
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001748 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001749 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1750 if (s->parse_float == NULL)
1751 goto bail;
1752 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1753 if (s->parse_int == NULL)
1754 goto bail;
1755 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1756 if (s->parse_constant == NULL)
1757 goto bail;
1758
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001759 return (PyObject *)s;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001760
1761bail:
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001762 Py_DECREF(s);
1763 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001764}
1765
1766PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1767
1768static
1769PyTypeObject PyScannerType = {
Benjamin Petersona72d15c2017-09-13 21:20:29 -07001770 PyVarObject_HEAD_INIT(NULL, 0)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001771 "_json.Scanner", /* tp_name */
1772 sizeof(PyScannerObject), /* tp_basicsize */
1773 0, /* tp_itemsize */
1774 scanner_dealloc, /* tp_dealloc */
1775 0, /* tp_print */
1776 0, /* tp_getattr */
1777 0, /* tp_setattr */
1778 0, /* tp_compare */
1779 0, /* tp_repr */
1780 0, /* tp_as_number */
1781 0, /* tp_as_sequence */
1782 0, /* tp_as_mapping */
1783 0, /* tp_hash */
1784 scanner_call, /* tp_call */
1785 0, /* tp_str */
1786 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1787 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1788 0, /* tp_as_buffer */
1789 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1790 scanner_doc, /* tp_doc */
1791 scanner_traverse, /* tp_traverse */
1792 scanner_clear, /* tp_clear */
1793 0, /* tp_richcompare */
1794 0, /* tp_weaklistoffset */
1795 0, /* tp_iter */
1796 0, /* tp_iternext */
1797 0, /* tp_methods */
1798 scanner_members, /* tp_members */
1799 0, /* tp_getset */
1800 0, /* tp_base */
1801 0, /* tp_dict */
1802 0, /* tp_descr_get */
1803 0, /* tp_descr_set */
1804 0, /* tp_dictoffset */
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001805 0, /* tp_init */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001806 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1807 scanner_new, /* tp_new */
1808 0,/* PyObject_GC_Del, */ /* tp_free */
1809};
1810
1811static PyObject *
1812encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1813{
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001814 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1815
1816 PyEncoderObject *s;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001817 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001818 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan_obj;
1819 int allow_nan;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001820
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001821 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou187177f2009-12-08 15:40:51 +00001822 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001823 &sort_keys, &skipkeys, &allow_nan_obj))
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001824 return NULL;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001825
1826 allow_nan = PyObject_IsTrue(allow_nan_obj);
1827 if (allow_nan < 0)
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001828 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001829
Serhiy Storchaka76a64ca2015-07-26 09:07:53 +03001830 if (markers != Py_None && !PyDict_Check(markers)) {
1831 PyErr_Format(PyExc_TypeError,
1832 "make_encoder() argument 1 must be dict or None, "
1833 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001834 return NULL;
Serhiy Storchaka76a64ca2015-07-26 09:07:53 +03001835 }
1836
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001837 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1838 if (s == NULL)
1839 return NULL;
1840
Antoine Pitrou187177f2009-12-08 15:40:51 +00001841 s->markers = markers;
1842 s->defaultfn = defaultfn;
1843 s->encoder = encoder;
1844 s->indent = indent;
1845 s->key_separator = key_separator;
1846 s->item_separator = item_separator;
1847 s->sort_keys = sort_keys;
1848 s->skipkeys = skipkeys;
1849 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001850 s->allow_nan = allow_nan;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001851
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001852 Py_INCREF(s->markers);
1853 Py_INCREF(s->defaultfn);
1854 Py_INCREF(s->encoder);
1855 Py_INCREF(s->indent);
1856 Py_INCREF(s->key_separator);
1857 Py_INCREF(s->item_separator);
1858 Py_INCREF(s->sort_keys);
1859 Py_INCREF(s->skipkeys);
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001860 return (PyObject *)s;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001861}
1862
1863static PyObject *
1864encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1865{
1866 /* Python callable interface to encode_listencode_obj */
1867 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1868 PyObject *obj;
1869 PyObject *rval;
1870 Py_ssize_t indent_level;
1871 PyEncoderObject *s;
1872 assert(PyEncoder_Check(self));
1873 s = (PyEncoderObject *)self;
1874 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1875 &obj, _convertPyInt_AsSsize_t, &indent_level))
1876 return NULL;
1877 rval = PyList_New(0);
1878 if (rval == NULL)
1879 return NULL;
1880 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1881 Py_DECREF(rval);
1882 return NULL;
1883 }
1884 return rval;
1885}
1886
1887static PyObject *
1888_encoded_const(PyObject *obj)
1889{
1890 /* Return the JSON string representation of None, True, False */
1891 if (obj == Py_None) {
1892 static PyObject *s_null = NULL;
1893 if (s_null == NULL) {
1894 s_null = PyString_InternFromString("null");
1895 }
Miss Islington (bot)669429f2018-09-12 15:00:56 -07001896 Py_XINCREF(s_null);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001897 return s_null;
1898 }
1899 else if (obj == Py_True) {
1900 static PyObject *s_true = NULL;
1901 if (s_true == NULL) {
1902 s_true = PyString_InternFromString("true");
1903 }
Miss Islington (bot)669429f2018-09-12 15:00:56 -07001904 Py_XINCREF(s_true);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001905 return s_true;
1906 }
1907 else if (obj == Py_False) {
1908 static PyObject *s_false = NULL;
1909 if (s_false == NULL) {
1910 s_false = PyString_InternFromString("false");
1911 }
Miss Islington (bot)669429f2018-09-12 15:00:56 -07001912 Py_XINCREF(s_false);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001913 return s_false;
1914 }
1915 else {
1916 PyErr_SetString(PyExc_ValueError, "not a const");
1917 return NULL;
1918 }
1919}
1920
1921static PyObject *
1922encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1923{
1924 /* Return the JSON representation of a PyFloat */
1925 double i = PyFloat_AS_DOUBLE(obj);
1926 if (!Py_IS_FINITE(i)) {
1927 if (!s->allow_nan) {
1928 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1929 return NULL;
1930 }
1931 if (i > 0) {
1932 return PyString_FromString("Infinity");
1933 }
1934 else if (i < 0) {
1935 return PyString_FromString("-Infinity");
1936 }
1937 else {
1938 return PyString_FromString("NaN");
1939 }
1940 }
Mark Dickinsone6239a32016-09-03 17:45:00 +01001941 /* Make sure to use the base float class repr method */
1942 return PyFloat_Type.tp_repr(obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001943}
1944
1945static PyObject *
1946encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1947{
1948 /* Return the JSON representation of a string */
1949 if (s->fast_encode)
1950 return py_encode_basestring_ascii(NULL, obj);
1951 else
1952 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1953}
1954
1955static int
1956_steal_list_append(PyObject *lst, PyObject *stolen)
1957{
1958 /* Append stolen and then decrement its reference count */
1959 int rval = PyList_Append(lst, stolen);
1960 Py_DECREF(stolen);
1961 return rval;
1962}
1963
1964static int
1965encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1966{
1967 /* Encode Python object obj to a JSON term, rval is a PyList */
1968 PyObject *newobj;
1969 int rv;
1970
1971 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1972 PyObject *cstr = _encoded_const(obj);
1973 if (cstr == NULL)
1974 return -1;
1975 return _steal_list_append(rval, cstr);
1976 }
1977 else if (PyString_Check(obj) || PyUnicode_Check(obj))
1978 {
1979 PyObject *encoded = encoder_encode_string(s, obj);
1980 if (encoded == NULL)
1981 return -1;
1982 return _steal_list_append(rval, encoded);
1983 }
Serhiy Storchaka48c8bf22018-07-31 09:09:36 +03001984 else if (_PyAnyInt_Check(obj)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001985 PyObject *encoded = PyObject_Str(obj);
1986 if (encoded == NULL)
1987 return -1;
1988 return _steal_list_append(rval, encoded);
1989 }
1990 else if (PyFloat_Check(obj)) {
1991 PyObject *encoded = encoder_encode_float(s, obj);
1992 if (encoded == NULL)
1993 return -1;
1994 return _steal_list_append(rval, encoded);
1995 }
1996 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melottic1ec7b52011-05-11 00:55:35 +03001997 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1998 return -1;
1999 rv = encoder_listencode_list(s, rval, obj, indent_level);
2000 Py_LeaveRecursiveCall();
2001 return rv;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002002 }
2003 else if (PyDict_Check(obj)) {
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002004 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2005 return -1;
2006 rv = encoder_listencode_dict(s, rval, obj, indent_level);
2007 Py_LeaveRecursiveCall();
2008 return rv;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002009 }
2010 else {
2011 PyObject *ident = NULL;
2012 if (s->markers != Py_None) {
2013 int has_key;
2014 ident = PyLong_FromVoidPtr(obj);
2015 if (ident == NULL)
2016 return -1;
2017 has_key = PyDict_Contains(s->markers, ident);
2018 if (has_key) {
2019 if (has_key != -1)
2020 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2021 Py_DECREF(ident);
2022 return -1;
2023 }
2024 if (PyDict_SetItem(s->markers, ident, obj)) {
2025 Py_DECREF(ident);
2026 return -1;
2027 }
2028 }
2029 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2030 if (newobj == NULL) {
2031 Py_XDECREF(ident);
2032 return -1;
2033 }
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002034
Serhiy Storchakaf04790a2017-01-03 11:17:53 +02002035 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
2036 Py_DECREF(newobj);
2037 Py_XDECREF(ident);
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002038 return -1;
Serhiy Storchakaf04790a2017-01-03 11:17:53 +02002039 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002040 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002041 Py_LeaveRecursiveCall();
2042
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002043 Py_DECREF(newobj);
2044 if (rv) {
2045 Py_XDECREF(ident);
2046 return -1;
2047 }
2048 if (ident != NULL) {
2049 if (PyDict_DelItem(s->markers, ident)) {
2050 Py_XDECREF(ident);
2051 return -1;
2052 }
2053 Py_XDECREF(ident);
2054 }
2055 return rv;
2056 }
2057}
2058
2059static int
2060encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2061{
2062 /* Encode Python dict dct a JSON term, rval is a PyList */
2063 static PyObject *open_dict = NULL;
2064 static PyObject *close_dict = NULL;
2065 static PyObject *empty_dict = NULL;
2066 PyObject *kstr = NULL;
2067 PyObject *ident = NULL;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002068 PyObject *key = NULL;
2069 PyObject *value = NULL;
2070 PyObject *it = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002071 int skipkeys;
2072 Py_ssize_t idx;
2073
2074 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2075 open_dict = PyString_InternFromString("{");
2076 close_dict = PyString_InternFromString("}");
2077 empty_dict = PyString_InternFromString("{}");
2078 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2079 return -1;
2080 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002081 if (Py_SIZE(dct) == 0)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002082 return PyList_Append(rval, empty_dict);
2083
2084 if (s->markers != Py_None) {
2085 int has_key;
2086 ident = PyLong_FromVoidPtr(dct);
2087 if (ident == NULL)
2088 goto bail;
2089 has_key = PyDict_Contains(s->markers, ident);
2090 if (has_key) {
2091 if (has_key != -1)
2092 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2093 goto bail;
2094 }
2095 if (PyDict_SetItem(s->markers, ident, dct)) {
2096 goto bail;
2097 }
2098 }
2099
2100 if (PyList_Append(rval, open_dict))
2101 goto bail;
2102
2103 if (s->indent != Py_None) {
2104 /* TODO: DOES NOT RUN */
2105 indent_level += 1;
2106 /*
2107 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2108 separator = _item_separator + newline_indent
2109 buf += newline_indent
2110 */
2111 }
2112
2113 /* TODO: C speedup not implemented for sort_keys */
2114
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002115 it = PyObject_GetIter(dct);
2116 if (it == NULL)
2117 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002118 skipkeys = PyObject_IsTrue(s->skipkeys);
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03002119 if (skipkeys < 0)
2120 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002121 idx = 0;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002122 while ((key = PyIter_Next(it)) != NULL) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002123 PyObject *encoded;
2124
2125 if (PyString_Check(key) || PyUnicode_Check(key)) {
2126 Py_INCREF(key);
2127 kstr = key;
2128 }
2129 else if (PyFloat_Check(key)) {
2130 kstr = encoder_encode_float(s, key);
2131 if (kstr == NULL)
2132 goto bail;
2133 }
Serhiy Storchaka48c8bf22018-07-31 09:09:36 +03002134 else if (_PyAnyInt_Check(key)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002135 kstr = PyObject_Str(key);
2136 if (kstr == NULL)
2137 goto bail;
2138 }
2139 else if (key == Py_True || key == Py_False || key == Py_None) {
2140 kstr = _encoded_const(key);
2141 if (kstr == NULL)
2142 goto bail;
2143 }
2144 else if (skipkeys) {
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002145 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002146 continue;
2147 }
2148 else {
2149 /* TODO: include repr of key */
Doug Hellmannf31db932010-07-21 12:36:33 +00002150 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002151 goto bail;
2152 }
2153
2154 if (idx) {
2155 if (PyList_Append(rval, s->item_separator))
2156 goto bail;
2157 }
2158
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002159 value = PyObject_GetItem(dct, key);
2160 if (value == NULL)
2161 goto bail;
2162
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002163 encoded = encoder_encode_string(s, kstr);
2164 Py_CLEAR(kstr);
2165 if (encoded == NULL)
2166 goto bail;
2167 if (PyList_Append(rval, encoded)) {
2168 Py_DECREF(encoded);
2169 goto bail;
2170 }
2171 Py_DECREF(encoded);
2172 if (PyList_Append(rval, s->key_separator))
2173 goto bail;
2174 if (encoder_listencode_obj(s, rval, value, indent_level))
2175 goto bail;
2176 idx += 1;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002177 Py_CLEAR(value);
2178 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002179 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002180 if (PyErr_Occurred())
2181 goto bail;
2182 Py_CLEAR(it);
2183
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002184 if (ident != NULL) {
2185 if (PyDict_DelItem(s->markers, ident))
2186 goto bail;
2187 Py_CLEAR(ident);
2188 }
2189 if (s->indent != Py_None) {
2190 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002191 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002192 indent_level -= 1;
2193
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002194 yield '\n' + (' ' * (_indent * _current_indent_level))
2195 */
2196 }
2197 if (PyList_Append(rval, close_dict))
2198 goto bail;
2199 return 0;
2200
2201bail:
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002202 Py_XDECREF(it);
2203 Py_XDECREF(key);
2204 Py_XDECREF(value);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002205 Py_XDECREF(kstr);
2206 Py_XDECREF(ident);
2207 return -1;
2208}
2209
2210
2211static int
2212encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2213{
2214 /* Encode Python list seq to a JSON term, rval is a PyList */
2215 static PyObject *open_array = NULL;
2216 static PyObject *close_array = NULL;
2217 static PyObject *empty_array = NULL;
2218 PyObject *ident = NULL;
2219 PyObject *s_fast = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002220 Py_ssize_t i;
2221
2222 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2223 open_array = PyString_InternFromString("[");
2224 close_array = PyString_InternFromString("]");
2225 empty_array = PyString_InternFromString("[]");
2226 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2227 return -1;
2228 }
2229 ident = NULL;
2230 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2231 if (s_fast == NULL)
2232 return -1;
Antoine Pitroue9e35c32012-11-01 20:07:40 +01002233 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002234 Py_DECREF(s_fast);
2235 return PyList_Append(rval, empty_array);
2236 }
2237
2238 if (s->markers != Py_None) {
2239 int has_key;
2240 ident = PyLong_FromVoidPtr(seq);
2241 if (ident == NULL)
2242 goto bail;
2243 has_key = PyDict_Contains(s->markers, ident);
2244 if (has_key) {
2245 if (has_key != -1)
2246 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2247 goto bail;
2248 }
2249 if (PyDict_SetItem(s->markers, ident, seq)) {
2250 goto bail;
2251 }
2252 }
2253
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002254 if (PyList_Append(rval, open_array))
2255 goto bail;
2256 if (s->indent != Py_None) {
2257 /* TODO: DOES NOT RUN */
2258 indent_level += 1;
2259 /*
2260 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2261 separator = _item_separator + newline_indent
2262 buf += newline_indent
2263 */
2264 }
Antoine Pitroue9e35c32012-11-01 20:07:40 +01002265 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
2266 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002267 if (i) {
2268 if (PyList_Append(rval, s->item_separator))
2269 goto bail;
2270 }
2271 if (encoder_listencode_obj(s, rval, obj, indent_level))
2272 goto bail;
2273 }
2274 if (ident != NULL) {
2275 if (PyDict_DelItem(s->markers, ident))
2276 goto bail;
2277 Py_CLEAR(ident);
2278 }
2279 if (s->indent != Py_None) {
2280 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002281 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002282 indent_level -= 1;
2283
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002284 yield '\n' + (' ' * (_indent * _current_indent_level))
2285 */
2286 }
2287 if (PyList_Append(rval, close_array))
2288 goto bail;
2289 Py_DECREF(s_fast);
2290 return 0;
2291
2292bail:
2293 Py_XDECREF(ident);
2294 Py_DECREF(s_fast);
2295 return -1;
2296}
2297
2298static void
2299encoder_dealloc(PyObject *self)
2300{
INADA Naoki4cde4bd2017-09-04 12:31:41 +09002301 /* bpo-31095: UnTrack is needed before calling any callbacks */
2302 PyObject_GC_UnTrack(self);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002303 encoder_clear(self);
2304 Py_TYPE(self)->tp_free(self);
2305}
2306
2307static int
2308encoder_traverse(PyObject *self, visitproc visit, void *arg)
2309{
2310 PyEncoderObject *s;
2311 assert(PyEncoder_Check(self));
2312 s = (PyEncoderObject *)self;
2313 Py_VISIT(s->markers);
2314 Py_VISIT(s->defaultfn);
2315 Py_VISIT(s->encoder);
2316 Py_VISIT(s->indent);
2317 Py_VISIT(s->key_separator);
2318 Py_VISIT(s->item_separator);
2319 Py_VISIT(s->sort_keys);
2320 Py_VISIT(s->skipkeys);
2321 return 0;
2322}
2323
2324static int
2325encoder_clear(PyObject *self)
2326{
2327 /* Deallocate Encoder */
2328 PyEncoderObject *s;
2329 assert(PyEncoder_Check(self));
2330 s = (PyEncoderObject *)self;
2331 Py_CLEAR(s->markers);
2332 Py_CLEAR(s->defaultfn);
2333 Py_CLEAR(s->encoder);
2334 Py_CLEAR(s->indent);
2335 Py_CLEAR(s->key_separator);
2336 Py_CLEAR(s->item_separator);
2337 Py_CLEAR(s->sort_keys);
2338 Py_CLEAR(s->skipkeys);
2339 return 0;
2340}
2341
2342PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2343
2344static
2345PyTypeObject PyEncoderType = {
Benjamin Petersona72d15c2017-09-13 21:20:29 -07002346 PyVarObject_HEAD_INIT(NULL, 0)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002347 "_json.Encoder", /* tp_name */
2348 sizeof(PyEncoderObject), /* tp_basicsize */
2349 0, /* tp_itemsize */
2350 encoder_dealloc, /* tp_dealloc */
2351 0, /* tp_print */
2352 0, /* tp_getattr */
2353 0, /* tp_setattr */
2354 0, /* tp_compare */
2355 0, /* tp_repr */
2356 0, /* tp_as_number */
2357 0, /* tp_as_sequence */
2358 0, /* tp_as_mapping */
2359 0, /* tp_hash */
2360 encoder_call, /* tp_call */
2361 0, /* tp_str */
2362 0, /* tp_getattro */
2363 0, /* tp_setattro */
2364 0, /* tp_as_buffer */
2365 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2366 encoder_doc, /* tp_doc */
2367 encoder_traverse, /* tp_traverse */
2368 encoder_clear, /* tp_clear */
2369 0, /* tp_richcompare */
2370 0, /* tp_weaklistoffset */
2371 0, /* tp_iter */
2372 0, /* tp_iternext */
2373 0, /* tp_methods */
2374 encoder_members, /* tp_members */
2375 0, /* tp_getset */
2376 0, /* tp_base */
2377 0, /* tp_dict */
2378 0, /* tp_descr_get */
2379 0, /* tp_descr_set */
2380 0, /* tp_dictoffset */
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03002381 0, /* tp_init */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002382 0, /* tp_alloc */
2383 encoder_new, /* tp_new */
2384 0, /* tp_free */
2385};
2386
2387static PyMethodDef speedups_methods[] = {
2388 {"encode_basestring_ascii",
2389 (PyCFunction)py_encode_basestring_ascii,
2390 METH_O,
2391 pydoc_encode_basestring_ascii},
2392 {"scanstring",
2393 (PyCFunction)py_scanstring,
2394 METH_VARARGS,
2395 pydoc_scanstring},
Brett Cannon4b964f92008-05-05 20:21:38 +00002396 {NULL, NULL, 0, NULL}
2397};
2398
2399PyDoc_STRVAR(module_doc,
2400"json speedups\n");
2401
2402void
2403init_json(void)
2404{
2405 PyObject *m;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002406 if (PyType_Ready(&PyScannerType) < 0)
2407 return;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002408 if (PyType_Ready(&PyEncoderType) < 0)
2409 return;
2410 m = Py_InitModule3("_json", speedups_methods, module_doc);
Serhiy Storchaka045c4512015-07-24 12:58:25 +03002411 if (m == NULL)
2412 return;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002413 Py_INCREF((PyObject*)&PyScannerType);
2414 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2415 Py_INCREF((PyObject*)&PyEncoderType);
2416 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
Brett Cannon4b964f92008-05-05 20:21:38 +00002417}