blob: be1e079696051c33a1a7eab6c691a697044ca6dd [file] [log] [blame]
Brett Cannon4b964f92008-05-05 20:21:38 +00001#include "Python.h"
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
16
17#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
Brett Cannon4b964f92008-05-05 20:21:38 +000022
23#define DEFAULT_ENCODING "utf-8"
Brett Cannon4b964f92008-05-05 20:21:38 +000024
Bob Ippolitod914e3f2009-03-17 23:19:00 +000025#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30static PyTypeObject PyScannerType;
31static PyTypeObject PyEncoderType;
32
33typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
Raymond Hettinger91852ca2009-03-19 19:19:03 +000038 PyObject *pairs_hook;
Bob Ippolitod914e3f2009-03-17 23:19:00 +000039 PyObject *parse_float;
40 PyObject *parse_int;
41 PyObject *parse_constant;
42} PyScannerObject;
43
44static PyMemberDef scanner_members[] = {
45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
Raymond Hettinger91852ca2009-03-19 19:19:03 +000048 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
Bob Ippolitod914e3f2009-03-17 23:19:00 +000049 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52 {NULL}
53};
54
55typedef struct _PyEncoderObject {
56 PyObject_HEAD
57 PyObject *markers;
58 PyObject *defaultfn;
59 PyObject *encoder;
60 PyObject *indent;
61 PyObject *key_separator;
62 PyObject *item_separator;
63 PyObject *sort_keys;
64 PyObject *skipkeys;
65 int fast_encode;
66 int allow_nan;
67} PyEncoderObject;
68
69static PyMemberDef encoder_members[] = {
70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78 {NULL}
79};
80
81static Py_ssize_t
82ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83static PyObject *
84ascii_escape_unicode(PyObject *pystr);
85static PyObject *
86ascii_escape_str(PyObject *pystr);
87static PyObject *
88py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89void init_json(void);
90static PyObject *
91scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92static PyObject *
93scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94static PyObject *
95_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96static PyObject *
97scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Bob Ippolitod914e3f2009-03-17 23:19:00 +000098static void
99scanner_dealloc(PyObject *self);
100static int
101scanner_clear(PyObject *self);
102static PyObject *
103encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000104static void
105encoder_dealloc(PyObject *self);
106static int
107encoder_clear(PyObject *self);
108static int
109encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
110static int
111encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
112static int
113encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
114static PyObject *
Raymond Hettingera0b8d9a2009-03-19 19:24:43 +0000115_encoded_const(PyObject *obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000116static void
117raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
118static PyObject *
119encoder_encode_string(PyEncoderObject *s, PyObject *obj);
120static int
121_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
122static PyObject *
123_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
124static PyObject *
125encoder_encode_float(PyEncoderObject *s, PyObject *obj);
126
127#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
128#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
129
130#define MIN_EXPANSION 6
Brett Cannon4b964f92008-05-05 20:21:38 +0000131#ifdef Py_UNICODE_WIDE
132#define MAX_EXPANSION (2 * MIN_EXPANSION)
133#else
134#define MAX_EXPANSION MIN_EXPANSION
135#endif
136
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000137static int
138_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
139{
140 /* PyObject to Py_ssize_t converter */
141 *size_ptr = PyInt_AsSsize_t(o);
Georg Brandlf71ba952009-05-05 07:48:12 +0000142 if (*size_ptr == -1 && PyErr_Occurred())
143 return 0;
144 return 1;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000145}
146
147static PyObject *
148_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
149{
150 /* Py_ssize_t to PyObject converter */
151 return PyInt_FromSsize_t(*size_ptr);
152}
153
Brett Cannon4b964f92008-05-05 20:21:38 +0000154static Py_ssize_t
155ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
156{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000157 /* Escape unicode code point c to ASCII escape sequences
158 in char *output. output must have at least 12 bytes unused to
159 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Brett Cannon4b964f92008-05-05 20:21:38 +0000160 output[chars++] = '\\';
161 switch (c) {
162 case '\\': output[chars++] = (char)c; break;
163 case '"': output[chars++] = (char)c; break;
164 case '\b': output[chars++] = 'b'; break;
165 case '\f': output[chars++] = 'f'; break;
166 case '\n': output[chars++] = 'n'; break;
167 case '\r': output[chars++] = 'r'; break;
168 case '\t': output[chars++] = 't'; break;
169 default:
170#ifdef Py_UNICODE_WIDE
171 if (c >= 0x10000) {
172 /* UTF-16 surrogate pair */
173 Py_UNICODE v = c - 0x10000;
174 c = 0xd800 | ((v >> 10) & 0x3ff);
175 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000176 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
177 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
178 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
179 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000180 c = 0xdc00 | (v & 0x3ff);
181 output[chars++] = '\\';
182 }
183#endif
184 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000185 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
186 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
187 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
188 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000189 }
190 return chars;
191}
192
193static PyObject *
194ascii_escape_unicode(PyObject *pystr)
195{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000196 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000197 Py_ssize_t i;
198 Py_ssize_t input_chars;
199 Py_ssize_t output_size;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000200 Py_ssize_t max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000201 Py_ssize_t chars;
Benjamin Petersonaa034202016-09-26 23:55:41 -0700202 Py_ssize_t incr;
Brett Cannon4b964f92008-05-05 20:21:38 +0000203 PyObject *rval;
204 char *output;
205 Py_UNICODE *input_unicode;
206
207 input_chars = PyUnicode_GET_SIZE(pystr);
208 input_unicode = PyUnicode_AS_UNICODE(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000209
Benjamin Petersonaa034202016-09-26 23:55:41 -0700210 output_size = input_chars;
211 incr = 2; /* for quotes */
Brett Cannon4b964f92008-05-05 20:21:38 +0000212 /* One char input can be up to 6 chars output, estimate 4 of these */
Benjamin Petersonaa034202016-09-26 23:55:41 -0700213 incr += MIN_EXPANSION * 4;
214 if (PY_SSIZE_T_MAX - incr < output_size) {
215 PyErr_NoMemory();
Benjamin Peterson04a53852016-08-13 16:47:25 -0700216 return NULL;
217 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700218 output_size += incr;
219 if (PY_SSIZE_T_MAX / MAX_EXPANSION < input_chars ||
220 PY_SSIZE_T_MAX - 2 < input_chars * MAX_EXPANSION)
221 max_output_size = PY_SSIZE_T_MAX;
222 else
223 max_output_size = 2 + (input_chars * MAX_EXPANSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000224 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000225 if (rval == NULL) {
226 return NULL;
227 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000228 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000229 chars = 0;
230 output[chars++] = '"';
231 for (i = 0; i < input_chars; i++) {
232 Py_UNICODE c = input_unicode[i];
233 if (S_CHAR(c)) {
234 output[chars++] = (char)c;
235 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000236 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000237 chars = ascii_escape_char(c, output, chars);
238 }
239 if (output_size - chars < (1 + MAX_EXPANSION)) {
Benjamin Petersonaa034202016-09-26 23:55:41 -0700240 if (output_size == PY_SSIZE_T_MAX) {
241 Py_DECREF(rval);
242 PyErr_NoMemory();
243 return NULL;
244 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000245 /* There's more than four, so let's resize by a lot */
Benjamin Petersonaa034202016-09-26 23:55:41 -0700246 if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
247 output_size *= 2;
248 else
249 output_size = max_output_size;
250 if (_PyString_Resize(&rval, output_size) == -1) {
251 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000252 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700253 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000254 }
255 }
256 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000257 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000258 return NULL;
259 }
260 return rval;
261}
262
263static PyObject *
264ascii_escape_str(PyObject *pystr)
265{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000266 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000267 Py_ssize_t i;
268 Py_ssize_t input_chars;
269 Py_ssize_t output_size;
Benjamin Petersonaa034202016-09-26 23:55:41 -0700270 Py_ssize_t max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000271 Py_ssize_t chars;
Benjamin Petersonaa034202016-09-26 23:55:41 -0700272 Py_ssize_t incr;
Brett Cannon4b964f92008-05-05 20:21:38 +0000273 PyObject *rval;
274 char *output;
275 char *input_str;
276
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000277 input_chars = PyString_GET_SIZE(pystr);
278 input_str = PyString_AS_STRING(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000279
280 /* Fast path for a string that's already ASCII */
281 for (i = 0; i < input_chars; i++) {
282 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
283 if (!S_CHAR(c)) {
284 /* If we have to escape something, scan the string for unicode */
285 Py_ssize_t j;
286 for (j = i; j < input_chars; j++) {
287 c = (Py_UNICODE)(unsigned char)input_str[j];
288 if (c > 0x7f) {
289 /* We hit a non-ASCII character, bail to unicode mode */
290 PyObject *uni;
291 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
292 if (uni == NULL) {
293 return NULL;
294 }
295 rval = ascii_escape_unicode(uni);
296 Py_DECREF(uni);
297 return rval;
298 }
299 }
300 break;
301 }
302 }
303
Benjamin Petersonaa034202016-09-26 23:55:41 -0700304 output_size = input_chars;
305 incr = 2; /* for quotes */
306 if (i != input_chars) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000307 /* One char input can be up to 6 chars output, estimate 4 of these */
Benjamin Petersonaa034202016-09-26 23:55:41 -0700308 incr += MIN_EXPANSION * 4;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000309 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700310 if (PY_SSIZE_T_MAX - incr < output_size) {
311 PyErr_NoMemory();
312 return NULL;
313 }
314 output_size += incr;
315 if (PY_SSIZE_T_MAX / MIN_EXPANSION < input_chars ||
316 PY_SSIZE_T_MAX - 2 < input_chars * MIN_EXPANSION)
317 max_output_size = PY_SSIZE_T_MAX;
318 else
319 max_output_size = 2 + (input_chars * MIN_EXPANSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000320 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000321 if (rval == NULL) {
322 return NULL;
323 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000324 output = PyString_AS_STRING(rval);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000325 output[0] = '"';
326
327 /* We know that everything up to i is ASCII already */
328 chars = i + 1;
329 memcpy(&output[1], input_str, i);
330
331 for (; i < input_chars; i++) {
332 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
Brett Cannon4b964f92008-05-05 20:21:38 +0000333 if (S_CHAR(c)) {
334 output[chars++] = (char)c;
335 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000336 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000337 chars = ascii_escape_char(c, output, chars);
338 }
339 /* An ASCII char can't possibly expand to a surrogate! */
340 if (output_size - chars < (1 + MIN_EXPANSION)) {
Benjamin Petersonaa034202016-09-26 23:55:41 -0700341 if (output_size == PY_SSIZE_T_MAX) {
342 Py_DECREF(rval);
343 PyErr_NoMemory();
344 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000345 }
Benjamin Petersonaa034202016-09-26 23:55:41 -0700346 /* There's more than four, so let's resize by a lot */
347 if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
348 output_size *= 2;
349 else
350 output_size = max_output_size;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000351 if (_PyString_Resize(&rval, output_size) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000352 return NULL;
353 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000354 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000355 }
356 }
357 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000358 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000359 return NULL;
360 }
361 return rval;
362}
363
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000364static void
Brett Cannon4b964f92008-05-05 20:21:38 +0000365raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
366{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000367 /* Use the Python function json.decoder.errmsg to raise a nice
368 looking ValueError exception */
Brett Cannon4b964f92008-05-05 20:21:38 +0000369 static PyObject *errmsg_fn = NULL;
370 PyObject *pymsg;
371 if (errmsg_fn == NULL) {
372 PyObject *decoder = PyImport_ImportModule("json.decoder");
373 if (decoder == NULL)
374 return;
375 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000376 Py_DECREF(decoder);
Brett Cannon4b964f92008-05-05 20:21:38 +0000377 if (errmsg_fn == NULL)
378 return;
Brett Cannon4b964f92008-05-05 20:21:38 +0000379 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000380 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
Benjamin Peterson595e3cb2008-10-16 21:09:28 +0000381 if (pymsg) {
382 PyErr_SetObject(PyExc_ValueError, pymsg);
383 Py_DECREF(pymsg);
384 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000385}
386
387static PyObject *
388join_list_unicode(PyObject *lst)
389{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000390 /* return u''.join(lst) */
391 static PyObject *joinfn = NULL;
392 if (joinfn == NULL) {
393 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
394 if (ustr == NULL)
395 return NULL;
396
397 joinfn = PyObject_GetAttrString(ustr, "join");
398 Py_DECREF(ustr);
399 if (joinfn == NULL)
400 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000401 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000402 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
Brett Cannon4b964f92008-05-05 20:21:38 +0000403}
404
405static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000406_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
407 /* return (rval, idx) tuple, stealing reference to rval */
408 PyObject *tpl;
409 PyObject *pyidx;
410 /*
411 steal a reference to rval, returns (rval, idx)
412 */
413 if (rval == NULL) {
414 return NULL;
415 }
416 pyidx = PyInt_FromSsize_t(idx);
417 if (pyidx == NULL) {
418 Py_DECREF(rval);
419 return NULL;
420 }
421 tpl = PyTuple_New(2);
422 if (tpl == NULL) {
423 Py_DECREF(pyidx);
424 Py_DECREF(rval);
425 return NULL;
426 }
427 PyTuple_SET_ITEM(tpl, 0, rval);
428 PyTuple_SET_ITEM(tpl, 1, pyidx);
429 return tpl;
430}
431
432static PyObject *
433scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
434{
435 /* Read the JSON string from PyString pystr.
436 end is the index of the first character after the quote.
437 encoding is the encoding of pystr (must be an ASCII superset)
438 if strict is zero then literal control characters are allowed
439 *next_end_ptr is a return-by-reference index of the character
440 after the end quote
441
442 Return value is a new PyString (if ASCII-only) or PyUnicode
443 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000444 PyObject *rval;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000445 Py_ssize_t len = PyString_GET_SIZE(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000446 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000447 Py_ssize_t next;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000448 char *buf = PyString_AS_STRING(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000449 PyObject *chunks = PyList_New(0);
450 if (chunks == NULL) {
451 goto bail;
452 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000453 if (end < 0 || len <= end) {
454 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
455 goto bail;
456 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000457 while (1) {
458 /* Find the end of the string or the next escape */
459 Py_UNICODE c = 0;
460 PyObject *chunk = NULL;
461 for (next = end; next < len; next++) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000462 c = (unsigned char)buf[next];
Brett Cannon4b964f92008-05-05 20:21:38 +0000463 if (c == '"' || c == '\\') {
464 break;
465 }
466 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000467 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000468 goto bail;
469 }
470 }
471 if (!(c == '"' || c == '\\')) {
472 raise_errmsg("Unterminated string starting at", pystr, begin);
473 goto bail;
474 }
475 /* Pick up this chunk if it's not zero length */
476 if (next != end) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000477 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000478 if (strchunk == NULL) {
479 goto bail;
480 }
Barry Warsawfa658272010-11-02 21:03:09 +0000481 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
482 Py_DECREF(strchunk);
483 if (chunk == NULL) {
484 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000485 }
486 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000487 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000488 goto bail;
489 }
490 Py_DECREF(chunk);
491 }
492 next++;
493 if (c == '"') {
494 end = next;
495 break;
496 }
497 if (next == len) {
498 raise_errmsg("Unterminated string starting at", pystr, begin);
499 goto bail;
500 }
501 c = buf[next];
502 if (c != 'u') {
503 /* Non-unicode backslash escapes */
504 end = next + 1;
505 switch (c) {
506 case '"': break;
507 case '\\': break;
508 case '/': break;
509 case 'b': c = '\b'; break;
510 case 'f': c = '\f'; break;
511 case 'n': c = '\n'; break;
512 case 'r': c = '\r'; break;
513 case 't': c = '\t'; break;
514 default: c = 0;
515 }
516 if (c == 0) {
517 raise_errmsg("Invalid \\escape", pystr, end - 2);
518 goto bail;
519 }
520 }
521 else {
522 c = 0;
523 next++;
524 end = next + 4;
525 if (end >= len) {
526 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
527 goto bail;
528 }
529 /* Decode 4 hex digits */
530 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000531 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000532 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000533 switch (digit) {
534 case '0': case '1': case '2': case '3': case '4':
535 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000536 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000537 case 'a': case 'b': case 'c': case 'd': case 'e':
538 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000539 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000540 case 'A': case 'B': case 'C': case 'D': case 'E':
541 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000542 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000543 default:
544 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
545 goto bail;
546 }
547 }
548#ifdef Py_UNICODE_WIDE
549 /* Surrogate pair */
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200550 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
551 buf[next++] == '\\' &&
552 buf[next++] == 'u') {
Brett Cannon4b964f92008-05-05 20:21:38 +0000553 Py_UNICODE c2 = 0;
Brett Cannon4b964f92008-05-05 20:21:38 +0000554 end += 6;
555 /* Decode 4 hex digits */
556 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000557 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000558 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000559 switch (digit) {
560 case '0': case '1': case '2': case '3': case '4':
561 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000562 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000563 case 'a': case 'b': case 'c': case 'd': case 'e':
564 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000565 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000566 case 'A': case 'B': case 'C': case 'D': case 'E':
567 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000568 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000569 default:
570 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
571 goto bail;
572 }
573 }
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200574 if ((c2 & 0xfc00) == 0xdc00)
575 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
576 else
577 end -= 6;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000578 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000579#endif
580 }
Barry Warsawfa658272010-11-02 21:03:09 +0000581 chunk = PyUnicode_FromUnicode(&c, 1);
582 if (chunk == NULL) {
583 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000584 }
585 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000586 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000587 goto bail;
588 }
589 Py_DECREF(chunk);
590 }
591
Ezio Melottidf8a8f72011-05-04 14:40:53 +0300592 rval = join_list_unicode(chunks);
Brett Cannon4b964f92008-05-05 20:21:38 +0000593 if (rval == NULL) {
594 goto bail;
595 }
Benjamin Peterson336680e2008-10-16 21:48:06 +0000596 Py_CLEAR(chunks);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000597 *next_end_ptr = end;
598 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000599bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000600 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000601 Py_XDECREF(chunks);
602 return NULL;
603}
604
605
606static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000607scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000608{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000609 /* Read the JSON string from PyUnicode pystr.
610 end is the index of the first character after the quote.
611 if strict is zero then literal control characters are allowed
612 *next_end_ptr is a return-by-reference index of the character
613 after the end quote
614
615 Return value is a new PyUnicode
616 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000617 PyObject *rval;
618 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
619 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000620 Py_ssize_t next;
Brett Cannon4b964f92008-05-05 20:21:38 +0000621 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
622 PyObject *chunks = PyList_New(0);
623 if (chunks == NULL) {
624 goto bail;
625 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000626 if (end < 0 || len <= end) {
627 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
628 goto bail;
629 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000630 while (1) {
631 /* Find the end of the string or the next escape */
632 Py_UNICODE c = 0;
633 PyObject *chunk = NULL;
634 for (next = end; next < len; next++) {
635 c = buf[next];
636 if (c == '"' || c == '\\') {
637 break;
638 }
639 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000640 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000641 goto bail;
642 }
643 }
644 if (!(c == '"' || c == '\\')) {
645 raise_errmsg("Unterminated string starting at", pystr, begin);
646 goto bail;
647 }
648 /* Pick up this chunk if it's not zero length */
649 if (next != end) {
650 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
651 if (chunk == NULL) {
652 goto bail;
653 }
654 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson87e6ad22008-10-16 21:27:54 +0000655 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000656 goto bail;
657 }
658 Py_DECREF(chunk);
659 }
660 next++;
661 if (c == '"') {
662 end = next;
663 break;
664 }
665 if (next == len) {
666 raise_errmsg("Unterminated string starting at", pystr, begin);
667 goto bail;
668 }
669 c = buf[next];
670 if (c != 'u') {
671 /* Non-unicode backslash escapes */
672 end = next + 1;
673 switch (c) {
674 case '"': break;
675 case '\\': break;
676 case '/': break;
677 case 'b': c = '\b'; break;
678 case 'f': c = '\f'; break;
679 case 'n': c = '\n'; break;
680 case 'r': c = '\r'; break;
681 case 't': c = '\t'; break;
682 default: c = 0;
683 }
684 if (c == 0) {
685 raise_errmsg("Invalid \\escape", pystr, end - 2);
686 goto bail;
687 }
688 }
689 else {
690 c = 0;
691 next++;
692 end = next + 4;
693 if (end >= len) {
694 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
695 goto bail;
696 }
697 /* Decode 4 hex digits */
698 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000699 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000700 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000701 switch (digit) {
702 case '0': case '1': case '2': case '3': case '4':
703 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000704 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000705 case 'a': case 'b': case 'c': case 'd': case 'e':
706 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000707 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000708 case 'A': case 'B': case 'C': case 'D': case 'E':
709 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000710 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000711 default:
712 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
713 goto bail;
714 }
715 }
716#ifdef Py_UNICODE_WIDE
717 /* Surrogate pair */
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200718 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
719 buf[next++] == '\\' && buf[next++] == 'u') {
Brett Cannon4b964f92008-05-05 20:21:38 +0000720 Py_UNICODE c2 = 0;
Brett Cannon4b964f92008-05-05 20:21:38 +0000721 end += 6;
722 /* Decode 4 hex digits */
723 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000724 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000725 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000726 switch (digit) {
727 case '0': case '1': case '2': case '3': case '4':
728 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000729 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000730 case 'a': case 'b': case 'c': case 'd': case 'e':
731 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000732 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000733 case 'A': case 'B': case 'C': case 'D': case 'E':
734 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000735 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000736 default:
737 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
738 goto bail;
739 }
740 }
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200741 if ((c2 & 0xfc00) == 0xdc00)
742 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
743 else
744 end -= 6;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000745 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000746#endif
747 }
748 chunk = PyUnicode_FromUnicode(&c, 1);
749 if (chunk == NULL) {
750 goto bail;
751 }
752 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000753 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000754 goto bail;
755 }
756 Py_DECREF(chunk);
757 }
758
759 rval = join_list_unicode(chunks);
760 if (rval == NULL) {
761 goto bail;
762 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000763 Py_DECREF(chunks);
764 *next_end_ptr = end;
765 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000766bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000767 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000768 Py_XDECREF(chunks);
769 return NULL;
770}
771
772PyDoc_STRVAR(pydoc_scanstring,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000773 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
774 "\n"
775 "Scan the string s for a JSON string. End is the index of the\n"
776 "character in s after the quote that started the JSON string.\n"
777 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
778 "on attempt to decode an invalid string. If strict is False then literal\n"
779 "control characters are allowed in the string.\n"
780 "\n"
781 "Returns a tuple of the decoded string and the index of the character in s\n"
782 "after the end quote."
783);
Brett Cannon4b964f92008-05-05 20:21:38 +0000784
785static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000786py_scanstring(PyObject* self UNUSED, PyObject *args)
Brett Cannon4b964f92008-05-05 20:21:38 +0000787{
788 PyObject *pystr;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000789 PyObject *rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000790 Py_ssize_t end;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000791 Py_ssize_t next_end = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000792 char *encoding = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000793 int strict = 1;
794 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000795 return NULL;
796 }
797 if (encoding == NULL) {
798 encoding = DEFAULT_ENCODING;
799 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000800 if (PyString_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000801 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000802 }
803 else if (PyUnicode_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000804 rval = scanstring_unicode(pystr, end, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000805 }
806 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000807 PyErr_Format(PyExc_TypeError,
808 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000809 Py_TYPE(pystr)->tp_name);
810 return NULL;
811 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000812 return _build_rval_index_tuple(rval, next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000813}
814
815PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000816 "encode_basestring_ascii(basestring) -> str\n"
817 "\n"
818 "Return an ASCII-only JSON representation of a Python string"
819);
Brett Cannon4b964f92008-05-05 20:21:38 +0000820
821static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000822py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000823{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000824 /* Return an ASCII-only JSON representation of a Python string */
Brett Cannon4b964f92008-05-05 20:21:38 +0000825 /* METH_O */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000826 if (PyString_Check(pystr)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000827 return ascii_escape_str(pystr);
828 }
829 else if (PyUnicode_Check(pystr)) {
830 return ascii_escape_unicode(pystr);
831 }
832 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000833 PyErr_Format(PyExc_TypeError,
834 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000835 Py_TYPE(pystr)->tp_name);
836 return NULL;
837 }
838}
839
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000840static void
841scanner_dealloc(PyObject *self)
842{
INADA Naoki4cde4bd2017-09-04 12:31:41 +0900843 /* bpo-31095: UnTrack is needed before calling any callbacks */
844 PyObject_GC_UnTrack(self);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000845 scanner_clear(self);
846 Py_TYPE(self)->tp_free(self);
847}
848
849static int
850scanner_traverse(PyObject *self, visitproc visit, void *arg)
851{
852 PyScannerObject *s;
853 assert(PyScanner_Check(self));
854 s = (PyScannerObject *)self;
855 Py_VISIT(s->encoding);
856 Py_VISIT(s->strict);
857 Py_VISIT(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000858 Py_VISIT(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000859 Py_VISIT(s->parse_float);
860 Py_VISIT(s->parse_int);
861 Py_VISIT(s->parse_constant);
862 return 0;
863}
864
865static int
866scanner_clear(PyObject *self)
867{
868 PyScannerObject *s;
869 assert(PyScanner_Check(self));
870 s = (PyScannerObject *)self;
871 Py_CLEAR(s->encoding);
872 Py_CLEAR(s->strict);
873 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000874 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000875 Py_CLEAR(s->parse_float);
876 Py_CLEAR(s->parse_int);
877 Py_CLEAR(s->parse_constant);
878 return 0;
879}
880
881static PyObject *
882_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
883 /* Read a JSON object from PyString pystr.
884 idx is the index of the first character after the opening curly brace.
885 *next_idx_ptr is a return-by-reference index to the first character after
886 the closing curly brace.
887
888 Returns a new PyObject (usually a dict, but object_hook can change that)
889 */
890 char *str = PyString_AS_STRING(pystr);
891 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000892 PyObject *rval;
893 PyObject *pairs;
894 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000895 PyObject *key = NULL;
896 PyObject *val = NULL;
897 char *encoding = PyString_AS_STRING(s->encoding);
898 int strict = PyObject_IsTrue(s->strict);
899 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000900
Serhiy Storchaka5127ed72015-05-30 17:45:12 +0300901 if (strict < 0)
902 return NULL;
903
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000904 pairs = PyList_New(0);
905 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000906 return NULL;
907
908 /* skip whitespace after { */
909 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
910
911 /* only loop if the object is non-empty */
912 if (idx <= end_idx && str[idx] != '}') {
913 while (idx <= end_idx) {
914 /* read key */
915 if (str[idx] != '"') {
916 raise_errmsg("Expecting property name", pystr, idx);
917 goto bail;
918 }
919 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
920 if (key == NULL)
921 goto bail;
922 idx = next_idx;
923
924 /* skip whitespace between key and : delimiter, read :, skip whitespace */
925 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
926 if (idx > end_idx || str[idx] != ':') {
927 raise_errmsg("Expecting : delimiter", pystr, idx);
928 goto bail;
929 }
930 idx++;
931 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
932
933 /* read any JSON data type */
934 val = scan_once_str(s, pystr, idx, &next_idx);
935 if (val == NULL)
936 goto bail;
937
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000938 item = PyTuple_Pack(2, key, val);
939 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000940 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000941 Py_CLEAR(key);
942 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000943 if (PyList_Append(pairs, item) == -1) {
944 Py_DECREF(item);
945 goto bail;
946 }
947 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000948 idx = next_idx;
949
950 /* skip whitespace before } or , */
951 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
952
953 /* bail if the object is closed or we didn't get the , delimiter */
954 if (idx > end_idx) break;
955 if (str[idx] == '}') {
956 break;
957 }
958 else if (str[idx] != ',') {
959 raise_errmsg("Expecting , delimiter", pystr, idx);
960 goto bail;
961 }
962 idx++;
963
964 /* skip whitespace after , delimiter */
965 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
966 }
967 }
968 /* verify that idx < end_idx, str[idx] should be '}' */
969 if (idx > end_idx || str[idx] != '}') {
970 raise_errmsg("Expecting object", pystr, end_idx);
971 goto bail;
972 }
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000973
974 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
975 if (s->pairs_hook != Py_None) {
976 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
977 if (val == NULL)
978 goto bail;
979 Py_DECREF(pairs);
980 *next_idx_ptr = idx + 1;
981 return val;
982 }
983
984 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
985 pairs, NULL);
986 if (rval == NULL)
987 goto bail;
988 Py_CLEAR(pairs);
989
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000990 /* if object_hook is not None: rval = object_hook(rval) */
991 if (s->object_hook != Py_None) {
992 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
993 if (val == NULL)
994 goto bail;
995 Py_DECREF(rval);
996 rval = val;
997 val = NULL;
998 }
999 *next_idx_ptr = idx + 1;
1000 return rval;
1001bail:
1002 Py_XDECREF(key);
1003 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001004 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001005 return NULL;
1006}
1007
1008static PyObject *
1009_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1010 /* Read a JSON object from PyUnicode pystr.
1011 idx is the index of the first character after the opening curly brace.
1012 *next_idx_ptr is a return-by-reference index to the first character after
1013 the closing curly brace.
1014
1015 Returns a new PyObject (usually a dict, but object_hook can change that)
1016 */
1017 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1018 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001019 PyObject *rval;
1020 PyObject *pairs;
1021 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001022 PyObject *key = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001023 PyObject *val = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001024 int strict = PyObject_IsTrue(s->strict);
1025 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001026
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001027 if (strict < 0)
1028 return NULL;
1029
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001030 pairs = PyList_New(0);
1031 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001032 return NULL;
1033
1034 /* skip whitespace after { */
1035 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1036
1037 /* only loop if the object is non-empty */
1038 if (idx <= end_idx && str[idx] != '}') {
1039 while (idx <= end_idx) {
1040 /* read key */
1041 if (str[idx] != '"') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001042 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001043 goto bail;
1044 }
1045 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1046 if (key == NULL)
1047 goto bail;
1048 idx = next_idx;
1049
1050 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1051 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1052 if (idx > end_idx || str[idx] != ':') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001053 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001054 goto bail;
1055 }
1056 idx++;
1057 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1058
1059 /* read any JSON term */
1060 val = scan_once_unicode(s, pystr, idx, &next_idx);
1061 if (val == NULL)
1062 goto bail;
1063
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001064 item = PyTuple_Pack(2, key, val);
1065 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001066 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001067 Py_CLEAR(key);
1068 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001069 if (PyList_Append(pairs, item) == -1) {
1070 Py_DECREF(item);
1071 goto bail;
1072 }
1073 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001074 idx = next_idx;
1075
1076 /* skip whitespace before } or , */
1077 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1078
1079 /* bail if the object is closed or we didn't get the , delimiter */
1080 if (idx > end_idx) break;
1081 if (str[idx] == '}') {
1082 break;
1083 }
1084 else if (str[idx] != ',') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001085 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001086 goto bail;
1087 }
1088 idx++;
1089
1090 /* skip whitespace after , delimiter */
1091 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1092 }
1093 }
1094
1095 /* verify that idx < end_idx, str[idx] should be '}' */
1096 if (idx > end_idx || str[idx] != '}') {
1097 raise_errmsg("Expecting object", pystr, end_idx);
1098 goto bail;
1099 }
1100
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001101 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1102 if (s->pairs_hook != Py_None) {
1103 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1104 if (val == NULL)
1105 goto bail;
1106 Py_DECREF(pairs);
1107 *next_idx_ptr = idx + 1;
1108 return val;
1109 }
1110
1111 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1112 pairs, NULL);
1113 if (rval == NULL)
1114 goto bail;
1115 Py_CLEAR(pairs);
1116
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001117 /* if object_hook is not None: rval = object_hook(rval) */
1118 if (s->object_hook != Py_None) {
1119 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1120 if (val == NULL)
1121 goto bail;
1122 Py_DECREF(rval);
1123 rval = val;
1124 val = NULL;
1125 }
1126 *next_idx_ptr = idx + 1;
1127 return rval;
1128bail:
1129 Py_XDECREF(key);
1130 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001131 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001132 return NULL;
1133}
1134
1135static PyObject *
1136_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1137 /* Read a JSON array from PyString pystr.
1138 idx is the index of the first character after the opening brace.
1139 *next_idx_ptr is a return-by-reference index to the first character after
1140 the closing brace.
1141
1142 Returns a new PyList
1143 */
1144 char *str = PyString_AS_STRING(pystr);
1145 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1146 PyObject *val = NULL;
1147 PyObject *rval = PyList_New(0);
1148 Py_ssize_t next_idx;
1149 if (rval == NULL)
1150 return NULL;
1151
1152 /* skip whitespace after [ */
1153 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1154
1155 /* only loop if the array is non-empty */
1156 if (idx <= end_idx && str[idx] != ']') {
1157 while (idx <= end_idx) {
1158
1159 /* read any JSON term and de-tuplefy the (rval, idx) */
1160 val = scan_once_str(s, pystr, idx, &next_idx);
1161 if (val == NULL)
1162 goto bail;
1163
1164 if (PyList_Append(rval, val) == -1)
1165 goto bail;
1166
1167 Py_CLEAR(val);
1168 idx = next_idx;
1169
1170 /* skip whitespace between term and , */
1171 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1172
1173 /* bail if the array is closed or we didn't get the , delimiter */
1174 if (idx > end_idx) break;
1175 if (str[idx] == ']') {
1176 break;
1177 }
1178 else if (str[idx] != ',') {
1179 raise_errmsg("Expecting , delimiter", pystr, idx);
1180 goto bail;
1181 }
1182 idx++;
1183
1184 /* skip whitespace after , */
1185 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1186 }
1187 }
1188
1189 /* verify that idx < end_idx, str[idx] should be ']' */
1190 if (idx > end_idx || str[idx] != ']') {
1191 raise_errmsg("Expecting object", pystr, end_idx);
1192 goto bail;
1193 }
1194 *next_idx_ptr = idx + 1;
1195 return rval;
1196bail:
1197 Py_XDECREF(val);
1198 Py_DECREF(rval);
1199 return NULL;
1200}
1201
1202static PyObject *
1203_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1204 /* Read a JSON array from PyString pystr.
1205 idx is the index of the first character after the opening brace.
1206 *next_idx_ptr is a return-by-reference index to the first character after
1207 the closing brace.
1208
1209 Returns a new PyList
1210 */
1211 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1212 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1213 PyObject *val = NULL;
1214 PyObject *rval = PyList_New(0);
1215 Py_ssize_t next_idx;
1216 if (rval == NULL)
1217 return NULL;
1218
1219 /* skip whitespace after [ */
1220 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1221
1222 /* only loop if the array is non-empty */
1223 if (idx <= end_idx && str[idx] != ']') {
1224 while (idx <= end_idx) {
1225
1226 /* read any JSON term */
1227 val = scan_once_unicode(s, pystr, idx, &next_idx);
1228 if (val == NULL)
1229 goto bail;
1230
1231 if (PyList_Append(rval, val) == -1)
1232 goto bail;
1233
1234 Py_CLEAR(val);
1235 idx = next_idx;
1236
1237 /* skip whitespace between term and , */
1238 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1239
1240 /* bail if the array is closed or we didn't get the , delimiter */
1241 if (idx > end_idx) break;
1242 if (str[idx] == ']') {
1243 break;
1244 }
1245 else if (str[idx] != ',') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001246 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001247 goto bail;
1248 }
1249 idx++;
1250
1251 /* skip whitespace after , */
1252 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1253 }
1254 }
1255
1256 /* verify that idx < end_idx, str[idx] should be ']' */
1257 if (idx > end_idx || str[idx] != ']') {
1258 raise_errmsg("Expecting object", pystr, end_idx);
1259 goto bail;
1260 }
1261 *next_idx_ptr = idx + 1;
1262 return rval;
1263bail:
1264 Py_XDECREF(val);
1265 Py_DECREF(rval);
1266 return NULL;
1267}
1268
1269static PyObject *
1270_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1271 /* Read a JSON constant from PyString pystr.
1272 constant is the constant string that was found
1273 ("NaN", "Infinity", "-Infinity").
1274 idx is the index of the first character of the constant
1275 *next_idx_ptr is a return-by-reference index to the first character after
1276 the constant.
1277
1278 Returns the result of parse_constant
1279 */
1280 PyObject *cstr;
1281 PyObject *rval;
1282 /* constant is "NaN", "Infinity", or "-Infinity" */
1283 cstr = PyString_InternFromString(constant);
1284 if (cstr == NULL)
1285 return NULL;
1286
1287 /* rval = parse_constant(constant) */
1288 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1289 idx += PyString_GET_SIZE(cstr);
1290 Py_DECREF(cstr);
1291 *next_idx_ptr = idx;
1292 return rval;
1293}
1294
1295static PyObject *
1296_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1297 /* Read a JSON number from PyString pystr.
1298 idx is the index of the first character of the number
1299 *next_idx_ptr is a return-by-reference index to the first character after
1300 the number.
1301
1302 Returns a new PyObject representation of that number:
1303 PyInt, PyLong, or PyFloat.
1304 May return other types if parse_int or parse_float are set
1305 */
1306 char *str = PyString_AS_STRING(pystr);
1307 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1308 Py_ssize_t idx = start;
1309 int is_float = 0;
1310 PyObject *rval;
1311 PyObject *numstr;
1312
1313 /* read a sign if it's there, make sure it's not the end of the string */
1314 if (str[idx] == '-') {
1315 idx++;
1316 if (idx > end_idx) {
1317 PyErr_SetNone(PyExc_StopIteration);
1318 return NULL;
1319 }
1320 }
1321
1322 /* read as many integer digits as we find as long as it doesn't start with 0 */
1323 if (str[idx] >= '1' && str[idx] <= '9') {
1324 idx++;
1325 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1326 }
1327 /* if it starts with 0 we only expect one integer digit */
1328 else if (str[idx] == '0') {
1329 idx++;
1330 }
1331 /* no integer digits, error */
1332 else {
1333 PyErr_SetNone(PyExc_StopIteration);
1334 return NULL;
1335 }
1336
1337 /* if the next char is '.' followed by a digit then read all float digits */
1338 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1339 is_float = 1;
1340 idx += 2;
1341 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1342 }
1343
1344 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1345 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1346
1347 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1348 Py_ssize_t e_start = idx;
1349 idx++;
1350
1351 /* read an exponent sign if present */
1352 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1353
1354 /* read all digits */
1355 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1356
1357 /* if we got a digit, then parse as float. if not, backtrack */
1358 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1359 is_float = 1;
1360 }
1361 else {
1362 idx = e_start;
1363 }
1364 }
1365
1366 /* copy the section we determined to be a number */
1367 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1368 if (numstr == NULL)
1369 return NULL;
1370 if (is_float) {
1371 /* parse as a float using a fast path if available, otherwise call user defined method */
1372 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1373 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1374 }
1375 else {
Eric Smith129c97d2009-10-28 08:44:37 +00001376 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1377 NULL, NULL);
1378 if (d == -1.0 && PyErr_Occurred())
1379 return NULL;
1380 rval = PyFloat_FromDouble(d);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001381 }
1382 }
1383 else {
1384 /* parse as an int using a fast path if available, otherwise call user defined method */
1385 if (s->parse_int != (PyObject *)&PyInt_Type) {
1386 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1387 }
1388 else {
1389 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1390 }
1391 }
1392 Py_DECREF(numstr);
1393 *next_idx_ptr = idx;
1394 return rval;
1395}
1396
1397static PyObject *
1398_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1399 /* Read a JSON number from PyUnicode pystr.
1400 idx is the index of the first character of the number
1401 *next_idx_ptr is a return-by-reference index to the first character after
1402 the number.
1403
1404 Returns a new PyObject representation of that number:
1405 PyInt, PyLong, or PyFloat.
1406 May return other types if parse_int or parse_float are set
1407 */
1408 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1409 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1410 Py_ssize_t idx = start;
1411 int is_float = 0;
1412 PyObject *rval;
1413 PyObject *numstr;
1414
1415 /* read a sign if it's there, make sure it's not the end of the string */
1416 if (str[idx] == '-') {
1417 idx++;
1418 if (idx > end_idx) {
1419 PyErr_SetNone(PyExc_StopIteration);
1420 return NULL;
1421 }
1422 }
1423
1424 /* read as many integer digits as we find as long as it doesn't start with 0 */
1425 if (str[idx] >= '1' && str[idx] <= '9') {
1426 idx++;
1427 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1428 }
1429 /* if it starts with 0 we only expect one integer digit */
1430 else if (str[idx] == '0') {
1431 idx++;
1432 }
1433 /* no integer digits, error */
1434 else {
1435 PyErr_SetNone(PyExc_StopIteration);
1436 return NULL;
1437 }
1438
1439 /* if the next char is '.' followed by a digit then read all float digits */
1440 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1441 is_float = 1;
1442 idx += 2;
Bob Ippolito76a982a2009-03-29 22:33:58 +00001443 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001444 }
1445
1446 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1447 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1448 Py_ssize_t e_start = idx;
1449 idx++;
1450
1451 /* read an exponent sign if present */
1452 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1453
1454 /* read all digits */
1455 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1456
1457 /* if we got a digit, then parse as float. if not, backtrack */
1458 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1459 is_float = 1;
1460 }
1461 else {
1462 idx = e_start;
1463 }
1464 }
1465
1466 /* copy the section we determined to be a number */
1467 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1468 if (numstr == NULL)
1469 return NULL;
1470 if (is_float) {
1471 /* parse as a float using a fast path if available, otherwise call user defined method */
1472 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1473 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1474 }
1475 else {
1476 rval = PyFloat_FromString(numstr, NULL);
1477 }
1478 }
1479 else {
1480 /* no fast path for unicode -> int, just call */
1481 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1482 }
1483 Py_DECREF(numstr);
1484 *next_idx_ptr = idx;
1485 return rval;
1486}
1487
1488static PyObject *
1489scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1490{
1491 /* Read one JSON term (of any kind) from PyString pystr.
1492 idx is the index of the first character of the term
1493 *next_idx_ptr is a return-by-reference index to the first character after
1494 the number.
1495
1496 Returns a new PyObject representation of the term.
1497 */
Ezio Melotticec46492011-05-07 17:40:23 +03001498 PyObject *res;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001499 int strict;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001500 char *str = PyString_AS_STRING(pystr);
1501 Py_ssize_t length = PyString_GET_SIZE(pystr);
Benjamin Peterson3e5d87b2014-04-14 11:43:09 -04001502 if (idx < 0) {
1503 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1504 return NULL;
1505 }
1506 if (idx >= length) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001507 PyErr_SetNone(PyExc_StopIteration);
1508 return NULL;
1509 }
1510 switch (str[idx]) {
1511 case '"':
1512 /* string */
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001513 strict = PyObject_IsTrue(s->strict);
1514 if (strict < 0)
1515 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001516 return scanstring_str(pystr, idx + 1,
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001517 PyString_AS_STRING(s->encoding), strict, next_idx_ptr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001518 case '{':
1519 /* object */
Ezio Melotticec46492011-05-07 17:40:23 +03001520 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1521 "from a byte string"))
1522 return NULL;
1523 res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1524 Py_LeaveRecursiveCall();
1525 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001526 case '[':
1527 /* array */
Ezio Melotticec46492011-05-07 17:40:23 +03001528 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1529 "from a byte string"))
1530 return NULL;
1531 res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1532 Py_LeaveRecursiveCall();
1533 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001534 case 'n':
1535 /* null */
1536 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1537 Py_INCREF(Py_None);
1538 *next_idx_ptr = idx + 4;
1539 return Py_None;
1540 }
1541 break;
1542 case 't':
1543 /* true */
1544 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1545 Py_INCREF(Py_True);
1546 *next_idx_ptr = idx + 4;
1547 return Py_True;
1548 }
1549 break;
1550 case 'f':
1551 /* false */
1552 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1553 Py_INCREF(Py_False);
1554 *next_idx_ptr = idx + 5;
1555 return Py_False;
1556 }
1557 break;
1558 case 'N':
1559 /* NaN */
1560 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1561 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1562 }
1563 break;
1564 case 'I':
1565 /* Infinity */
1566 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1567 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1568 }
1569 break;
1570 case '-':
1571 /* -Infinity */
1572 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1573 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1574 }
1575 break;
1576 }
1577 /* Didn't find a string, object, array, or named constant. Look for a number. */
1578 return _match_number_str(s, pystr, idx, next_idx_ptr);
1579}
1580
1581static PyObject *
1582scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1583{
1584 /* Read one JSON term (of any kind) from PyUnicode pystr.
1585 idx is the index of the first character of the term
1586 *next_idx_ptr is a return-by-reference index to the first character after
1587 the number.
1588
1589 Returns a new PyObject representation of the term.
1590 */
Ezio Melotticec46492011-05-07 17:40:23 +03001591 PyObject *res;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001592 int strict;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001593 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1594 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
Benjamin Peterson3e5d87b2014-04-14 11:43:09 -04001595 if (idx < 0) {
1596 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1597 return NULL;
1598 }
1599 if (idx >= length) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001600 PyErr_SetNone(PyExc_StopIteration);
1601 return NULL;
1602 }
1603 switch (str[idx]) {
1604 case '"':
1605 /* string */
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001606 strict = PyObject_IsTrue(s->strict);
1607 if (strict < 0)
1608 return NULL;
1609 return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001610 case '{':
1611 /* object */
Ezio Melotticec46492011-05-07 17:40:23 +03001612 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1613 "from a unicode string"))
1614 return NULL;
1615 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1616 Py_LeaveRecursiveCall();
1617 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001618 case '[':
1619 /* array */
Ezio Melotticec46492011-05-07 17:40:23 +03001620 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1621 "from a unicode string"))
1622 return NULL;
1623 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1624 Py_LeaveRecursiveCall();
1625 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001626 case 'n':
1627 /* null */
1628 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1629 Py_INCREF(Py_None);
1630 *next_idx_ptr = idx + 4;
1631 return Py_None;
1632 }
1633 break;
1634 case 't':
1635 /* true */
1636 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1637 Py_INCREF(Py_True);
1638 *next_idx_ptr = idx + 4;
1639 return Py_True;
1640 }
1641 break;
1642 case 'f':
1643 /* false */
1644 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1645 Py_INCREF(Py_False);
1646 *next_idx_ptr = idx + 5;
1647 return Py_False;
1648 }
1649 break;
1650 case 'N':
1651 /* NaN */
1652 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1653 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1654 }
1655 break;
1656 case 'I':
1657 /* Infinity */
1658 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1659 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1660 }
1661 break;
1662 case '-':
1663 /* -Infinity */
1664 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1665 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1666 }
1667 break;
1668 }
1669 /* Didn't find a string, object, array, or named constant. Look for a number. */
1670 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1671}
1672
1673static PyObject *
1674scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1675{
1676 /* Python callable interface to scan_once_{str,unicode} */
1677 PyObject *pystr;
1678 PyObject *rval;
1679 Py_ssize_t idx;
1680 Py_ssize_t next_idx = -1;
1681 static char *kwlist[] = {"string", "idx", NULL};
1682 PyScannerObject *s;
1683 assert(PyScanner_Check(self));
1684 s = (PyScannerObject *)self;
1685 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1686 return NULL;
1687
1688 if (PyString_Check(pystr)) {
1689 rval = scan_once_str(s, pystr, idx, &next_idx);
1690 }
1691 else if (PyUnicode_Check(pystr)) {
1692 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1693 }
1694 else {
1695 PyErr_Format(PyExc_TypeError,
1696 "first argument must be a string, not %.80s",
1697 Py_TYPE(pystr)->tp_name);
1698 return NULL;
1699 }
1700 return _build_rval_index_tuple(rval, next_idx);
1701}
1702
1703static PyObject *
1704scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1705{
1706 PyScannerObject *s;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001707 PyObject *ctx;
1708 static char *kwlist[] = {"context", NULL};
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001709
1710 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001711 return NULL;
1712
1713 s = (PyScannerObject *)type->tp_alloc(type, 0);
1714 if (s == NULL)
1715 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001716
1717 /* PyString_AS_STRING is used on encoding */
1718 s->encoding = PyObject_GetAttrString(ctx, "encoding");
Antoine Pitrou187177f2009-12-08 15:40:51 +00001719 if (s->encoding == NULL)
1720 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001721 if (s->encoding == Py_None) {
1722 Py_DECREF(Py_None);
1723 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1724 }
1725 else if (PyUnicode_Check(s->encoding)) {
1726 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03001727 Py_SETREF(s->encoding, tmp);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001728 }
Amaury Forgeot d'Arcdee76e62012-01-13 22:53:25 +01001729 if (s->encoding == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001730 goto bail;
Amaury Forgeot d'Arcdee76e62012-01-13 22:53:25 +01001731 if (!PyString_Check(s->encoding)) {
1732 PyErr_Format(PyExc_TypeError,
1733 "encoding must be a string, not %.80s",
1734 Py_TYPE(s->encoding)->tp_name);
1735 goto bail;
1736 }
1737
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001738
1739 /* All of these will fail "gracefully" so we don't need to verify them */
1740 s->strict = PyObject_GetAttrString(ctx, "strict");
1741 if (s->strict == NULL)
1742 goto bail;
1743 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1744 if (s->object_hook == NULL)
1745 goto bail;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001746 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
Georg Brandld823bdc2011-01-02 14:20:16 +00001747 if (s->pairs_hook == NULL)
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001748 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001749 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1750 if (s->parse_float == NULL)
1751 goto bail;
1752 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1753 if (s->parse_int == NULL)
1754 goto bail;
1755 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1756 if (s->parse_constant == NULL)
1757 goto bail;
1758
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001759 return (PyObject *)s;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001760
1761bail:
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001762 Py_DECREF(s);
1763 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001764}
1765
1766PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1767
1768static
1769PyTypeObject PyScannerType = {
1770 PyObject_HEAD_INIT(NULL)
1771 0, /* tp_internal */
1772 "_json.Scanner", /* tp_name */
1773 sizeof(PyScannerObject), /* tp_basicsize */
1774 0, /* tp_itemsize */
1775 scanner_dealloc, /* tp_dealloc */
1776 0, /* tp_print */
1777 0, /* tp_getattr */
1778 0, /* tp_setattr */
1779 0, /* tp_compare */
1780 0, /* tp_repr */
1781 0, /* tp_as_number */
1782 0, /* tp_as_sequence */
1783 0, /* tp_as_mapping */
1784 0, /* tp_hash */
1785 scanner_call, /* tp_call */
1786 0, /* tp_str */
1787 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1788 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1789 0, /* tp_as_buffer */
1790 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1791 scanner_doc, /* tp_doc */
1792 scanner_traverse, /* tp_traverse */
1793 scanner_clear, /* tp_clear */
1794 0, /* tp_richcompare */
1795 0, /* tp_weaklistoffset */
1796 0, /* tp_iter */
1797 0, /* tp_iternext */
1798 0, /* tp_methods */
1799 scanner_members, /* tp_members */
1800 0, /* tp_getset */
1801 0, /* tp_base */
1802 0, /* tp_dict */
1803 0, /* tp_descr_get */
1804 0, /* tp_descr_set */
1805 0, /* tp_dictoffset */
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001806 0, /* tp_init */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001807 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1808 scanner_new, /* tp_new */
1809 0,/* PyObject_GC_Del, */ /* tp_free */
1810};
1811
1812static PyObject *
1813encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1814{
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001815 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1816
1817 PyEncoderObject *s;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001818 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001819 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan_obj;
1820 int allow_nan;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001821
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001822 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou187177f2009-12-08 15:40:51 +00001823 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001824 &sort_keys, &skipkeys, &allow_nan_obj))
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001825 return NULL;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001826
1827 allow_nan = PyObject_IsTrue(allow_nan_obj);
1828 if (allow_nan < 0)
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001829 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001830
Serhiy Storchaka76a64ca2015-07-26 09:07:53 +03001831 if (markers != Py_None && !PyDict_Check(markers)) {
1832 PyErr_Format(PyExc_TypeError,
1833 "make_encoder() argument 1 must be dict or None, "
1834 "not %.200s", Py_TYPE(markers)->tp_name);
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001835 return NULL;
Serhiy Storchaka76a64ca2015-07-26 09:07:53 +03001836 }
1837
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001838 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1839 if (s == NULL)
1840 return NULL;
1841
Antoine Pitrou187177f2009-12-08 15:40:51 +00001842 s->markers = markers;
1843 s->defaultfn = defaultfn;
1844 s->encoder = encoder;
1845 s->indent = indent;
1846 s->key_separator = key_separator;
1847 s->item_separator = item_separator;
1848 s->sort_keys = sort_keys;
1849 s->skipkeys = skipkeys;
1850 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001851 s->allow_nan = allow_nan;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001852
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001853 Py_INCREF(s->markers);
1854 Py_INCREF(s->defaultfn);
1855 Py_INCREF(s->encoder);
1856 Py_INCREF(s->indent);
1857 Py_INCREF(s->key_separator);
1858 Py_INCREF(s->item_separator);
1859 Py_INCREF(s->sort_keys);
1860 Py_INCREF(s->skipkeys);
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03001861 return (PyObject *)s;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001862}
1863
1864static PyObject *
1865encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1866{
1867 /* Python callable interface to encode_listencode_obj */
1868 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1869 PyObject *obj;
1870 PyObject *rval;
1871 Py_ssize_t indent_level;
1872 PyEncoderObject *s;
1873 assert(PyEncoder_Check(self));
1874 s = (PyEncoderObject *)self;
1875 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1876 &obj, _convertPyInt_AsSsize_t, &indent_level))
1877 return NULL;
1878 rval = PyList_New(0);
1879 if (rval == NULL)
1880 return NULL;
1881 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1882 Py_DECREF(rval);
1883 return NULL;
1884 }
1885 return rval;
1886}
1887
1888static PyObject *
1889_encoded_const(PyObject *obj)
1890{
1891 /* Return the JSON string representation of None, True, False */
1892 if (obj == Py_None) {
1893 static PyObject *s_null = NULL;
1894 if (s_null == NULL) {
1895 s_null = PyString_InternFromString("null");
1896 }
1897 Py_INCREF(s_null);
1898 return s_null;
1899 }
1900 else if (obj == Py_True) {
1901 static PyObject *s_true = NULL;
1902 if (s_true == NULL) {
1903 s_true = PyString_InternFromString("true");
1904 }
1905 Py_INCREF(s_true);
1906 return s_true;
1907 }
1908 else if (obj == Py_False) {
1909 static PyObject *s_false = NULL;
1910 if (s_false == NULL) {
1911 s_false = PyString_InternFromString("false");
1912 }
1913 Py_INCREF(s_false);
1914 return s_false;
1915 }
1916 else {
1917 PyErr_SetString(PyExc_ValueError, "not a const");
1918 return NULL;
1919 }
1920}
1921
1922static PyObject *
1923encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1924{
1925 /* Return the JSON representation of a PyFloat */
1926 double i = PyFloat_AS_DOUBLE(obj);
1927 if (!Py_IS_FINITE(i)) {
1928 if (!s->allow_nan) {
1929 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1930 return NULL;
1931 }
1932 if (i > 0) {
1933 return PyString_FromString("Infinity");
1934 }
1935 else if (i < 0) {
1936 return PyString_FromString("-Infinity");
1937 }
1938 else {
1939 return PyString_FromString("NaN");
1940 }
1941 }
Mark Dickinsone6239a32016-09-03 17:45:00 +01001942 /* Make sure to use the base float class repr method */
1943 return PyFloat_Type.tp_repr(obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001944}
1945
1946static PyObject *
1947encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1948{
1949 /* Return the JSON representation of a string */
1950 if (s->fast_encode)
1951 return py_encode_basestring_ascii(NULL, obj);
1952 else
1953 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1954}
1955
1956static int
1957_steal_list_append(PyObject *lst, PyObject *stolen)
1958{
1959 /* Append stolen and then decrement its reference count */
1960 int rval = PyList_Append(lst, stolen);
1961 Py_DECREF(stolen);
1962 return rval;
1963}
1964
1965static int
1966encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1967{
1968 /* Encode Python object obj to a JSON term, rval is a PyList */
1969 PyObject *newobj;
1970 int rv;
1971
1972 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1973 PyObject *cstr = _encoded_const(obj);
1974 if (cstr == NULL)
1975 return -1;
1976 return _steal_list_append(rval, cstr);
1977 }
1978 else if (PyString_Check(obj) || PyUnicode_Check(obj))
1979 {
1980 PyObject *encoded = encoder_encode_string(s, obj);
1981 if (encoded == NULL)
1982 return -1;
1983 return _steal_list_append(rval, encoded);
1984 }
1985 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1986 PyObject *encoded = PyObject_Str(obj);
1987 if (encoded == NULL)
1988 return -1;
1989 return _steal_list_append(rval, encoded);
1990 }
1991 else if (PyFloat_Check(obj)) {
1992 PyObject *encoded = encoder_encode_float(s, obj);
1993 if (encoded == NULL)
1994 return -1;
1995 return _steal_list_append(rval, encoded);
1996 }
1997 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melottic1ec7b52011-05-11 00:55:35 +03001998 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1999 return -1;
2000 rv = encoder_listencode_list(s, rval, obj, indent_level);
2001 Py_LeaveRecursiveCall();
2002 return rv;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002003 }
2004 else if (PyDict_Check(obj)) {
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002005 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2006 return -1;
2007 rv = encoder_listencode_dict(s, rval, obj, indent_level);
2008 Py_LeaveRecursiveCall();
2009 return rv;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002010 }
2011 else {
2012 PyObject *ident = NULL;
2013 if (s->markers != Py_None) {
2014 int has_key;
2015 ident = PyLong_FromVoidPtr(obj);
2016 if (ident == NULL)
2017 return -1;
2018 has_key = PyDict_Contains(s->markers, ident);
2019 if (has_key) {
2020 if (has_key != -1)
2021 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2022 Py_DECREF(ident);
2023 return -1;
2024 }
2025 if (PyDict_SetItem(s->markers, ident, obj)) {
2026 Py_DECREF(ident);
2027 return -1;
2028 }
2029 }
2030 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2031 if (newobj == NULL) {
2032 Py_XDECREF(ident);
2033 return -1;
2034 }
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002035
Serhiy Storchakaf04790a2017-01-03 11:17:53 +02002036 if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
2037 Py_DECREF(newobj);
2038 Py_XDECREF(ident);
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002039 return -1;
Serhiy Storchakaf04790a2017-01-03 11:17:53 +02002040 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002041 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002042 Py_LeaveRecursiveCall();
2043
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002044 Py_DECREF(newobj);
2045 if (rv) {
2046 Py_XDECREF(ident);
2047 return -1;
2048 }
2049 if (ident != NULL) {
2050 if (PyDict_DelItem(s->markers, ident)) {
2051 Py_XDECREF(ident);
2052 return -1;
2053 }
2054 Py_XDECREF(ident);
2055 }
2056 return rv;
2057 }
2058}
2059
2060static int
2061encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2062{
2063 /* Encode Python dict dct a JSON term, rval is a PyList */
2064 static PyObject *open_dict = NULL;
2065 static PyObject *close_dict = NULL;
2066 static PyObject *empty_dict = NULL;
2067 PyObject *kstr = NULL;
2068 PyObject *ident = NULL;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002069 PyObject *key = NULL;
2070 PyObject *value = NULL;
2071 PyObject *it = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002072 int skipkeys;
2073 Py_ssize_t idx;
2074
2075 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2076 open_dict = PyString_InternFromString("{");
2077 close_dict = PyString_InternFromString("}");
2078 empty_dict = PyString_InternFromString("{}");
2079 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2080 return -1;
2081 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002082 if (Py_SIZE(dct) == 0)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002083 return PyList_Append(rval, empty_dict);
2084
2085 if (s->markers != Py_None) {
2086 int has_key;
2087 ident = PyLong_FromVoidPtr(dct);
2088 if (ident == NULL)
2089 goto bail;
2090 has_key = PyDict_Contains(s->markers, ident);
2091 if (has_key) {
2092 if (has_key != -1)
2093 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2094 goto bail;
2095 }
2096 if (PyDict_SetItem(s->markers, ident, dct)) {
2097 goto bail;
2098 }
2099 }
2100
2101 if (PyList_Append(rval, open_dict))
2102 goto bail;
2103
2104 if (s->indent != Py_None) {
2105 /* TODO: DOES NOT RUN */
2106 indent_level += 1;
2107 /*
2108 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2109 separator = _item_separator + newline_indent
2110 buf += newline_indent
2111 */
2112 }
2113
2114 /* TODO: C speedup not implemented for sort_keys */
2115
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002116 it = PyObject_GetIter(dct);
2117 if (it == NULL)
2118 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002119 skipkeys = PyObject_IsTrue(s->skipkeys);
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03002120 if (skipkeys < 0)
2121 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002122 idx = 0;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002123 while ((key = PyIter_Next(it)) != NULL) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002124 PyObject *encoded;
2125
2126 if (PyString_Check(key) || PyUnicode_Check(key)) {
2127 Py_INCREF(key);
2128 kstr = key;
2129 }
2130 else if (PyFloat_Check(key)) {
2131 kstr = encoder_encode_float(s, key);
2132 if (kstr == NULL)
2133 goto bail;
2134 }
2135 else if (PyInt_Check(key) || PyLong_Check(key)) {
2136 kstr = PyObject_Str(key);
2137 if (kstr == NULL)
2138 goto bail;
2139 }
2140 else if (key == Py_True || key == Py_False || key == Py_None) {
2141 kstr = _encoded_const(key);
2142 if (kstr == NULL)
2143 goto bail;
2144 }
2145 else if (skipkeys) {
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002146 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002147 continue;
2148 }
2149 else {
2150 /* TODO: include repr of key */
Doug Hellmannf31db932010-07-21 12:36:33 +00002151 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002152 goto bail;
2153 }
2154
2155 if (idx) {
2156 if (PyList_Append(rval, s->item_separator))
2157 goto bail;
2158 }
2159
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002160 value = PyObject_GetItem(dct, key);
2161 if (value == NULL)
2162 goto bail;
2163
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002164 encoded = encoder_encode_string(s, kstr);
2165 Py_CLEAR(kstr);
2166 if (encoded == NULL)
2167 goto bail;
2168 if (PyList_Append(rval, encoded)) {
2169 Py_DECREF(encoded);
2170 goto bail;
2171 }
2172 Py_DECREF(encoded);
2173 if (PyList_Append(rval, s->key_separator))
2174 goto bail;
2175 if (encoder_listencode_obj(s, rval, value, indent_level))
2176 goto bail;
2177 idx += 1;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002178 Py_CLEAR(value);
2179 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002180 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002181 if (PyErr_Occurred())
2182 goto bail;
2183 Py_CLEAR(it);
2184
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002185 if (ident != NULL) {
2186 if (PyDict_DelItem(s->markers, ident))
2187 goto bail;
2188 Py_CLEAR(ident);
2189 }
2190 if (s->indent != Py_None) {
2191 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002192 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002193 indent_level -= 1;
2194
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002195 yield '\n' + (' ' * (_indent * _current_indent_level))
2196 */
2197 }
2198 if (PyList_Append(rval, close_dict))
2199 goto bail;
2200 return 0;
2201
2202bail:
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002203 Py_XDECREF(it);
2204 Py_XDECREF(key);
2205 Py_XDECREF(value);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002206 Py_XDECREF(kstr);
2207 Py_XDECREF(ident);
2208 return -1;
2209}
2210
2211
2212static int
2213encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2214{
2215 /* Encode Python list seq to a JSON term, rval is a PyList */
2216 static PyObject *open_array = NULL;
2217 static PyObject *close_array = NULL;
2218 static PyObject *empty_array = NULL;
2219 PyObject *ident = NULL;
2220 PyObject *s_fast = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002221 Py_ssize_t i;
2222
2223 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2224 open_array = PyString_InternFromString("[");
2225 close_array = PyString_InternFromString("]");
2226 empty_array = PyString_InternFromString("[]");
2227 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2228 return -1;
2229 }
2230 ident = NULL;
2231 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2232 if (s_fast == NULL)
2233 return -1;
Antoine Pitroue9e35c32012-11-01 20:07:40 +01002234 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002235 Py_DECREF(s_fast);
2236 return PyList_Append(rval, empty_array);
2237 }
2238
2239 if (s->markers != Py_None) {
2240 int has_key;
2241 ident = PyLong_FromVoidPtr(seq);
2242 if (ident == NULL)
2243 goto bail;
2244 has_key = PyDict_Contains(s->markers, ident);
2245 if (has_key) {
2246 if (has_key != -1)
2247 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2248 goto bail;
2249 }
2250 if (PyDict_SetItem(s->markers, ident, seq)) {
2251 goto bail;
2252 }
2253 }
2254
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002255 if (PyList_Append(rval, open_array))
2256 goto bail;
2257 if (s->indent != Py_None) {
2258 /* TODO: DOES NOT RUN */
2259 indent_level += 1;
2260 /*
2261 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2262 separator = _item_separator + newline_indent
2263 buf += newline_indent
2264 */
2265 }
Antoine Pitroue9e35c32012-11-01 20:07:40 +01002266 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
2267 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002268 if (i) {
2269 if (PyList_Append(rval, s->item_separator))
2270 goto bail;
2271 }
2272 if (encoder_listencode_obj(s, rval, obj, indent_level))
2273 goto bail;
2274 }
2275 if (ident != NULL) {
2276 if (PyDict_DelItem(s->markers, ident))
2277 goto bail;
2278 Py_CLEAR(ident);
2279 }
2280 if (s->indent != Py_None) {
2281 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002282 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002283 indent_level -= 1;
2284
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002285 yield '\n' + (' ' * (_indent * _current_indent_level))
2286 */
2287 }
2288 if (PyList_Append(rval, close_array))
2289 goto bail;
2290 Py_DECREF(s_fast);
2291 return 0;
2292
2293bail:
2294 Py_XDECREF(ident);
2295 Py_DECREF(s_fast);
2296 return -1;
2297}
2298
2299static void
2300encoder_dealloc(PyObject *self)
2301{
INADA Naoki4cde4bd2017-09-04 12:31:41 +09002302 /* bpo-31095: UnTrack is needed before calling any callbacks */
2303 PyObject_GC_UnTrack(self);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002304 encoder_clear(self);
2305 Py_TYPE(self)->tp_free(self);
2306}
2307
2308static int
2309encoder_traverse(PyObject *self, visitproc visit, void *arg)
2310{
2311 PyEncoderObject *s;
2312 assert(PyEncoder_Check(self));
2313 s = (PyEncoderObject *)self;
2314 Py_VISIT(s->markers);
2315 Py_VISIT(s->defaultfn);
2316 Py_VISIT(s->encoder);
2317 Py_VISIT(s->indent);
2318 Py_VISIT(s->key_separator);
2319 Py_VISIT(s->item_separator);
2320 Py_VISIT(s->sort_keys);
2321 Py_VISIT(s->skipkeys);
2322 return 0;
2323}
2324
2325static int
2326encoder_clear(PyObject *self)
2327{
2328 /* Deallocate Encoder */
2329 PyEncoderObject *s;
2330 assert(PyEncoder_Check(self));
2331 s = (PyEncoderObject *)self;
2332 Py_CLEAR(s->markers);
2333 Py_CLEAR(s->defaultfn);
2334 Py_CLEAR(s->encoder);
2335 Py_CLEAR(s->indent);
2336 Py_CLEAR(s->key_separator);
2337 Py_CLEAR(s->item_separator);
2338 Py_CLEAR(s->sort_keys);
2339 Py_CLEAR(s->skipkeys);
2340 return 0;
2341}
2342
2343PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2344
2345static
2346PyTypeObject PyEncoderType = {
2347 PyObject_HEAD_INIT(NULL)
2348 0, /* tp_internal */
2349 "_json.Encoder", /* tp_name */
2350 sizeof(PyEncoderObject), /* tp_basicsize */
2351 0, /* tp_itemsize */
2352 encoder_dealloc, /* tp_dealloc */
2353 0, /* tp_print */
2354 0, /* tp_getattr */
2355 0, /* tp_setattr */
2356 0, /* tp_compare */
2357 0, /* tp_repr */
2358 0, /* tp_as_number */
2359 0, /* tp_as_sequence */
2360 0, /* tp_as_mapping */
2361 0, /* tp_hash */
2362 encoder_call, /* tp_call */
2363 0, /* tp_str */
2364 0, /* tp_getattro */
2365 0, /* tp_setattro */
2366 0, /* tp_as_buffer */
2367 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2368 encoder_doc, /* tp_doc */
2369 encoder_traverse, /* tp_traverse */
2370 encoder_clear, /* tp_clear */
2371 0, /* tp_richcompare */
2372 0, /* tp_weaklistoffset */
2373 0, /* tp_iter */
2374 0, /* tp_iternext */
2375 0, /* tp_methods */
2376 encoder_members, /* tp_members */
2377 0, /* tp_getset */
2378 0, /* tp_base */
2379 0, /* tp_dict */
2380 0, /* tp_descr_get */
2381 0, /* tp_descr_set */
2382 0, /* tp_dictoffset */
Serhiy Storchaka5d7a18f2017-05-05 11:21:45 +03002383 0, /* tp_init */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002384 0, /* tp_alloc */
2385 encoder_new, /* tp_new */
2386 0, /* tp_free */
2387};
2388
2389static PyMethodDef speedups_methods[] = {
2390 {"encode_basestring_ascii",
2391 (PyCFunction)py_encode_basestring_ascii,
2392 METH_O,
2393 pydoc_encode_basestring_ascii},
2394 {"scanstring",
2395 (PyCFunction)py_scanstring,
2396 METH_VARARGS,
2397 pydoc_scanstring},
Brett Cannon4b964f92008-05-05 20:21:38 +00002398 {NULL, NULL, 0, NULL}
2399};
2400
2401PyDoc_STRVAR(module_doc,
2402"json speedups\n");
2403
2404void
2405init_json(void)
2406{
2407 PyObject *m;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002408 if (PyType_Ready(&PyScannerType) < 0)
2409 return;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002410 if (PyType_Ready(&PyEncoderType) < 0)
2411 return;
2412 m = Py_InitModule3("_json", speedups_methods, module_doc);
Serhiy Storchaka045c4512015-07-24 12:58:25 +03002413 if (m == NULL)
2414 return;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002415 Py_INCREF((PyObject*)&PyScannerType);
2416 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2417 Py_INCREF((PyObject*)&PyEncoderType);
2418 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
Brett Cannon4b964f92008-05-05 20:21:38 +00002419}