blob: 9c8f66ce110b333db755debedc0c6cfa553734ac [file] [log] [blame]
Brett Cannon4b964f92008-05-05 20:21:38 +00001#include "Python.h"
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
16
17#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
Brett Cannon4b964f92008-05-05 20:21:38 +000022
23#define DEFAULT_ENCODING "utf-8"
Brett Cannon4b964f92008-05-05 20:21:38 +000024
Bob Ippolitod914e3f2009-03-17 23:19:00 +000025#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30static PyTypeObject PyScannerType;
31static PyTypeObject PyEncoderType;
32
33typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
Raymond Hettinger91852ca2009-03-19 19:19:03 +000038 PyObject *pairs_hook;
Bob Ippolitod914e3f2009-03-17 23:19:00 +000039 PyObject *parse_float;
40 PyObject *parse_int;
41 PyObject *parse_constant;
42} PyScannerObject;
43
44static PyMemberDef scanner_members[] = {
45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
Raymond Hettinger91852ca2009-03-19 19:19:03 +000048 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
Bob Ippolitod914e3f2009-03-17 23:19:00 +000049 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52 {NULL}
53};
54
55typedef struct _PyEncoderObject {
56 PyObject_HEAD
57 PyObject *markers;
58 PyObject *defaultfn;
59 PyObject *encoder;
60 PyObject *indent;
61 PyObject *key_separator;
62 PyObject *item_separator;
63 PyObject *sort_keys;
64 PyObject *skipkeys;
65 int fast_encode;
66 int allow_nan;
67} PyEncoderObject;
68
69static PyMemberDef encoder_members[] = {
70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78 {NULL}
79};
80
81static Py_ssize_t
82ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83static PyObject *
84ascii_escape_unicode(PyObject *pystr);
85static PyObject *
86ascii_escape_str(PyObject *pystr);
87static PyObject *
88py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89void init_json(void);
90static PyObject *
91scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92static PyObject *
93scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94static PyObject *
95_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96static PyObject *
97scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98static int
99scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100static void
101scanner_dealloc(PyObject *self);
102static int
103scanner_clear(PyObject *self);
104static PyObject *
105encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106static int
107encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108static void
109encoder_dealloc(PyObject *self);
110static int
111encoder_clear(PyObject *self);
112static int
113encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114static int
115encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116static int
117encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118static PyObject *
Raymond Hettingera0b8d9a2009-03-19 19:24:43 +0000119_encoded_const(PyObject *obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000120static void
121raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122static PyObject *
123encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124static int
125_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126static PyObject *
127_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128static PyObject *
129encoder_encode_float(PyEncoderObject *s, PyObject *obj);
130
131#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
133
134#define MIN_EXPANSION 6
Brett Cannon4b964f92008-05-05 20:21:38 +0000135#ifdef Py_UNICODE_WIDE
136#define MAX_EXPANSION (2 * MIN_EXPANSION)
137#else
138#define MAX_EXPANSION MIN_EXPANSION
139#endif
140
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000141static int
142_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
143{
144 /* PyObject to Py_ssize_t converter */
145 *size_ptr = PyInt_AsSsize_t(o);
Georg Brandlf71ba952009-05-05 07:48:12 +0000146 if (*size_ptr == -1 && PyErr_Occurred())
147 return 0;
148 return 1;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000149}
150
151static PyObject *
152_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
153{
154 /* Py_ssize_t to PyObject converter */
155 return PyInt_FromSsize_t(*size_ptr);
156}
157
Brett Cannon4b964f92008-05-05 20:21:38 +0000158static Py_ssize_t
159ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
160{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000161 /* Escape unicode code point c to ASCII escape sequences
162 in char *output. output must have at least 12 bytes unused to
163 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Brett Cannon4b964f92008-05-05 20:21:38 +0000164 output[chars++] = '\\';
165 switch (c) {
166 case '\\': output[chars++] = (char)c; break;
167 case '"': output[chars++] = (char)c; break;
168 case '\b': output[chars++] = 'b'; break;
169 case '\f': output[chars++] = 'f'; break;
170 case '\n': output[chars++] = 'n'; break;
171 case '\r': output[chars++] = 'r'; break;
172 case '\t': output[chars++] = 't'; break;
173 default:
174#ifdef Py_UNICODE_WIDE
175 if (c >= 0x10000) {
176 /* UTF-16 surrogate pair */
177 Py_UNICODE v = c - 0x10000;
178 c = 0xd800 | ((v >> 10) & 0x3ff);
179 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000180 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
182 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
183 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000184 c = 0xdc00 | (v & 0x3ff);
185 output[chars++] = '\\';
186 }
187#endif
188 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000189 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
191 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
192 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000193 }
194 return chars;
195}
196
197static PyObject *
198ascii_escape_unicode(PyObject *pystr)
199{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000200 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000201 Py_ssize_t i;
202 Py_ssize_t input_chars;
203 Py_ssize_t output_size;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000204 Py_ssize_t max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000205 Py_ssize_t chars;
206 PyObject *rval;
207 char *output;
208 Py_UNICODE *input_unicode;
209
210 input_chars = PyUnicode_GET_SIZE(pystr);
211 input_unicode = PyUnicode_AS_UNICODE(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000212
Brett Cannon4b964f92008-05-05 20:21:38 +0000213 /* One char input can be up to 6 chars output, estimate 4 of these */
214 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000215 max_output_size = 2 + (input_chars * MAX_EXPANSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000216 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000217 if (rval == NULL) {
218 return NULL;
219 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000220 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000221 chars = 0;
222 output[chars++] = '"';
223 for (i = 0; i < input_chars; i++) {
224 Py_UNICODE c = input_unicode[i];
225 if (S_CHAR(c)) {
226 output[chars++] = (char)c;
227 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000228 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000229 chars = ascii_escape_char(c, output, chars);
230 }
231 if (output_size - chars < (1 + MAX_EXPANSION)) {
232 /* There's more than four, so let's resize by a lot */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000233 Py_ssize_t new_output_size = output_size * 2;
Brett Cannon4b964f92008-05-05 20:21:38 +0000234 /* This is an upper bound */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000235 if (new_output_size > max_output_size) {
236 new_output_size = max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000237 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000238 /* Make sure that the output size changed before resizing */
239 if (new_output_size != output_size) {
240 output_size = new_output_size;
241 if (_PyString_Resize(&rval, output_size) == -1) {
242 return NULL;
243 }
244 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000245 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000246 }
247 }
248 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000249 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000250 return NULL;
251 }
252 return rval;
253}
254
255static PyObject *
256ascii_escape_str(PyObject *pystr)
257{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000258 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000259 Py_ssize_t i;
260 Py_ssize_t input_chars;
261 Py_ssize_t output_size;
262 Py_ssize_t chars;
263 PyObject *rval;
264 char *output;
265 char *input_str;
266
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000267 input_chars = PyString_GET_SIZE(pystr);
268 input_str = PyString_AS_STRING(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000269
270 /* Fast path for a string that's already ASCII */
271 for (i = 0; i < input_chars; i++) {
272 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
273 if (!S_CHAR(c)) {
274 /* If we have to escape something, scan the string for unicode */
275 Py_ssize_t j;
276 for (j = i; j < input_chars; j++) {
277 c = (Py_UNICODE)(unsigned char)input_str[j];
278 if (c > 0x7f) {
279 /* We hit a non-ASCII character, bail to unicode mode */
280 PyObject *uni;
281 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
282 if (uni == NULL) {
283 return NULL;
284 }
285 rval = ascii_escape_unicode(uni);
286 Py_DECREF(uni);
287 return rval;
288 }
289 }
290 break;
291 }
292 }
293
294 if (i == input_chars) {
295 /* Input is already ASCII */
296 output_size = 2 + input_chars;
297 }
298 else {
299 /* One char input can be up to 6 chars output, estimate 4 of these */
300 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
301 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000302 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000303 if (rval == NULL) {
304 return NULL;
305 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000306 output = PyString_AS_STRING(rval);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000307 output[0] = '"';
308
309 /* We know that everything up to i is ASCII already */
310 chars = i + 1;
311 memcpy(&output[1], input_str, i);
312
313 for (; i < input_chars; i++) {
314 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
Brett Cannon4b964f92008-05-05 20:21:38 +0000315 if (S_CHAR(c)) {
316 output[chars++] = (char)c;
317 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000318 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000319 chars = ascii_escape_char(c, output, chars);
320 }
321 /* An ASCII char can't possibly expand to a surrogate! */
322 if (output_size - chars < (1 + MIN_EXPANSION)) {
323 /* There's more than four, so let's resize by a lot */
324 output_size *= 2;
325 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
326 output_size = 2 + (input_chars * MIN_EXPANSION);
327 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000328 if (_PyString_Resize(&rval, output_size) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000329 return NULL;
330 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000331 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000332 }
333 }
334 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000335 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000336 return NULL;
337 }
338 return rval;
339}
340
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000341static void
Brett Cannon4b964f92008-05-05 20:21:38 +0000342raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
343{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000344 /* Use the Python function json.decoder.errmsg to raise a nice
345 looking ValueError exception */
Brett Cannon4b964f92008-05-05 20:21:38 +0000346 static PyObject *errmsg_fn = NULL;
347 PyObject *pymsg;
348 if (errmsg_fn == NULL) {
349 PyObject *decoder = PyImport_ImportModule("json.decoder");
350 if (decoder == NULL)
351 return;
352 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000353 Py_DECREF(decoder);
Brett Cannon4b964f92008-05-05 20:21:38 +0000354 if (errmsg_fn == NULL)
355 return;
Brett Cannon4b964f92008-05-05 20:21:38 +0000356 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000357 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
Benjamin Peterson595e3cb2008-10-16 21:09:28 +0000358 if (pymsg) {
359 PyErr_SetObject(PyExc_ValueError, pymsg);
360 Py_DECREF(pymsg);
361 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000362}
363
364static PyObject *
365join_list_unicode(PyObject *lst)
366{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000367 /* return u''.join(lst) */
368 static PyObject *joinfn = NULL;
369 if (joinfn == NULL) {
370 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
371 if (ustr == NULL)
372 return NULL;
373
374 joinfn = PyObject_GetAttrString(ustr, "join");
375 Py_DECREF(ustr);
376 if (joinfn == NULL)
377 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000378 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000379 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
Brett Cannon4b964f92008-05-05 20:21:38 +0000380}
381
382static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000383_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
384 /* return (rval, idx) tuple, stealing reference to rval */
385 PyObject *tpl;
386 PyObject *pyidx;
387 /*
388 steal a reference to rval, returns (rval, idx)
389 */
390 if (rval == NULL) {
391 return NULL;
392 }
393 pyidx = PyInt_FromSsize_t(idx);
394 if (pyidx == NULL) {
395 Py_DECREF(rval);
396 return NULL;
397 }
398 tpl = PyTuple_New(2);
399 if (tpl == NULL) {
400 Py_DECREF(pyidx);
401 Py_DECREF(rval);
402 return NULL;
403 }
404 PyTuple_SET_ITEM(tpl, 0, rval);
405 PyTuple_SET_ITEM(tpl, 1, pyidx);
406 return tpl;
407}
408
409static PyObject *
410scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
411{
412 /* Read the JSON string from PyString pystr.
413 end is the index of the first character after the quote.
414 encoding is the encoding of pystr (must be an ASCII superset)
415 if strict is zero then literal control characters are allowed
416 *next_end_ptr is a return-by-reference index of the character
417 after the end quote
418
419 Return value is a new PyString (if ASCII-only) or PyUnicode
420 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000421 PyObject *rval;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000422 Py_ssize_t len = PyString_GET_SIZE(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000423 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000424 Py_ssize_t next;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000425 char *buf = PyString_AS_STRING(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000426 PyObject *chunks = PyList_New(0);
427 if (chunks == NULL) {
428 goto bail;
429 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000430 if (end < 0 || len <= end) {
431 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
432 goto bail;
433 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000434 while (1) {
435 /* Find the end of the string or the next escape */
436 Py_UNICODE c = 0;
437 PyObject *chunk = NULL;
438 for (next = end; next < len; next++) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000439 c = (unsigned char)buf[next];
Brett Cannon4b964f92008-05-05 20:21:38 +0000440 if (c == '"' || c == '\\') {
441 break;
442 }
443 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000444 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000445 goto bail;
446 }
447 }
448 if (!(c == '"' || c == '\\')) {
449 raise_errmsg("Unterminated string starting at", pystr, begin);
450 goto bail;
451 }
452 /* Pick up this chunk if it's not zero length */
453 if (next != end) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000454 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000455 if (strchunk == NULL) {
456 goto bail;
457 }
Barry Warsawfa658272010-11-02 21:03:09 +0000458 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
459 Py_DECREF(strchunk);
460 if (chunk == NULL) {
461 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000462 }
463 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000464 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000465 goto bail;
466 }
467 Py_DECREF(chunk);
468 }
469 next++;
470 if (c == '"') {
471 end = next;
472 break;
473 }
474 if (next == len) {
475 raise_errmsg("Unterminated string starting at", pystr, begin);
476 goto bail;
477 }
478 c = buf[next];
479 if (c != 'u') {
480 /* Non-unicode backslash escapes */
481 end = next + 1;
482 switch (c) {
483 case '"': break;
484 case '\\': break;
485 case '/': break;
486 case 'b': c = '\b'; break;
487 case 'f': c = '\f'; break;
488 case 'n': c = '\n'; break;
489 case 'r': c = '\r'; break;
490 case 't': c = '\t'; break;
491 default: c = 0;
492 }
493 if (c == 0) {
494 raise_errmsg("Invalid \\escape", pystr, end - 2);
495 goto bail;
496 }
497 }
498 else {
499 c = 0;
500 next++;
501 end = next + 4;
502 if (end >= len) {
503 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
504 goto bail;
505 }
506 /* Decode 4 hex digits */
507 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000508 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000509 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000510 switch (digit) {
511 case '0': case '1': case '2': case '3': case '4':
512 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000513 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000514 case 'a': case 'b': case 'c': case 'd': case 'e':
515 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000516 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000517 case 'A': case 'B': case 'C': case 'D': case 'E':
518 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000519 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000520 default:
521 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
522 goto bail;
523 }
524 }
525#ifdef Py_UNICODE_WIDE
526 /* Surrogate pair */
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200527 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
528 buf[next++] == '\\' &&
529 buf[next++] == 'u') {
Brett Cannon4b964f92008-05-05 20:21:38 +0000530 Py_UNICODE c2 = 0;
Brett Cannon4b964f92008-05-05 20:21:38 +0000531 end += 6;
532 /* Decode 4 hex digits */
533 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000534 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000535 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000536 switch (digit) {
537 case '0': case '1': case '2': case '3': case '4':
538 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000539 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000540 case 'a': case 'b': case 'c': case 'd': case 'e':
541 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000542 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000543 case 'A': case 'B': case 'C': case 'D': case 'E':
544 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000545 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000546 default:
547 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
548 goto bail;
549 }
550 }
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200551 if ((c2 & 0xfc00) == 0xdc00)
552 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
553 else
554 end -= 6;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000555 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000556#endif
557 }
Barry Warsawfa658272010-11-02 21:03:09 +0000558 chunk = PyUnicode_FromUnicode(&c, 1);
559 if (chunk == NULL) {
560 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000561 }
562 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000563 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000564 goto bail;
565 }
566 Py_DECREF(chunk);
567 }
568
Ezio Melottidf8a8f72011-05-04 14:40:53 +0300569 rval = join_list_unicode(chunks);
Brett Cannon4b964f92008-05-05 20:21:38 +0000570 if (rval == NULL) {
571 goto bail;
572 }
Benjamin Peterson336680e2008-10-16 21:48:06 +0000573 Py_CLEAR(chunks);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000574 *next_end_ptr = end;
575 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000576bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000577 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000578 Py_XDECREF(chunks);
579 return NULL;
580}
581
582
583static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000584scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000585{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000586 /* Read the JSON string from PyUnicode pystr.
587 end is the index of the first character after the quote.
588 if strict is zero then literal control characters are allowed
589 *next_end_ptr is a return-by-reference index of the character
590 after the end quote
591
592 Return value is a new PyUnicode
593 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000594 PyObject *rval;
595 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
596 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000597 Py_ssize_t next;
Brett Cannon4b964f92008-05-05 20:21:38 +0000598 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
599 PyObject *chunks = PyList_New(0);
600 if (chunks == NULL) {
601 goto bail;
602 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000603 if (end < 0 || len <= end) {
604 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
605 goto bail;
606 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000607 while (1) {
608 /* Find the end of the string or the next escape */
609 Py_UNICODE c = 0;
610 PyObject *chunk = NULL;
611 for (next = end; next < len; next++) {
612 c = buf[next];
613 if (c == '"' || c == '\\') {
614 break;
615 }
616 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000617 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000618 goto bail;
619 }
620 }
621 if (!(c == '"' || c == '\\')) {
622 raise_errmsg("Unterminated string starting at", pystr, begin);
623 goto bail;
624 }
625 /* Pick up this chunk if it's not zero length */
626 if (next != end) {
627 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
628 if (chunk == NULL) {
629 goto bail;
630 }
631 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson87e6ad22008-10-16 21:27:54 +0000632 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000633 goto bail;
634 }
635 Py_DECREF(chunk);
636 }
637 next++;
638 if (c == '"') {
639 end = next;
640 break;
641 }
642 if (next == len) {
643 raise_errmsg("Unterminated string starting at", pystr, begin);
644 goto bail;
645 }
646 c = buf[next];
647 if (c != 'u') {
648 /* Non-unicode backslash escapes */
649 end = next + 1;
650 switch (c) {
651 case '"': break;
652 case '\\': break;
653 case '/': break;
654 case 'b': c = '\b'; break;
655 case 'f': c = '\f'; break;
656 case 'n': c = '\n'; break;
657 case 'r': c = '\r'; break;
658 case 't': c = '\t'; break;
659 default: c = 0;
660 }
661 if (c == 0) {
662 raise_errmsg("Invalid \\escape", pystr, end - 2);
663 goto bail;
664 }
665 }
666 else {
667 c = 0;
668 next++;
669 end = next + 4;
670 if (end >= len) {
671 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
672 goto bail;
673 }
674 /* Decode 4 hex digits */
675 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000676 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000677 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000678 switch (digit) {
679 case '0': case '1': case '2': case '3': case '4':
680 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000681 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000682 case 'a': case 'b': case 'c': case 'd': case 'e':
683 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000684 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000685 case 'A': case 'B': case 'C': case 'D': case 'E':
686 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000687 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000688 default:
689 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
690 goto bail;
691 }
692 }
693#ifdef Py_UNICODE_WIDE
694 /* Surrogate pair */
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200695 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
696 buf[next++] == '\\' && buf[next++] == 'u') {
Brett Cannon4b964f92008-05-05 20:21:38 +0000697 Py_UNICODE c2 = 0;
Brett Cannon4b964f92008-05-05 20:21:38 +0000698 end += 6;
699 /* Decode 4 hex digits */
700 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000701 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000702 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000703 switch (digit) {
704 case '0': case '1': case '2': case '3': case '4':
705 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000706 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000707 case 'a': case 'b': case 'c': case 'd': case 'e':
708 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000709 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000710 case 'A': case 'B': case 'C': case 'D': case 'E':
711 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000712 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000713 default:
714 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
715 goto bail;
716 }
717 }
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200718 if ((c2 & 0xfc00) == 0xdc00)
719 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
720 else
721 end -= 6;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000722 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000723#endif
724 }
725 chunk = PyUnicode_FromUnicode(&c, 1);
726 if (chunk == NULL) {
727 goto bail;
728 }
729 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000730 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000731 goto bail;
732 }
733 Py_DECREF(chunk);
734 }
735
736 rval = join_list_unicode(chunks);
737 if (rval == NULL) {
738 goto bail;
739 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000740 Py_DECREF(chunks);
741 *next_end_ptr = end;
742 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000743bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000744 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000745 Py_XDECREF(chunks);
746 return NULL;
747}
748
749PyDoc_STRVAR(pydoc_scanstring,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000750 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
751 "\n"
752 "Scan the string s for a JSON string. End is the index of the\n"
753 "character in s after the quote that started the JSON string.\n"
754 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
755 "on attempt to decode an invalid string. If strict is False then literal\n"
756 "control characters are allowed in the string.\n"
757 "\n"
758 "Returns a tuple of the decoded string and the index of the character in s\n"
759 "after the end quote."
760);
Brett Cannon4b964f92008-05-05 20:21:38 +0000761
762static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000763py_scanstring(PyObject* self UNUSED, PyObject *args)
Brett Cannon4b964f92008-05-05 20:21:38 +0000764{
765 PyObject *pystr;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000766 PyObject *rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000767 Py_ssize_t end;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000768 Py_ssize_t next_end = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000769 char *encoding = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000770 int strict = 1;
771 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000772 return NULL;
773 }
774 if (encoding == NULL) {
775 encoding = DEFAULT_ENCODING;
776 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000777 if (PyString_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000778 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000779 }
780 else if (PyUnicode_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000781 rval = scanstring_unicode(pystr, end, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000782 }
783 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000784 PyErr_Format(PyExc_TypeError,
785 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000786 Py_TYPE(pystr)->tp_name);
787 return NULL;
788 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000789 return _build_rval_index_tuple(rval, next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000790}
791
792PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000793 "encode_basestring_ascii(basestring) -> str\n"
794 "\n"
795 "Return an ASCII-only JSON representation of a Python string"
796);
Brett Cannon4b964f92008-05-05 20:21:38 +0000797
798static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000799py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000800{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000801 /* Return an ASCII-only JSON representation of a Python string */
Brett Cannon4b964f92008-05-05 20:21:38 +0000802 /* METH_O */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000803 if (PyString_Check(pystr)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000804 return ascii_escape_str(pystr);
805 }
806 else if (PyUnicode_Check(pystr)) {
807 return ascii_escape_unicode(pystr);
808 }
809 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000810 PyErr_Format(PyExc_TypeError,
811 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000812 Py_TYPE(pystr)->tp_name);
813 return NULL;
814 }
815}
816
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000817static void
818scanner_dealloc(PyObject *self)
819{
820 /* Deallocate scanner object */
821 scanner_clear(self);
822 Py_TYPE(self)->tp_free(self);
823}
824
825static int
826scanner_traverse(PyObject *self, visitproc visit, void *arg)
827{
828 PyScannerObject *s;
829 assert(PyScanner_Check(self));
830 s = (PyScannerObject *)self;
831 Py_VISIT(s->encoding);
832 Py_VISIT(s->strict);
833 Py_VISIT(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000834 Py_VISIT(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000835 Py_VISIT(s->parse_float);
836 Py_VISIT(s->parse_int);
837 Py_VISIT(s->parse_constant);
838 return 0;
839}
840
841static int
842scanner_clear(PyObject *self)
843{
844 PyScannerObject *s;
845 assert(PyScanner_Check(self));
846 s = (PyScannerObject *)self;
847 Py_CLEAR(s->encoding);
848 Py_CLEAR(s->strict);
849 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000850 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000851 Py_CLEAR(s->parse_float);
852 Py_CLEAR(s->parse_int);
853 Py_CLEAR(s->parse_constant);
854 return 0;
855}
856
857static PyObject *
858_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
859 /* Read a JSON object from PyString pystr.
860 idx is the index of the first character after the opening curly brace.
861 *next_idx_ptr is a return-by-reference index to the first character after
862 the closing curly brace.
863
864 Returns a new PyObject (usually a dict, but object_hook can change that)
865 */
866 char *str = PyString_AS_STRING(pystr);
867 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000868 PyObject *rval;
869 PyObject *pairs;
870 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000871 PyObject *key = NULL;
872 PyObject *val = NULL;
873 char *encoding = PyString_AS_STRING(s->encoding);
874 int strict = PyObject_IsTrue(s->strict);
875 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000876
Serhiy Storchaka5127ed72015-05-30 17:45:12 +0300877 if (strict < 0)
878 return NULL;
879
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000880 pairs = PyList_New(0);
881 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000882 return NULL;
883
884 /* skip whitespace after { */
885 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
886
887 /* only loop if the object is non-empty */
888 if (idx <= end_idx && str[idx] != '}') {
889 while (idx <= end_idx) {
890 /* read key */
891 if (str[idx] != '"') {
892 raise_errmsg("Expecting property name", pystr, idx);
893 goto bail;
894 }
895 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
896 if (key == NULL)
897 goto bail;
898 idx = next_idx;
899
900 /* skip whitespace between key and : delimiter, read :, skip whitespace */
901 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
902 if (idx > end_idx || str[idx] != ':') {
903 raise_errmsg("Expecting : delimiter", pystr, idx);
904 goto bail;
905 }
906 idx++;
907 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
908
909 /* read any JSON data type */
910 val = scan_once_str(s, pystr, idx, &next_idx);
911 if (val == NULL)
912 goto bail;
913
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000914 item = PyTuple_Pack(2, key, val);
915 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000916 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000917 Py_CLEAR(key);
918 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000919 if (PyList_Append(pairs, item) == -1) {
920 Py_DECREF(item);
921 goto bail;
922 }
923 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000924 idx = next_idx;
925
926 /* skip whitespace before } or , */
927 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
928
929 /* bail if the object is closed or we didn't get the , delimiter */
930 if (idx > end_idx) break;
931 if (str[idx] == '}') {
932 break;
933 }
934 else if (str[idx] != ',') {
935 raise_errmsg("Expecting , delimiter", pystr, idx);
936 goto bail;
937 }
938 idx++;
939
940 /* skip whitespace after , delimiter */
941 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
942 }
943 }
944 /* verify that idx < end_idx, str[idx] should be '}' */
945 if (idx > end_idx || str[idx] != '}') {
946 raise_errmsg("Expecting object", pystr, end_idx);
947 goto bail;
948 }
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000949
950 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
951 if (s->pairs_hook != Py_None) {
952 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
953 if (val == NULL)
954 goto bail;
955 Py_DECREF(pairs);
956 *next_idx_ptr = idx + 1;
957 return val;
958 }
959
960 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
961 pairs, NULL);
962 if (rval == NULL)
963 goto bail;
964 Py_CLEAR(pairs);
965
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000966 /* if object_hook is not None: rval = object_hook(rval) */
967 if (s->object_hook != Py_None) {
968 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
969 if (val == NULL)
970 goto bail;
971 Py_DECREF(rval);
972 rval = val;
973 val = NULL;
974 }
975 *next_idx_ptr = idx + 1;
976 return rval;
977bail:
978 Py_XDECREF(key);
979 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000980 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000981 return NULL;
982}
983
984static PyObject *
985_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
986 /* Read a JSON object from PyUnicode pystr.
987 idx is the index of the first character after the opening curly brace.
988 *next_idx_ptr is a return-by-reference index to the first character after
989 the closing curly brace.
990
991 Returns a new PyObject (usually a dict, but object_hook can change that)
992 */
993 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
994 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000995 PyObject *rval;
996 PyObject *pairs;
997 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000998 PyObject *key = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000999 PyObject *val = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001000 int strict = PyObject_IsTrue(s->strict);
1001 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001002
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001003 if (strict < 0)
1004 return NULL;
1005
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001006 pairs = PyList_New(0);
1007 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001008 return NULL;
1009
1010 /* skip whitespace after { */
1011 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1012
1013 /* only loop if the object is non-empty */
1014 if (idx <= end_idx && str[idx] != '}') {
1015 while (idx <= end_idx) {
1016 /* read key */
1017 if (str[idx] != '"') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001018 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001019 goto bail;
1020 }
1021 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1022 if (key == NULL)
1023 goto bail;
1024 idx = next_idx;
1025
1026 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1027 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1028 if (idx > end_idx || str[idx] != ':') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001029 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001030 goto bail;
1031 }
1032 idx++;
1033 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1034
1035 /* read any JSON term */
1036 val = scan_once_unicode(s, pystr, idx, &next_idx);
1037 if (val == NULL)
1038 goto bail;
1039
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001040 item = PyTuple_Pack(2, key, val);
1041 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001042 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001043 Py_CLEAR(key);
1044 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001045 if (PyList_Append(pairs, item) == -1) {
1046 Py_DECREF(item);
1047 goto bail;
1048 }
1049 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001050 idx = next_idx;
1051
1052 /* skip whitespace before } or , */
1053 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1054
1055 /* bail if the object is closed or we didn't get the , delimiter */
1056 if (idx > end_idx) break;
1057 if (str[idx] == '}') {
1058 break;
1059 }
1060 else if (str[idx] != ',') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001061 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001062 goto bail;
1063 }
1064 idx++;
1065
1066 /* skip whitespace after , delimiter */
1067 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1068 }
1069 }
1070
1071 /* verify that idx < end_idx, str[idx] should be '}' */
1072 if (idx > end_idx || str[idx] != '}') {
1073 raise_errmsg("Expecting object", pystr, end_idx);
1074 goto bail;
1075 }
1076
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001077 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1078 if (s->pairs_hook != Py_None) {
1079 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1080 if (val == NULL)
1081 goto bail;
1082 Py_DECREF(pairs);
1083 *next_idx_ptr = idx + 1;
1084 return val;
1085 }
1086
1087 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1088 pairs, NULL);
1089 if (rval == NULL)
1090 goto bail;
1091 Py_CLEAR(pairs);
1092
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001093 /* if object_hook is not None: rval = object_hook(rval) */
1094 if (s->object_hook != Py_None) {
1095 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1096 if (val == NULL)
1097 goto bail;
1098 Py_DECREF(rval);
1099 rval = val;
1100 val = NULL;
1101 }
1102 *next_idx_ptr = idx + 1;
1103 return rval;
1104bail:
1105 Py_XDECREF(key);
1106 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001107 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001108 return NULL;
1109}
1110
1111static PyObject *
1112_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1113 /* Read a JSON array from PyString pystr.
1114 idx is the index of the first character after the opening brace.
1115 *next_idx_ptr is a return-by-reference index to the first character after
1116 the closing brace.
1117
1118 Returns a new PyList
1119 */
1120 char *str = PyString_AS_STRING(pystr);
1121 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1122 PyObject *val = NULL;
1123 PyObject *rval = PyList_New(0);
1124 Py_ssize_t next_idx;
1125 if (rval == NULL)
1126 return NULL;
1127
1128 /* skip whitespace after [ */
1129 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1130
1131 /* only loop if the array is non-empty */
1132 if (idx <= end_idx && str[idx] != ']') {
1133 while (idx <= end_idx) {
1134
1135 /* read any JSON term and de-tuplefy the (rval, idx) */
1136 val = scan_once_str(s, pystr, idx, &next_idx);
1137 if (val == NULL)
1138 goto bail;
1139
1140 if (PyList_Append(rval, val) == -1)
1141 goto bail;
1142
1143 Py_CLEAR(val);
1144 idx = next_idx;
1145
1146 /* skip whitespace between term and , */
1147 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1148
1149 /* bail if the array is closed or we didn't get the , delimiter */
1150 if (idx > end_idx) break;
1151 if (str[idx] == ']') {
1152 break;
1153 }
1154 else if (str[idx] != ',') {
1155 raise_errmsg("Expecting , delimiter", pystr, idx);
1156 goto bail;
1157 }
1158 idx++;
1159
1160 /* skip whitespace after , */
1161 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1162 }
1163 }
1164
1165 /* verify that idx < end_idx, str[idx] should be ']' */
1166 if (idx > end_idx || str[idx] != ']') {
1167 raise_errmsg("Expecting object", pystr, end_idx);
1168 goto bail;
1169 }
1170 *next_idx_ptr = idx + 1;
1171 return rval;
1172bail:
1173 Py_XDECREF(val);
1174 Py_DECREF(rval);
1175 return NULL;
1176}
1177
1178static PyObject *
1179_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1180 /* Read a JSON array from PyString pystr.
1181 idx is the index of the first character after the opening brace.
1182 *next_idx_ptr is a return-by-reference index to the first character after
1183 the closing brace.
1184
1185 Returns a new PyList
1186 */
1187 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1188 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1189 PyObject *val = NULL;
1190 PyObject *rval = PyList_New(0);
1191 Py_ssize_t next_idx;
1192 if (rval == NULL)
1193 return NULL;
1194
1195 /* skip whitespace after [ */
1196 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1197
1198 /* only loop if the array is non-empty */
1199 if (idx <= end_idx && str[idx] != ']') {
1200 while (idx <= end_idx) {
1201
1202 /* read any JSON term */
1203 val = scan_once_unicode(s, pystr, idx, &next_idx);
1204 if (val == NULL)
1205 goto bail;
1206
1207 if (PyList_Append(rval, val) == -1)
1208 goto bail;
1209
1210 Py_CLEAR(val);
1211 idx = next_idx;
1212
1213 /* skip whitespace between term and , */
1214 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1215
1216 /* bail if the array is closed or we didn't get the , delimiter */
1217 if (idx > end_idx) break;
1218 if (str[idx] == ']') {
1219 break;
1220 }
1221 else if (str[idx] != ',') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001222 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001223 goto bail;
1224 }
1225 idx++;
1226
1227 /* skip whitespace after , */
1228 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1229 }
1230 }
1231
1232 /* verify that idx < end_idx, str[idx] should be ']' */
1233 if (idx > end_idx || str[idx] != ']') {
1234 raise_errmsg("Expecting object", pystr, end_idx);
1235 goto bail;
1236 }
1237 *next_idx_ptr = idx + 1;
1238 return rval;
1239bail:
1240 Py_XDECREF(val);
1241 Py_DECREF(rval);
1242 return NULL;
1243}
1244
1245static PyObject *
1246_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1247 /* Read a JSON constant from PyString pystr.
1248 constant is the constant string that was found
1249 ("NaN", "Infinity", "-Infinity").
1250 idx is the index of the first character of the constant
1251 *next_idx_ptr is a return-by-reference index to the first character after
1252 the constant.
1253
1254 Returns the result of parse_constant
1255 */
1256 PyObject *cstr;
1257 PyObject *rval;
1258 /* constant is "NaN", "Infinity", or "-Infinity" */
1259 cstr = PyString_InternFromString(constant);
1260 if (cstr == NULL)
1261 return NULL;
1262
1263 /* rval = parse_constant(constant) */
1264 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1265 idx += PyString_GET_SIZE(cstr);
1266 Py_DECREF(cstr);
1267 *next_idx_ptr = idx;
1268 return rval;
1269}
1270
1271static PyObject *
1272_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1273 /* Read a JSON number from PyString pystr.
1274 idx is the index of the first character of the number
1275 *next_idx_ptr is a return-by-reference index to the first character after
1276 the number.
1277
1278 Returns a new PyObject representation of that number:
1279 PyInt, PyLong, or PyFloat.
1280 May return other types if parse_int or parse_float are set
1281 */
1282 char *str = PyString_AS_STRING(pystr);
1283 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1284 Py_ssize_t idx = start;
1285 int is_float = 0;
1286 PyObject *rval;
1287 PyObject *numstr;
1288
1289 /* read a sign if it's there, make sure it's not the end of the string */
1290 if (str[idx] == '-') {
1291 idx++;
1292 if (idx > end_idx) {
1293 PyErr_SetNone(PyExc_StopIteration);
1294 return NULL;
1295 }
1296 }
1297
1298 /* read as many integer digits as we find as long as it doesn't start with 0 */
1299 if (str[idx] >= '1' && str[idx] <= '9') {
1300 idx++;
1301 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1302 }
1303 /* if it starts with 0 we only expect one integer digit */
1304 else if (str[idx] == '0') {
1305 idx++;
1306 }
1307 /* no integer digits, error */
1308 else {
1309 PyErr_SetNone(PyExc_StopIteration);
1310 return NULL;
1311 }
1312
1313 /* if the next char is '.' followed by a digit then read all float digits */
1314 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1315 is_float = 1;
1316 idx += 2;
1317 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1318 }
1319
1320 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1321 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1322
1323 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1324 Py_ssize_t e_start = idx;
1325 idx++;
1326
1327 /* read an exponent sign if present */
1328 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1329
1330 /* read all digits */
1331 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1332
1333 /* if we got a digit, then parse as float. if not, backtrack */
1334 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1335 is_float = 1;
1336 }
1337 else {
1338 idx = e_start;
1339 }
1340 }
1341
1342 /* copy the section we determined to be a number */
1343 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1344 if (numstr == NULL)
1345 return NULL;
1346 if (is_float) {
1347 /* parse as a float using a fast path if available, otherwise call user defined method */
1348 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1349 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1350 }
1351 else {
Eric Smith129c97d2009-10-28 08:44:37 +00001352 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1353 NULL, NULL);
1354 if (d == -1.0 && PyErr_Occurred())
1355 return NULL;
1356 rval = PyFloat_FromDouble(d);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001357 }
1358 }
1359 else {
1360 /* parse as an int using a fast path if available, otherwise call user defined method */
1361 if (s->parse_int != (PyObject *)&PyInt_Type) {
1362 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1363 }
1364 else {
1365 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1366 }
1367 }
1368 Py_DECREF(numstr);
1369 *next_idx_ptr = idx;
1370 return rval;
1371}
1372
1373static PyObject *
1374_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1375 /* Read a JSON number from PyUnicode pystr.
1376 idx is the index of the first character of the number
1377 *next_idx_ptr is a return-by-reference index to the first character after
1378 the number.
1379
1380 Returns a new PyObject representation of that number:
1381 PyInt, PyLong, or PyFloat.
1382 May return other types if parse_int or parse_float are set
1383 */
1384 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1385 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1386 Py_ssize_t idx = start;
1387 int is_float = 0;
1388 PyObject *rval;
1389 PyObject *numstr;
1390
1391 /* read a sign if it's there, make sure it's not the end of the string */
1392 if (str[idx] == '-') {
1393 idx++;
1394 if (idx > end_idx) {
1395 PyErr_SetNone(PyExc_StopIteration);
1396 return NULL;
1397 }
1398 }
1399
1400 /* read as many integer digits as we find as long as it doesn't start with 0 */
1401 if (str[idx] >= '1' && str[idx] <= '9') {
1402 idx++;
1403 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1404 }
1405 /* if it starts with 0 we only expect one integer digit */
1406 else if (str[idx] == '0') {
1407 idx++;
1408 }
1409 /* no integer digits, error */
1410 else {
1411 PyErr_SetNone(PyExc_StopIteration);
1412 return NULL;
1413 }
1414
1415 /* if the next char is '.' followed by a digit then read all float digits */
1416 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1417 is_float = 1;
1418 idx += 2;
Bob Ippolito76a982a2009-03-29 22:33:58 +00001419 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001420 }
1421
1422 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1423 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1424 Py_ssize_t e_start = idx;
1425 idx++;
1426
1427 /* read an exponent sign if present */
1428 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1429
1430 /* read all digits */
1431 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1432
1433 /* if we got a digit, then parse as float. if not, backtrack */
1434 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1435 is_float = 1;
1436 }
1437 else {
1438 idx = e_start;
1439 }
1440 }
1441
1442 /* copy the section we determined to be a number */
1443 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1444 if (numstr == NULL)
1445 return NULL;
1446 if (is_float) {
1447 /* parse as a float using a fast path if available, otherwise call user defined method */
1448 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1449 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1450 }
1451 else {
1452 rval = PyFloat_FromString(numstr, NULL);
1453 }
1454 }
1455 else {
1456 /* no fast path for unicode -> int, just call */
1457 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1458 }
1459 Py_DECREF(numstr);
1460 *next_idx_ptr = idx;
1461 return rval;
1462}
1463
1464static PyObject *
1465scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1466{
1467 /* Read one JSON term (of any kind) from PyString pystr.
1468 idx is the index of the first character of the term
1469 *next_idx_ptr is a return-by-reference index to the first character after
1470 the number.
1471
1472 Returns a new PyObject representation of the term.
1473 */
Ezio Melotticec46492011-05-07 17:40:23 +03001474 PyObject *res;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001475 int strict;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001476 char *str = PyString_AS_STRING(pystr);
1477 Py_ssize_t length = PyString_GET_SIZE(pystr);
Benjamin Peterson3e5d87b2014-04-14 11:43:09 -04001478 if (idx < 0) {
1479 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1480 return NULL;
1481 }
1482 if (idx >= length) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001483 PyErr_SetNone(PyExc_StopIteration);
1484 return NULL;
1485 }
1486 switch (str[idx]) {
1487 case '"':
1488 /* string */
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001489 strict = PyObject_IsTrue(s->strict);
1490 if (strict < 0)
1491 return NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001492 return scanstring_str(pystr, idx + 1,
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001493 PyString_AS_STRING(s->encoding), strict, next_idx_ptr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001494 case '{':
1495 /* object */
Ezio Melotticec46492011-05-07 17:40:23 +03001496 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1497 "from a byte string"))
1498 return NULL;
1499 res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1500 Py_LeaveRecursiveCall();
1501 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001502 case '[':
1503 /* array */
Ezio Melotticec46492011-05-07 17:40:23 +03001504 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1505 "from a byte string"))
1506 return NULL;
1507 res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1508 Py_LeaveRecursiveCall();
1509 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001510 case 'n':
1511 /* null */
1512 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1513 Py_INCREF(Py_None);
1514 *next_idx_ptr = idx + 4;
1515 return Py_None;
1516 }
1517 break;
1518 case 't':
1519 /* true */
1520 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1521 Py_INCREF(Py_True);
1522 *next_idx_ptr = idx + 4;
1523 return Py_True;
1524 }
1525 break;
1526 case 'f':
1527 /* false */
1528 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1529 Py_INCREF(Py_False);
1530 *next_idx_ptr = idx + 5;
1531 return Py_False;
1532 }
1533 break;
1534 case 'N':
1535 /* NaN */
1536 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1537 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1538 }
1539 break;
1540 case 'I':
1541 /* Infinity */
1542 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1543 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1544 }
1545 break;
1546 case '-':
1547 /* -Infinity */
1548 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1549 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1550 }
1551 break;
1552 }
1553 /* Didn't find a string, object, array, or named constant. Look for a number. */
1554 return _match_number_str(s, pystr, idx, next_idx_ptr);
1555}
1556
1557static PyObject *
1558scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1559{
1560 /* Read one JSON term (of any kind) from PyUnicode pystr.
1561 idx is the index of the first character of the term
1562 *next_idx_ptr is a return-by-reference index to the first character after
1563 the number.
1564
1565 Returns a new PyObject representation of the term.
1566 */
Ezio Melotticec46492011-05-07 17:40:23 +03001567 PyObject *res;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001568 int strict;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001569 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1570 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
Benjamin Peterson3e5d87b2014-04-14 11:43:09 -04001571 if (idx < 0) {
1572 PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1573 return NULL;
1574 }
1575 if (idx >= length) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001576 PyErr_SetNone(PyExc_StopIteration);
1577 return NULL;
1578 }
1579 switch (str[idx]) {
1580 case '"':
1581 /* string */
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001582 strict = PyObject_IsTrue(s->strict);
1583 if (strict < 0)
1584 return NULL;
1585 return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001586 case '{':
1587 /* object */
Ezio Melotticec46492011-05-07 17:40:23 +03001588 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1589 "from a unicode string"))
1590 return NULL;
1591 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1592 Py_LeaveRecursiveCall();
1593 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001594 case '[':
1595 /* array */
Ezio Melotticec46492011-05-07 17:40:23 +03001596 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1597 "from a unicode string"))
1598 return NULL;
1599 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1600 Py_LeaveRecursiveCall();
1601 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001602 case 'n':
1603 /* null */
1604 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1605 Py_INCREF(Py_None);
1606 *next_idx_ptr = idx + 4;
1607 return Py_None;
1608 }
1609 break;
1610 case 't':
1611 /* true */
1612 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1613 Py_INCREF(Py_True);
1614 *next_idx_ptr = idx + 4;
1615 return Py_True;
1616 }
1617 break;
1618 case 'f':
1619 /* false */
1620 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1621 Py_INCREF(Py_False);
1622 *next_idx_ptr = idx + 5;
1623 return Py_False;
1624 }
1625 break;
1626 case 'N':
1627 /* NaN */
1628 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1629 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1630 }
1631 break;
1632 case 'I':
1633 /* Infinity */
1634 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1635 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1636 }
1637 break;
1638 case '-':
1639 /* -Infinity */
1640 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1641 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1642 }
1643 break;
1644 }
1645 /* Didn't find a string, object, array, or named constant. Look for a number. */
1646 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1647}
1648
1649static PyObject *
1650scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1651{
1652 /* Python callable interface to scan_once_{str,unicode} */
1653 PyObject *pystr;
1654 PyObject *rval;
1655 Py_ssize_t idx;
1656 Py_ssize_t next_idx = -1;
1657 static char *kwlist[] = {"string", "idx", NULL};
1658 PyScannerObject *s;
1659 assert(PyScanner_Check(self));
1660 s = (PyScannerObject *)self;
1661 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1662 return NULL;
1663
1664 if (PyString_Check(pystr)) {
1665 rval = scan_once_str(s, pystr, idx, &next_idx);
1666 }
1667 else if (PyUnicode_Check(pystr)) {
1668 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1669 }
1670 else {
1671 PyErr_Format(PyExc_TypeError,
1672 "first argument must be a string, not %.80s",
1673 Py_TYPE(pystr)->tp_name);
1674 return NULL;
1675 }
1676 return _build_rval_index_tuple(rval, next_idx);
1677}
1678
1679static PyObject *
1680scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1681{
1682 PyScannerObject *s;
1683 s = (PyScannerObject *)type->tp_alloc(type, 0);
1684 if (s != NULL) {
1685 s->encoding = NULL;
1686 s->strict = NULL;
1687 s->object_hook = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001688 s->pairs_hook = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001689 s->parse_float = NULL;
1690 s->parse_int = NULL;
1691 s->parse_constant = NULL;
1692 }
1693 return (PyObject *)s;
1694}
1695
1696static int
1697scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1698{
1699 /* Initialize Scanner object */
1700 PyObject *ctx;
1701 static char *kwlist[] = {"context", NULL};
1702 PyScannerObject *s;
1703
1704 assert(PyScanner_Check(self));
1705 s = (PyScannerObject *)self;
1706
1707 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1708 return -1;
1709
1710 /* PyString_AS_STRING is used on encoding */
1711 s->encoding = PyObject_GetAttrString(ctx, "encoding");
Antoine Pitrou187177f2009-12-08 15:40:51 +00001712 if (s->encoding == NULL)
1713 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001714 if (s->encoding == Py_None) {
1715 Py_DECREF(Py_None);
1716 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1717 }
1718 else if (PyUnicode_Check(s->encoding)) {
1719 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
Serhiy Storchakabc62af12016-04-06 09:51:18 +03001720 Py_XSETREF(s->encoding, tmp);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001721 }
Amaury Forgeot d'Arcdee76e62012-01-13 22:53:25 +01001722 if (s->encoding == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001723 goto bail;
Amaury Forgeot d'Arcdee76e62012-01-13 22:53:25 +01001724 if (!PyString_Check(s->encoding)) {
1725 PyErr_Format(PyExc_TypeError,
1726 "encoding must be a string, not %.80s",
1727 Py_TYPE(s->encoding)->tp_name);
1728 goto bail;
1729 }
1730
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001731
1732 /* All of these will fail "gracefully" so we don't need to verify them */
1733 s->strict = PyObject_GetAttrString(ctx, "strict");
1734 if (s->strict == NULL)
1735 goto bail;
1736 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1737 if (s->object_hook == NULL)
1738 goto bail;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001739 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
Georg Brandld823bdc2011-01-02 14:20:16 +00001740 if (s->pairs_hook == NULL)
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001741 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001742 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1743 if (s->parse_float == NULL)
1744 goto bail;
1745 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1746 if (s->parse_int == NULL)
1747 goto bail;
1748 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1749 if (s->parse_constant == NULL)
1750 goto bail;
1751
1752 return 0;
1753
1754bail:
1755 Py_CLEAR(s->encoding);
1756 Py_CLEAR(s->strict);
1757 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001758 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001759 Py_CLEAR(s->parse_float);
1760 Py_CLEAR(s->parse_int);
1761 Py_CLEAR(s->parse_constant);
1762 return -1;
1763}
1764
1765PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1766
1767static
1768PyTypeObject PyScannerType = {
1769 PyObject_HEAD_INIT(NULL)
1770 0, /* tp_internal */
1771 "_json.Scanner", /* tp_name */
1772 sizeof(PyScannerObject), /* tp_basicsize */
1773 0, /* tp_itemsize */
1774 scanner_dealloc, /* tp_dealloc */
1775 0, /* tp_print */
1776 0, /* tp_getattr */
1777 0, /* tp_setattr */
1778 0, /* tp_compare */
1779 0, /* tp_repr */
1780 0, /* tp_as_number */
1781 0, /* tp_as_sequence */
1782 0, /* tp_as_mapping */
1783 0, /* tp_hash */
1784 scanner_call, /* tp_call */
1785 0, /* tp_str */
1786 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1787 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1788 0, /* tp_as_buffer */
1789 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1790 scanner_doc, /* tp_doc */
1791 scanner_traverse, /* tp_traverse */
1792 scanner_clear, /* tp_clear */
1793 0, /* tp_richcompare */
1794 0, /* tp_weaklistoffset */
1795 0, /* tp_iter */
1796 0, /* tp_iternext */
1797 0, /* tp_methods */
1798 scanner_members, /* tp_members */
1799 0, /* tp_getset */
1800 0, /* tp_base */
1801 0, /* tp_dict */
1802 0, /* tp_descr_get */
1803 0, /* tp_descr_set */
1804 0, /* tp_dictoffset */
1805 scanner_init, /* tp_init */
1806 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1807 scanner_new, /* tp_new */
1808 0,/* PyObject_GC_Del, */ /* tp_free */
1809};
1810
1811static PyObject *
1812encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1813{
1814 PyEncoderObject *s;
1815 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1816 if (s != NULL) {
1817 s->markers = NULL;
1818 s->defaultfn = NULL;
1819 s->encoder = NULL;
1820 s->indent = NULL;
1821 s->key_separator = NULL;
1822 s->item_separator = NULL;
1823 s->sort_keys = NULL;
1824 s->skipkeys = NULL;
1825 }
1826 return (PyObject *)s;
1827}
1828
1829static int
1830encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1831{
1832 /* initialize Encoder object */
1833 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1834
1835 PyEncoderObject *s;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001836 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001837 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan_obj;
1838 int allow_nan;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001839
1840 assert(PyEncoder_Check(self));
1841 s = (PyEncoderObject *)self;
1842
1843 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou187177f2009-12-08 15:40:51 +00001844 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001845 &sort_keys, &skipkeys, &allow_nan_obj))
1846 return -1;
1847
1848 allow_nan = PyObject_IsTrue(allow_nan_obj);
1849 if (allow_nan < 0)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001850 return -1;
1851
Serhiy Storchaka76a64ca2015-07-26 09:07:53 +03001852 if (markers != Py_None && !PyDict_Check(markers)) {
1853 PyErr_Format(PyExc_TypeError,
1854 "make_encoder() argument 1 must be dict or None, "
1855 "not %.200s", Py_TYPE(markers)->tp_name);
1856 return -1;
1857 }
1858
Antoine Pitrou187177f2009-12-08 15:40:51 +00001859 s->markers = markers;
1860 s->defaultfn = defaultfn;
1861 s->encoder = encoder;
1862 s->indent = indent;
1863 s->key_separator = key_separator;
1864 s->item_separator = item_separator;
1865 s->sort_keys = sort_keys;
1866 s->skipkeys = skipkeys;
1867 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03001868 s->allow_nan = allow_nan;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001869
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001870 Py_INCREF(s->markers);
1871 Py_INCREF(s->defaultfn);
1872 Py_INCREF(s->encoder);
1873 Py_INCREF(s->indent);
1874 Py_INCREF(s->key_separator);
1875 Py_INCREF(s->item_separator);
1876 Py_INCREF(s->sort_keys);
1877 Py_INCREF(s->skipkeys);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001878 return 0;
1879}
1880
1881static PyObject *
1882encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1883{
1884 /* Python callable interface to encode_listencode_obj */
1885 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1886 PyObject *obj;
1887 PyObject *rval;
1888 Py_ssize_t indent_level;
1889 PyEncoderObject *s;
1890 assert(PyEncoder_Check(self));
1891 s = (PyEncoderObject *)self;
1892 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1893 &obj, _convertPyInt_AsSsize_t, &indent_level))
1894 return NULL;
1895 rval = PyList_New(0);
1896 if (rval == NULL)
1897 return NULL;
1898 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1899 Py_DECREF(rval);
1900 return NULL;
1901 }
1902 return rval;
1903}
1904
1905static PyObject *
1906_encoded_const(PyObject *obj)
1907{
1908 /* Return the JSON string representation of None, True, False */
1909 if (obj == Py_None) {
1910 static PyObject *s_null = NULL;
1911 if (s_null == NULL) {
1912 s_null = PyString_InternFromString("null");
1913 }
1914 Py_INCREF(s_null);
1915 return s_null;
1916 }
1917 else if (obj == Py_True) {
1918 static PyObject *s_true = NULL;
1919 if (s_true == NULL) {
1920 s_true = PyString_InternFromString("true");
1921 }
1922 Py_INCREF(s_true);
1923 return s_true;
1924 }
1925 else if (obj == Py_False) {
1926 static PyObject *s_false = NULL;
1927 if (s_false == NULL) {
1928 s_false = PyString_InternFromString("false");
1929 }
1930 Py_INCREF(s_false);
1931 return s_false;
1932 }
1933 else {
1934 PyErr_SetString(PyExc_ValueError, "not a const");
1935 return NULL;
1936 }
1937}
1938
1939static PyObject *
1940encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1941{
1942 /* Return the JSON representation of a PyFloat */
1943 double i = PyFloat_AS_DOUBLE(obj);
1944 if (!Py_IS_FINITE(i)) {
1945 if (!s->allow_nan) {
1946 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1947 return NULL;
1948 }
1949 if (i > 0) {
1950 return PyString_FromString("Infinity");
1951 }
1952 else if (i < 0) {
1953 return PyString_FromString("-Infinity");
1954 }
1955 else {
1956 return PyString_FromString("NaN");
1957 }
1958 }
1959 /* Use a better float format here? */
1960 return PyObject_Repr(obj);
1961}
1962
1963static PyObject *
1964encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1965{
1966 /* Return the JSON representation of a string */
1967 if (s->fast_encode)
1968 return py_encode_basestring_ascii(NULL, obj);
1969 else
1970 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1971}
1972
1973static int
1974_steal_list_append(PyObject *lst, PyObject *stolen)
1975{
1976 /* Append stolen and then decrement its reference count */
1977 int rval = PyList_Append(lst, stolen);
1978 Py_DECREF(stolen);
1979 return rval;
1980}
1981
1982static int
1983encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1984{
1985 /* Encode Python object obj to a JSON term, rval is a PyList */
1986 PyObject *newobj;
1987 int rv;
1988
1989 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1990 PyObject *cstr = _encoded_const(obj);
1991 if (cstr == NULL)
1992 return -1;
1993 return _steal_list_append(rval, cstr);
1994 }
1995 else if (PyString_Check(obj) || PyUnicode_Check(obj))
1996 {
1997 PyObject *encoded = encoder_encode_string(s, obj);
1998 if (encoded == NULL)
1999 return -1;
2000 return _steal_list_append(rval, encoded);
2001 }
2002 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
2003 PyObject *encoded = PyObject_Str(obj);
2004 if (encoded == NULL)
2005 return -1;
2006 return _steal_list_append(rval, encoded);
2007 }
2008 else if (PyFloat_Check(obj)) {
2009 PyObject *encoded = encoder_encode_float(s, obj);
2010 if (encoded == NULL)
2011 return -1;
2012 return _steal_list_append(rval, encoded);
2013 }
2014 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002015 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2016 return -1;
2017 rv = encoder_listencode_list(s, rval, obj, indent_level);
2018 Py_LeaveRecursiveCall();
2019 return rv;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002020 }
2021 else if (PyDict_Check(obj)) {
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002022 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2023 return -1;
2024 rv = encoder_listencode_dict(s, rval, obj, indent_level);
2025 Py_LeaveRecursiveCall();
2026 return rv;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002027 }
2028 else {
2029 PyObject *ident = NULL;
2030 if (s->markers != Py_None) {
2031 int has_key;
2032 ident = PyLong_FromVoidPtr(obj);
2033 if (ident == NULL)
2034 return -1;
2035 has_key = PyDict_Contains(s->markers, ident);
2036 if (has_key) {
2037 if (has_key != -1)
2038 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2039 Py_DECREF(ident);
2040 return -1;
2041 }
2042 if (PyDict_SetItem(s->markers, ident, obj)) {
2043 Py_DECREF(ident);
2044 return -1;
2045 }
2046 }
2047 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2048 if (newobj == NULL) {
2049 Py_XDECREF(ident);
2050 return -1;
2051 }
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002052
2053 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2054 return -1;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002055 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002056 Py_LeaveRecursiveCall();
2057
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002058 Py_DECREF(newobj);
2059 if (rv) {
2060 Py_XDECREF(ident);
2061 return -1;
2062 }
2063 if (ident != NULL) {
2064 if (PyDict_DelItem(s->markers, ident)) {
2065 Py_XDECREF(ident);
2066 return -1;
2067 }
2068 Py_XDECREF(ident);
2069 }
2070 return rv;
2071 }
2072}
2073
2074static int
2075encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2076{
2077 /* Encode Python dict dct a JSON term, rval is a PyList */
2078 static PyObject *open_dict = NULL;
2079 static PyObject *close_dict = NULL;
2080 static PyObject *empty_dict = NULL;
2081 PyObject *kstr = NULL;
2082 PyObject *ident = NULL;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002083 PyObject *key = NULL;
2084 PyObject *value = NULL;
2085 PyObject *it = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002086 int skipkeys;
2087 Py_ssize_t idx;
2088
2089 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2090 open_dict = PyString_InternFromString("{");
2091 close_dict = PyString_InternFromString("}");
2092 empty_dict = PyString_InternFromString("{}");
2093 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2094 return -1;
2095 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002096 if (Py_SIZE(dct) == 0)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002097 return PyList_Append(rval, empty_dict);
2098
2099 if (s->markers != Py_None) {
2100 int has_key;
2101 ident = PyLong_FromVoidPtr(dct);
2102 if (ident == NULL)
2103 goto bail;
2104 has_key = PyDict_Contains(s->markers, ident);
2105 if (has_key) {
2106 if (has_key != -1)
2107 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2108 goto bail;
2109 }
2110 if (PyDict_SetItem(s->markers, ident, dct)) {
2111 goto bail;
2112 }
2113 }
2114
2115 if (PyList_Append(rval, open_dict))
2116 goto bail;
2117
2118 if (s->indent != Py_None) {
2119 /* TODO: DOES NOT RUN */
2120 indent_level += 1;
2121 /*
2122 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2123 separator = _item_separator + newline_indent
2124 buf += newline_indent
2125 */
2126 }
2127
2128 /* TODO: C speedup not implemented for sort_keys */
2129
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002130 it = PyObject_GetIter(dct);
2131 if (it == NULL)
2132 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002133 skipkeys = PyObject_IsTrue(s->skipkeys);
Serhiy Storchaka5127ed72015-05-30 17:45:12 +03002134 if (skipkeys < 0)
2135 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002136 idx = 0;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002137 while ((key = PyIter_Next(it)) != NULL) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002138 PyObject *encoded;
2139
2140 if (PyString_Check(key) || PyUnicode_Check(key)) {
2141 Py_INCREF(key);
2142 kstr = key;
2143 }
2144 else if (PyFloat_Check(key)) {
2145 kstr = encoder_encode_float(s, key);
2146 if (kstr == NULL)
2147 goto bail;
2148 }
2149 else if (PyInt_Check(key) || PyLong_Check(key)) {
2150 kstr = PyObject_Str(key);
2151 if (kstr == NULL)
2152 goto bail;
2153 }
2154 else if (key == Py_True || key == Py_False || key == Py_None) {
2155 kstr = _encoded_const(key);
2156 if (kstr == NULL)
2157 goto bail;
2158 }
2159 else if (skipkeys) {
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002160 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002161 continue;
2162 }
2163 else {
2164 /* TODO: include repr of key */
Doug Hellmannf31db932010-07-21 12:36:33 +00002165 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002166 goto bail;
2167 }
2168
2169 if (idx) {
2170 if (PyList_Append(rval, s->item_separator))
2171 goto bail;
2172 }
2173
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002174 value = PyObject_GetItem(dct, key);
2175 if (value == NULL)
2176 goto bail;
2177
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002178 encoded = encoder_encode_string(s, kstr);
2179 Py_CLEAR(kstr);
2180 if (encoded == NULL)
2181 goto bail;
2182 if (PyList_Append(rval, encoded)) {
2183 Py_DECREF(encoded);
2184 goto bail;
2185 }
2186 Py_DECREF(encoded);
2187 if (PyList_Append(rval, s->key_separator))
2188 goto bail;
2189 if (encoder_listencode_obj(s, rval, value, indent_level))
2190 goto bail;
2191 idx += 1;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002192 Py_CLEAR(value);
2193 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002194 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002195 if (PyErr_Occurred())
2196 goto bail;
2197 Py_CLEAR(it);
2198
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002199 if (ident != NULL) {
2200 if (PyDict_DelItem(s->markers, ident))
2201 goto bail;
2202 Py_CLEAR(ident);
2203 }
2204 if (s->indent != Py_None) {
2205 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002206 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002207 indent_level -= 1;
2208
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002209 yield '\n' + (' ' * (_indent * _current_indent_level))
2210 */
2211 }
2212 if (PyList_Append(rval, close_dict))
2213 goto bail;
2214 return 0;
2215
2216bail:
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002217 Py_XDECREF(it);
2218 Py_XDECREF(key);
2219 Py_XDECREF(value);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002220 Py_XDECREF(kstr);
2221 Py_XDECREF(ident);
2222 return -1;
2223}
2224
2225
2226static int
2227encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2228{
2229 /* Encode Python list seq to a JSON term, rval is a PyList */
2230 static PyObject *open_array = NULL;
2231 static PyObject *close_array = NULL;
2232 static PyObject *empty_array = NULL;
2233 PyObject *ident = NULL;
2234 PyObject *s_fast = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002235 Py_ssize_t i;
2236
2237 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2238 open_array = PyString_InternFromString("[");
2239 close_array = PyString_InternFromString("]");
2240 empty_array = PyString_InternFromString("[]");
2241 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2242 return -1;
2243 }
2244 ident = NULL;
2245 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2246 if (s_fast == NULL)
2247 return -1;
Antoine Pitroue9e35c32012-11-01 20:07:40 +01002248 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002249 Py_DECREF(s_fast);
2250 return PyList_Append(rval, empty_array);
2251 }
2252
2253 if (s->markers != Py_None) {
2254 int has_key;
2255 ident = PyLong_FromVoidPtr(seq);
2256 if (ident == NULL)
2257 goto bail;
2258 has_key = PyDict_Contains(s->markers, ident);
2259 if (has_key) {
2260 if (has_key != -1)
2261 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2262 goto bail;
2263 }
2264 if (PyDict_SetItem(s->markers, ident, seq)) {
2265 goto bail;
2266 }
2267 }
2268
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002269 if (PyList_Append(rval, open_array))
2270 goto bail;
2271 if (s->indent != Py_None) {
2272 /* TODO: DOES NOT RUN */
2273 indent_level += 1;
2274 /*
2275 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2276 separator = _item_separator + newline_indent
2277 buf += newline_indent
2278 */
2279 }
Antoine Pitroue9e35c32012-11-01 20:07:40 +01002280 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
2281 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002282 if (i) {
2283 if (PyList_Append(rval, s->item_separator))
2284 goto bail;
2285 }
2286 if (encoder_listencode_obj(s, rval, obj, indent_level))
2287 goto bail;
2288 }
2289 if (ident != NULL) {
2290 if (PyDict_DelItem(s->markers, ident))
2291 goto bail;
2292 Py_CLEAR(ident);
2293 }
2294 if (s->indent != Py_None) {
2295 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002296 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002297 indent_level -= 1;
2298
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002299 yield '\n' + (' ' * (_indent * _current_indent_level))
2300 */
2301 }
2302 if (PyList_Append(rval, close_array))
2303 goto bail;
2304 Py_DECREF(s_fast);
2305 return 0;
2306
2307bail:
2308 Py_XDECREF(ident);
2309 Py_DECREF(s_fast);
2310 return -1;
2311}
2312
2313static void
2314encoder_dealloc(PyObject *self)
2315{
2316 /* Deallocate Encoder */
2317 encoder_clear(self);
2318 Py_TYPE(self)->tp_free(self);
2319}
2320
2321static int
2322encoder_traverse(PyObject *self, visitproc visit, void *arg)
2323{
2324 PyEncoderObject *s;
2325 assert(PyEncoder_Check(self));
2326 s = (PyEncoderObject *)self;
2327 Py_VISIT(s->markers);
2328 Py_VISIT(s->defaultfn);
2329 Py_VISIT(s->encoder);
2330 Py_VISIT(s->indent);
2331 Py_VISIT(s->key_separator);
2332 Py_VISIT(s->item_separator);
2333 Py_VISIT(s->sort_keys);
2334 Py_VISIT(s->skipkeys);
2335 return 0;
2336}
2337
2338static int
2339encoder_clear(PyObject *self)
2340{
2341 /* Deallocate Encoder */
2342 PyEncoderObject *s;
2343 assert(PyEncoder_Check(self));
2344 s = (PyEncoderObject *)self;
2345 Py_CLEAR(s->markers);
2346 Py_CLEAR(s->defaultfn);
2347 Py_CLEAR(s->encoder);
2348 Py_CLEAR(s->indent);
2349 Py_CLEAR(s->key_separator);
2350 Py_CLEAR(s->item_separator);
2351 Py_CLEAR(s->sort_keys);
2352 Py_CLEAR(s->skipkeys);
2353 return 0;
2354}
2355
2356PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2357
2358static
2359PyTypeObject PyEncoderType = {
2360 PyObject_HEAD_INIT(NULL)
2361 0, /* tp_internal */
2362 "_json.Encoder", /* tp_name */
2363 sizeof(PyEncoderObject), /* tp_basicsize */
2364 0, /* tp_itemsize */
2365 encoder_dealloc, /* tp_dealloc */
2366 0, /* tp_print */
2367 0, /* tp_getattr */
2368 0, /* tp_setattr */
2369 0, /* tp_compare */
2370 0, /* tp_repr */
2371 0, /* tp_as_number */
2372 0, /* tp_as_sequence */
2373 0, /* tp_as_mapping */
2374 0, /* tp_hash */
2375 encoder_call, /* tp_call */
2376 0, /* tp_str */
2377 0, /* tp_getattro */
2378 0, /* tp_setattro */
2379 0, /* tp_as_buffer */
2380 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2381 encoder_doc, /* tp_doc */
2382 encoder_traverse, /* tp_traverse */
2383 encoder_clear, /* tp_clear */
2384 0, /* tp_richcompare */
2385 0, /* tp_weaklistoffset */
2386 0, /* tp_iter */
2387 0, /* tp_iternext */
2388 0, /* tp_methods */
2389 encoder_members, /* tp_members */
2390 0, /* tp_getset */
2391 0, /* tp_base */
2392 0, /* tp_dict */
2393 0, /* tp_descr_get */
2394 0, /* tp_descr_set */
2395 0, /* tp_dictoffset */
2396 encoder_init, /* tp_init */
2397 0, /* tp_alloc */
2398 encoder_new, /* tp_new */
2399 0, /* tp_free */
2400};
2401
2402static PyMethodDef speedups_methods[] = {
2403 {"encode_basestring_ascii",
2404 (PyCFunction)py_encode_basestring_ascii,
2405 METH_O,
2406 pydoc_encode_basestring_ascii},
2407 {"scanstring",
2408 (PyCFunction)py_scanstring,
2409 METH_VARARGS,
2410 pydoc_scanstring},
Brett Cannon4b964f92008-05-05 20:21:38 +00002411 {NULL, NULL, 0, NULL}
2412};
2413
2414PyDoc_STRVAR(module_doc,
2415"json speedups\n");
2416
2417void
2418init_json(void)
2419{
2420 PyObject *m;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002421 PyScannerType.tp_new = PyType_GenericNew;
2422 if (PyType_Ready(&PyScannerType) < 0)
2423 return;
2424 PyEncoderType.tp_new = PyType_GenericNew;
2425 if (PyType_Ready(&PyEncoderType) < 0)
2426 return;
2427 m = Py_InitModule3("_json", speedups_methods, module_doc);
Serhiy Storchaka045c4512015-07-24 12:58:25 +03002428 if (m == NULL)
2429 return;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002430 Py_INCREF((PyObject*)&PyScannerType);
2431 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2432 Py_INCREF((PyObject*)&PyEncoderType);
2433 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
Brett Cannon4b964f92008-05-05 20:21:38 +00002434}