blob: eb4368a229aff77d64dfb510d0c626a13c0c8267 [file] [log] [blame]
Brett Cannon4b964f92008-05-05 20:21:38 +00001#include "Python.h"
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
16
17#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
Brett Cannon4b964f92008-05-05 20:21:38 +000022
23#define DEFAULT_ENCODING "utf-8"
Brett Cannon4b964f92008-05-05 20:21:38 +000024
Bob Ippolitod914e3f2009-03-17 23:19:00 +000025#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30static PyTypeObject PyScannerType;
31static PyTypeObject PyEncoderType;
32
33typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
Raymond Hettinger91852ca2009-03-19 19:19:03 +000038 PyObject *pairs_hook;
Bob Ippolitod914e3f2009-03-17 23:19:00 +000039 PyObject *parse_float;
40 PyObject *parse_int;
41 PyObject *parse_constant;
42} PyScannerObject;
43
44static PyMemberDef scanner_members[] = {
45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
Raymond Hettinger91852ca2009-03-19 19:19:03 +000048 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
Bob Ippolitod914e3f2009-03-17 23:19:00 +000049 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52 {NULL}
53};
54
55typedef struct _PyEncoderObject {
56 PyObject_HEAD
57 PyObject *markers;
58 PyObject *defaultfn;
59 PyObject *encoder;
60 PyObject *indent;
61 PyObject *key_separator;
62 PyObject *item_separator;
63 PyObject *sort_keys;
64 PyObject *skipkeys;
65 int fast_encode;
66 int allow_nan;
67} PyEncoderObject;
68
69static PyMemberDef encoder_members[] = {
70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78 {NULL}
79};
80
81static Py_ssize_t
82ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83static PyObject *
84ascii_escape_unicode(PyObject *pystr);
85static PyObject *
86ascii_escape_str(PyObject *pystr);
87static PyObject *
88py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89void init_json(void);
90static PyObject *
91scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92static PyObject *
93scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94static PyObject *
95_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96static PyObject *
97scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98static int
99scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100static void
101scanner_dealloc(PyObject *self);
102static int
103scanner_clear(PyObject *self);
104static PyObject *
105encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106static int
107encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108static void
109encoder_dealloc(PyObject *self);
110static int
111encoder_clear(PyObject *self);
112static int
113encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114static int
115encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116static int
117encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118static PyObject *
Raymond Hettingera0b8d9a2009-03-19 19:24:43 +0000119_encoded_const(PyObject *obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000120static void
121raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122static PyObject *
123encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124static int
125_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126static PyObject *
127_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128static PyObject *
129encoder_encode_float(PyEncoderObject *s, PyObject *obj);
130
131#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
133
134#define MIN_EXPANSION 6
Brett Cannon4b964f92008-05-05 20:21:38 +0000135#ifdef Py_UNICODE_WIDE
136#define MAX_EXPANSION (2 * MIN_EXPANSION)
137#else
138#define MAX_EXPANSION MIN_EXPANSION
139#endif
140
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000141static int
142_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
143{
144 /* PyObject to Py_ssize_t converter */
145 *size_ptr = PyInt_AsSsize_t(o);
Georg Brandlf71ba952009-05-05 07:48:12 +0000146 if (*size_ptr == -1 && PyErr_Occurred())
147 return 0;
148 return 1;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000149}
150
151static PyObject *
152_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
153{
154 /* Py_ssize_t to PyObject converter */
155 return PyInt_FromSsize_t(*size_ptr);
156}
157
Brett Cannon4b964f92008-05-05 20:21:38 +0000158static Py_ssize_t
159ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
160{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000161 /* Escape unicode code point c to ASCII escape sequences
162 in char *output. output must have at least 12 bytes unused to
163 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Brett Cannon4b964f92008-05-05 20:21:38 +0000164 output[chars++] = '\\';
165 switch (c) {
166 case '\\': output[chars++] = (char)c; break;
167 case '"': output[chars++] = (char)c; break;
168 case '\b': output[chars++] = 'b'; break;
169 case '\f': output[chars++] = 'f'; break;
170 case '\n': output[chars++] = 'n'; break;
171 case '\r': output[chars++] = 'r'; break;
172 case '\t': output[chars++] = 't'; break;
173 default:
174#ifdef Py_UNICODE_WIDE
175 if (c >= 0x10000) {
176 /* UTF-16 surrogate pair */
177 Py_UNICODE v = c - 0x10000;
178 c = 0xd800 | ((v >> 10) & 0x3ff);
179 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000180 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
182 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
183 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000184 c = 0xdc00 | (v & 0x3ff);
185 output[chars++] = '\\';
186 }
187#endif
188 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000189 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
191 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
192 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000193 }
194 return chars;
195}
196
197static PyObject *
198ascii_escape_unicode(PyObject *pystr)
199{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000200 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000201 Py_ssize_t i;
202 Py_ssize_t input_chars;
203 Py_ssize_t output_size;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000204 Py_ssize_t max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000205 Py_ssize_t chars;
206 PyObject *rval;
207 char *output;
208 Py_UNICODE *input_unicode;
209
210 input_chars = PyUnicode_GET_SIZE(pystr);
211 input_unicode = PyUnicode_AS_UNICODE(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000212
Brett Cannon4b964f92008-05-05 20:21:38 +0000213 /* One char input can be up to 6 chars output, estimate 4 of these */
214 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000215 max_output_size = 2 + (input_chars * MAX_EXPANSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000216 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000217 if (rval == NULL) {
218 return NULL;
219 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000220 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000221 chars = 0;
222 output[chars++] = '"';
223 for (i = 0; i < input_chars; i++) {
224 Py_UNICODE c = input_unicode[i];
225 if (S_CHAR(c)) {
226 output[chars++] = (char)c;
227 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000228 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000229 chars = ascii_escape_char(c, output, chars);
230 }
231 if (output_size - chars < (1 + MAX_EXPANSION)) {
232 /* There's more than four, so let's resize by a lot */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000233 Py_ssize_t new_output_size = output_size * 2;
Brett Cannon4b964f92008-05-05 20:21:38 +0000234 /* This is an upper bound */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000235 if (new_output_size > max_output_size) {
236 new_output_size = max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000237 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000238 /* Make sure that the output size changed before resizing */
239 if (new_output_size != output_size) {
240 output_size = new_output_size;
241 if (_PyString_Resize(&rval, output_size) == -1) {
242 return NULL;
243 }
244 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000245 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000246 }
247 }
248 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000249 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000250 return NULL;
251 }
252 return rval;
253}
254
255static PyObject *
256ascii_escape_str(PyObject *pystr)
257{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000258 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000259 Py_ssize_t i;
260 Py_ssize_t input_chars;
261 Py_ssize_t output_size;
262 Py_ssize_t chars;
263 PyObject *rval;
264 char *output;
265 char *input_str;
266
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000267 input_chars = PyString_GET_SIZE(pystr);
268 input_str = PyString_AS_STRING(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000269
270 /* Fast path for a string that's already ASCII */
271 for (i = 0; i < input_chars; i++) {
272 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
273 if (!S_CHAR(c)) {
274 /* If we have to escape something, scan the string for unicode */
275 Py_ssize_t j;
276 for (j = i; j < input_chars; j++) {
277 c = (Py_UNICODE)(unsigned char)input_str[j];
278 if (c > 0x7f) {
279 /* We hit a non-ASCII character, bail to unicode mode */
280 PyObject *uni;
281 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
282 if (uni == NULL) {
283 return NULL;
284 }
285 rval = ascii_escape_unicode(uni);
286 Py_DECREF(uni);
287 return rval;
288 }
289 }
290 break;
291 }
292 }
293
294 if (i == input_chars) {
295 /* Input is already ASCII */
296 output_size = 2 + input_chars;
297 }
298 else {
299 /* One char input can be up to 6 chars output, estimate 4 of these */
300 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
301 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000302 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000303 if (rval == NULL) {
304 return NULL;
305 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000306 output = PyString_AS_STRING(rval);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000307 output[0] = '"';
308
309 /* We know that everything up to i is ASCII already */
310 chars = i + 1;
311 memcpy(&output[1], input_str, i);
312
313 for (; i < input_chars; i++) {
314 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
Brett Cannon4b964f92008-05-05 20:21:38 +0000315 if (S_CHAR(c)) {
316 output[chars++] = (char)c;
317 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000318 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000319 chars = ascii_escape_char(c, output, chars);
320 }
321 /* An ASCII char can't possibly expand to a surrogate! */
322 if (output_size - chars < (1 + MIN_EXPANSION)) {
323 /* There's more than four, so let's resize by a lot */
324 output_size *= 2;
325 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
326 output_size = 2 + (input_chars * MIN_EXPANSION);
327 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000328 if (_PyString_Resize(&rval, output_size) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000329 return NULL;
330 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000331 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000332 }
333 }
334 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000335 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000336 return NULL;
337 }
338 return rval;
339}
340
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000341static void
Brett Cannon4b964f92008-05-05 20:21:38 +0000342raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
343{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000344 /* Use the Python function json.decoder.errmsg to raise a nice
345 looking ValueError exception */
Brett Cannon4b964f92008-05-05 20:21:38 +0000346 static PyObject *errmsg_fn = NULL;
347 PyObject *pymsg;
348 if (errmsg_fn == NULL) {
349 PyObject *decoder = PyImport_ImportModule("json.decoder");
350 if (decoder == NULL)
351 return;
352 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000353 Py_DECREF(decoder);
Brett Cannon4b964f92008-05-05 20:21:38 +0000354 if (errmsg_fn == NULL)
355 return;
Brett Cannon4b964f92008-05-05 20:21:38 +0000356 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000357 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
Benjamin Peterson595e3cb2008-10-16 21:09:28 +0000358 if (pymsg) {
359 PyErr_SetObject(PyExc_ValueError, pymsg);
360 Py_DECREF(pymsg);
361 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000362}
363
364static PyObject *
365join_list_unicode(PyObject *lst)
366{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000367 /* return u''.join(lst) */
368 static PyObject *joinfn = NULL;
369 if (joinfn == NULL) {
370 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
371 if (ustr == NULL)
372 return NULL;
373
374 joinfn = PyObject_GetAttrString(ustr, "join");
375 Py_DECREF(ustr);
376 if (joinfn == NULL)
377 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000378 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000379 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
Brett Cannon4b964f92008-05-05 20:21:38 +0000380}
381
382static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000383_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
384 /* return (rval, idx) tuple, stealing reference to rval */
385 PyObject *tpl;
386 PyObject *pyidx;
387 /*
388 steal a reference to rval, returns (rval, idx)
389 */
390 if (rval == NULL) {
391 return NULL;
392 }
393 pyidx = PyInt_FromSsize_t(idx);
394 if (pyidx == NULL) {
395 Py_DECREF(rval);
396 return NULL;
397 }
398 tpl = PyTuple_New(2);
399 if (tpl == NULL) {
400 Py_DECREF(pyidx);
401 Py_DECREF(rval);
402 return NULL;
403 }
404 PyTuple_SET_ITEM(tpl, 0, rval);
405 PyTuple_SET_ITEM(tpl, 1, pyidx);
406 return tpl;
407}
408
409static PyObject *
410scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
411{
412 /* Read the JSON string from PyString pystr.
413 end is the index of the first character after the quote.
414 encoding is the encoding of pystr (must be an ASCII superset)
415 if strict is zero then literal control characters are allowed
416 *next_end_ptr is a return-by-reference index of the character
417 after the end quote
418
419 Return value is a new PyString (if ASCII-only) or PyUnicode
420 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000421 PyObject *rval;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000422 Py_ssize_t len = PyString_GET_SIZE(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000423 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000424 Py_ssize_t next;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000425 char *buf = PyString_AS_STRING(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000426 PyObject *chunks = PyList_New(0);
427 if (chunks == NULL) {
428 goto bail;
429 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000430 if (end < 0 || len <= end) {
431 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
432 goto bail;
433 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000434 while (1) {
435 /* Find the end of the string or the next escape */
436 Py_UNICODE c = 0;
437 PyObject *chunk = NULL;
438 for (next = end; next < len; next++) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000439 c = (unsigned char)buf[next];
Brett Cannon4b964f92008-05-05 20:21:38 +0000440 if (c == '"' || c == '\\') {
441 break;
442 }
443 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000444 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000445 goto bail;
446 }
447 }
448 if (!(c == '"' || c == '\\')) {
449 raise_errmsg("Unterminated string starting at", pystr, begin);
450 goto bail;
451 }
452 /* Pick up this chunk if it's not zero length */
453 if (next != end) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000454 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000455 if (strchunk == NULL) {
456 goto bail;
457 }
Barry Warsawfa658272010-11-02 21:03:09 +0000458 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
459 Py_DECREF(strchunk);
460 if (chunk == NULL) {
461 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000462 }
463 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000464 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000465 goto bail;
466 }
467 Py_DECREF(chunk);
468 }
469 next++;
470 if (c == '"') {
471 end = next;
472 break;
473 }
474 if (next == len) {
475 raise_errmsg("Unterminated string starting at", pystr, begin);
476 goto bail;
477 }
478 c = buf[next];
479 if (c != 'u') {
480 /* Non-unicode backslash escapes */
481 end = next + 1;
482 switch (c) {
483 case '"': break;
484 case '\\': break;
485 case '/': break;
486 case 'b': c = '\b'; break;
487 case 'f': c = '\f'; break;
488 case 'n': c = '\n'; break;
489 case 'r': c = '\r'; break;
490 case 't': c = '\t'; break;
491 default: c = 0;
492 }
493 if (c == 0) {
494 raise_errmsg("Invalid \\escape", pystr, end - 2);
495 goto bail;
496 }
497 }
498 else {
499 c = 0;
500 next++;
501 end = next + 4;
502 if (end >= len) {
503 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
504 goto bail;
505 }
506 /* Decode 4 hex digits */
507 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000508 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000509 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000510 switch (digit) {
511 case '0': case '1': case '2': case '3': case '4':
512 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000513 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000514 case 'a': case 'b': case 'c': case 'd': case 'e':
515 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000516 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000517 case 'A': case 'B': case 'C': case 'D': case 'E':
518 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000519 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000520 default:
521 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
522 goto bail;
523 }
524 }
525#ifdef Py_UNICODE_WIDE
526 /* Surrogate pair */
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200527 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
528 buf[next++] == '\\' &&
529 buf[next++] == 'u') {
Brett Cannon4b964f92008-05-05 20:21:38 +0000530 Py_UNICODE c2 = 0;
Brett Cannon4b964f92008-05-05 20:21:38 +0000531 end += 6;
532 /* Decode 4 hex digits */
533 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000534 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000535 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000536 switch (digit) {
537 case '0': case '1': case '2': case '3': case '4':
538 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000539 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000540 case 'a': case 'b': case 'c': case 'd': case 'e':
541 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000542 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000543 case 'A': case 'B': case 'C': case 'D': case 'E':
544 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000545 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000546 default:
547 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
548 goto bail;
549 }
550 }
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200551 if ((c2 & 0xfc00) == 0xdc00)
552 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
553 else
554 end -= 6;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000555 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000556#endif
557 }
Barry Warsawfa658272010-11-02 21:03:09 +0000558 chunk = PyUnicode_FromUnicode(&c, 1);
559 if (chunk == NULL) {
560 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000561 }
562 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000563 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000564 goto bail;
565 }
566 Py_DECREF(chunk);
567 }
568
Ezio Melottidf8a8f72011-05-04 14:40:53 +0300569 rval = join_list_unicode(chunks);
Brett Cannon4b964f92008-05-05 20:21:38 +0000570 if (rval == NULL) {
571 goto bail;
572 }
Benjamin Peterson336680e2008-10-16 21:48:06 +0000573 Py_CLEAR(chunks);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000574 *next_end_ptr = end;
575 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000576bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000577 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000578 Py_XDECREF(chunks);
579 return NULL;
580}
581
582
583static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000584scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000585{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000586 /* Read the JSON string from PyUnicode pystr.
587 end is the index of the first character after the quote.
588 if strict is zero then literal control characters are allowed
589 *next_end_ptr is a return-by-reference index of the character
590 after the end quote
591
592 Return value is a new PyUnicode
593 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000594 PyObject *rval;
595 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
596 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000597 Py_ssize_t next;
Brett Cannon4b964f92008-05-05 20:21:38 +0000598 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
599 PyObject *chunks = PyList_New(0);
600 if (chunks == NULL) {
601 goto bail;
602 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000603 if (end < 0 || len <= end) {
604 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
605 goto bail;
606 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000607 while (1) {
608 /* Find the end of the string or the next escape */
609 Py_UNICODE c = 0;
610 PyObject *chunk = NULL;
611 for (next = end; next < len; next++) {
612 c = buf[next];
613 if (c == '"' || c == '\\') {
614 break;
615 }
616 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000617 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000618 goto bail;
619 }
620 }
621 if (!(c == '"' || c == '\\')) {
622 raise_errmsg("Unterminated string starting at", pystr, begin);
623 goto bail;
624 }
625 /* Pick up this chunk if it's not zero length */
626 if (next != end) {
627 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
628 if (chunk == NULL) {
629 goto bail;
630 }
631 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson87e6ad22008-10-16 21:27:54 +0000632 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000633 goto bail;
634 }
635 Py_DECREF(chunk);
636 }
637 next++;
638 if (c == '"') {
639 end = next;
640 break;
641 }
642 if (next == len) {
643 raise_errmsg("Unterminated string starting at", pystr, begin);
644 goto bail;
645 }
646 c = buf[next];
647 if (c != 'u') {
648 /* Non-unicode backslash escapes */
649 end = next + 1;
650 switch (c) {
651 case '"': break;
652 case '\\': break;
653 case '/': break;
654 case 'b': c = '\b'; break;
655 case 'f': c = '\f'; break;
656 case 'n': c = '\n'; break;
657 case 'r': c = '\r'; break;
658 case 't': c = '\t'; break;
659 default: c = 0;
660 }
661 if (c == 0) {
662 raise_errmsg("Invalid \\escape", pystr, end - 2);
663 goto bail;
664 }
665 }
666 else {
667 c = 0;
668 next++;
669 end = next + 4;
670 if (end >= len) {
671 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
672 goto bail;
673 }
674 /* Decode 4 hex digits */
675 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000676 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000677 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000678 switch (digit) {
679 case '0': case '1': case '2': case '3': case '4':
680 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000681 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000682 case 'a': case 'b': case 'c': case 'd': case 'e':
683 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000684 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000685 case 'A': case 'B': case 'C': case 'D': case 'E':
686 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000687 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000688 default:
689 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
690 goto bail;
691 }
692 }
693#ifdef Py_UNICODE_WIDE
694 /* Surrogate pair */
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200695 if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
696 buf[next++] == '\\' && buf[next++] == 'u') {
Brett Cannon4b964f92008-05-05 20:21:38 +0000697 Py_UNICODE c2 = 0;
Brett Cannon4b964f92008-05-05 20:21:38 +0000698 end += 6;
699 /* Decode 4 hex digits */
700 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000701 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000702 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000703 switch (digit) {
704 case '0': case '1': case '2': case '3': case '4':
705 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000706 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000707 case 'a': case 'b': case 'c': case 'd': case 'e':
708 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000709 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000710 case 'A': case 'B': case 'C': case 'D': case 'E':
711 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000712 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000713 default:
714 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
715 goto bail;
716 }
717 }
Serhiy Storchakadafda9b2013-11-26 21:25:15 +0200718 if ((c2 & 0xfc00) == 0xdc00)
719 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
720 else
721 end -= 6;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000722 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000723#endif
724 }
725 chunk = PyUnicode_FromUnicode(&c, 1);
726 if (chunk == NULL) {
727 goto bail;
728 }
729 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000730 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000731 goto bail;
732 }
733 Py_DECREF(chunk);
734 }
735
736 rval = join_list_unicode(chunks);
737 if (rval == NULL) {
738 goto bail;
739 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000740 Py_DECREF(chunks);
741 *next_end_ptr = end;
742 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000743bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000744 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000745 Py_XDECREF(chunks);
746 return NULL;
747}
748
749PyDoc_STRVAR(pydoc_scanstring,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000750 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
751 "\n"
752 "Scan the string s for a JSON string. End is the index of the\n"
753 "character in s after the quote that started the JSON string.\n"
754 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
755 "on attempt to decode an invalid string. If strict is False then literal\n"
756 "control characters are allowed in the string.\n"
757 "\n"
758 "Returns a tuple of the decoded string and the index of the character in s\n"
759 "after the end quote."
760);
Brett Cannon4b964f92008-05-05 20:21:38 +0000761
762static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000763py_scanstring(PyObject* self UNUSED, PyObject *args)
Brett Cannon4b964f92008-05-05 20:21:38 +0000764{
765 PyObject *pystr;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000766 PyObject *rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000767 Py_ssize_t end;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000768 Py_ssize_t next_end = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000769 char *encoding = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000770 int strict = 1;
771 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000772 return NULL;
773 }
774 if (encoding == NULL) {
775 encoding = DEFAULT_ENCODING;
776 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000777 if (PyString_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000778 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000779 }
780 else if (PyUnicode_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000781 rval = scanstring_unicode(pystr, end, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000782 }
783 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000784 PyErr_Format(PyExc_TypeError,
785 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000786 Py_TYPE(pystr)->tp_name);
787 return NULL;
788 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000789 return _build_rval_index_tuple(rval, next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000790}
791
792PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000793 "encode_basestring_ascii(basestring) -> str\n"
794 "\n"
795 "Return an ASCII-only JSON representation of a Python string"
796);
Brett Cannon4b964f92008-05-05 20:21:38 +0000797
798static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000799py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000800{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000801 /* Return an ASCII-only JSON representation of a Python string */
Brett Cannon4b964f92008-05-05 20:21:38 +0000802 /* METH_O */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000803 if (PyString_Check(pystr)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000804 return ascii_escape_str(pystr);
805 }
806 else if (PyUnicode_Check(pystr)) {
807 return ascii_escape_unicode(pystr);
808 }
809 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000810 PyErr_Format(PyExc_TypeError,
811 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000812 Py_TYPE(pystr)->tp_name);
813 return NULL;
814 }
815}
816
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000817static void
818scanner_dealloc(PyObject *self)
819{
820 /* Deallocate scanner object */
821 scanner_clear(self);
822 Py_TYPE(self)->tp_free(self);
823}
824
825static int
826scanner_traverse(PyObject *self, visitproc visit, void *arg)
827{
828 PyScannerObject *s;
829 assert(PyScanner_Check(self));
830 s = (PyScannerObject *)self;
831 Py_VISIT(s->encoding);
832 Py_VISIT(s->strict);
833 Py_VISIT(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000834 Py_VISIT(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000835 Py_VISIT(s->parse_float);
836 Py_VISIT(s->parse_int);
837 Py_VISIT(s->parse_constant);
838 return 0;
839}
840
841static int
842scanner_clear(PyObject *self)
843{
844 PyScannerObject *s;
845 assert(PyScanner_Check(self));
846 s = (PyScannerObject *)self;
847 Py_CLEAR(s->encoding);
848 Py_CLEAR(s->strict);
849 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000850 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000851 Py_CLEAR(s->parse_float);
852 Py_CLEAR(s->parse_int);
853 Py_CLEAR(s->parse_constant);
854 return 0;
855}
856
857static PyObject *
858_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
859 /* Read a JSON object from PyString pystr.
860 idx is the index of the first character after the opening curly brace.
861 *next_idx_ptr is a return-by-reference index to the first character after
862 the closing curly brace.
863
864 Returns a new PyObject (usually a dict, but object_hook can change that)
865 */
866 char *str = PyString_AS_STRING(pystr);
867 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000868 PyObject *rval;
869 PyObject *pairs;
870 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000871 PyObject *key = NULL;
872 PyObject *val = NULL;
873 char *encoding = PyString_AS_STRING(s->encoding);
874 int strict = PyObject_IsTrue(s->strict);
875 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000876
877 pairs = PyList_New(0);
878 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000879 return NULL;
880
881 /* skip whitespace after { */
882 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
883
884 /* only loop if the object is non-empty */
885 if (idx <= end_idx && str[idx] != '}') {
886 while (idx <= end_idx) {
887 /* read key */
888 if (str[idx] != '"') {
889 raise_errmsg("Expecting property name", pystr, idx);
890 goto bail;
891 }
892 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
893 if (key == NULL)
894 goto bail;
895 idx = next_idx;
896
897 /* skip whitespace between key and : delimiter, read :, skip whitespace */
898 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
899 if (idx > end_idx || str[idx] != ':') {
900 raise_errmsg("Expecting : delimiter", pystr, idx);
901 goto bail;
902 }
903 idx++;
904 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
905
906 /* read any JSON data type */
907 val = scan_once_str(s, pystr, idx, &next_idx);
908 if (val == NULL)
909 goto bail;
910
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000911 item = PyTuple_Pack(2, key, val);
912 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000913 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000914 Py_CLEAR(key);
915 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000916 if (PyList_Append(pairs, item) == -1) {
917 Py_DECREF(item);
918 goto bail;
919 }
920 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000921 idx = next_idx;
922
923 /* skip whitespace before } or , */
924 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
925
926 /* bail if the object is closed or we didn't get the , delimiter */
927 if (idx > end_idx) break;
928 if (str[idx] == '}') {
929 break;
930 }
931 else if (str[idx] != ',') {
932 raise_errmsg("Expecting , delimiter", pystr, idx);
933 goto bail;
934 }
935 idx++;
936
937 /* skip whitespace after , delimiter */
938 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
939 }
940 }
941 /* verify that idx < end_idx, str[idx] should be '}' */
942 if (idx > end_idx || str[idx] != '}') {
943 raise_errmsg("Expecting object", pystr, end_idx);
944 goto bail;
945 }
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000946
947 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
948 if (s->pairs_hook != Py_None) {
949 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
950 if (val == NULL)
951 goto bail;
952 Py_DECREF(pairs);
953 *next_idx_ptr = idx + 1;
954 return val;
955 }
956
957 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
958 pairs, NULL);
959 if (rval == NULL)
960 goto bail;
961 Py_CLEAR(pairs);
962
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000963 /* if object_hook is not None: rval = object_hook(rval) */
964 if (s->object_hook != Py_None) {
965 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
966 if (val == NULL)
967 goto bail;
968 Py_DECREF(rval);
969 rval = val;
970 val = NULL;
971 }
972 *next_idx_ptr = idx + 1;
973 return rval;
974bail:
975 Py_XDECREF(key);
976 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000977 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000978 return NULL;
979}
980
981static PyObject *
982_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
983 /* Read a JSON object from PyUnicode pystr.
984 idx is the index of the first character after the opening curly brace.
985 *next_idx_ptr is a return-by-reference index to the first character after
986 the closing curly brace.
987
988 Returns a new PyObject (usually a dict, but object_hook can change that)
989 */
990 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
991 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000992 PyObject *rval;
993 PyObject *pairs;
994 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000995 PyObject *key = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000996 PyObject *val = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000997 int strict = PyObject_IsTrue(s->strict);
998 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000999
1000 pairs = PyList_New(0);
1001 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001002 return NULL;
1003
1004 /* skip whitespace after { */
1005 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1006
1007 /* only loop if the object is non-empty */
1008 if (idx <= end_idx && str[idx] != '}') {
1009 while (idx <= end_idx) {
1010 /* read key */
1011 if (str[idx] != '"') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001012 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001013 goto bail;
1014 }
1015 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1016 if (key == NULL)
1017 goto bail;
1018 idx = next_idx;
1019
1020 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1021 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1022 if (idx > end_idx || str[idx] != ':') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001023 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001024 goto bail;
1025 }
1026 idx++;
1027 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1028
1029 /* read any JSON term */
1030 val = scan_once_unicode(s, pystr, idx, &next_idx);
1031 if (val == NULL)
1032 goto bail;
1033
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001034 item = PyTuple_Pack(2, key, val);
1035 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001036 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001037 Py_CLEAR(key);
1038 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001039 if (PyList_Append(pairs, item) == -1) {
1040 Py_DECREF(item);
1041 goto bail;
1042 }
1043 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001044 idx = next_idx;
1045
1046 /* skip whitespace before } or , */
1047 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1048
1049 /* bail if the object is closed or we didn't get the , delimiter */
1050 if (idx > end_idx) break;
1051 if (str[idx] == '}') {
1052 break;
1053 }
1054 else if (str[idx] != ',') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001055 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001056 goto bail;
1057 }
1058 idx++;
1059
1060 /* skip whitespace after , delimiter */
1061 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1062 }
1063 }
1064
1065 /* verify that idx < end_idx, str[idx] should be '}' */
1066 if (idx > end_idx || str[idx] != '}') {
1067 raise_errmsg("Expecting object", pystr, end_idx);
1068 goto bail;
1069 }
1070
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001071 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1072 if (s->pairs_hook != Py_None) {
1073 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1074 if (val == NULL)
1075 goto bail;
1076 Py_DECREF(pairs);
1077 *next_idx_ptr = idx + 1;
1078 return val;
1079 }
1080
1081 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1082 pairs, NULL);
1083 if (rval == NULL)
1084 goto bail;
1085 Py_CLEAR(pairs);
1086
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001087 /* if object_hook is not None: rval = object_hook(rval) */
1088 if (s->object_hook != Py_None) {
1089 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1090 if (val == NULL)
1091 goto bail;
1092 Py_DECREF(rval);
1093 rval = val;
1094 val = NULL;
1095 }
1096 *next_idx_ptr = idx + 1;
1097 return rval;
1098bail:
1099 Py_XDECREF(key);
1100 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001101 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001102 return NULL;
1103}
1104
1105static PyObject *
1106_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1107 /* Read a JSON array from PyString pystr.
1108 idx is the index of the first character after the opening brace.
1109 *next_idx_ptr is a return-by-reference index to the first character after
1110 the closing brace.
1111
1112 Returns a new PyList
1113 */
1114 char *str = PyString_AS_STRING(pystr);
1115 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1116 PyObject *val = NULL;
1117 PyObject *rval = PyList_New(0);
1118 Py_ssize_t next_idx;
1119 if (rval == NULL)
1120 return NULL;
1121
1122 /* skip whitespace after [ */
1123 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1124
1125 /* only loop if the array is non-empty */
1126 if (idx <= end_idx && str[idx] != ']') {
1127 while (idx <= end_idx) {
1128
1129 /* read any JSON term and de-tuplefy the (rval, idx) */
1130 val = scan_once_str(s, pystr, idx, &next_idx);
1131 if (val == NULL)
1132 goto bail;
1133
1134 if (PyList_Append(rval, val) == -1)
1135 goto bail;
1136
1137 Py_CLEAR(val);
1138 idx = next_idx;
1139
1140 /* skip whitespace between term and , */
1141 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1142
1143 /* bail if the array is closed or we didn't get the , delimiter */
1144 if (idx > end_idx) break;
1145 if (str[idx] == ']') {
1146 break;
1147 }
1148 else if (str[idx] != ',') {
1149 raise_errmsg("Expecting , delimiter", pystr, idx);
1150 goto bail;
1151 }
1152 idx++;
1153
1154 /* skip whitespace after , */
1155 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1156 }
1157 }
1158
1159 /* verify that idx < end_idx, str[idx] should be ']' */
1160 if (idx > end_idx || str[idx] != ']') {
1161 raise_errmsg("Expecting object", pystr, end_idx);
1162 goto bail;
1163 }
1164 *next_idx_ptr = idx + 1;
1165 return rval;
1166bail:
1167 Py_XDECREF(val);
1168 Py_DECREF(rval);
1169 return NULL;
1170}
1171
1172static PyObject *
1173_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1174 /* Read a JSON array from PyString pystr.
1175 idx is the index of the first character after the opening brace.
1176 *next_idx_ptr is a return-by-reference index to the first character after
1177 the closing brace.
1178
1179 Returns a new PyList
1180 */
1181 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1182 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1183 PyObject *val = NULL;
1184 PyObject *rval = PyList_New(0);
1185 Py_ssize_t next_idx;
1186 if (rval == NULL)
1187 return NULL;
1188
1189 /* skip whitespace after [ */
1190 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1191
1192 /* only loop if the array is non-empty */
1193 if (idx <= end_idx && str[idx] != ']') {
1194 while (idx <= end_idx) {
1195
1196 /* read any JSON term */
1197 val = scan_once_unicode(s, pystr, idx, &next_idx);
1198 if (val == NULL)
1199 goto bail;
1200
1201 if (PyList_Append(rval, val) == -1)
1202 goto bail;
1203
1204 Py_CLEAR(val);
1205 idx = next_idx;
1206
1207 /* skip whitespace between term and , */
1208 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1209
1210 /* bail if the array is closed or we didn't get the , delimiter */
1211 if (idx > end_idx) break;
1212 if (str[idx] == ']') {
1213 break;
1214 }
1215 else if (str[idx] != ',') {
Antoine Pitroud9a51372012-06-29 01:58:26 +02001216 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001217 goto bail;
1218 }
1219 idx++;
1220
1221 /* skip whitespace after , */
1222 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1223 }
1224 }
1225
1226 /* verify that idx < end_idx, str[idx] should be ']' */
1227 if (idx > end_idx || str[idx] != ']') {
1228 raise_errmsg("Expecting object", pystr, end_idx);
1229 goto bail;
1230 }
1231 *next_idx_ptr = idx + 1;
1232 return rval;
1233bail:
1234 Py_XDECREF(val);
1235 Py_DECREF(rval);
1236 return NULL;
1237}
1238
1239static PyObject *
1240_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1241 /* Read a JSON constant from PyString pystr.
1242 constant is the constant string that was found
1243 ("NaN", "Infinity", "-Infinity").
1244 idx is the index of the first character of the constant
1245 *next_idx_ptr is a return-by-reference index to the first character after
1246 the constant.
1247
1248 Returns the result of parse_constant
1249 */
1250 PyObject *cstr;
1251 PyObject *rval;
1252 /* constant is "NaN", "Infinity", or "-Infinity" */
1253 cstr = PyString_InternFromString(constant);
1254 if (cstr == NULL)
1255 return NULL;
1256
1257 /* rval = parse_constant(constant) */
1258 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1259 idx += PyString_GET_SIZE(cstr);
1260 Py_DECREF(cstr);
1261 *next_idx_ptr = idx;
1262 return rval;
1263}
1264
1265static PyObject *
1266_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1267 /* Read a JSON number from PyString pystr.
1268 idx is the index of the first character of the number
1269 *next_idx_ptr is a return-by-reference index to the first character after
1270 the number.
1271
1272 Returns a new PyObject representation of that number:
1273 PyInt, PyLong, or PyFloat.
1274 May return other types if parse_int or parse_float are set
1275 */
1276 char *str = PyString_AS_STRING(pystr);
1277 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1278 Py_ssize_t idx = start;
1279 int is_float = 0;
1280 PyObject *rval;
1281 PyObject *numstr;
1282
1283 /* read a sign if it's there, make sure it's not the end of the string */
1284 if (str[idx] == '-') {
1285 idx++;
1286 if (idx > end_idx) {
1287 PyErr_SetNone(PyExc_StopIteration);
1288 return NULL;
1289 }
1290 }
1291
1292 /* read as many integer digits as we find as long as it doesn't start with 0 */
1293 if (str[idx] >= '1' && str[idx] <= '9') {
1294 idx++;
1295 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1296 }
1297 /* if it starts with 0 we only expect one integer digit */
1298 else if (str[idx] == '0') {
1299 idx++;
1300 }
1301 /* no integer digits, error */
1302 else {
1303 PyErr_SetNone(PyExc_StopIteration);
1304 return NULL;
1305 }
1306
1307 /* if the next char is '.' followed by a digit then read all float digits */
1308 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1309 is_float = 1;
1310 idx += 2;
1311 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1312 }
1313
1314 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1315 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1316
1317 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1318 Py_ssize_t e_start = idx;
1319 idx++;
1320
1321 /* read an exponent sign if present */
1322 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1323
1324 /* read all digits */
1325 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1326
1327 /* if we got a digit, then parse as float. if not, backtrack */
1328 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1329 is_float = 1;
1330 }
1331 else {
1332 idx = e_start;
1333 }
1334 }
1335
1336 /* copy the section we determined to be a number */
1337 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1338 if (numstr == NULL)
1339 return NULL;
1340 if (is_float) {
1341 /* parse as a float using a fast path if available, otherwise call user defined method */
1342 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1343 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1344 }
1345 else {
Eric Smith129c97d2009-10-28 08:44:37 +00001346 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1347 NULL, NULL);
1348 if (d == -1.0 && PyErr_Occurred())
1349 return NULL;
1350 rval = PyFloat_FromDouble(d);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001351 }
1352 }
1353 else {
1354 /* parse as an int using a fast path if available, otherwise call user defined method */
1355 if (s->parse_int != (PyObject *)&PyInt_Type) {
1356 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1357 }
1358 else {
1359 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1360 }
1361 }
1362 Py_DECREF(numstr);
1363 *next_idx_ptr = idx;
1364 return rval;
1365}
1366
1367static PyObject *
1368_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1369 /* Read a JSON number from PyUnicode pystr.
1370 idx is the index of the first character of the number
1371 *next_idx_ptr is a return-by-reference index to the first character after
1372 the number.
1373
1374 Returns a new PyObject representation of that number:
1375 PyInt, PyLong, or PyFloat.
1376 May return other types if parse_int or parse_float are set
1377 */
1378 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1379 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1380 Py_ssize_t idx = start;
1381 int is_float = 0;
1382 PyObject *rval;
1383 PyObject *numstr;
1384
1385 /* read a sign if it's there, make sure it's not the end of the string */
1386 if (str[idx] == '-') {
1387 idx++;
1388 if (idx > end_idx) {
1389 PyErr_SetNone(PyExc_StopIteration);
1390 return NULL;
1391 }
1392 }
1393
1394 /* read as many integer digits as we find as long as it doesn't start with 0 */
1395 if (str[idx] >= '1' && str[idx] <= '9') {
1396 idx++;
1397 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1398 }
1399 /* if it starts with 0 we only expect one integer digit */
1400 else if (str[idx] == '0') {
1401 idx++;
1402 }
1403 /* no integer digits, error */
1404 else {
1405 PyErr_SetNone(PyExc_StopIteration);
1406 return NULL;
1407 }
1408
1409 /* if the next char is '.' followed by a digit then read all float digits */
1410 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1411 is_float = 1;
1412 idx += 2;
Bob Ippolito76a982a2009-03-29 22:33:58 +00001413 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001414 }
1415
1416 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1417 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1418 Py_ssize_t e_start = idx;
1419 idx++;
1420
1421 /* read an exponent sign if present */
1422 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1423
1424 /* read all digits */
1425 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1426
1427 /* if we got a digit, then parse as float. if not, backtrack */
1428 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1429 is_float = 1;
1430 }
1431 else {
1432 idx = e_start;
1433 }
1434 }
1435
1436 /* copy the section we determined to be a number */
1437 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1438 if (numstr == NULL)
1439 return NULL;
1440 if (is_float) {
1441 /* parse as a float using a fast path if available, otherwise call user defined method */
1442 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1443 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1444 }
1445 else {
1446 rval = PyFloat_FromString(numstr, NULL);
1447 }
1448 }
1449 else {
1450 /* no fast path for unicode -> int, just call */
1451 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1452 }
1453 Py_DECREF(numstr);
1454 *next_idx_ptr = idx;
1455 return rval;
1456}
1457
1458static PyObject *
1459scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1460{
1461 /* Read one JSON term (of any kind) from PyString pystr.
1462 idx is the index of the first character of the term
1463 *next_idx_ptr is a return-by-reference index to the first character after
1464 the number.
1465
1466 Returns a new PyObject representation of the term.
1467 */
Ezio Melotticec46492011-05-07 17:40:23 +03001468 PyObject *res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001469 char *str = PyString_AS_STRING(pystr);
1470 Py_ssize_t length = PyString_GET_SIZE(pystr);
1471 if (idx >= length) {
1472 PyErr_SetNone(PyExc_StopIteration);
1473 return NULL;
1474 }
1475 switch (str[idx]) {
1476 case '"':
1477 /* string */
1478 return scanstring_str(pystr, idx + 1,
1479 PyString_AS_STRING(s->encoding),
1480 PyObject_IsTrue(s->strict),
1481 next_idx_ptr);
1482 case '{':
1483 /* object */
Ezio Melotticec46492011-05-07 17:40:23 +03001484 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1485 "from a byte string"))
1486 return NULL;
1487 res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1488 Py_LeaveRecursiveCall();
1489 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001490 case '[':
1491 /* array */
Ezio Melotticec46492011-05-07 17:40:23 +03001492 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1493 "from a byte string"))
1494 return NULL;
1495 res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1496 Py_LeaveRecursiveCall();
1497 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001498 case 'n':
1499 /* null */
1500 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1501 Py_INCREF(Py_None);
1502 *next_idx_ptr = idx + 4;
1503 return Py_None;
1504 }
1505 break;
1506 case 't':
1507 /* true */
1508 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1509 Py_INCREF(Py_True);
1510 *next_idx_ptr = idx + 4;
1511 return Py_True;
1512 }
1513 break;
1514 case 'f':
1515 /* false */
1516 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1517 Py_INCREF(Py_False);
1518 *next_idx_ptr = idx + 5;
1519 return Py_False;
1520 }
1521 break;
1522 case 'N':
1523 /* NaN */
1524 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1525 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1526 }
1527 break;
1528 case 'I':
1529 /* Infinity */
1530 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1531 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1532 }
1533 break;
1534 case '-':
1535 /* -Infinity */
1536 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1537 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1538 }
1539 break;
1540 }
1541 /* Didn't find a string, object, array, or named constant. Look for a number. */
1542 return _match_number_str(s, pystr, idx, next_idx_ptr);
1543}
1544
1545static PyObject *
1546scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1547{
1548 /* Read one JSON term (of any kind) from PyUnicode pystr.
1549 idx is the index of the first character of the term
1550 *next_idx_ptr is a return-by-reference index to the first character after
1551 the number.
1552
1553 Returns a new PyObject representation of the term.
1554 */
Ezio Melotticec46492011-05-07 17:40:23 +03001555 PyObject *res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001556 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1557 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1558 if (idx >= length) {
1559 PyErr_SetNone(PyExc_StopIteration);
1560 return NULL;
1561 }
1562 switch (str[idx]) {
1563 case '"':
1564 /* string */
1565 return scanstring_unicode(pystr, idx + 1,
1566 PyObject_IsTrue(s->strict),
1567 next_idx_ptr);
1568 case '{':
1569 /* object */
Ezio Melotticec46492011-05-07 17:40:23 +03001570 if (Py_EnterRecursiveCall(" while decoding a JSON object "
1571 "from a unicode string"))
1572 return NULL;
1573 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1574 Py_LeaveRecursiveCall();
1575 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001576 case '[':
1577 /* array */
Ezio Melotticec46492011-05-07 17:40:23 +03001578 if (Py_EnterRecursiveCall(" while decoding a JSON array "
1579 "from a unicode string"))
1580 return NULL;
1581 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1582 Py_LeaveRecursiveCall();
1583 return res;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001584 case 'n':
1585 /* null */
1586 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1587 Py_INCREF(Py_None);
1588 *next_idx_ptr = idx + 4;
1589 return Py_None;
1590 }
1591 break;
1592 case 't':
1593 /* true */
1594 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1595 Py_INCREF(Py_True);
1596 *next_idx_ptr = idx + 4;
1597 return Py_True;
1598 }
1599 break;
1600 case 'f':
1601 /* false */
1602 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1603 Py_INCREF(Py_False);
1604 *next_idx_ptr = idx + 5;
1605 return Py_False;
1606 }
1607 break;
1608 case 'N':
1609 /* NaN */
1610 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1611 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1612 }
1613 break;
1614 case 'I':
1615 /* Infinity */
1616 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1617 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1618 }
1619 break;
1620 case '-':
1621 /* -Infinity */
1622 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1623 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1624 }
1625 break;
1626 }
1627 /* Didn't find a string, object, array, or named constant. Look for a number. */
1628 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1629}
1630
1631static PyObject *
1632scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1633{
1634 /* Python callable interface to scan_once_{str,unicode} */
1635 PyObject *pystr;
1636 PyObject *rval;
1637 Py_ssize_t idx;
1638 Py_ssize_t next_idx = -1;
1639 static char *kwlist[] = {"string", "idx", NULL};
1640 PyScannerObject *s;
1641 assert(PyScanner_Check(self));
1642 s = (PyScannerObject *)self;
1643 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1644 return NULL;
1645
1646 if (PyString_Check(pystr)) {
1647 rval = scan_once_str(s, pystr, idx, &next_idx);
1648 }
1649 else if (PyUnicode_Check(pystr)) {
1650 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1651 }
1652 else {
1653 PyErr_Format(PyExc_TypeError,
1654 "first argument must be a string, not %.80s",
1655 Py_TYPE(pystr)->tp_name);
1656 return NULL;
1657 }
1658 return _build_rval_index_tuple(rval, next_idx);
1659}
1660
1661static PyObject *
1662scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1663{
1664 PyScannerObject *s;
1665 s = (PyScannerObject *)type->tp_alloc(type, 0);
1666 if (s != NULL) {
1667 s->encoding = NULL;
1668 s->strict = NULL;
1669 s->object_hook = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001670 s->pairs_hook = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001671 s->parse_float = NULL;
1672 s->parse_int = NULL;
1673 s->parse_constant = NULL;
1674 }
1675 return (PyObject *)s;
1676}
1677
1678static int
1679scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1680{
1681 /* Initialize Scanner object */
1682 PyObject *ctx;
1683 static char *kwlist[] = {"context", NULL};
1684 PyScannerObject *s;
1685
1686 assert(PyScanner_Check(self));
1687 s = (PyScannerObject *)self;
1688
1689 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1690 return -1;
1691
1692 /* PyString_AS_STRING is used on encoding */
1693 s->encoding = PyObject_GetAttrString(ctx, "encoding");
Antoine Pitrou187177f2009-12-08 15:40:51 +00001694 if (s->encoding == NULL)
1695 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001696 if (s->encoding == Py_None) {
1697 Py_DECREF(Py_None);
1698 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1699 }
1700 else if (PyUnicode_Check(s->encoding)) {
1701 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1702 Py_DECREF(s->encoding);
1703 s->encoding = tmp;
1704 }
Amaury Forgeot d'Arcdee76e62012-01-13 22:53:25 +01001705 if (s->encoding == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001706 goto bail;
Amaury Forgeot d'Arcdee76e62012-01-13 22:53:25 +01001707 if (!PyString_Check(s->encoding)) {
1708 PyErr_Format(PyExc_TypeError,
1709 "encoding must be a string, not %.80s",
1710 Py_TYPE(s->encoding)->tp_name);
1711 goto bail;
1712 }
1713
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001714
1715 /* All of these will fail "gracefully" so we don't need to verify them */
1716 s->strict = PyObject_GetAttrString(ctx, "strict");
1717 if (s->strict == NULL)
1718 goto bail;
1719 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1720 if (s->object_hook == NULL)
1721 goto bail;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001722 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
Georg Brandld823bdc2011-01-02 14:20:16 +00001723 if (s->pairs_hook == NULL)
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001724 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001725 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1726 if (s->parse_float == NULL)
1727 goto bail;
1728 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1729 if (s->parse_int == NULL)
1730 goto bail;
1731 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1732 if (s->parse_constant == NULL)
1733 goto bail;
1734
1735 return 0;
1736
1737bail:
1738 Py_CLEAR(s->encoding);
1739 Py_CLEAR(s->strict);
1740 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001741 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001742 Py_CLEAR(s->parse_float);
1743 Py_CLEAR(s->parse_int);
1744 Py_CLEAR(s->parse_constant);
1745 return -1;
1746}
1747
1748PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1749
1750static
1751PyTypeObject PyScannerType = {
1752 PyObject_HEAD_INIT(NULL)
1753 0, /* tp_internal */
1754 "_json.Scanner", /* tp_name */
1755 sizeof(PyScannerObject), /* tp_basicsize */
1756 0, /* tp_itemsize */
1757 scanner_dealloc, /* tp_dealloc */
1758 0, /* tp_print */
1759 0, /* tp_getattr */
1760 0, /* tp_setattr */
1761 0, /* tp_compare */
1762 0, /* tp_repr */
1763 0, /* tp_as_number */
1764 0, /* tp_as_sequence */
1765 0, /* tp_as_mapping */
1766 0, /* tp_hash */
1767 scanner_call, /* tp_call */
1768 0, /* tp_str */
1769 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1770 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1771 0, /* tp_as_buffer */
1772 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1773 scanner_doc, /* tp_doc */
1774 scanner_traverse, /* tp_traverse */
1775 scanner_clear, /* tp_clear */
1776 0, /* tp_richcompare */
1777 0, /* tp_weaklistoffset */
1778 0, /* tp_iter */
1779 0, /* tp_iternext */
1780 0, /* tp_methods */
1781 scanner_members, /* tp_members */
1782 0, /* tp_getset */
1783 0, /* tp_base */
1784 0, /* tp_dict */
1785 0, /* tp_descr_get */
1786 0, /* tp_descr_set */
1787 0, /* tp_dictoffset */
1788 scanner_init, /* tp_init */
1789 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1790 scanner_new, /* tp_new */
1791 0,/* PyObject_GC_Del, */ /* tp_free */
1792};
1793
1794static PyObject *
1795encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1796{
1797 PyEncoderObject *s;
1798 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1799 if (s != NULL) {
1800 s->markers = NULL;
1801 s->defaultfn = NULL;
1802 s->encoder = NULL;
1803 s->indent = NULL;
1804 s->key_separator = NULL;
1805 s->item_separator = NULL;
1806 s->sort_keys = NULL;
1807 s->skipkeys = NULL;
1808 }
1809 return (PyObject *)s;
1810}
1811
1812static int
1813encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1814{
1815 /* initialize Encoder object */
1816 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1817
1818 PyEncoderObject *s;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001819 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1820 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001821
1822 assert(PyEncoder_Check(self));
1823 s = (PyEncoderObject *)self;
1824
1825 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou187177f2009-12-08 15:40:51 +00001826 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1827 &sort_keys, &skipkeys, &allow_nan))
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001828 return -1;
1829
Antoine Pitrou187177f2009-12-08 15:40:51 +00001830 s->markers = markers;
1831 s->defaultfn = defaultfn;
1832 s->encoder = encoder;
1833 s->indent = indent;
1834 s->key_separator = key_separator;
1835 s->item_separator = item_separator;
1836 s->sort_keys = sort_keys;
1837 s->skipkeys = skipkeys;
1838 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1839 s->allow_nan = PyObject_IsTrue(allow_nan);
1840
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001841 Py_INCREF(s->markers);
1842 Py_INCREF(s->defaultfn);
1843 Py_INCREF(s->encoder);
1844 Py_INCREF(s->indent);
1845 Py_INCREF(s->key_separator);
1846 Py_INCREF(s->item_separator);
1847 Py_INCREF(s->sort_keys);
1848 Py_INCREF(s->skipkeys);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001849 return 0;
1850}
1851
1852static PyObject *
1853encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1854{
1855 /* Python callable interface to encode_listencode_obj */
1856 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1857 PyObject *obj;
1858 PyObject *rval;
1859 Py_ssize_t indent_level;
1860 PyEncoderObject *s;
1861 assert(PyEncoder_Check(self));
1862 s = (PyEncoderObject *)self;
1863 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1864 &obj, _convertPyInt_AsSsize_t, &indent_level))
1865 return NULL;
1866 rval = PyList_New(0);
1867 if (rval == NULL)
1868 return NULL;
1869 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1870 Py_DECREF(rval);
1871 return NULL;
1872 }
1873 return rval;
1874}
1875
1876static PyObject *
1877_encoded_const(PyObject *obj)
1878{
1879 /* Return the JSON string representation of None, True, False */
1880 if (obj == Py_None) {
1881 static PyObject *s_null = NULL;
1882 if (s_null == NULL) {
1883 s_null = PyString_InternFromString("null");
1884 }
1885 Py_INCREF(s_null);
1886 return s_null;
1887 }
1888 else if (obj == Py_True) {
1889 static PyObject *s_true = NULL;
1890 if (s_true == NULL) {
1891 s_true = PyString_InternFromString("true");
1892 }
1893 Py_INCREF(s_true);
1894 return s_true;
1895 }
1896 else if (obj == Py_False) {
1897 static PyObject *s_false = NULL;
1898 if (s_false == NULL) {
1899 s_false = PyString_InternFromString("false");
1900 }
1901 Py_INCREF(s_false);
1902 return s_false;
1903 }
1904 else {
1905 PyErr_SetString(PyExc_ValueError, "not a const");
1906 return NULL;
1907 }
1908}
1909
1910static PyObject *
1911encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1912{
1913 /* Return the JSON representation of a PyFloat */
1914 double i = PyFloat_AS_DOUBLE(obj);
1915 if (!Py_IS_FINITE(i)) {
1916 if (!s->allow_nan) {
1917 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1918 return NULL;
1919 }
1920 if (i > 0) {
1921 return PyString_FromString("Infinity");
1922 }
1923 else if (i < 0) {
1924 return PyString_FromString("-Infinity");
1925 }
1926 else {
1927 return PyString_FromString("NaN");
1928 }
1929 }
1930 /* Use a better float format here? */
1931 return PyObject_Repr(obj);
1932}
1933
1934static PyObject *
1935encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1936{
1937 /* Return the JSON representation of a string */
1938 if (s->fast_encode)
1939 return py_encode_basestring_ascii(NULL, obj);
1940 else
1941 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1942}
1943
1944static int
1945_steal_list_append(PyObject *lst, PyObject *stolen)
1946{
1947 /* Append stolen and then decrement its reference count */
1948 int rval = PyList_Append(lst, stolen);
1949 Py_DECREF(stolen);
1950 return rval;
1951}
1952
1953static int
1954encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1955{
1956 /* Encode Python object obj to a JSON term, rval is a PyList */
1957 PyObject *newobj;
1958 int rv;
1959
1960 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1961 PyObject *cstr = _encoded_const(obj);
1962 if (cstr == NULL)
1963 return -1;
1964 return _steal_list_append(rval, cstr);
1965 }
1966 else if (PyString_Check(obj) || PyUnicode_Check(obj))
1967 {
1968 PyObject *encoded = encoder_encode_string(s, obj);
1969 if (encoded == NULL)
1970 return -1;
1971 return _steal_list_append(rval, encoded);
1972 }
1973 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1974 PyObject *encoded = PyObject_Str(obj);
1975 if (encoded == NULL)
1976 return -1;
1977 return _steal_list_append(rval, encoded);
1978 }
1979 else if (PyFloat_Check(obj)) {
1980 PyObject *encoded = encoder_encode_float(s, obj);
1981 if (encoded == NULL)
1982 return -1;
1983 return _steal_list_append(rval, encoded);
1984 }
1985 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melottic1ec7b52011-05-11 00:55:35 +03001986 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1987 return -1;
1988 rv = encoder_listencode_list(s, rval, obj, indent_level);
1989 Py_LeaveRecursiveCall();
1990 return rv;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001991 }
1992 else if (PyDict_Check(obj)) {
Ezio Melottic1ec7b52011-05-11 00:55:35 +03001993 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1994 return -1;
1995 rv = encoder_listencode_dict(s, rval, obj, indent_level);
1996 Py_LeaveRecursiveCall();
1997 return rv;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001998 }
1999 else {
2000 PyObject *ident = NULL;
2001 if (s->markers != Py_None) {
2002 int has_key;
2003 ident = PyLong_FromVoidPtr(obj);
2004 if (ident == NULL)
2005 return -1;
2006 has_key = PyDict_Contains(s->markers, ident);
2007 if (has_key) {
2008 if (has_key != -1)
2009 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2010 Py_DECREF(ident);
2011 return -1;
2012 }
2013 if (PyDict_SetItem(s->markers, ident, obj)) {
2014 Py_DECREF(ident);
2015 return -1;
2016 }
2017 }
2018 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2019 if (newobj == NULL) {
2020 Py_XDECREF(ident);
2021 return -1;
2022 }
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002023
2024 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2025 return -1;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002026 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
Ezio Melottic1ec7b52011-05-11 00:55:35 +03002027 Py_LeaveRecursiveCall();
2028
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002029 Py_DECREF(newobj);
2030 if (rv) {
2031 Py_XDECREF(ident);
2032 return -1;
2033 }
2034 if (ident != NULL) {
2035 if (PyDict_DelItem(s->markers, ident)) {
2036 Py_XDECREF(ident);
2037 return -1;
2038 }
2039 Py_XDECREF(ident);
2040 }
2041 return rv;
2042 }
2043}
2044
2045static int
2046encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2047{
2048 /* Encode Python dict dct a JSON term, rval is a PyList */
2049 static PyObject *open_dict = NULL;
2050 static PyObject *close_dict = NULL;
2051 static PyObject *empty_dict = NULL;
2052 PyObject *kstr = NULL;
2053 PyObject *ident = NULL;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002054 PyObject *key = NULL;
2055 PyObject *value = NULL;
2056 PyObject *it = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002057 int skipkeys;
2058 Py_ssize_t idx;
2059
2060 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2061 open_dict = PyString_InternFromString("{");
2062 close_dict = PyString_InternFromString("}");
2063 empty_dict = PyString_InternFromString("{}");
2064 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2065 return -1;
2066 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002067 if (Py_SIZE(dct) == 0)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002068 return PyList_Append(rval, empty_dict);
2069
2070 if (s->markers != Py_None) {
2071 int has_key;
2072 ident = PyLong_FromVoidPtr(dct);
2073 if (ident == NULL)
2074 goto bail;
2075 has_key = PyDict_Contains(s->markers, ident);
2076 if (has_key) {
2077 if (has_key != -1)
2078 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2079 goto bail;
2080 }
2081 if (PyDict_SetItem(s->markers, ident, dct)) {
2082 goto bail;
2083 }
2084 }
2085
2086 if (PyList_Append(rval, open_dict))
2087 goto bail;
2088
2089 if (s->indent != Py_None) {
2090 /* TODO: DOES NOT RUN */
2091 indent_level += 1;
2092 /*
2093 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2094 separator = _item_separator + newline_indent
2095 buf += newline_indent
2096 */
2097 }
2098
2099 /* TODO: C speedup not implemented for sort_keys */
2100
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002101 it = PyObject_GetIter(dct);
2102 if (it == NULL)
2103 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002104 skipkeys = PyObject_IsTrue(s->skipkeys);
2105 idx = 0;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002106 while ((key = PyIter_Next(it)) != NULL) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002107 PyObject *encoded;
2108
2109 if (PyString_Check(key) || PyUnicode_Check(key)) {
2110 Py_INCREF(key);
2111 kstr = key;
2112 }
2113 else if (PyFloat_Check(key)) {
2114 kstr = encoder_encode_float(s, key);
2115 if (kstr == NULL)
2116 goto bail;
2117 }
2118 else if (PyInt_Check(key) || PyLong_Check(key)) {
2119 kstr = PyObject_Str(key);
2120 if (kstr == NULL)
2121 goto bail;
2122 }
2123 else if (key == Py_True || key == Py_False || key == Py_None) {
2124 kstr = _encoded_const(key);
2125 if (kstr == NULL)
2126 goto bail;
2127 }
2128 else if (skipkeys) {
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002129 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002130 continue;
2131 }
2132 else {
2133 /* TODO: include repr of key */
Doug Hellmannf31db932010-07-21 12:36:33 +00002134 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002135 goto bail;
2136 }
2137
2138 if (idx) {
2139 if (PyList_Append(rval, s->item_separator))
2140 goto bail;
2141 }
2142
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002143 value = PyObject_GetItem(dct, key);
2144 if (value == NULL)
2145 goto bail;
2146
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002147 encoded = encoder_encode_string(s, kstr);
2148 Py_CLEAR(kstr);
2149 if (encoded == NULL)
2150 goto bail;
2151 if (PyList_Append(rval, encoded)) {
2152 Py_DECREF(encoded);
2153 goto bail;
2154 }
2155 Py_DECREF(encoded);
2156 if (PyList_Append(rval, s->key_separator))
2157 goto bail;
2158 if (encoder_listencode_obj(s, rval, value, indent_level))
2159 goto bail;
2160 idx += 1;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002161 Py_CLEAR(value);
2162 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002163 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002164 if (PyErr_Occurred())
2165 goto bail;
2166 Py_CLEAR(it);
2167
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002168 if (ident != NULL) {
2169 if (PyDict_DelItem(s->markers, ident))
2170 goto bail;
2171 Py_CLEAR(ident);
2172 }
2173 if (s->indent != Py_None) {
2174 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002175 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002176 indent_level -= 1;
2177
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002178 yield '\n' + (' ' * (_indent * _current_indent_level))
2179 */
2180 }
2181 if (PyList_Append(rval, close_dict))
2182 goto bail;
2183 return 0;
2184
2185bail:
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002186 Py_XDECREF(it);
2187 Py_XDECREF(key);
2188 Py_XDECREF(value);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002189 Py_XDECREF(kstr);
2190 Py_XDECREF(ident);
2191 return -1;
2192}
2193
2194
2195static int
2196encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2197{
2198 /* Encode Python list seq to a JSON term, rval is a PyList */
2199 static PyObject *open_array = NULL;
2200 static PyObject *close_array = NULL;
2201 static PyObject *empty_array = NULL;
2202 PyObject *ident = NULL;
2203 PyObject *s_fast = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002204 Py_ssize_t i;
2205
2206 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2207 open_array = PyString_InternFromString("[");
2208 close_array = PyString_InternFromString("]");
2209 empty_array = PyString_InternFromString("[]");
2210 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2211 return -1;
2212 }
2213 ident = NULL;
2214 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2215 if (s_fast == NULL)
2216 return -1;
Antoine Pitroue9e35c32012-11-01 20:07:40 +01002217 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002218 Py_DECREF(s_fast);
2219 return PyList_Append(rval, empty_array);
2220 }
2221
2222 if (s->markers != Py_None) {
2223 int has_key;
2224 ident = PyLong_FromVoidPtr(seq);
2225 if (ident == NULL)
2226 goto bail;
2227 has_key = PyDict_Contains(s->markers, ident);
2228 if (has_key) {
2229 if (has_key != -1)
2230 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2231 goto bail;
2232 }
2233 if (PyDict_SetItem(s->markers, ident, seq)) {
2234 goto bail;
2235 }
2236 }
2237
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002238 if (PyList_Append(rval, open_array))
2239 goto bail;
2240 if (s->indent != Py_None) {
2241 /* TODO: DOES NOT RUN */
2242 indent_level += 1;
2243 /*
2244 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2245 separator = _item_separator + newline_indent
2246 buf += newline_indent
2247 */
2248 }
Antoine Pitroue9e35c32012-11-01 20:07:40 +01002249 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
2250 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002251 if (i) {
2252 if (PyList_Append(rval, s->item_separator))
2253 goto bail;
2254 }
2255 if (encoder_listencode_obj(s, rval, obj, indent_level))
2256 goto bail;
2257 }
2258 if (ident != NULL) {
2259 if (PyDict_DelItem(s->markers, ident))
2260 goto bail;
2261 Py_CLEAR(ident);
2262 }
2263 if (s->indent != Py_None) {
2264 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002265 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002266 indent_level -= 1;
2267
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002268 yield '\n' + (' ' * (_indent * _current_indent_level))
2269 */
2270 }
2271 if (PyList_Append(rval, close_array))
2272 goto bail;
2273 Py_DECREF(s_fast);
2274 return 0;
2275
2276bail:
2277 Py_XDECREF(ident);
2278 Py_DECREF(s_fast);
2279 return -1;
2280}
2281
2282static void
2283encoder_dealloc(PyObject *self)
2284{
2285 /* Deallocate Encoder */
2286 encoder_clear(self);
2287 Py_TYPE(self)->tp_free(self);
2288}
2289
2290static int
2291encoder_traverse(PyObject *self, visitproc visit, void *arg)
2292{
2293 PyEncoderObject *s;
2294 assert(PyEncoder_Check(self));
2295 s = (PyEncoderObject *)self;
2296 Py_VISIT(s->markers);
2297 Py_VISIT(s->defaultfn);
2298 Py_VISIT(s->encoder);
2299 Py_VISIT(s->indent);
2300 Py_VISIT(s->key_separator);
2301 Py_VISIT(s->item_separator);
2302 Py_VISIT(s->sort_keys);
2303 Py_VISIT(s->skipkeys);
2304 return 0;
2305}
2306
2307static int
2308encoder_clear(PyObject *self)
2309{
2310 /* Deallocate Encoder */
2311 PyEncoderObject *s;
2312 assert(PyEncoder_Check(self));
2313 s = (PyEncoderObject *)self;
2314 Py_CLEAR(s->markers);
2315 Py_CLEAR(s->defaultfn);
2316 Py_CLEAR(s->encoder);
2317 Py_CLEAR(s->indent);
2318 Py_CLEAR(s->key_separator);
2319 Py_CLEAR(s->item_separator);
2320 Py_CLEAR(s->sort_keys);
2321 Py_CLEAR(s->skipkeys);
2322 return 0;
2323}
2324
2325PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2326
2327static
2328PyTypeObject PyEncoderType = {
2329 PyObject_HEAD_INIT(NULL)
2330 0, /* tp_internal */
2331 "_json.Encoder", /* tp_name */
2332 sizeof(PyEncoderObject), /* tp_basicsize */
2333 0, /* tp_itemsize */
2334 encoder_dealloc, /* tp_dealloc */
2335 0, /* tp_print */
2336 0, /* tp_getattr */
2337 0, /* tp_setattr */
2338 0, /* tp_compare */
2339 0, /* tp_repr */
2340 0, /* tp_as_number */
2341 0, /* tp_as_sequence */
2342 0, /* tp_as_mapping */
2343 0, /* tp_hash */
2344 encoder_call, /* tp_call */
2345 0, /* tp_str */
2346 0, /* tp_getattro */
2347 0, /* tp_setattro */
2348 0, /* tp_as_buffer */
2349 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2350 encoder_doc, /* tp_doc */
2351 encoder_traverse, /* tp_traverse */
2352 encoder_clear, /* tp_clear */
2353 0, /* tp_richcompare */
2354 0, /* tp_weaklistoffset */
2355 0, /* tp_iter */
2356 0, /* tp_iternext */
2357 0, /* tp_methods */
2358 encoder_members, /* tp_members */
2359 0, /* tp_getset */
2360 0, /* tp_base */
2361 0, /* tp_dict */
2362 0, /* tp_descr_get */
2363 0, /* tp_descr_set */
2364 0, /* tp_dictoffset */
2365 encoder_init, /* tp_init */
2366 0, /* tp_alloc */
2367 encoder_new, /* tp_new */
2368 0, /* tp_free */
2369};
2370
2371static PyMethodDef speedups_methods[] = {
2372 {"encode_basestring_ascii",
2373 (PyCFunction)py_encode_basestring_ascii,
2374 METH_O,
2375 pydoc_encode_basestring_ascii},
2376 {"scanstring",
2377 (PyCFunction)py_scanstring,
2378 METH_VARARGS,
2379 pydoc_scanstring},
Brett Cannon4b964f92008-05-05 20:21:38 +00002380 {NULL, NULL, 0, NULL}
2381};
2382
2383PyDoc_STRVAR(module_doc,
2384"json speedups\n");
2385
2386void
2387init_json(void)
2388{
2389 PyObject *m;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002390 PyScannerType.tp_new = PyType_GenericNew;
2391 if (PyType_Ready(&PyScannerType) < 0)
2392 return;
2393 PyEncoderType.tp_new = PyType_GenericNew;
2394 if (PyType_Ready(&PyEncoderType) < 0)
2395 return;
2396 m = Py_InitModule3("_json", speedups_methods, module_doc);
2397 Py_INCREF((PyObject*)&PyScannerType);
2398 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2399 Py_INCREF((PyObject*)&PyEncoderType);
2400 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
Brett Cannon4b964f92008-05-05 20:21:38 +00002401}