blob: 8d63edb2bf7a1997fc6128a8875a4dadf95330ba [file] [log] [blame]
Brett Cannon4b964f92008-05-05 20:21:38 +00001#include "Python.h"
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
16
17#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
Brett Cannon4b964f92008-05-05 20:21:38 +000022
23#define DEFAULT_ENCODING "utf-8"
Brett Cannon4b964f92008-05-05 20:21:38 +000024
Bob Ippolitod914e3f2009-03-17 23:19:00 +000025#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30static PyTypeObject PyScannerType;
31static PyTypeObject PyEncoderType;
32
33typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
Raymond Hettinger91852ca2009-03-19 19:19:03 +000038 PyObject *pairs_hook;
Bob Ippolitod914e3f2009-03-17 23:19:00 +000039 PyObject *parse_float;
40 PyObject *parse_int;
41 PyObject *parse_constant;
42} PyScannerObject;
43
44static PyMemberDef scanner_members[] = {
45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
Raymond Hettinger91852ca2009-03-19 19:19:03 +000048 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
Bob Ippolitod914e3f2009-03-17 23:19:00 +000049 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52 {NULL}
53};
54
55typedef struct _PyEncoderObject {
56 PyObject_HEAD
57 PyObject *markers;
58 PyObject *defaultfn;
59 PyObject *encoder;
60 PyObject *indent;
61 PyObject *key_separator;
62 PyObject *item_separator;
63 PyObject *sort_keys;
64 PyObject *skipkeys;
65 int fast_encode;
66 int allow_nan;
67} PyEncoderObject;
68
69static PyMemberDef encoder_members[] = {
70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78 {NULL}
79};
80
81static Py_ssize_t
82ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83static PyObject *
84ascii_escape_unicode(PyObject *pystr);
85static PyObject *
86ascii_escape_str(PyObject *pystr);
87static PyObject *
88py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89void init_json(void);
90static PyObject *
91scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92static PyObject *
93scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94static PyObject *
95_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96static PyObject *
97scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98static int
99scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100static void
101scanner_dealloc(PyObject *self);
102static int
103scanner_clear(PyObject *self);
104static PyObject *
105encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106static int
107encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108static void
109encoder_dealloc(PyObject *self);
110static int
111encoder_clear(PyObject *self);
112static int
113encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114static int
115encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116static int
117encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118static PyObject *
Raymond Hettingera0b8d9a2009-03-19 19:24:43 +0000119_encoded_const(PyObject *obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000120static void
121raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122static PyObject *
123encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124static int
125_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126static PyObject *
127_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128static PyObject *
129encoder_encode_float(PyEncoderObject *s, PyObject *obj);
130
131#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
133
134#define MIN_EXPANSION 6
Brett Cannon4b964f92008-05-05 20:21:38 +0000135#ifdef Py_UNICODE_WIDE
136#define MAX_EXPANSION (2 * MIN_EXPANSION)
137#else
138#define MAX_EXPANSION MIN_EXPANSION
139#endif
140
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000141static int
142_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
143{
144 /* PyObject to Py_ssize_t converter */
145 *size_ptr = PyInt_AsSsize_t(o);
Georg Brandlf71ba952009-05-05 07:48:12 +0000146 if (*size_ptr == -1 && PyErr_Occurred())
147 return 0;
148 return 1;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000149}
150
151static PyObject *
152_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
153{
154 /* Py_ssize_t to PyObject converter */
155 return PyInt_FromSsize_t(*size_ptr);
156}
157
Brett Cannon4b964f92008-05-05 20:21:38 +0000158static Py_ssize_t
159ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
160{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000161 /* Escape unicode code point c to ASCII escape sequences
162 in char *output. output must have at least 12 bytes unused to
163 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Brett Cannon4b964f92008-05-05 20:21:38 +0000164 output[chars++] = '\\';
165 switch (c) {
166 case '\\': output[chars++] = (char)c; break;
167 case '"': output[chars++] = (char)c; break;
168 case '\b': output[chars++] = 'b'; break;
169 case '\f': output[chars++] = 'f'; break;
170 case '\n': output[chars++] = 'n'; break;
171 case '\r': output[chars++] = 'r'; break;
172 case '\t': output[chars++] = 't'; break;
173 default:
174#ifdef Py_UNICODE_WIDE
175 if (c >= 0x10000) {
176 /* UTF-16 surrogate pair */
177 Py_UNICODE v = c - 0x10000;
178 c = 0xd800 | ((v >> 10) & 0x3ff);
179 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000180 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
182 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
183 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000184 c = 0xdc00 | (v & 0x3ff);
185 output[chars++] = '\\';
186 }
187#endif
188 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000189 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
191 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
192 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000193 }
194 return chars;
195}
196
197static PyObject *
198ascii_escape_unicode(PyObject *pystr)
199{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000200 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000201 Py_ssize_t i;
202 Py_ssize_t input_chars;
203 Py_ssize_t output_size;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000204 Py_ssize_t max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000205 Py_ssize_t chars;
206 PyObject *rval;
207 char *output;
208 Py_UNICODE *input_unicode;
209
210 input_chars = PyUnicode_GET_SIZE(pystr);
211 input_unicode = PyUnicode_AS_UNICODE(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000212
Brett Cannon4b964f92008-05-05 20:21:38 +0000213 /* One char input can be up to 6 chars output, estimate 4 of these */
214 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000215 max_output_size = 2 + (input_chars * MAX_EXPANSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000216 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000217 if (rval == NULL) {
218 return NULL;
219 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000220 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000221 chars = 0;
222 output[chars++] = '"';
223 for (i = 0; i < input_chars; i++) {
224 Py_UNICODE c = input_unicode[i];
225 if (S_CHAR(c)) {
226 output[chars++] = (char)c;
227 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000228 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000229 chars = ascii_escape_char(c, output, chars);
230 }
231 if (output_size - chars < (1 + MAX_EXPANSION)) {
232 /* There's more than four, so let's resize by a lot */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000233 Py_ssize_t new_output_size = output_size * 2;
Brett Cannon4b964f92008-05-05 20:21:38 +0000234 /* This is an upper bound */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000235 if (new_output_size > max_output_size) {
236 new_output_size = max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000237 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000238 /* Make sure that the output size changed before resizing */
239 if (new_output_size != output_size) {
240 output_size = new_output_size;
241 if (_PyString_Resize(&rval, output_size) == -1) {
242 return NULL;
243 }
244 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000245 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000246 }
247 }
248 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000249 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000250 return NULL;
251 }
252 return rval;
253}
254
255static PyObject *
256ascii_escape_str(PyObject *pystr)
257{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000258 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000259 Py_ssize_t i;
260 Py_ssize_t input_chars;
261 Py_ssize_t output_size;
262 Py_ssize_t chars;
263 PyObject *rval;
264 char *output;
265 char *input_str;
266
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000267 input_chars = PyString_GET_SIZE(pystr);
268 input_str = PyString_AS_STRING(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000269
270 /* Fast path for a string that's already ASCII */
271 for (i = 0; i < input_chars; i++) {
272 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
273 if (!S_CHAR(c)) {
274 /* If we have to escape something, scan the string for unicode */
275 Py_ssize_t j;
276 for (j = i; j < input_chars; j++) {
277 c = (Py_UNICODE)(unsigned char)input_str[j];
278 if (c > 0x7f) {
279 /* We hit a non-ASCII character, bail to unicode mode */
280 PyObject *uni;
281 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
282 if (uni == NULL) {
283 return NULL;
284 }
285 rval = ascii_escape_unicode(uni);
286 Py_DECREF(uni);
287 return rval;
288 }
289 }
290 break;
291 }
292 }
293
294 if (i == input_chars) {
295 /* Input is already ASCII */
296 output_size = 2 + input_chars;
297 }
298 else {
299 /* One char input can be up to 6 chars output, estimate 4 of these */
300 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
301 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000302 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000303 if (rval == NULL) {
304 return NULL;
305 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000306 output = PyString_AS_STRING(rval);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000307 output[0] = '"';
308
309 /* We know that everything up to i is ASCII already */
310 chars = i + 1;
311 memcpy(&output[1], input_str, i);
312
313 for (; i < input_chars; i++) {
314 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
Brett Cannon4b964f92008-05-05 20:21:38 +0000315 if (S_CHAR(c)) {
316 output[chars++] = (char)c;
317 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000318 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000319 chars = ascii_escape_char(c, output, chars);
320 }
321 /* An ASCII char can't possibly expand to a surrogate! */
322 if (output_size - chars < (1 + MIN_EXPANSION)) {
323 /* There's more than four, so let's resize by a lot */
324 output_size *= 2;
325 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
326 output_size = 2 + (input_chars * MIN_EXPANSION);
327 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000328 if (_PyString_Resize(&rval, output_size) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000329 return NULL;
330 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000331 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000332 }
333 }
334 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000335 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000336 return NULL;
337 }
338 return rval;
339}
340
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000341static void
Brett Cannon4b964f92008-05-05 20:21:38 +0000342raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
343{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000344 /* Use the Python function json.decoder.errmsg to raise a nice
345 looking ValueError exception */
Brett Cannon4b964f92008-05-05 20:21:38 +0000346 static PyObject *errmsg_fn = NULL;
347 PyObject *pymsg;
348 if (errmsg_fn == NULL) {
349 PyObject *decoder = PyImport_ImportModule("json.decoder");
350 if (decoder == NULL)
351 return;
352 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000353 Py_DECREF(decoder);
Brett Cannon4b964f92008-05-05 20:21:38 +0000354 if (errmsg_fn == NULL)
355 return;
Brett Cannon4b964f92008-05-05 20:21:38 +0000356 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000357 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
Benjamin Peterson595e3cb2008-10-16 21:09:28 +0000358 if (pymsg) {
359 PyErr_SetObject(PyExc_ValueError, pymsg);
360 Py_DECREF(pymsg);
361 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000362}
363
364static PyObject *
365join_list_unicode(PyObject *lst)
366{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000367 /* return u''.join(lst) */
368 static PyObject *joinfn = NULL;
369 if (joinfn == NULL) {
370 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
371 if (ustr == NULL)
372 return NULL;
373
374 joinfn = PyObject_GetAttrString(ustr, "join");
375 Py_DECREF(ustr);
376 if (joinfn == NULL)
377 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000378 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000379 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
Brett Cannon4b964f92008-05-05 20:21:38 +0000380}
381
382static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000383join_list_string(PyObject *lst)
Brett Cannon4b964f92008-05-05 20:21:38 +0000384{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000385 /* return ''.join(lst) */
386 static PyObject *joinfn = NULL;
387 if (joinfn == NULL) {
388 PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
389 if (ustr == NULL)
390 return NULL;
391
392 joinfn = PyObject_GetAttrString(ustr, "join");
393 Py_DECREF(ustr);
394 if (joinfn == NULL)
395 return NULL;
396 }
397 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
398}
399
400static PyObject *
401_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
402 /* return (rval, idx) tuple, stealing reference to rval */
403 PyObject *tpl;
404 PyObject *pyidx;
405 /*
406 steal a reference to rval, returns (rval, idx)
407 */
408 if (rval == NULL) {
409 return NULL;
410 }
411 pyidx = PyInt_FromSsize_t(idx);
412 if (pyidx == NULL) {
413 Py_DECREF(rval);
414 return NULL;
415 }
416 tpl = PyTuple_New(2);
417 if (tpl == NULL) {
418 Py_DECREF(pyidx);
419 Py_DECREF(rval);
420 return NULL;
421 }
422 PyTuple_SET_ITEM(tpl, 0, rval);
423 PyTuple_SET_ITEM(tpl, 1, pyidx);
424 return tpl;
425}
426
427static PyObject *
428scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
429{
430 /* Read the JSON string from PyString pystr.
431 end is the index of the first character after the quote.
432 encoding is the encoding of pystr (must be an ASCII superset)
433 if strict is zero then literal control characters are allowed
434 *next_end_ptr is a return-by-reference index of the character
435 after the end quote
436
437 Return value is a new PyString (if ASCII-only) or PyUnicode
438 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000439 PyObject *rval;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000440 Py_ssize_t len = PyString_GET_SIZE(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000441 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000442 Py_ssize_t next;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000443 char *buf = PyString_AS_STRING(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000444 PyObject *chunks = PyList_New(0);
445 if (chunks == NULL) {
446 goto bail;
447 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000448 if (end < 0 || len <= end) {
449 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
450 goto bail;
451 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000452 while (1) {
453 /* Find the end of the string or the next escape */
454 Py_UNICODE c = 0;
455 PyObject *chunk = NULL;
456 for (next = end; next < len; next++) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000457 c = (unsigned char)buf[next];
Brett Cannon4b964f92008-05-05 20:21:38 +0000458 if (c == '"' || c == '\\') {
459 break;
460 }
461 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000462 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000463 goto bail;
464 }
465 }
466 if (!(c == '"' || c == '\\')) {
467 raise_errmsg("Unterminated string starting at", pystr, begin);
468 goto bail;
469 }
470 /* Pick up this chunk if it's not zero length */
471 if (next != end) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000472 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000473 if (strchunk == NULL) {
474 goto bail;
475 }
Barry Warsawfa658272010-11-02 21:03:09 +0000476 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
477 Py_DECREF(strchunk);
478 if (chunk == NULL) {
479 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000480 }
481 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000482 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000483 goto bail;
484 }
485 Py_DECREF(chunk);
486 }
487 next++;
488 if (c == '"') {
489 end = next;
490 break;
491 }
492 if (next == len) {
493 raise_errmsg("Unterminated string starting at", pystr, begin);
494 goto bail;
495 }
496 c = buf[next];
497 if (c != 'u') {
498 /* Non-unicode backslash escapes */
499 end = next + 1;
500 switch (c) {
501 case '"': break;
502 case '\\': break;
503 case '/': break;
504 case 'b': c = '\b'; break;
505 case 'f': c = '\f'; break;
506 case 'n': c = '\n'; break;
507 case 'r': c = '\r'; break;
508 case 't': c = '\t'; break;
509 default: c = 0;
510 }
511 if (c == 0) {
512 raise_errmsg("Invalid \\escape", pystr, end - 2);
513 goto bail;
514 }
515 }
516 else {
517 c = 0;
518 next++;
519 end = next + 4;
520 if (end >= len) {
521 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
522 goto bail;
523 }
524 /* Decode 4 hex digits */
525 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000526 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000527 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000528 switch (digit) {
529 case '0': case '1': case '2': case '3': case '4':
530 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000531 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000532 case 'a': case 'b': case 'c': case 'd': case 'e':
533 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000534 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000535 case 'A': case 'B': case 'C': case 'D': case 'E':
536 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000537 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000538 default:
539 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
540 goto bail;
541 }
542 }
543#ifdef Py_UNICODE_WIDE
544 /* Surrogate pair */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000545 if ((c & 0xfc00) == 0xd800) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000546 Py_UNICODE c2 = 0;
547 if (end + 6 >= len) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000548 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
549 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000550 }
551 if (buf[next++] != '\\' || buf[next++] != 'u') {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000552 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
553 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000554 }
555 end += 6;
556 /* Decode 4 hex digits */
557 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000558 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000559 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000560 switch (digit) {
561 case '0': case '1': case '2': case '3': case '4':
562 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000563 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000564 case 'a': case 'b': case 'c': case 'd': case 'e':
565 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000566 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000567 case 'A': case 'B': case 'C': case 'D': case 'E':
568 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000569 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000570 default:
571 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
572 goto bail;
573 }
574 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000575 if ((c2 & 0xfc00) != 0xdc00) {
576 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
577 goto bail;
578 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000579 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
580 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000581 else if ((c & 0xfc00) == 0xdc00) {
582 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
583 goto bail;
584 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000585#endif
586 }
Barry Warsawfa658272010-11-02 21:03:09 +0000587 chunk = PyUnicode_FromUnicode(&c, 1);
588 if (chunk == NULL) {
589 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000590 }
591 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000592 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000593 goto bail;
594 }
595 Py_DECREF(chunk);
596 }
597
Ezio Melottidf8a8f72011-05-04 14:40:53 +0300598 rval = join_list_unicode(chunks);
Brett Cannon4b964f92008-05-05 20:21:38 +0000599 if (rval == NULL) {
600 goto bail;
601 }
Benjamin Peterson336680e2008-10-16 21:48:06 +0000602 Py_CLEAR(chunks);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000603 *next_end_ptr = end;
604 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000605bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000606 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000607 Py_XDECREF(chunks);
608 return NULL;
609}
610
611
612static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000613scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000614{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000615 /* Read the JSON string from PyUnicode pystr.
616 end is the index of the first character after the quote.
617 if strict is zero then literal control characters are allowed
618 *next_end_ptr is a return-by-reference index of the character
619 after the end quote
620
621 Return value is a new PyUnicode
622 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000623 PyObject *rval;
624 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
625 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000626 Py_ssize_t next;
Brett Cannon4b964f92008-05-05 20:21:38 +0000627 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
628 PyObject *chunks = PyList_New(0);
629 if (chunks == NULL) {
630 goto bail;
631 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000632 if (end < 0 || len <= end) {
633 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
634 goto bail;
635 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000636 while (1) {
637 /* Find the end of the string or the next escape */
638 Py_UNICODE c = 0;
639 PyObject *chunk = NULL;
640 for (next = end; next < len; next++) {
641 c = buf[next];
642 if (c == '"' || c == '\\') {
643 break;
644 }
645 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000646 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000647 goto bail;
648 }
649 }
650 if (!(c == '"' || c == '\\')) {
651 raise_errmsg("Unterminated string starting at", pystr, begin);
652 goto bail;
653 }
654 /* Pick up this chunk if it's not zero length */
655 if (next != end) {
656 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
657 if (chunk == NULL) {
658 goto bail;
659 }
660 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson87e6ad22008-10-16 21:27:54 +0000661 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000662 goto bail;
663 }
664 Py_DECREF(chunk);
665 }
666 next++;
667 if (c == '"') {
668 end = next;
669 break;
670 }
671 if (next == len) {
672 raise_errmsg("Unterminated string starting at", pystr, begin);
673 goto bail;
674 }
675 c = buf[next];
676 if (c != 'u') {
677 /* Non-unicode backslash escapes */
678 end = next + 1;
679 switch (c) {
680 case '"': break;
681 case '\\': break;
682 case '/': break;
683 case 'b': c = '\b'; break;
684 case 'f': c = '\f'; break;
685 case 'n': c = '\n'; break;
686 case 'r': c = '\r'; break;
687 case 't': c = '\t'; break;
688 default: c = 0;
689 }
690 if (c == 0) {
691 raise_errmsg("Invalid \\escape", pystr, end - 2);
692 goto bail;
693 }
694 }
695 else {
696 c = 0;
697 next++;
698 end = next + 4;
699 if (end >= len) {
700 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
701 goto bail;
702 }
703 /* Decode 4 hex digits */
704 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000705 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000706 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000707 switch (digit) {
708 case '0': case '1': case '2': case '3': case '4':
709 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000710 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000711 case 'a': case 'b': case 'c': case 'd': case 'e':
712 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000713 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000714 case 'A': case 'B': case 'C': case 'D': case 'E':
715 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000716 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000717 default:
718 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
719 goto bail;
720 }
721 }
722#ifdef Py_UNICODE_WIDE
723 /* Surrogate pair */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000724 if ((c & 0xfc00) == 0xd800) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000725 Py_UNICODE c2 = 0;
726 if (end + 6 >= len) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000727 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
728 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000729 }
730 if (buf[next++] != '\\' || buf[next++] != 'u') {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000731 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
732 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000733 }
734 end += 6;
735 /* Decode 4 hex digits */
736 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000737 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000738 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000739 switch (digit) {
740 case '0': case '1': case '2': case '3': case '4':
741 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000742 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000743 case 'a': case 'b': case 'c': case 'd': case 'e':
744 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000745 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000746 case 'A': case 'B': case 'C': case 'D': case 'E':
747 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000748 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000749 default:
750 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
751 goto bail;
752 }
753 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000754 if ((c2 & 0xfc00) != 0xdc00) {
755 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
756 goto bail;
757 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000758 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
759 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000760 else if ((c & 0xfc00) == 0xdc00) {
761 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
762 goto bail;
763 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000764#endif
765 }
766 chunk = PyUnicode_FromUnicode(&c, 1);
767 if (chunk == NULL) {
768 goto bail;
769 }
770 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000771 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000772 goto bail;
773 }
774 Py_DECREF(chunk);
775 }
776
777 rval = join_list_unicode(chunks);
778 if (rval == NULL) {
779 goto bail;
780 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000781 Py_DECREF(chunks);
782 *next_end_ptr = end;
783 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000784bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000785 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000786 Py_XDECREF(chunks);
787 return NULL;
788}
789
790PyDoc_STRVAR(pydoc_scanstring,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000791 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
792 "\n"
793 "Scan the string s for a JSON string. End is the index of the\n"
794 "character in s after the quote that started the JSON string.\n"
795 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
796 "on attempt to decode an invalid string. If strict is False then literal\n"
797 "control characters are allowed in the string.\n"
798 "\n"
799 "Returns a tuple of the decoded string and the index of the character in s\n"
800 "after the end quote."
801);
Brett Cannon4b964f92008-05-05 20:21:38 +0000802
803static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000804py_scanstring(PyObject* self UNUSED, PyObject *args)
Brett Cannon4b964f92008-05-05 20:21:38 +0000805{
806 PyObject *pystr;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000807 PyObject *rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000808 Py_ssize_t end;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000809 Py_ssize_t next_end = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000810 char *encoding = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000811 int strict = 1;
812 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000813 return NULL;
814 }
815 if (encoding == NULL) {
816 encoding = DEFAULT_ENCODING;
817 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000818 if (PyString_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000819 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000820 }
821 else if (PyUnicode_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000822 rval = scanstring_unicode(pystr, end, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000823 }
824 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000825 PyErr_Format(PyExc_TypeError,
826 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000827 Py_TYPE(pystr)->tp_name);
828 return NULL;
829 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000830 return _build_rval_index_tuple(rval, next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000831}
832
833PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000834 "encode_basestring_ascii(basestring) -> str\n"
835 "\n"
836 "Return an ASCII-only JSON representation of a Python string"
837);
Brett Cannon4b964f92008-05-05 20:21:38 +0000838
839static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000840py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000841{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000842 /* Return an ASCII-only JSON representation of a Python string */
Brett Cannon4b964f92008-05-05 20:21:38 +0000843 /* METH_O */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000844 if (PyString_Check(pystr)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000845 return ascii_escape_str(pystr);
846 }
847 else if (PyUnicode_Check(pystr)) {
848 return ascii_escape_unicode(pystr);
849 }
850 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000851 PyErr_Format(PyExc_TypeError,
852 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000853 Py_TYPE(pystr)->tp_name);
854 return NULL;
855 }
856}
857
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000858static void
859scanner_dealloc(PyObject *self)
860{
861 /* Deallocate scanner object */
862 scanner_clear(self);
863 Py_TYPE(self)->tp_free(self);
864}
865
866static int
867scanner_traverse(PyObject *self, visitproc visit, void *arg)
868{
869 PyScannerObject *s;
870 assert(PyScanner_Check(self));
871 s = (PyScannerObject *)self;
872 Py_VISIT(s->encoding);
873 Py_VISIT(s->strict);
874 Py_VISIT(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000875 Py_VISIT(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000876 Py_VISIT(s->parse_float);
877 Py_VISIT(s->parse_int);
878 Py_VISIT(s->parse_constant);
879 return 0;
880}
881
882static int
883scanner_clear(PyObject *self)
884{
885 PyScannerObject *s;
886 assert(PyScanner_Check(self));
887 s = (PyScannerObject *)self;
888 Py_CLEAR(s->encoding);
889 Py_CLEAR(s->strict);
890 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000891 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000892 Py_CLEAR(s->parse_float);
893 Py_CLEAR(s->parse_int);
894 Py_CLEAR(s->parse_constant);
895 return 0;
896}
897
898static PyObject *
899_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
900 /* Read a JSON object from PyString pystr.
901 idx is the index of the first character after the opening curly brace.
902 *next_idx_ptr is a return-by-reference index to the first character after
903 the closing curly brace.
904
905 Returns a new PyObject (usually a dict, but object_hook can change that)
906 */
907 char *str = PyString_AS_STRING(pystr);
908 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000909 PyObject *rval;
910 PyObject *pairs;
911 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000912 PyObject *key = NULL;
913 PyObject *val = NULL;
914 char *encoding = PyString_AS_STRING(s->encoding);
915 int strict = PyObject_IsTrue(s->strict);
916 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000917
918 pairs = PyList_New(0);
919 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000920 return NULL;
921
922 /* skip whitespace after { */
923 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
924
925 /* only loop if the object is non-empty */
926 if (idx <= end_idx && str[idx] != '}') {
927 while (idx <= end_idx) {
928 /* read key */
929 if (str[idx] != '"') {
930 raise_errmsg("Expecting property name", pystr, idx);
931 goto bail;
932 }
933 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
934 if (key == NULL)
935 goto bail;
936 idx = next_idx;
937
938 /* skip whitespace between key and : delimiter, read :, skip whitespace */
939 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
940 if (idx > end_idx || str[idx] != ':') {
941 raise_errmsg("Expecting : delimiter", pystr, idx);
942 goto bail;
943 }
944 idx++;
945 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
946
947 /* read any JSON data type */
948 val = scan_once_str(s, pystr, idx, &next_idx);
949 if (val == NULL)
950 goto bail;
951
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000952 item = PyTuple_Pack(2, key, val);
953 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000954 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000955 Py_CLEAR(key);
956 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000957 if (PyList_Append(pairs, item) == -1) {
958 Py_DECREF(item);
959 goto bail;
960 }
961 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000962 idx = next_idx;
963
964 /* skip whitespace before } or , */
965 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
966
967 /* bail if the object is closed or we didn't get the , delimiter */
968 if (idx > end_idx) break;
969 if (str[idx] == '}') {
970 break;
971 }
972 else if (str[idx] != ',') {
973 raise_errmsg("Expecting , delimiter", pystr, idx);
974 goto bail;
975 }
976 idx++;
977
978 /* skip whitespace after , delimiter */
979 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
980 }
981 }
982 /* verify that idx < end_idx, str[idx] should be '}' */
983 if (idx > end_idx || str[idx] != '}') {
984 raise_errmsg("Expecting object", pystr, end_idx);
985 goto bail;
986 }
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000987
988 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
989 if (s->pairs_hook != Py_None) {
990 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
991 if (val == NULL)
992 goto bail;
993 Py_DECREF(pairs);
994 *next_idx_ptr = idx + 1;
995 return val;
996 }
997
998 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
999 pairs, NULL);
1000 if (rval == NULL)
1001 goto bail;
1002 Py_CLEAR(pairs);
1003
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001004 /* if object_hook is not None: rval = object_hook(rval) */
1005 if (s->object_hook != Py_None) {
1006 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1007 if (val == NULL)
1008 goto bail;
1009 Py_DECREF(rval);
1010 rval = val;
1011 val = NULL;
1012 }
1013 *next_idx_ptr = idx + 1;
1014 return rval;
1015bail:
1016 Py_XDECREF(key);
1017 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001018 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001019 return NULL;
1020}
1021
1022static PyObject *
1023_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1024 /* Read a JSON object from PyUnicode pystr.
1025 idx is the index of the first character after the opening curly brace.
1026 *next_idx_ptr is a return-by-reference index to the first character after
1027 the closing curly brace.
1028
1029 Returns a new PyObject (usually a dict, but object_hook can change that)
1030 */
1031 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1032 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001033 PyObject *rval;
1034 PyObject *pairs;
1035 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001036 PyObject *key = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001037 PyObject *val = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001038 int strict = PyObject_IsTrue(s->strict);
1039 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001040
1041 pairs = PyList_New(0);
1042 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001043 return NULL;
1044
1045 /* skip whitespace after { */
1046 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1047
1048 /* only loop if the object is non-empty */
1049 if (idx <= end_idx && str[idx] != '}') {
1050 while (idx <= end_idx) {
1051 /* read key */
1052 if (str[idx] != '"') {
1053 raise_errmsg("Expecting property name", pystr, idx);
1054 goto bail;
1055 }
1056 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1057 if (key == NULL)
1058 goto bail;
1059 idx = next_idx;
1060
1061 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1062 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1063 if (idx > end_idx || str[idx] != ':') {
1064 raise_errmsg("Expecting : delimiter", pystr, idx);
1065 goto bail;
1066 }
1067 idx++;
1068 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1069
1070 /* read any JSON term */
1071 val = scan_once_unicode(s, pystr, idx, &next_idx);
1072 if (val == NULL)
1073 goto bail;
1074
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001075 item = PyTuple_Pack(2, key, val);
1076 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001077 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001078 Py_CLEAR(key);
1079 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001080 if (PyList_Append(pairs, item) == -1) {
1081 Py_DECREF(item);
1082 goto bail;
1083 }
1084 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001085 idx = next_idx;
1086
1087 /* skip whitespace before } or , */
1088 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1089
1090 /* bail if the object is closed or we didn't get the , delimiter */
1091 if (idx > end_idx) break;
1092 if (str[idx] == '}') {
1093 break;
1094 }
1095 else if (str[idx] != ',') {
1096 raise_errmsg("Expecting , delimiter", pystr, idx);
1097 goto bail;
1098 }
1099 idx++;
1100
1101 /* skip whitespace after , delimiter */
1102 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1103 }
1104 }
1105
1106 /* verify that idx < end_idx, str[idx] should be '}' */
1107 if (idx > end_idx || str[idx] != '}') {
1108 raise_errmsg("Expecting object", pystr, end_idx);
1109 goto bail;
1110 }
1111
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001112 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1113 if (s->pairs_hook != Py_None) {
1114 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1115 if (val == NULL)
1116 goto bail;
1117 Py_DECREF(pairs);
1118 *next_idx_ptr = idx + 1;
1119 return val;
1120 }
1121
1122 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1123 pairs, NULL);
1124 if (rval == NULL)
1125 goto bail;
1126 Py_CLEAR(pairs);
1127
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001128 /* if object_hook is not None: rval = object_hook(rval) */
1129 if (s->object_hook != Py_None) {
1130 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1131 if (val == NULL)
1132 goto bail;
1133 Py_DECREF(rval);
1134 rval = val;
1135 val = NULL;
1136 }
1137 *next_idx_ptr = idx + 1;
1138 return rval;
1139bail:
1140 Py_XDECREF(key);
1141 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001142 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001143 return NULL;
1144}
1145
1146static PyObject *
1147_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1148 /* Read a JSON array from PyString pystr.
1149 idx is the index of the first character after the opening brace.
1150 *next_idx_ptr is a return-by-reference index to the first character after
1151 the closing brace.
1152
1153 Returns a new PyList
1154 */
1155 char *str = PyString_AS_STRING(pystr);
1156 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1157 PyObject *val = NULL;
1158 PyObject *rval = PyList_New(0);
1159 Py_ssize_t next_idx;
1160 if (rval == NULL)
1161 return NULL;
1162
1163 /* skip whitespace after [ */
1164 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1165
1166 /* only loop if the array is non-empty */
1167 if (idx <= end_idx && str[idx] != ']') {
1168 while (idx <= end_idx) {
1169
1170 /* read any JSON term and de-tuplefy the (rval, idx) */
1171 val = scan_once_str(s, pystr, idx, &next_idx);
1172 if (val == NULL)
1173 goto bail;
1174
1175 if (PyList_Append(rval, val) == -1)
1176 goto bail;
1177
1178 Py_CLEAR(val);
1179 idx = next_idx;
1180
1181 /* skip whitespace between term and , */
1182 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1183
1184 /* bail if the array is closed or we didn't get the , delimiter */
1185 if (idx > end_idx) break;
1186 if (str[idx] == ']') {
1187 break;
1188 }
1189 else if (str[idx] != ',') {
1190 raise_errmsg("Expecting , delimiter", pystr, idx);
1191 goto bail;
1192 }
1193 idx++;
1194
1195 /* skip whitespace after , */
1196 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1197 }
1198 }
1199
1200 /* verify that idx < end_idx, str[idx] should be ']' */
1201 if (idx > end_idx || str[idx] != ']') {
1202 raise_errmsg("Expecting object", pystr, end_idx);
1203 goto bail;
1204 }
1205 *next_idx_ptr = idx + 1;
1206 return rval;
1207bail:
1208 Py_XDECREF(val);
1209 Py_DECREF(rval);
1210 return NULL;
1211}
1212
1213static PyObject *
1214_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1215 /* Read a JSON array from PyString pystr.
1216 idx is the index of the first character after the opening brace.
1217 *next_idx_ptr is a return-by-reference index to the first character after
1218 the closing brace.
1219
1220 Returns a new PyList
1221 */
1222 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1223 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1224 PyObject *val = NULL;
1225 PyObject *rval = PyList_New(0);
1226 Py_ssize_t next_idx;
1227 if (rval == NULL)
1228 return NULL;
1229
1230 /* skip whitespace after [ */
1231 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1232
1233 /* only loop if the array is non-empty */
1234 if (idx <= end_idx && str[idx] != ']') {
1235 while (idx <= end_idx) {
1236
1237 /* read any JSON term */
1238 val = scan_once_unicode(s, pystr, idx, &next_idx);
1239 if (val == NULL)
1240 goto bail;
1241
1242 if (PyList_Append(rval, val) == -1)
1243 goto bail;
1244
1245 Py_CLEAR(val);
1246 idx = next_idx;
1247
1248 /* skip whitespace between term and , */
1249 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1250
1251 /* bail if the array is closed or we didn't get the , delimiter */
1252 if (idx > end_idx) break;
1253 if (str[idx] == ']') {
1254 break;
1255 }
1256 else if (str[idx] != ',') {
1257 raise_errmsg("Expecting , delimiter", pystr, idx);
1258 goto bail;
1259 }
1260 idx++;
1261
1262 /* skip whitespace after , */
1263 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1264 }
1265 }
1266
1267 /* verify that idx < end_idx, str[idx] should be ']' */
1268 if (idx > end_idx || str[idx] != ']') {
1269 raise_errmsg("Expecting object", pystr, end_idx);
1270 goto bail;
1271 }
1272 *next_idx_ptr = idx + 1;
1273 return rval;
1274bail:
1275 Py_XDECREF(val);
1276 Py_DECREF(rval);
1277 return NULL;
1278}
1279
1280static PyObject *
1281_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1282 /* Read a JSON constant from PyString pystr.
1283 constant is the constant string that was found
1284 ("NaN", "Infinity", "-Infinity").
1285 idx is the index of the first character of the constant
1286 *next_idx_ptr is a return-by-reference index to the first character after
1287 the constant.
1288
1289 Returns the result of parse_constant
1290 */
1291 PyObject *cstr;
1292 PyObject *rval;
1293 /* constant is "NaN", "Infinity", or "-Infinity" */
1294 cstr = PyString_InternFromString(constant);
1295 if (cstr == NULL)
1296 return NULL;
1297
1298 /* rval = parse_constant(constant) */
1299 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1300 idx += PyString_GET_SIZE(cstr);
1301 Py_DECREF(cstr);
1302 *next_idx_ptr = idx;
1303 return rval;
1304}
1305
1306static PyObject *
1307_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1308 /* Read a JSON number from PyString pystr.
1309 idx is the index of the first character of the number
1310 *next_idx_ptr is a return-by-reference index to the first character after
1311 the number.
1312
1313 Returns a new PyObject representation of that number:
1314 PyInt, PyLong, or PyFloat.
1315 May return other types if parse_int or parse_float are set
1316 */
1317 char *str = PyString_AS_STRING(pystr);
1318 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1319 Py_ssize_t idx = start;
1320 int is_float = 0;
1321 PyObject *rval;
1322 PyObject *numstr;
1323
1324 /* read a sign if it's there, make sure it's not the end of the string */
1325 if (str[idx] == '-') {
1326 idx++;
1327 if (idx > end_idx) {
1328 PyErr_SetNone(PyExc_StopIteration);
1329 return NULL;
1330 }
1331 }
1332
1333 /* read as many integer digits as we find as long as it doesn't start with 0 */
1334 if (str[idx] >= '1' && str[idx] <= '9') {
1335 idx++;
1336 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1337 }
1338 /* if it starts with 0 we only expect one integer digit */
1339 else if (str[idx] == '0') {
1340 idx++;
1341 }
1342 /* no integer digits, error */
1343 else {
1344 PyErr_SetNone(PyExc_StopIteration);
1345 return NULL;
1346 }
1347
1348 /* if the next char is '.' followed by a digit then read all float digits */
1349 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1350 is_float = 1;
1351 idx += 2;
1352 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1353 }
1354
1355 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1356 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1357
1358 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1359 Py_ssize_t e_start = idx;
1360 idx++;
1361
1362 /* read an exponent sign if present */
1363 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1364
1365 /* read all digits */
1366 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1367
1368 /* if we got a digit, then parse as float. if not, backtrack */
1369 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1370 is_float = 1;
1371 }
1372 else {
1373 idx = e_start;
1374 }
1375 }
1376
1377 /* copy the section we determined to be a number */
1378 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1379 if (numstr == NULL)
1380 return NULL;
1381 if (is_float) {
1382 /* parse as a float using a fast path if available, otherwise call user defined method */
1383 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1384 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1385 }
1386 else {
Eric Smith129c97d2009-10-28 08:44:37 +00001387 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1388 NULL, NULL);
1389 if (d == -1.0 && PyErr_Occurred())
1390 return NULL;
1391 rval = PyFloat_FromDouble(d);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001392 }
1393 }
1394 else {
1395 /* parse as an int using a fast path if available, otherwise call user defined method */
1396 if (s->parse_int != (PyObject *)&PyInt_Type) {
1397 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1398 }
1399 else {
1400 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1401 }
1402 }
1403 Py_DECREF(numstr);
1404 *next_idx_ptr = idx;
1405 return rval;
1406}
1407
1408static PyObject *
1409_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1410 /* Read a JSON number from PyUnicode pystr.
1411 idx is the index of the first character of the number
1412 *next_idx_ptr is a return-by-reference index to the first character after
1413 the number.
1414
1415 Returns a new PyObject representation of that number:
1416 PyInt, PyLong, or PyFloat.
1417 May return other types if parse_int or parse_float are set
1418 */
1419 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1420 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1421 Py_ssize_t idx = start;
1422 int is_float = 0;
1423 PyObject *rval;
1424 PyObject *numstr;
1425
1426 /* read a sign if it's there, make sure it's not the end of the string */
1427 if (str[idx] == '-') {
1428 idx++;
1429 if (idx > end_idx) {
1430 PyErr_SetNone(PyExc_StopIteration);
1431 return NULL;
1432 }
1433 }
1434
1435 /* read as many integer digits as we find as long as it doesn't start with 0 */
1436 if (str[idx] >= '1' && str[idx] <= '9') {
1437 idx++;
1438 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1439 }
1440 /* if it starts with 0 we only expect one integer digit */
1441 else if (str[idx] == '0') {
1442 idx++;
1443 }
1444 /* no integer digits, error */
1445 else {
1446 PyErr_SetNone(PyExc_StopIteration);
1447 return NULL;
1448 }
1449
1450 /* if the next char is '.' followed by a digit then read all float digits */
1451 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1452 is_float = 1;
1453 idx += 2;
Bob Ippolito76a982a2009-03-29 22:33:58 +00001454 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001455 }
1456
1457 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1458 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1459 Py_ssize_t e_start = idx;
1460 idx++;
1461
1462 /* read an exponent sign if present */
1463 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1464
1465 /* read all digits */
1466 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1467
1468 /* if we got a digit, then parse as float. if not, backtrack */
1469 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1470 is_float = 1;
1471 }
1472 else {
1473 idx = e_start;
1474 }
1475 }
1476
1477 /* copy the section we determined to be a number */
1478 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1479 if (numstr == NULL)
1480 return NULL;
1481 if (is_float) {
1482 /* parse as a float using a fast path if available, otherwise call user defined method */
1483 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1484 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1485 }
1486 else {
1487 rval = PyFloat_FromString(numstr, NULL);
1488 }
1489 }
1490 else {
1491 /* no fast path for unicode -> int, just call */
1492 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1493 }
1494 Py_DECREF(numstr);
1495 *next_idx_ptr = idx;
1496 return rval;
1497}
1498
1499static PyObject *
1500scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1501{
1502 /* Read one JSON term (of any kind) from PyString pystr.
1503 idx is the index of the first character of the term
1504 *next_idx_ptr is a return-by-reference index to the first character after
1505 the number.
1506
1507 Returns a new PyObject representation of the term.
1508 */
1509 char *str = PyString_AS_STRING(pystr);
1510 Py_ssize_t length = PyString_GET_SIZE(pystr);
1511 if (idx >= length) {
1512 PyErr_SetNone(PyExc_StopIteration);
1513 return NULL;
1514 }
1515 switch (str[idx]) {
1516 case '"':
1517 /* string */
1518 return scanstring_str(pystr, idx + 1,
1519 PyString_AS_STRING(s->encoding),
1520 PyObject_IsTrue(s->strict),
1521 next_idx_ptr);
1522 case '{':
1523 /* object */
1524 return _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1525 case '[':
1526 /* array */
1527 return _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1528 case 'n':
1529 /* null */
1530 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1531 Py_INCREF(Py_None);
1532 *next_idx_ptr = idx + 4;
1533 return Py_None;
1534 }
1535 break;
1536 case 't':
1537 /* true */
1538 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1539 Py_INCREF(Py_True);
1540 *next_idx_ptr = idx + 4;
1541 return Py_True;
1542 }
1543 break;
1544 case 'f':
1545 /* false */
1546 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1547 Py_INCREF(Py_False);
1548 *next_idx_ptr = idx + 5;
1549 return Py_False;
1550 }
1551 break;
1552 case 'N':
1553 /* NaN */
1554 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1555 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1556 }
1557 break;
1558 case 'I':
1559 /* Infinity */
1560 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1561 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1562 }
1563 break;
1564 case '-':
1565 /* -Infinity */
1566 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1567 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1568 }
1569 break;
1570 }
1571 /* Didn't find a string, object, array, or named constant. Look for a number. */
1572 return _match_number_str(s, pystr, idx, next_idx_ptr);
1573}
1574
1575static PyObject *
1576scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1577{
1578 /* Read one JSON term (of any kind) from PyUnicode pystr.
1579 idx is the index of the first character of the term
1580 *next_idx_ptr is a return-by-reference index to the first character after
1581 the number.
1582
1583 Returns a new PyObject representation of the term.
1584 */
1585 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1586 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1587 if (idx >= length) {
1588 PyErr_SetNone(PyExc_StopIteration);
1589 return NULL;
1590 }
1591 switch (str[idx]) {
1592 case '"':
1593 /* string */
1594 return scanstring_unicode(pystr, idx + 1,
1595 PyObject_IsTrue(s->strict),
1596 next_idx_ptr);
1597 case '{':
1598 /* object */
1599 return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1600 case '[':
1601 /* array */
1602 return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1603 case 'n':
1604 /* null */
1605 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1606 Py_INCREF(Py_None);
1607 *next_idx_ptr = idx + 4;
1608 return Py_None;
1609 }
1610 break;
1611 case 't':
1612 /* true */
1613 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1614 Py_INCREF(Py_True);
1615 *next_idx_ptr = idx + 4;
1616 return Py_True;
1617 }
1618 break;
1619 case 'f':
1620 /* false */
1621 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1622 Py_INCREF(Py_False);
1623 *next_idx_ptr = idx + 5;
1624 return Py_False;
1625 }
1626 break;
1627 case 'N':
1628 /* NaN */
1629 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1630 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1631 }
1632 break;
1633 case 'I':
1634 /* Infinity */
1635 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1636 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1637 }
1638 break;
1639 case '-':
1640 /* -Infinity */
1641 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1642 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1643 }
1644 break;
1645 }
1646 /* Didn't find a string, object, array, or named constant. Look for a number. */
1647 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1648}
1649
1650static PyObject *
1651scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1652{
1653 /* Python callable interface to scan_once_{str,unicode} */
1654 PyObject *pystr;
1655 PyObject *rval;
1656 Py_ssize_t idx;
1657 Py_ssize_t next_idx = -1;
1658 static char *kwlist[] = {"string", "idx", NULL};
1659 PyScannerObject *s;
1660 assert(PyScanner_Check(self));
1661 s = (PyScannerObject *)self;
1662 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1663 return NULL;
1664
1665 if (PyString_Check(pystr)) {
1666 rval = scan_once_str(s, pystr, idx, &next_idx);
1667 }
1668 else if (PyUnicode_Check(pystr)) {
1669 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1670 }
1671 else {
1672 PyErr_Format(PyExc_TypeError,
1673 "first argument must be a string, not %.80s",
1674 Py_TYPE(pystr)->tp_name);
1675 return NULL;
1676 }
1677 return _build_rval_index_tuple(rval, next_idx);
1678}
1679
1680static PyObject *
1681scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1682{
1683 PyScannerObject *s;
1684 s = (PyScannerObject *)type->tp_alloc(type, 0);
1685 if (s != NULL) {
1686 s->encoding = NULL;
1687 s->strict = NULL;
1688 s->object_hook = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001689 s->pairs_hook = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001690 s->parse_float = NULL;
1691 s->parse_int = NULL;
1692 s->parse_constant = NULL;
1693 }
1694 return (PyObject *)s;
1695}
1696
1697static int
1698scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1699{
1700 /* Initialize Scanner object */
1701 PyObject *ctx;
1702 static char *kwlist[] = {"context", NULL};
1703 PyScannerObject *s;
1704
1705 assert(PyScanner_Check(self));
1706 s = (PyScannerObject *)self;
1707
1708 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1709 return -1;
1710
1711 /* PyString_AS_STRING is used on encoding */
1712 s->encoding = PyObject_GetAttrString(ctx, "encoding");
Antoine Pitrou187177f2009-12-08 15:40:51 +00001713 if (s->encoding == NULL)
1714 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001715 if (s->encoding == Py_None) {
1716 Py_DECREF(Py_None);
1717 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1718 }
1719 else if (PyUnicode_Check(s->encoding)) {
1720 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1721 Py_DECREF(s->encoding);
1722 s->encoding = tmp;
1723 }
1724 if (s->encoding == NULL || !PyString_Check(s->encoding))
1725 goto bail;
1726
1727 /* All of these will fail "gracefully" so we don't need to verify them */
1728 s->strict = PyObject_GetAttrString(ctx, "strict");
1729 if (s->strict == NULL)
1730 goto bail;
1731 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1732 if (s->object_hook == NULL)
1733 goto bail;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001734 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
Georg Brandld823bdc2011-01-02 14:20:16 +00001735 if (s->pairs_hook == NULL)
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001736 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001737 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1738 if (s->parse_float == NULL)
1739 goto bail;
1740 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1741 if (s->parse_int == NULL)
1742 goto bail;
1743 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1744 if (s->parse_constant == NULL)
1745 goto bail;
1746
1747 return 0;
1748
1749bail:
1750 Py_CLEAR(s->encoding);
1751 Py_CLEAR(s->strict);
1752 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001753 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001754 Py_CLEAR(s->parse_float);
1755 Py_CLEAR(s->parse_int);
1756 Py_CLEAR(s->parse_constant);
1757 return -1;
1758}
1759
1760PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1761
1762static
1763PyTypeObject PyScannerType = {
1764 PyObject_HEAD_INIT(NULL)
1765 0, /* tp_internal */
1766 "_json.Scanner", /* tp_name */
1767 sizeof(PyScannerObject), /* tp_basicsize */
1768 0, /* tp_itemsize */
1769 scanner_dealloc, /* tp_dealloc */
1770 0, /* tp_print */
1771 0, /* tp_getattr */
1772 0, /* tp_setattr */
1773 0, /* tp_compare */
1774 0, /* tp_repr */
1775 0, /* tp_as_number */
1776 0, /* tp_as_sequence */
1777 0, /* tp_as_mapping */
1778 0, /* tp_hash */
1779 scanner_call, /* tp_call */
1780 0, /* tp_str */
1781 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1782 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1783 0, /* tp_as_buffer */
1784 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1785 scanner_doc, /* tp_doc */
1786 scanner_traverse, /* tp_traverse */
1787 scanner_clear, /* tp_clear */
1788 0, /* tp_richcompare */
1789 0, /* tp_weaklistoffset */
1790 0, /* tp_iter */
1791 0, /* tp_iternext */
1792 0, /* tp_methods */
1793 scanner_members, /* tp_members */
1794 0, /* tp_getset */
1795 0, /* tp_base */
1796 0, /* tp_dict */
1797 0, /* tp_descr_get */
1798 0, /* tp_descr_set */
1799 0, /* tp_dictoffset */
1800 scanner_init, /* tp_init */
1801 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1802 scanner_new, /* tp_new */
1803 0,/* PyObject_GC_Del, */ /* tp_free */
1804};
1805
1806static PyObject *
1807encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1808{
1809 PyEncoderObject *s;
1810 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1811 if (s != NULL) {
1812 s->markers = NULL;
1813 s->defaultfn = NULL;
1814 s->encoder = NULL;
1815 s->indent = NULL;
1816 s->key_separator = NULL;
1817 s->item_separator = NULL;
1818 s->sort_keys = NULL;
1819 s->skipkeys = NULL;
1820 }
1821 return (PyObject *)s;
1822}
1823
1824static int
1825encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1826{
1827 /* initialize Encoder object */
1828 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1829
1830 PyEncoderObject *s;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001831 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1832 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001833
1834 assert(PyEncoder_Check(self));
1835 s = (PyEncoderObject *)self;
1836
1837 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou187177f2009-12-08 15:40:51 +00001838 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1839 &sort_keys, &skipkeys, &allow_nan))
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001840 return -1;
1841
Antoine Pitrou187177f2009-12-08 15:40:51 +00001842 s->markers = markers;
1843 s->defaultfn = defaultfn;
1844 s->encoder = encoder;
1845 s->indent = indent;
1846 s->key_separator = key_separator;
1847 s->item_separator = item_separator;
1848 s->sort_keys = sort_keys;
1849 s->skipkeys = skipkeys;
1850 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1851 s->allow_nan = PyObject_IsTrue(allow_nan);
1852
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001853 Py_INCREF(s->markers);
1854 Py_INCREF(s->defaultfn);
1855 Py_INCREF(s->encoder);
1856 Py_INCREF(s->indent);
1857 Py_INCREF(s->key_separator);
1858 Py_INCREF(s->item_separator);
1859 Py_INCREF(s->sort_keys);
1860 Py_INCREF(s->skipkeys);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001861 return 0;
1862}
1863
1864static PyObject *
1865encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1866{
1867 /* Python callable interface to encode_listencode_obj */
1868 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1869 PyObject *obj;
1870 PyObject *rval;
1871 Py_ssize_t indent_level;
1872 PyEncoderObject *s;
1873 assert(PyEncoder_Check(self));
1874 s = (PyEncoderObject *)self;
1875 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1876 &obj, _convertPyInt_AsSsize_t, &indent_level))
1877 return NULL;
1878 rval = PyList_New(0);
1879 if (rval == NULL)
1880 return NULL;
1881 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1882 Py_DECREF(rval);
1883 return NULL;
1884 }
1885 return rval;
1886}
1887
1888static PyObject *
1889_encoded_const(PyObject *obj)
1890{
1891 /* Return the JSON string representation of None, True, False */
1892 if (obj == Py_None) {
1893 static PyObject *s_null = NULL;
1894 if (s_null == NULL) {
1895 s_null = PyString_InternFromString("null");
1896 }
1897 Py_INCREF(s_null);
1898 return s_null;
1899 }
1900 else if (obj == Py_True) {
1901 static PyObject *s_true = NULL;
1902 if (s_true == NULL) {
1903 s_true = PyString_InternFromString("true");
1904 }
1905 Py_INCREF(s_true);
1906 return s_true;
1907 }
1908 else if (obj == Py_False) {
1909 static PyObject *s_false = NULL;
1910 if (s_false == NULL) {
1911 s_false = PyString_InternFromString("false");
1912 }
1913 Py_INCREF(s_false);
1914 return s_false;
1915 }
1916 else {
1917 PyErr_SetString(PyExc_ValueError, "not a const");
1918 return NULL;
1919 }
1920}
1921
1922static PyObject *
1923encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1924{
1925 /* Return the JSON representation of a PyFloat */
1926 double i = PyFloat_AS_DOUBLE(obj);
1927 if (!Py_IS_FINITE(i)) {
1928 if (!s->allow_nan) {
1929 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1930 return NULL;
1931 }
1932 if (i > 0) {
1933 return PyString_FromString("Infinity");
1934 }
1935 else if (i < 0) {
1936 return PyString_FromString("-Infinity");
1937 }
1938 else {
1939 return PyString_FromString("NaN");
1940 }
1941 }
1942 /* Use a better float format here? */
1943 return PyObject_Repr(obj);
1944}
1945
1946static PyObject *
1947encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1948{
1949 /* Return the JSON representation of a string */
1950 if (s->fast_encode)
1951 return py_encode_basestring_ascii(NULL, obj);
1952 else
1953 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1954}
1955
1956static int
1957_steal_list_append(PyObject *lst, PyObject *stolen)
1958{
1959 /* Append stolen and then decrement its reference count */
1960 int rval = PyList_Append(lst, stolen);
1961 Py_DECREF(stolen);
1962 return rval;
1963}
1964
1965static int
1966encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1967{
1968 /* Encode Python object obj to a JSON term, rval is a PyList */
1969 PyObject *newobj;
1970 int rv;
1971
1972 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1973 PyObject *cstr = _encoded_const(obj);
1974 if (cstr == NULL)
1975 return -1;
1976 return _steal_list_append(rval, cstr);
1977 }
1978 else if (PyString_Check(obj) || PyUnicode_Check(obj))
1979 {
1980 PyObject *encoded = encoder_encode_string(s, obj);
1981 if (encoded == NULL)
1982 return -1;
1983 return _steal_list_append(rval, encoded);
1984 }
1985 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1986 PyObject *encoded = PyObject_Str(obj);
1987 if (encoded == NULL)
1988 return -1;
1989 return _steal_list_append(rval, encoded);
1990 }
1991 else if (PyFloat_Check(obj)) {
1992 PyObject *encoded = encoder_encode_float(s, obj);
1993 if (encoded == NULL)
1994 return -1;
1995 return _steal_list_append(rval, encoded);
1996 }
1997 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1998 return encoder_listencode_list(s, rval, obj, indent_level);
1999 }
2000 else if (PyDict_Check(obj)) {
2001 return encoder_listencode_dict(s, rval, obj, indent_level);
2002 }
2003 else {
2004 PyObject *ident = NULL;
2005 if (s->markers != Py_None) {
2006 int has_key;
2007 ident = PyLong_FromVoidPtr(obj);
2008 if (ident == NULL)
2009 return -1;
2010 has_key = PyDict_Contains(s->markers, ident);
2011 if (has_key) {
2012 if (has_key != -1)
2013 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2014 Py_DECREF(ident);
2015 return -1;
2016 }
2017 if (PyDict_SetItem(s->markers, ident, obj)) {
2018 Py_DECREF(ident);
2019 return -1;
2020 }
2021 }
2022 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2023 if (newobj == NULL) {
2024 Py_XDECREF(ident);
2025 return -1;
2026 }
2027 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2028 Py_DECREF(newobj);
2029 if (rv) {
2030 Py_XDECREF(ident);
2031 return -1;
2032 }
2033 if (ident != NULL) {
2034 if (PyDict_DelItem(s->markers, ident)) {
2035 Py_XDECREF(ident);
2036 return -1;
2037 }
2038 Py_XDECREF(ident);
2039 }
2040 return rv;
2041 }
2042}
2043
2044static int
2045encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2046{
2047 /* Encode Python dict dct a JSON term, rval is a PyList */
2048 static PyObject *open_dict = NULL;
2049 static PyObject *close_dict = NULL;
2050 static PyObject *empty_dict = NULL;
2051 PyObject *kstr = NULL;
2052 PyObject *ident = NULL;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002053 PyObject *key = NULL;
2054 PyObject *value = NULL;
2055 PyObject *it = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002056 int skipkeys;
2057 Py_ssize_t idx;
2058
2059 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2060 open_dict = PyString_InternFromString("{");
2061 close_dict = PyString_InternFromString("}");
2062 empty_dict = PyString_InternFromString("{}");
2063 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2064 return -1;
2065 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002066 if (Py_SIZE(dct) == 0)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002067 return PyList_Append(rval, empty_dict);
2068
2069 if (s->markers != Py_None) {
2070 int has_key;
2071 ident = PyLong_FromVoidPtr(dct);
2072 if (ident == NULL)
2073 goto bail;
2074 has_key = PyDict_Contains(s->markers, ident);
2075 if (has_key) {
2076 if (has_key != -1)
2077 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2078 goto bail;
2079 }
2080 if (PyDict_SetItem(s->markers, ident, dct)) {
2081 goto bail;
2082 }
2083 }
2084
2085 if (PyList_Append(rval, open_dict))
2086 goto bail;
2087
2088 if (s->indent != Py_None) {
2089 /* TODO: DOES NOT RUN */
2090 indent_level += 1;
2091 /*
2092 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2093 separator = _item_separator + newline_indent
2094 buf += newline_indent
2095 */
2096 }
2097
2098 /* TODO: C speedup not implemented for sort_keys */
2099
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002100 it = PyObject_GetIter(dct);
2101 if (it == NULL)
2102 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002103 skipkeys = PyObject_IsTrue(s->skipkeys);
2104 idx = 0;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002105 while ((key = PyIter_Next(it)) != NULL) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002106 PyObject *encoded;
2107
2108 if (PyString_Check(key) || PyUnicode_Check(key)) {
2109 Py_INCREF(key);
2110 kstr = key;
2111 }
2112 else if (PyFloat_Check(key)) {
2113 kstr = encoder_encode_float(s, key);
2114 if (kstr == NULL)
2115 goto bail;
2116 }
2117 else if (PyInt_Check(key) || PyLong_Check(key)) {
2118 kstr = PyObject_Str(key);
2119 if (kstr == NULL)
2120 goto bail;
2121 }
2122 else if (key == Py_True || key == Py_False || key == Py_None) {
2123 kstr = _encoded_const(key);
2124 if (kstr == NULL)
2125 goto bail;
2126 }
2127 else if (skipkeys) {
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002128 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002129 continue;
2130 }
2131 else {
2132 /* TODO: include repr of key */
Doug Hellmannf31db932010-07-21 12:36:33 +00002133 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002134 goto bail;
2135 }
2136
2137 if (idx) {
2138 if (PyList_Append(rval, s->item_separator))
2139 goto bail;
2140 }
2141
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002142 value = PyObject_GetItem(dct, key);
2143 if (value == NULL)
2144 goto bail;
2145
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002146 encoded = encoder_encode_string(s, kstr);
2147 Py_CLEAR(kstr);
2148 if (encoded == NULL)
2149 goto bail;
2150 if (PyList_Append(rval, encoded)) {
2151 Py_DECREF(encoded);
2152 goto bail;
2153 }
2154 Py_DECREF(encoded);
2155 if (PyList_Append(rval, s->key_separator))
2156 goto bail;
2157 if (encoder_listencode_obj(s, rval, value, indent_level))
2158 goto bail;
2159 idx += 1;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002160 Py_CLEAR(value);
2161 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002162 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002163 if (PyErr_Occurred())
2164 goto bail;
2165 Py_CLEAR(it);
2166
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002167 if (ident != NULL) {
2168 if (PyDict_DelItem(s->markers, ident))
2169 goto bail;
2170 Py_CLEAR(ident);
2171 }
2172 if (s->indent != Py_None) {
2173 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002174 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002175 indent_level -= 1;
2176
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002177 yield '\n' + (' ' * (_indent * _current_indent_level))
2178 */
2179 }
2180 if (PyList_Append(rval, close_dict))
2181 goto bail;
2182 return 0;
2183
2184bail:
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002185 Py_XDECREF(it);
2186 Py_XDECREF(key);
2187 Py_XDECREF(value);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002188 Py_XDECREF(kstr);
2189 Py_XDECREF(ident);
2190 return -1;
2191}
2192
2193
2194static int
2195encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2196{
2197 /* Encode Python list seq to a JSON term, rval is a PyList */
2198 static PyObject *open_array = NULL;
2199 static PyObject *close_array = NULL;
2200 static PyObject *empty_array = NULL;
2201 PyObject *ident = NULL;
2202 PyObject *s_fast = NULL;
2203 Py_ssize_t num_items;
2204 PyObject **seq_items;
2205 Py_ssize_t i;
2206
2207 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2208 open_array = PyString_InternFromString("[");
2209 close_array = PyString_InternFromString("]");
2210 empty_array = PyString_InternFromString("[]");
2211 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2212 return -1;
2213 }
2214 ident = NULL;
2215 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2216 if (s_fast == NULL)
2217 return -1;
2218 num_items = PySequence_Fast_GET_SIZE(s_fast);
2219 if (num_items == 0) {
2220 Py_DECREF(s_fast);
2221 return PyList_Append(rval, empty_array);
2222 }
2223
2224 if (s->markers != Py_None) {
2225 int has_key;
2226 ident = PyLong_FromVoidPtr(seq);
2227 if (ident == NULL)
2228 goto bail;
2229 has_key = PyDict_Contains(s->markers, ident);
2230 if (has_key) {
2231 if (has_key != -1)
2232 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2233 goto bail;
2234 }
2235 if (PyDict_SetItem(s->markers, ident, seq)) {
2236 goto bail;
2237 }
2238 }
2239
2240 seq_items = PySequence_Fast_ITEMS(s_fast);
2241 if (PyList_Append(rval, open_array))
2242 goto bail;
2243 if (s->indent != Py_None) {
2244 /* TODO: DOES NOT RUN */
2245 indent_level += 1;
2246 /*
2247 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2248 separator = _item_separator + newline_indent
2249 buf += newline_indent
2250 */
2251 }
2252 for (i = 0; i < num_items; i++) {
2253 PyObject *obj = seq_items[i];
2254 if (i) {
2255 if (PyList_Append(rval, s->item_separator))
2256 goto bail;
2257 }
2258 if (encoder_listencode_obj(s, rval, obj, indent_level))
2259 goto bail;
2260 }
2261 if (ident != NULL) {
2262 if (PyDict_DelItem(s->markers, ident))
2263 goto bail;
2264 Py_CLEAR(ident);
2265 }
2266 if (s->indent != Py_None) {
2267 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002268 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002269 indent_level -= 1;
2270
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002271 yield '\n' + (' ' * (_indent * _current_indent_level))
2272 */
2273 }
2274 if (PyList_Append(rval, close_array))
2275 goto bail;
2276 Py_DECREF(s_fast);
2277 return 0;
2278
2279bail:
2280 Py_XDECREF(ident);
2281 Py_DECREF(s_fast);
2282 return -1;
2283}
2284
2285static void
2286encoder_dealloc(PyObject *self)
2287{
2288 /* Deallocate Encoder */
2289 encoder_clear(self);
2290 Py_TYPE(self)->tp_free(self);
2291}
2292
2293static int
2294encoder_traverse(PyObject *self, visitproc visit, void *arg)
2295{
2296 PyEncoderObject *s;
2297 assert(PyEncoder_Check(self));
2298 s = (PyEncoderObject *)self;
2299 Py_VISIT(s->markers);
2300 Py_VISIT(s->defaultfn);
2301 Py_VISIT(s->encoder);
2302 Py_VISIT(s->indent);
2303 Py_VISIT(s->key_separator);
2304 Py_VISIT(s->item_separator);
2305 Py_VISIT(s->sort_keys);
2306 Py_VISIT(s->skipkeys);
2307 return 0;
2308}
2309
2310static int
2311encoder_clear(PyObject *self)
2312{
2313 /* Deallocate Encoder */
2314 PyEncoderObject *s;
2315 assert(PyEncoder_Check(self));
2316 s = (PyEncoderObject *)self;
2317 Py_CLEAR(s->markers);
2318 Py_CLEAR(s->defaultfn);
2319 Py_CLEAR(s->encoder);
2320 Py_CLEAR(s->indent);
2321 Py_CLEAR(s->key_separator);
2322 Py_CLEAR(s->item_separator);
2323 Py_CLEAR(s->sort_keys);
2324 Py_CLEAR(s->skipkeys);
2325 return 0;
2326}
2327
2328PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2329
2330static
2331PyTypeObject PyEncoderType = {
2332 PyObject_HEAD_INIT(NULL)
2333 0, /* tp_internal */
2334 "_json.Encoder", /* tp_name */
2335 sizeof(PyEncoderObject), /* tp_basicsize */
2336 0, /* tp_itemsize */
2337 encoder_dealloc, /* tp_dealloc */
2338 0, /* tp_print */
2339 0, /* tp_getattr */
2340 0, /* tp_setattr */
2341 0, /* tp_compare */
2342 0, /* tp_repr */
2343 0, /* tp_as_number */
2344 0, /* tp_as_sequence */
2345 0, /* tp_as_mapping */
2346 0, /* tp_hash */
2347 encoder_call, /* tp_call */
2348 0, /* tp_str */
2349 0, /* tp_getattro */
2350 0, /* tp_setattro */
2351 0, /* tp_as_buffer */
2352 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2353 encoder_doc, /* tp_doc */
2354 encoder_traverse, /* tp_traverse */
2355 encoder_clear, /* tp_clear */
2356 0, /* tp_richcompare */
2357 0, /* tp_weaklistoffset */
2358 0, /* tp_iter */
2359 0, /* tp_iternext */
2360 0, /* tp_methods */
2361 encoder_members, /* tp_members */
2362 0, /* tp_getset */
2363 0, /* tp_base */
2364 0, /* tp_dict */
2365 0, /* tp_descr_get */
2366 0, /* tp_descr_set */
2367 0, /* tp_dictoffset */
2368 encoder_init, /* tp_init */
2369 0, /* tp_alloc */
2370 encoder_new, /* tp_new */
2371 0, /* tp_free */
2372};
2373
2374static PyMethodDef speedups_methods[] = {
2375 {"encode_basestring_ascii",
2376 (PyCFunction)py_encode_basestring_ascii,
2377 METH_O,
2378 pydoc_encode_basestring_ascii},
2379 {"scanstring",
2380 (PyCFunction)py_scanstring,
2381 METH_VARARGS,
2382 pydoc_scanstring},
Brett Cannon4b964f92008-05-05 20:21:38 +00002383 {NULL, NULL, 0, NULL}
2384};
2385
2386PyDoc_STRVAR(module_doc,
2387"json speedups\n");
2388
2389void
2390init_json(void)
2391{
2392 PyObject *m;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002393 PyScannerType.tp_new = PyType_GenericNew;
2394 if (PyType_Ready(&PyScannerType) < 0)
2395 return;
2396 PyEncoderType.tp_new = PyType_GenericNew;
2397 if (PyType_Ready(&PyEncoderType) < 0)
2398 return;
2399 m = Py_InitModule3("_json", speedups_methods, module_doc);
2400 Py_INCREF((PyObject*)&PyScannerType);
2401 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2402 Py_INCREF((PyObject*)&PyEncoderType);
2403 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
Brett Cannon4b964f92008-05-05 20:21:38 +00002404}