blob: 7a1fabdba151769536a035d8a44a55485606ac80 [file] [log] [blame]
Brett Cannon4b964f92008-05-05 20:21:38 +00001#include "Python.h"
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
16
17#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
Brett Cannon4b964f92008-05-05 20:21:38 +000022
23#define DEFAULT_ENCODING "utf-8"
Brett Cannon4b964f92008-05-05 20:21:38 +000024
Bob Ippolitod914e3f2009-03-17 23:19:00 +000025#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30static PyTypeObject PyScannerType;
31static PyTypeObject PyEncoderType;
32
33typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
Raymond Hettinger91852ca2009-03-19 19:19:03 +000038 PyObject *pairs_hook;
Bob Ippolitod914e3f2009-03-17 23:19:00 +000039 PyObject *parse_float;
40 PyObject *parse_int;
41 PyObject *parse_constant;
42} PyScannerObject;
43
44static PyMemberDef scanner_members[] = {
45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
Raymond Hettinger91852ca2009-03-19 19:19:03 +000048 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
Bob Ippolitod914e3f2009-03-17 23:19:00 +000049 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52 {NULL}
53};
54
55typedef struct _PyEncoderObject {
56 PyObject_HEAD
57 PyObject *markers;
58 PyObject *defaultfn;
59 PyObject *encoder;
60 PyObject *indent;
61 PyObject *key_separator;
62 PyObject *item_separator;
63 PyObject *sort_keys;
64 PyObject *skipkeys;
65 int fast_encode;
66 int allow_nan;
67} PyEncoderObject;
68
69static PyMemberDef encoder_members[] = {
70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78 {NULL}
79};
80
81static Py_ssize_t
82ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83static PyObject *
84ascii_escape_unicode(PyObject *pystr);
85static PyObject *
86ascii_escape_str(PyObject *pystr);
87static PyObject *
88py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89void init_json(void);
90static PyObject *
91scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92static PyObject *
93scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94static PyObject *
95_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96static PyObject *
97scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98static int
99scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100static void
101scanner_dealloc(PyObject *self);
102static int
103scanner_clear(PyObject *self);
104static PyObject *
105encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106static int
107encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108static void
109encoder_dealloc(PyObject *self);
110static int
111encoder_clear(PyObject *self);
112static int
113encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114static int
115encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116static int
117encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118static PyObject *
Raymond Hettingera0b8d9a2009-03-19 19:24:43 +0000119_encoded_const(PyObject *obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000120static void
121raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122static PyObject *
123encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124static int
125_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126static PyObject *
127_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128static PyObject *
129encoder_encode_float(PyEncoderObject *s, PyObject *obj);
130
131#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
133
134#define MIN_EXPANSION 6
Brett Cannon4b964f92008-05-05 20:21:38 +0000135#ifdef Py_UNICODE_WIDE
136#define MAX_EXPANSION (2 * MIN_EXPANSION)
137#else
138#define MAX_EXPANSION MIN_EXPANSION
139#endif
140
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000141static int
142_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
143{
144 /* PyObject to Py_ssize_t converter */
145 *size_ptr = PyInt_AsSsize_t(o);
Georg Brandlf71ba952009-05-05 07:48:12 +0000146 if (*size_ptr == -1 && PyErr_Occurred())
147 return 0;
148 return 1;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000149}
150
151static PyObject *
152_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
153{
154 /* Py_ssize_t to PyObject converter */
155 return PyInt_FromSsize_t(*size_ptr);
156}
157
Brett Cannon4b964f92008-05-05 20:21:38 +0000158static Py_ssize_t
159ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
160{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000161 /* Escape unicode code point c to ASCII escape sequences
162 in char *output. output must have at least 12 bytes unused to
163 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Brett Cannon4b964f92008-05-05 20:21:38 +0000164 output[chars++] = '\\';
165 switch (c) {
166 case '\\': output[chars++] = (char)c; break;
167 case '"': output[chars++] = (char)c; break;
168 case '\b': output[chars++] = 'b'; break;
169 case '\f': output[chars++] = 'f'; break;
170 case '\n': output[chars++] = 'n'; break;
171 case '\r': output[chars++] = 'r'; break;
172 case '\t': output[chars++] = 't'; break;
173 default:
174#ifdef Py_UNICODE_WIDE
175 if (c >= 0x10000) {
176 /* UTF-16 surrogate pair */
177 Py_UNICODE v = c - 0x10000;
178 c = 0xd800 | ((v >> 10) & 0x3ff);
179 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000180 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
182 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
183 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000184 c = 0xdc00 | (v & 0x3ff);
185 output[chars++] = '\\';
186 }
187#endif
188 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000189 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
191 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
192 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000193 }
194 return chars;
195}
196
197static PyObject *
198ascii_escape_unicode(PyObject *pystr)
199{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000200 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000201 Py_ssize_t i;
202 Py_ssize_t input_chars;
203 Py_ssize_t output_size;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000204 Py_ssize_t max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000205 Py_ssize_t chars;
206 PyObject *rval;
207 char *output;
208 Py_UNICODE *input_unicode;
209
210 input_chars = PyUnicode_GET_SIZE(pystr);
211 input_unicode = PyUnicode_AS_UNICODE(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000212
Brett Cannon4b964f92008-05-05 20:21:38 +0000213 /* One char input can be up to 6 chars output, estimate 4 of these */
214 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000215 max_output_size = 2 + (input_chars * MAX_EXPANSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000216 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000217 if (rval == NULL) {
218 return NULL;
219 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000220 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000221 chars = 0;
222 output[chars++] = '"';
223 for (i = 0; i < input_chars; i++) {
224 Py_UNICODE c = input_unicode[i];
225 if (S_CHAR(c)) {
226 output[chars++] = (char)c;
227 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000228 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000229 chars = ascii_escape_char(c, output, chars);
230 }
231 if (output_size - chars < (1 + MAX_EXPANSION)) {
232 /* There's more than four, so let's resize by a lot */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000233 Py_ssize_t new_output_size = output_size * 2;
Brett Cannon4b964f92008-05-05 20:21:38 +0000234 /* This is an upper bound */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000235 if (new_output_size > max_output_size) {
236 new_output_size = max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000237 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000238 /* Make sure that the output size changed before resizing */
239 if (new_output_size != output_size) {
240 output_size = new_output_size;
241 if (_PyString_Resize(&rval, output_size) == -1) {
242 return NULL;
243 }
244 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000245 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000246 }
247 }
248 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000249 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000250 return NULL;
251 }
252 return rval;
253}
254
255static PyObject *
256ascii_escape_str(PyObject *pystr)
257{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000258 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000259 Py_ssize_t i;
260 Py_ssize_t input_chars;
261 Py_ssize_t output_size;
262 Py_ssize_t chars;
263 PyObject *rval;
264 char *output;
265 char *input_str;
266
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000267 input_chars = PyString_GET_SIZE(pystr);
268 input_str = PyString_AS_STRING(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000269
270 /* Fast path for a string that's already ASCII */
271 for (i = 0; i < input_chars; i++) {
272 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
273 if (!S_CHAR(c)) {
274 /* If we have to escape something, scan the string for unicode */
275 Py_ssize_t j;
276 for (j = i; j < input_chars; j++) {
277 c = (Py_UNICODE)(unsigned char)input_str[j];
278 if (c > 0x7f) {
279 /* We hit a non-ASCII character, bail to unicode mode */
280 PyObject *uni;
281 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
282 if (uni == NULL) {
283 return NULL;
284 }
285 rval = ascii_escape_unicode(uni);
286 Py_DECREF(uni);
287 return rval;
288 }
289 }
290 break;
291 }
292 }
293
294 if (i == input_chars) {
295 /* Input is already ASCII */
296 output_size = 2 + input_chars;
297 }
298 else {
299 /* One char input can be up to 6 chars output, estimate 4 of these */
300 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
301 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000302 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000303 if (rval == NULL) {
304 return NULL;
305 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000306 output = PyString_AS_STRING(rval);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000307 output[0] = '"';
308
309 /* We know that everything up to i is ASCII already */
310 chars = i + 1;
311 memcpy(&output[1], input_str, i);
312
313 for (; i < input_chars; i++) {
314 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
Brett Cannon4b964f92008-05-05 20:21:38 +0000315 if (S_CHAR(c)) {
316 output[chars++] = (char)c;
317 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000318 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000319 chars = ascii_escape_char(c, output, chars);
320 }
321 /* An ASCII char can't possibly expand to a surrogate! */
322 if (output_size - chars < (1 + MIN_EXPANSION)) {
323 /* There's more than four, so let's resize by a lot */
324 output_size *= 2;
325 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
326 output_size = 2 + (input_chars * MIN_EXPANSION);
327 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000328 if (_PyString_Resize(&rval, output_size) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000329 return NULL;
330 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000331 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000332 }
333 }
334 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000335 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000336 return NULL;
337 }
338 return rval;
339}
340
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000341static void
Brett Cannon4b964f92008-05-05 20:21:38 +0000342raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
343{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000344 /* Use the Python function json.decoder.errmsg to raise a nice
345 looking ValueError exception */
Brett Cannon4b964f92008-05-05 20:21:38 +0000346 static PyObject *errmsg_fn = NULL;
347 PyObject *pymsg;
348 if (errmsg_fn == NULL) {
349 PyObject *decoder = PyImport_ImportModule("json.decoder");
350 if (decoder == NULL)
351 return;
352 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000353 Py_DECREF(decoder);
Brett Cannon4b964f92008-05-05 20:21:38 +0000354 if (errmsg_fn == NULL)
355 return;
Brett Cannon4b964f92008-05-05 20:21:38 +0000356 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000357 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
Benjamin Peterson595e3cb2008-10-16 21:09:28 +0000358 if (pymsg) {
359 PyErr_SetObject(PyExc_ValueError, pymsg);
360 Py_DECREF(pymsg);
361 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000362}
363
364static PyObject *
365join_list_unicode(PyObject *lst)
366{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000367 /* return u''.join(lst) */
368 static PyObject *joinfn = NULL;
369 if (joinfn == NULL) {
370 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
371 if (ustr == NULL)
372 return NULL;
373
374 joinfn = PyObject_GetAttrString(ustr, "join");
375 Py_DECREF(ustr);
376 if (joinfn == NULL)
377 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000378 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000379 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
Brett Cannon4b964f92008-05-05 20:21:38 +0000380}
381
382static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000383_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
384 /* return (rval, idx) tuple, stealing reference to rval */
385 PyObject *tpl;
386 PyObject *pyidx;
387 /*
388 steal a reference to rval, returns (rval, idx)
389 */
390 if (rval == NULL) {
391 return NULL;
392 }
393 pyidx = PyInt_FromSsize_t(idx);
394 if (pyidx == NULL) {
395 Py_DECREF(rval);
396 return NULL;
397 }
398 tpl = PyTuple_New(2);
399 if (tpl == NULL) {
400 Py_DECREF(pyidx);
401 Py_DECREF(rval);
402 return NULL;
403 }
404 PyTuple_SET_ITEM(tpl, 0, rval);
405 PyTuple_SET_ITEM(tpl, 1, pyidx);
406 return tpl;
407}
408
409static PyObject *
410scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
411{
412 /* Read the JSON string from PyString pystr.
413 end is the index of the first character after the quote.
414 encoding is the encoding of pystr (must be an ASCII superset)
415 if strict is zero then literal control characters are allowed
416 *next_end_ptr is a return-by-reference index of the character
417 after the end quote
418
419 Return value is a new PyString (if ASCII-only) or PyUnicode
420 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000421 PyObject *rval;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000422 Py_ssize_t len = PyString_GET_SIZE(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000423 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000424 Py_ssize_t next;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000425 char *buf = PyString_AS_STRING(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000426 PyObject *chunks = PyList_New(0);
427 if (chunks == NULL) {
428 goto bail;
429 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000430 if (end < 0 || len <= end) {
431 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
432 goto bail;
433 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000434 while (1) {
435 /* Find the end of the string or the next escape */
436 Py_UNICODE c = 0;
437 PyObject *chunk = NULL;
438 for (next = end; next < len; next++) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000439 c = (unsigned char)buf[next];
Brett Cannon4b964f92008-05-05 20:21:38 +0000440 if (c == '"' || c == '\\') {
441 break;
442 }
443 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000444 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000445 goto bail;
446 }
447 }
448 if (!(c == '"' || c == '\\')) {
449 raise_errmsg("Unterminated string starting at", pystr, begin);
450 goto bail;
451 }
452 /* Pick up this chunk if it's not zero length */
453 if (next != end) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000454 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000455 if (strchunk == NULL) {
456 goto bail;
457 }
Barry Warsawfa658272010-11-02 21:03:09 +0000458 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
459 Py_DECREF(strchunk);
460 if (chunk == NULL) {
461 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000462 }
463 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000464 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000465 goto bail;
466 }
467 Py_DECREF(chunk);
468 }
469 next++;
470 if (c == '"') {
471 end = next;
472 break;
473 }
474 if (next == len) {
475 raise_errmsg("Unterminated string starting at", pystr, begin);
476 goto bail;
477 }
478 c = buf[next];
479 if (c != 'u') {
480 /* Non-unicode backslash escapes */
481 end = next + 1;
482 switch (c) {
483 case '"': break;
484 case '\\': break;
485 case '/': break;
486 case 'b': c = '\b'; break;
487 case 'f': c = '\f'; break;
488 case 'n': c = '\n'; break;
489 case 'r': c = '\r'; break;
490 case 't': c = '\t'; break;
491 default: c = 0;
492 }
493 if (c == 0) {
494 raise_errmsg("Invalid \\escape", pystr, end - 2);
495 goto bail;
496 }
497 }
498 else {
499 c = 0;
500 next++;
501 end = next + 4;
502 if (end >= len) {
503 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
504 goto bail;
505 }
506 /* Decode 4 hex digits */
507 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000508 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000509 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000510 switch (digit) {
511 case '0': case '1': case '2': case '3': case '4':
512 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000513 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000514 case 'a': case 'b': case 'c': case 'd': case 'e':
515 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000516 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000517 case 'A': case 'B': case 'C': case 'D': case 'E':
518 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000519 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000520 default:
521 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
522 goto bail;
523 }
524 }
525#ifdef Py_UNICODE_WIDE
526 /* Surrogate pair */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000527 if ((c & 0xfc00) == 0xd800) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000528 Py_UNICODE c2 = 0;
529 if (end + 6 >= len) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000530 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
531 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000532 }
533 if (buf[next++] != '\\' || buf[next++] != 'u') {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000534 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
535 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000536 }
537 end += 6;
538 /* Decode 4 hex digits */
539 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000540 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000541 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000542 switch (digit) {
543 case '0': case '1': case '2': case '3': case '4':
544 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000545 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000546 case 'a': case 'b': case 'c': case 'd': case 'e':
547 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000548 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000549 case 'A': case 'B': case 'C': case 'D': case 'E':
550 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000551 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000552 default:
553 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
554 goto bail;
555 }
556 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000557 if ((c2 & 0xfc00) != 0xdc00) {
558 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
559 goto bail;
560 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000561 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
562 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000563 else if ((c & 0xfc00) == 0xdc00) {
564 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
565 goto bail;
566 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000567#endif
568 }
Barry Warsawfa658272010-11-02 21:03:09 +0000569 chunk = PyUnicode_FromUnicode(&c, 1);
570 if (chunk == NULL) {
571 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000572 }
573 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000574 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000575 goto bail;
576 }
577 Py_DECREF(chunk);
578 }
579
Ezio Melottidf8a8f72011-05-04 14:40:53 +0300580 rval = join_list_unicode(chunks);
Brett Cannon4b964f92008-05-05 20:21:38 +0000581 if (rval == NULL) {
582 goto bail;
583 }
Benjamin Peterson336680e2008-10-16 21:48:06 +0000584 Py_CLEAR(chunks);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000585 *next_end_ptr = end;
586 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000587bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000588 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000589 Py_XDECREF(chunks);
590 return NULL;
591}
592
593
594static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000595scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000596{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000597 /* Read the JSON string from PyUnicode pystr.
598 end is the index of the first character after the quote.
599 if strict is zero then literal control characters are allowed
600 *next_end_ptr is a return-by-reference index of the character
601 after the end quote
602
603 Return value is a new PyUnicode
604 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000605 PyObject *rval;
606 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
607 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000608 Py_ssize_t next;
Brett Cannon4b964f92008-05-05 20:21:38 +0000609 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
610 PyObject *chunks = PyList_New(0);
611 if (chunks == NULL) {
612 goto bail;
613 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000614 if (end < 0 || len <= end) {
615 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
616 goto bail;
617 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000618 while (1) {
619 /* Find the end of the string or the next escape */
620 Py_UNICODE c = 0;
621 PyObject *chunk = NULL;
622 for (next = end; next < len; next++) {
623 c = buf[next];
624 if (c == '"' || c == '\\') {
625 break;
626 }
627 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000628 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000629 goto bail;
630 }
631 }
632 if (!(c == '"' || c == '\\')) {
633 raise_errmsg("Unterminated string starting at", pystr, begin);
634 goto bail;
635 }
636 /* Pick up this chunk if it's not zero length */
637 if (next != end) {
638 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
639 if (chunk == NULL) {
640 goto bail;
641 }
642 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson87e6ad22008-10-16 21:27:54 +0000643 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000644 goto bail;
645 }
646 Py_DECREF(chunk);
647 }
648 next++;
649 if (c == '"') {
650 end = next;
651 break;
652 }
653 if (next == len) {
654 raise_errmsg("Unterminated string starting at", pystr, begin);
655 goto bail;
656 }
657 c = buf[next];
658 if (c != 'u') {
659 /* Non-unicode backslash escapes */
660 end = next + 1;
661 switch (c) {
662 case '"': break;
663 case '\\': break;
664 case '/': break;
665 case 'b': c = '\b'; break;
666 case 'f': c = '\f'; break;
667 case 'n': c = '\n'; break;
668 case 'r': c = '\r'; break;
669 case 't': c = '\t'; break;
670 default: c = 0;
671 }
672 if (c == 0) {
673 raise_errmsg("Invalid \\escape", pystr, end - 2);
674 goto bail;
675 }
676 }
677 else {
678 c = 0;
679 next++;
680 end = next + 4;
681 if (end >= len) {
682 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
683 goto bail;
684 }
685 /* Decode 4 hex digits */
686 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000687 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000688 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000689 switch (digit) {
690 case '0': case '1': case '2': case '3': case '4':
691 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000692 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000693 case 'a': case 'b': case 'c': case 'd': case 'e':
694 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000695 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000696 case 'A': case 'B': case 'C': case 'D': case 'E':
697 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000698 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000699 default:
700 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
701 goto bail;
702 }
703 }
704#ifdef Py_UNICODE_WIDE
705 /* Surrogate pair */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000706 if ((c & 0xfc00) == 0xd800) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000707 Py_UNICODE c2 = 0;
708 if (end + 6 >= len) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000709 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
710 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000711 }
712 if (buf[next++] != '\\' || buf[next++] != 'u') {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000713 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
714 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000715 }
716 end += 6;
717 /* Decode 4 hex digits */
718 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000719 Py_UNICODE digit = buf[next];
Antoine Pitrou22ad2452010-10-09 15:28:59 +0000720 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000721 switch (digit) {
722 case '0': case '1': case '2': case '3': case '4':
723 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000724 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000725 case 'a': case 'b': case 'c': case 'd': case 'e':
726 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000727 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000728 case 'A': case 'B': case 'C': case 'D': case 'E':
729 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000730 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000731 default:
732 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
733 goto bail;
734 }
735 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000736 if ((c2 & 0xfc00) != 0xdc00) {
737 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
738 goto bail;
739 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000740 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
741 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000742 else if ((c & 0xfc00) == 0xdc00) {
743 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
744 goto bail;
745 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000746#endif
747 }
748 chunk = PyUnicode_FromUnicode(&c, 1);
749 if (chunk == NULL) {
750 goto bail;
751 }
752 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000753 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000754 goto bail;
755 }
756 Py_DECREF(chunk);
757 }
758
759 rval = join_list_unicode(chunks);
760 if (rval == NULL) {
761 goto bail;
762 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000763 Py_DECREF(chunks);
764 *next_end_ptr = end;
765 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000766bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000767 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000768 Py_XDECREF(chunks);
769 return NULL;
770}
771
772PyDoc_STRVAR(pydoc_scanstring,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000773 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
774 "\n"
775 "Scan the string s for a JSON string. End is the index of the\n"
776 "character in s after the quote that started the JSON string.\n"
777 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
778 "on attempt to decode an invalid string. If strict is False then literal\n"
779 "control characters are allowed in the string.\n"
780 "\n"
781 "Returns a tuple of the decoded string and the index of the character in s\n"
782 "after the end quote."
783);
Brett Cannon4b964f92008-05-05 20:21:38 +0000784
785static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000786py_scanstring(PyObject* self UNUSED, PyObject *args)
Brett Cannon4b964f92008-05-05 20:21:38 +0000787{
788 PyObject *pystr;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000789 PyObject *rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000790 Py_ssize_t end;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000791 Py_ssize_t next_end = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000792 char *encoding = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000793 int strict = 1;
794 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000795 return NULL;
796 }
797 if (encoding == NULL) {
798 encoding = DEFAULT_ENCODING;
799 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000800 if (PyString_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000801 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000802 }
803 else if (PyUnicode_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000804 rval = scanstring_unicode(pystr, end, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000805 }
806 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000807 PyErr_Format(PyExc_TypeError,
808 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000809 Py_TYPE(pystr)->tp_name);
810 return NULL;
811 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000812 return _build_rval_index_tuple(rval, next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000813}
814
815PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000816 "encode_basestring_ascii(basestring) -> str\n"
817 "\n"
818 "Return an ASCII-only JSON representation of a Python string"
819);
Brett Cannon4b964f92008-05-05 20:21:38 +0000820
821static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000822py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000823{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000824 /* Return an ASCII-only JSON representation of a Python string */
Brett Cannon4b964f92008-05-05 20:21:38 +0000825 /* METH_O */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000826 if (PyString_Check(pystr)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000827 return ascii_escape_str(pystr);
828 }
829 else if (PyUnicode_Check(pystr)) {
830 return ascii_escape_unicode(pystr);
831 }
832 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000833 PyErr_Format(PyExc_TypeError,
834 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000835 Py_TYPE(pystr)->tp_name);
836 return NULL;
837 }
838}
839
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000840static void
841scanner_dealloc(PyObject *self)
842{
843 /* Deallocate scanner object */
844 scanner_clear(self);
845 Py_TYPE(self)->tp_free(self);
846}
847
848static int
849scanner_traverse(PyObject *self, visitproc visit, void *arg)
850{
851 PyScannerObject *s;
852 assert(PyScanner_Check(self));
853 s = (PyScannerObject *)self;
854 Py_VISIT(s->encoding);
855 Py_VISIT(s->strict);
856 Py_VISIT(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000857 Py_VISIT(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000858 Py_VISIT(s->parse_float);
859 Py_VISIT(s->parse_int);
860 Py_VISIT(s->parse_constant);
861 return 0;
862}
863
864static int
865scanner_clear(PyObject *self)
866{
867 PyScannerObject *s;
868 assert(PyScanner_Check(self));
869 s = (PyScannerObject *)self;
870 Py_CLEAR(s->encoding);
871 Py_CLEAR(s->strict);
872 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000873 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000874 Py_CLEAR(s->parse_float);
875 Py_CLEAR(s->parse_int);
876 Py_CLEAR(s->parse_constant);
877 return 0;
878}
879
880static PyObject *
881_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
882 /* Read a JSON object from PyString pystr.
883 idx is the index of the first character after the opening curly brace.
884 *next_idx_ptr is a return-by-reference index to the first character after
885 the closing curly brace.
886
887 Returns a new PyObject (usually a dict, but object_hook can change that)
888 */
889 char *str = PyString_AS_STRING(pystr);
890 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000891 PyObject *rval;
892 PyObject *pairs;
893 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000894 PyObject *key = NULL;
895 PyObject *val = NULL;
896 char *encoding = PyString_AS_STRING(s->encoding);
897 int strict = PyObject_IsTrue(s->strict);
898 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000899
900 pairs = PyList_New(0);
901 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000902 return NULL;
903
904 /* skip whitespace after { */
905 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
906
907 /* only loop if the object is non-empty */
908 if (idx <= end_idx && str[idx] != '}') {
909 while (idx <= end_idx) {
910 /* read key */
911 if (str[idx] != '"') {
912 raise_errmsg("Expecting property name", pystr, idx);
913 goto bail;
914 }
915 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
916 if (key == NULL)
917 goto bail;
918 idx = next_idx;
919
920 /* skip whitespace between key and : delimiter, read :, skip whitespace */
921 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
922 if (idx > end_idx || str[idx] != ':') {
923 raise_errmsg("Expecting : delimiter", pystr, idx);
924 goto bail;
925 }
926 idx++;
927 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
928
929 /* read any JSON data type */
930 val = scan_once_str(s, pystr, idx, &next_idx);
931 if (val == NULL)
932 goto bail;
933
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000934 item = PyTuple_Pack(2, key, val);
935 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000936 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000937 Py_CLEAR(key);
938 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000939 if (PyList_Append(pairs, item) == -1) {
940 Py_DECREF(item);
941 goto bail;
942 }
943 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000944 idx = next_idx;
945
946 /* skip whitespace before } or , */
947 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
948
949 /* bail if the object is closed or we didn't get the , delimiter */
950 if (idx > end_idx) break;
951 if (str[idx] == '}') {
952 break;
953 }
954 else if (str[idx] != ',') {
955 raise_errmsg("Expecting , delimiter", pystr, idx);
956 goto bail;
957 }
958 idx++;
959
960 /* skip whitespace after , delimiter */
961 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
962 }
963 }
964 /* verify that idx < end_idx, str[idx] should be '}' */
965 if (idx > end_idx || str[idx] != '}') {
966 raise_errmsg("Expecting object", pystr, end_idx);
967 goto bail;
968 }
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000969
970 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
971 if (s->pairs_hook != Py_None) {
972 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
973 if (val == NULL)
974 goto bail;
975 Py_DECREF(pairs);
976 *next_idx_ptr = idx + 1;
977 return val;
978 }
979
980 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
981 pairs, NULL);
982 if (rval == NULL)
983 goto bail;
984 Py_CLEAR(pairs);
985
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000986 /* if object_hook is not None: rval = object_hook(rval) */
987 if (s->object_hook != Py_None) {
988 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
989 if (val == NULL)
990 goto bail;
991 Py_DECREF(rval);
992 rval = val;
993 val = NULL;
994 }
995 *next_idx_ptr = idx + 1;
996 return rval;
997bail:
998 Py_XDECREF(key);
999 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001000 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001001 return NULL;
1002}
1003
1004static PyObject *
1005_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1006 /* Read a JSON object from PyUnicode pystr.
1007 idx is the index of the first character after the opening curly brace.
1008 *next_idx_ptr is a return-by-reference index to the first character after
1009 the closing curly brace.
1010
1011 Returns a new PyObject (usually a dict, but object_hook can change that)
1012 */
1013 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1014 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001015 PyObject *rval;
1016 PyObject *pairs;
1017 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001018 PyObject *key = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001019 PyObject *val = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001020 int strict = PyObject_IsTrue(s->strict);
1021 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001022
1023 pairs = PyList_New(0);
1024 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001025 return NULL;
1026
1027 /* skip whitespace after { */
1028 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1029
1030 /* only loop if the object is non-empty */
1031 if (idx <= end_idx && str[idx] != '}') {
1032 while (idx <= end_idx) {
1033 /* read key */
1034 if (str[idx] != '"') {
1035 raise_errmsg("Expecting property name", pystr, idx);
1036 goto bail;
1037 }
1038 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1039 if (key == NULL)
1040 goto bail;
1041 idx = next_idx;
1042
1043 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1044 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1045 if (idx > end_idx || str[idx] != ':') {
1046 raise_errmsg("Expecting : delimiter", pystr, idx);
1047 goto bail;
1048 }
1049 idx++;
1050 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1051
1052 /* read any JSON term */
1053 val = scan_once_unicode(s, pystr, idx, &next_idx);
1054 if (val == NULL)
1055 goto bail;
1056
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001057 item = PyTuple_Pack(2, key, val);
1058 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001059 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001060 Py_CLEAR(key);
1061 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001062 if (PyList_Append(pairs, item) == -1) {
1063 Py_DECREF(item);
1064 goto bail;
1065 }
1066 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001067 idx = next_idx;
1068
1069 /* skip whitespace before } or , */
1070 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1071
1072 /* bail if the object is closed or we didn't get the , delimiter */
1073 if (idx > end_idx) break;
1074 if (str[idx] == '}') {
1075 break;
1076 }
1077 else if (str[idx] != ',') {
1078 raise_errmsg("Expecting , delimiter", pystr, idx);
1079 goto bail;
1080 }
1081 idx++;
1082
1083 /* skip whitespace after , delimiter */
1084 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1085 }
1086 }
1087
1088 /* verify that idx < end_idx, str[idx] should be '}' */
1089 if (idx > end_idx || str[idx] != '}') {
1090 raise_errmsg("Expecting object", pystr, end_idx);
1091 goto bail;
1092 }
1093
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001094 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1095 if (s->pairs_hook != Py_None) {
1096 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1097 if (val == NULL)
1098 goto bail;
1099 Py_DECREF(pairs);
1100 *next_idx_ptr = idx + 1;
1101 return val;
1102 }
1103
1104 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1105 pairs, NULL);
1106 if (rval == NULL)
1107 goto bail;
1108 Py_CLEAR(pairs);
1109
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001110 /* if object_hook is not None: rval = object_hook(rval) */
1111 if (s->object_hook != Py_None) {
1112 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1113 if (val == NULL)
1114 goto bail;
1115 Py_DECREF(rval);
1116 rval = val;
1117 val = NULL;
1118 }
1119 *next_idx_ptr = idx + 1;
1120 return rval;
1121bail:
1122 Py_XDECREF(key);
1123 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001124 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001125 return NULL;
1126}
1127
1128static PyObject *
1129_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1130 /* Read a JSON array from PyString pystr.
1131 idx is the index of the first character after the opening brace.
1132 *next_idx_ptr is a return-by-reference index to the first character after
1133 the closing brace.
1134
1135 Returns a new PyList
1136 */
1137 char *str = PyString_AS_STRING(pystr);
1138 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1139 PyObject *val = NULL;
1140 PyObject *rval = PyList_New(0);
1141 Py_ssize_t next_idx;
1142 if (rval == NULL)
1143 return NULL;
1144
1145 /* skip whitespace after [ */
1146 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1147
1148 /* only loop if the array is non-empty */
1149 if (idx <= end_idx && str[idx] != ']') {
1150 while (idx <= end_idx) {
1151
1152 /* read any JSON term and de-tuplefy the (rval, idx) */
1153 val = scan_once_str(s, pystr, idx, &next_idx);
1154 if (val == NULL)
1155 goto bail;
1156
1157 if (PyList_Append(rval, val) == -1)
1158 goto bail;
1159
1160 Py_CLEAR(val);
1161 idx = next_idx;
1162
1163 /* skip whitespace between term and , */
1164 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1165
1166 /* bail if the array is closed or we didn't get the , delimiter */
1167 if (idx > end_idx) break;
1168 if (str[idx] == ']') {
1169 break;
1170 }
1171 else if (str[idx] != ',') {
1172 raise_errmsg("Expecting , delimiter", pystr, idx);
1173 goto bail;
1174 }
1175 idx++;
1176
1177 /* skip whitespace after , */
1178 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1179 }
1180 }
1181
1182 /* verify that idx < end_idx, str[idx] should be ']' */
1183 if (idx > end_idx || str[idx] != ']') {
1184 raise_errmsg("Expecting object", pystr, end_idx);
1185 goto bail;
1186 }
1187 *next_idx_ptr = idx + 1;
1188 return rval;
1189bail:
1190 Py_XDECREF(val);
1191 Py_DECREF(rval);
1192 return NULL;
1193}
1194
1195static PyObject *
1196_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1197 /* Read a JSON array from PyString pystr.
1198 idx is the index of the first character after the opening brace.
1199 *next_idx_ptr is a return-by-reference index to the first character after
1200 the closing brace.
1201
1202 Returns a new PyList
1203 */
1204 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1205 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1206 PyObject *val = NULL;
1207 PyObject *rval = PyList_New(0);
1208 Py_ssize_t next_idx;
1209 if (rval == NULL)
1210 return NULL;
1211
1212 /* skip whitespace after [ */
1213 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1214
1215 /* only loop if the array is non-empty */
1216 if (idx <= end_idx && str[idx] != ']') {
1217 while (idx <= end_idx) {
1218
1219 /* read any JSON term */
1220 val = scan_once_unicode(s, pystr, idx, &next_idx);
1221 if (val == NULL)
1222 goto bail;
1223
1224 if (PyList_Append(rval, val) == -1)
1225 goto bail;
1226
1227 Py_CLEAR(val);
1228 idx = next_idx;
1229
1230 /* skip whitespace between term and , */
1231 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1232
1233 /* bail if the array is closed or we didn't get the , delimiter */
1234 if (idx > end_idx) break;
1235 if (str[idx] == ']') {
1236 break;
1237 }
1238 else if (str[idx] != ',') {
1239 raise_errmsg("Expecting , delimiter", pystr, idx);
1240 goto bail;
1241 }
1242 idx++;
1243
1244 /* skip whitespace after , */
1245 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1246 }
1247 }
1248
1249 /* verify that idx < end_idx, str[idx] should be ']' */
1250 if (idx > end_idx || str[idx] != ']') {
1251 raise_errmsg("Expecting object", pystr, end_idx);
1252 goto bail;
1253 }
1254 *next_idx_ptr = idx + 1;
1255 return rval;
1256bail:
1257 Py_XDECREF(val);
1258 Py_DECREF(rval);
1259 return NULL;
1260}
1261
1262static PyObject *
1263_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1264 /* Read a JSON constant from PyString pystr.
1265 constant is the constant string that was found
1266 ("NaN", "Infinity", "-Infinity").
1267 idx is the index of the first character of the constant
1268 *next_idx_ptr is a return-by-reference index to the first character after
1269 the constant.
1270
1271 Returns the result of parse_constant
1272 */
1273 PyObject *cstr;
1274 PyObject *rval;
1275 /* constant is "NaN", "Infinity", or "-Infinity" */
1276 cstr = PyString_InternFromString(constant);
1277 if (cstr == NULL)
1278 return NULL;
1279
1280 /* rval = parse_constant(constant) */
1281 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1282 idx += PyString_GET_SIZE(cstr);
1283 Py_DECREF(cstr);
1284 *next_idx_ptr = idx;
1285 return rval;
1286}
1287
1288static PyObject *
1289_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1290 /* Read a JSON number from PyString pystr.
1291 idx is the index of the first character of the number
1292 *next_idx_ptr is a return-by-reference index to the first character after
1293 the number.
1294
1295 Returns a new PyObject representation of that number:
1296 PyInt, PyLong, or PyFloat.
1297 May return other types if parse_int or parse_float are set
1298 */
1299 char *str = PyString_AS_STRING(pystr);
1300 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1301 Py_ssize_t idx = start;
1302 int is_float = 0;
1303 PyObject *rval;
1304 PyObject *numstr;
1305
1306 /* read a sign if it's there, make sure it's not the end of the string */
1307 if (str[idx] == '-') {
1308 idx++;
1309 if (idx > end_idx) {
1310 PyErr_SetNone(PyExc_StopIteration);
1311 return NULL;
1312 }
1313 }
1314
1315 /* read as many integer digits as we find as long as it doesn't start with 0 */
1316 if (str[idx] >= '1' && str[idx] <= '9') {
1317 idx++;
1318 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1319 }
1320 /* if it starts with 0 we only expect one integer digit */
1321 else if (str[idx] == '0') {
1322 idx++;
1323 }
1324 /* no integer digits, error */
1325 else {
1326 PyErr_SetNone(PyExc_StopIteration);
1327 return NULL;
1328 }
1329
1330 /* if the next char is '.' followed by a digit then read all float digits */
1331 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1332 is_float = 1;
1333 idx += 2;
1334 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1335 }
1336
1337 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1338 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1339
1340 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1341 Py_ssize_t e_start = idx;
1342 idx++;
1343
1344 /* read an exponent sign if present */
1345 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1346
1347 /* read all digits */
1348 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1349
1350 /* if we got a digit, then parse as float. if not, backtrack */
1351 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1352 is_float = 1;
1353 }
1354 else {
1355 idx = e_start;
1356 }
1357 }
1358
1359 /* copy the section we determined to be a number */
1360 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1361 if (numstr == NULL)
1362 return NULL;
1363 if (is_float) {
1364 /* parse as a float using a fast path if available, otherwise call user defined method */
1365 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1366 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1367 }
1368 else {
Eric Smith129c97d2009-10-28 08:44:37 +00001369 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1370 NULL, NULL);
1371 if (d == -1.0 && PyErr_Occurred())
1372 return NULL;
1373 rval = PyFloat_FromDouble(d);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001374 }
1375 }
1376 else {
1377 /* parse as an int using a fast path if available, otherwise call user defined method */
1378 if (s->parse_int != (PyObject *)&PyInt_Type) {
1379 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1380 }
1381 else {
1382 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1383 }
1384 }
1385 Py_DECREF(numstr);
1386 *next_idx_ptr = idx;
1387 return rval;
1388}
1389
1390static PyObject *
1391_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1392 /* Read a JSON number from PyUnicode pystr.
1393 idx is the index of the first character of the number
1394 *next_idx_ptr is a return-by-reference index to the first character after
1395 the number.
1396
1397 Returns a new PyObject representation of that number:
1398 PyInt, PyLong, or PyFloat.
1399 May return other types if parse_int or parse_float are set
1400 */
1401 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1402 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1403 Py_ssize_t idx = start;
1404 int is_float = 0;
1405 PyObject *rval;
1406 PyObject *numstr;
1407
1408 /* read a sign if it's there, make sure it's not the end of the string */
1409 if (str[idx] == '-') {
1410 idx++;
1411 if (idx > end_idx) {
1412 PyErr_SetNone(PyExc_StopIteration);
1413 return NULL;
1414 }
1415 }
1416
1417 /* read as many integer digits as we find as long as it doesn't start with 0 */
1418 if (str[idx] >= '1' && str[idx] <= '9') {
1419 idx++;
1420 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1421 }
1422 /* if it starts with 0 we only expect one integer digit */
1423 else if (str[idx] == '0') {
1424 idx++;
1425 }
1426 /* no integer digits, error */
1427 else {
1428 PyErr_SetNone(PyExc_StopIteration);
1429 return NULL;
1430 }
1431
1432 /* if the next char is '.' followed by a digit then read all float digits */
1433 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1434 is_float = 1;
1435 idx += 2;
Bob Ippolito76a982a2009-03-29 22:33:58 +00001436 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001437 }
1438
1439 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1440 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1441 Py_ssize_t e_start = idx;
1442 idx++;
1443
1444 /* read an exponent sign if present */
1445 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1446
1447 /* read all digits */
1448 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1449
1450 /* if we got a digit, then parse as float. if not, backtrack */
1451 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1452 is_float = 1;
1453 }
1454 else {
1455 idx = e_start;
1456 }
1457 }
1458
1459 /* copy the section we determined to be a number */
1460 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1461 if (numstr == NULL)
1462 return NULL;
1463 if (is_float) {
1464 /* parse as a float using a fast path if available, otherwise call user defined method */
1465 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1466 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1467 }
1468 else {
1469 rval = PyFloat_FromString(numstr, NULL);
1470 }
1471 }
1472 else {
1473 /* no fast path for unicode -> int, just call */
1474 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1475 }
1476 Py_DECREF(numstr);
1477 *next_idx_ptr = idx;
1478 return rval;
1479}
1480
1481static PyObject *
1482scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1483{
1484 /* Read one JSON term (of any kind) from PyString pystr.
1485 idx is the index of the first character of the term
1486 *next_idx_ptr is a return-by-reference index to the first character after
1487 the number.
1488
1489 Returns a new PyObject representation of the term.
1490 */
1491 char *str = PyString_AS_STRING(pystr);
1492 Py_ssize_t length = PyString_GET_SIZE(pystr);
1493 if (idx >= length) {
1494 PyErr_SetNone(PyExc_StopIteration);
1495 return NULL;
1496 }
1497 switch (str[idx]) {
1498 case '"':
1499 /* string */
1500 return scanstring_str(pystr, idx + 1,
1501 PyString_AS_STRING(s->encoding),
1502 PyObject_IsTrue(s->strict),
1503 next_idx_ptr);
1504 case '{':
1505 /* object */
1506 return _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1507 case '[':
1508 /* array */
1509 return _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1510 case 'n':
1511 /* null */
1512 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1513 Py_INCREF(Py_None);
1514 *next_idx_ptr = idx + 4;
1515 return Py_None;
1516 }
1517 break;
1518 case 't':
1519 /* true */
1520 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1521 Py_INCREF(Py_True);
1522 *next_idx_ptr = idx + 4;
1523 return Py_True;
1524 }
1525 break;
1526 case 'f':
1527 /* false */
1528 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1529 Py_INCREF(Py_False);
1530 *next_idx_ptr = idx + 5;
1531 return Py_False;
1532 }
1533 break;
1534 case 'N':
1535 /* NaN */
1536 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1537 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1538 }
1539 break;
1540 case 'I':
1541 /* Infinity */
1542 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1543 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1544 }
1545 break;
1546 case '-':
1547 /* -Infinity */
1548 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1549 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1550 }
1551 break;
1552 }
1553 /* Didn't find a string, object, array, or named constant. Look for a number. */
1554 return _match_number_str(s, pystr, idx, next_idx_ptr);
1555}
1556
1557static PyObject *
1558scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1559{
1560 /* Read one JSON term (of any kind) from PyUnicode pystr.
1561 idx is the index of the first character of the term
1562 *next_idx_ptr is a return-by-reference index to the first character after
1563 the number.
1564
1565 Returns a new PyObject representation of the term.
1566 */
1567 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1568 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1569 if (idx >= length) {
1570 PyErr_SetNone(PyExc_StopIteration);
1571 return NULL;
1572 }
1573 switch (str[idx]) {
1574 case '"':
1575 /* string */
1576 return scanstring_unicode(pystr, idx + 1,
1577 PyObject_IsTrue(s->strict),
1578 next_idx_ptr);
1579 case '{':
1580 /* object */
1581 return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1582 case '[':
1583 /* array */
1584 return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1585 case 'n':
1586 /* null */
1587 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1588 Py_INCREF(Py_None);
1589 *next_idx_ptr = idx + 4;
1590 return Py_None;
1591 }
1592 break;
1593 case 't':
1594 /* true */
1595 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1596 Py_INCREF(Py_True);
1597 *next_idx_ptr = idx + 4;
1598 return Py_True;
1599 }
1600 break;
1601 case 'f':
1602 /* false */
1603 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1604 Py_INCREF(Py_False);
1605 *next_idx_ptr = idx + 5;
1606 return Py_False;
1607 }
1608 break;
1609 case 'N':
1610 /* NaN */
1611 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1612 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1613 }
1614 break;
1615 case 'I':
1616 /* Infinity */
1617 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1618 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1619 }
1620 break;
1621 case '-':
1622 /* -Infinity */
1623 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1624 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1625 }
1626 break;
1627 }
1628 /* Didn't find a string, object, array, or named constant. Look for a number. */
1629 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1630}
1631
1632static PyObject *
1633scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1634{
1635 /* Python callable interface to scan_once_{str,unicode} */
1636 PyObject *pystr;
1637 PyObject *rval;
1638 Py_ssize_t idx;
1639 Py_ssize_t next_idx = -1;
1640 static char *kwlist[] = {"string", "idx", NULL};
1641 PyScannerObject *s;
1642 assert(PyScanner_Check(self));
1643 s = (PyScannerObject *)self;
1644 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1645 return NULL;
1646
1647 if (PyString_Check(pystr)) {
1648 rval = scan_once_str(s, pystr, idx, &next_idx);
1649 }
1650 else if (PyUnicode_Check(pystr)) {
1651 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1652 }
1653 else {
1654 PyErr_Format(PyExc_TypeError,
1655 "first argument must be a string, not %.80s",
1656 Py_TYPE(pystr)->tp_name);
1657 return NULL;
1658 }
1659 return _build_rval_index_tuple(rval, next_idx);
1660}
1661
1662static PyObject *
1663scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1664{
1665 PyScannerObject *s;
1666 s = (PyScannerObject *)type->tp_alloc(type, 0);
1667 if (s != NULL) {
1668 s->encoding = NULL;
1669 s->strict = NULL;
1670 s->object_hook = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001671 s->pairs_hook = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001672 s->parse_float = NULL;
1673 s->parse_int = NULL;
1674 s->parse_constant = NULL;
1675 }
1676 return (PyObject *)s;
1677}
1678
1679static int
1680scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1681{
1682 /* Initialize Scanner object */
1683 PyObject *ctx;
1684 static char *kwlist[] = {"context", NULL};
1685 PyScannerObject *s;
1686
1687 assert(PyScanner_Check(self));
1688 s = (PyScannerObject *)self;
1689
1690 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1691 return -1;
1692
1693 /* PyString_AS_STRING is used on encoding */
1694 s->encoding = PyObject_GetAttrString(ctx, "encoding");
Antoine Pitrou187177f2009-12-08 15:40:51 +00001695 if (s->encoding == NULL)
1696 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001697 if (s->encoding == Py_None) {
1698 Py_DECREF(Py_None);
1699 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1700 }
1701 else if (PyUnicode_Check(s->encoding)) {
1702 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1703 Py_DECREF(s->encoding);
1704 s->encoding = tmp;
1705 }
1706 if (s->encoding == NULL || !PyString_Check(s->encoding))
1707 goto bail;
1708
1709 /* All of these will fail "gracefully" so we don't need to verify them */
1710 s->strict = PyObject_GetAttrString(ctx, "strict");
1711 if (s->strict == NULL)
1712 goto bail;
1713 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1714 if (s->object_hook == NULL)
1715 goto bail;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001716 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
Georg Brandld823bdc2011-01-02 14:20:16 +00001717 if (s->pairs_hook == NULL)
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001718 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001719 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1720 if (s->parse_float == NULL)
1721 goto bail;
1722 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1723 if (s->parse_int == NULL)
1724 goto bail;
1725 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1726 if (s->parse_constant == NULL)
1727 goto bail;
1728
1729 return 0;
1730
1731bail:
1732 Py_CLEAR(s->encoding);
1733 Py_CLEAR(s->strict);
1734 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001735 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001736 Py_CLEAR(s->parse_float);
1737 Py_CLEAR(s->parse_int);
1738 Py_CLEAR(s->parse_constant);
1739 return -1;
1740}
1741
1742PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1743
1744static
1745PyTypeObject PyScannerType = {
1746 PyObject_HEAD_INIT(NULL)
1747 0, /* tp_internal */
1748 "_json.Scanner", /* tp_name */
1749 sizeof(PyScannerObject), /* tp_basicsize */
1750 0, /* tp_itemsize */
1751 scanner_dealloc, /* tp_dealloc */
1752 0, /* tp_print */
1753 0, /* tp_getattr */
1754 0, /* tp_setattr */
1755 0, /* tp_compare */
1756 0, /* tp_repr */
1757 0, /* tp_as_number */
1758 0, /* tp_as_sequence */
1759 0, /* tp_as_mapping */
1760 0, /* tp_hash */
1761 scanner_call, /* tp_call */
1762 0, /* tp_str */
1763 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1764 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1765 0, /* tp_as_buffer */
1766 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1767 scanner_doc, /* tp_doc */
1768 scanner_traverse, /* tp_traverse */
1769 scanner_clear, /* tp_clear */
1770 0, /* tp_richcompare */
1771 0, /* tp_weaklistoffset */
1772 0, /* tp_iter */
1773 0, /* tp_iternext */
1774 0, /* tp_methods */
1775 scanner_members, /* tp_members */
1776 0, /* tp_getset */
1777 0, /* tp_base */
1778 0, /* tp_dict */
1779 0, /* tp_descr_get */
1780 0, /* tp_descr_set */
1781 0, /* tp_dictoffset */
1782 scanner_init, /* tp_init */
1783 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1784 scanner_new, /* tp_new */
1785 0,/* PyObject_GC_Del, */ /* tp_free */
1786};
1787
1788static PyObject *
1789encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1790{
1791 PyEncoderObject *s;
1792 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1793 if (s != NULL) {
1794 s->markers = NULL;
1795 s->defaultfn = NULL;
1796 s->encoder = NULL;
1797 s->indent = NULL;
1798 s->key_separator = NULL;
1799 s->item_separator = NULL;
1800 s->sort_keys = NULL;
1801 s->skipkeys = NULL;
1802 }
1803 return (PyObject *)s;
1804}
1805
1806static int
1807encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1808{
1809 /* initialize Encoder object */
1810 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1811
1812 PyEncoderObject *s;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001813 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1814 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001815
1816 assert(PyEncoder_Check(self));
1817 s = (PyEncoderObject *)self;
1818
1819 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou187177f2009-12-08 15:40:51 +00001820 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1821 &sort_keys, &skipkeys, &allow_nan))
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001822 return -1;
1823
Antoine Pitrou187177f2009-12-08 15:40:51 +00001824 s->markers = markers;
1825 s->defaultfn = defaultfn;
1826 s->encoder = encoder;
1827 s->indent = indent;
1828 s->key_separator = key_separator;
1829 s->item_separator = item_separator;
1830 s->sort_keys = sort_keys;
1831 s->skipkeys = skipkeys;
1832 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1833 s->allow_nan = PyObject_IsTrue(allow_nan);
1834
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001835 Py_INCREF(s->markers);
1836 Py_INCREF(s->defaultfn);
1837 Py_INCREF(s->encoder);
1838 Py_INCREF(s->indent);
1839 Py_INCREF(s->key_separator);
1840 Py_INCREF(s->item_separator);
1841 Py_INCREF(s->sort_keys);
1842 Py_INCREF(s->skipkeys);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001843 return 0;
1844}
1845
1846static PyObject *
1847encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1848{
1849 /* Python callable interface to encode_listencode_obj */
1850 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1851 PyObject *obj;
1852 PyObject *rval;
1853 Py_ssize_t indent_level;
1854 PyEncoderObject *s;
1855 assert(PyEncoder_Check(self));
1856 s = (PyEncoderObject *)self;
1857 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1858 &obj, _convertPyInt_AsSsize_t, &indent_level))
1859 return NULL;
1860 rval = PyList_New(0);
1861 if (rval == NULL)
1862 return NULL;
1863 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1864 Py_DECREF(rval);
1865 return NULL;
1866 }
1867 return rval;
1868}
1869
1870static PyObject *
1871_encoded_const(PyObject *obj)
1872{
1873 /* Return the JSON string representation of None, True, False */
1874 if (obj == Py_None) {
1875 static PyObject *s_null = NULL;
1876 if (s_null == NULL) {
1877 s_null = PyString_InternFromString("null");
1878 }
1879 Py_INCREF(s_null);
1880 return s_null;
1881 }
1882 else if (obj == Py_True) {
1883 static PyObject *s_true = NULL;
1884 if (s_true == NULL) {
1885 s_true = PyString_InternFromString("true");
1886 }
1887 Py_INCREF(s_true);
1888 return s_true;
1889 }
1890 else if (obj == Py_False) {
1891 static PyObject *s_false = NULL;
1892 if (s_false == NULL) {
1893 s_false = PyString_InternFromString("false");
1894 }
1895 Py_INCREF(s_false);
1896 return s_false;
1897 }
1898 else {
1899 PyErr_SetString(PyExc_ValueError, "not a const");
1900 return NULL;
1901 }
1902}
1903
1904static PyObject *
1905encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1906{
1907 /* Return the JSON representation of a PyFloat */
1908 double i = PyFloat_AS_DOUBLE(obj);
1909 if (!Py_IS_FINITE(i)) {
1910 if (!s->allow_nan) {
1911 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1912 return NULL;
1913 }
1914 if (i > 0) {
1915 return PyString_FromString("Infinity");
1916 }
1917 else if (i < 0) {
1918 return PyString_FromString("-Infinity");
1919 }
1920 else {
1921 return PyString_FromString("NaN");
1922 }
1923 }
1924 /* Use a better float format here? */
1925 return PyObject_Repr(obj);
1926}
1927
1928static PyObject *
1929encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1930{
1931 /* Return the JSON representation of a string */
1932 if (s->fast_encode)
1933 return py_encode_basestring_ascii(NULL, obj);
1934 else
1935 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1936}
1937
1938static int
1939_steal_list_append(PyObject *lst, PyObject *stolen)
1940{
1941 /* Append stolen and then decrement its reference count */
1942 int rval = PyList_Append(lst, stolen);
1943 Py_DECREF(stolen);
1944 return rval;
1945}
1946
1947static int
1948encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1949{
1950 /* Encode Python object obj to a JSON term, rval is a PyList */
1951 PyObject *newobj;
1952 int rv;
1953
1954 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1955 PyObject *cstr = _encoded_const(obj);
1956 if (cstr == NULL)
1957 return -1;
1958 return _steal_list_append(rval, cstr);
1959 }
1960 else if (PyString_Check(obj) || PyUnicode_Check(obj))
1961 {
1962 PyObject *encoded = encoder_encode_string(s, obj);
1963 if (encoded == NULL)
1964 return -1;
1965 return _steal_list_append(rval, encoded);
1966 }
1967 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1968 PyObject *encoded = PyObject_Str(obj);
1969 if (encoded == NULL)
1970 return -1;
1971 return _steal_list_append(rval, encoded);
1972 }
1973 else if (PyFloat_Check(obj)) {
1974 PyObject *encoded = encoder_encode_float(s, obj);
1975 if (encoded == NULL)
1976 return -1;
1977 return _steal_list_append(rval, encoded);
1978 }
1979 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1980 return encoder_listencode_list(s, rval, obj, indent_level);
1981 }
1982 else if (PyDict_Check(obj)) {
1983 return encoder_listencode_dict(s, rval, obj, indent_level);
1984 }
1985 else {
1986 PyObject *ident = NULL;
1987 if (s->markers != Py_None) {
1988 int has_key;
1989 ident = PyLong_FromVoidPtr(obj);
1990 if (ident == NULL)
1991 return -1;
1992 has_key = PyDict_Contains(s->markers, ident);
1993 if (has_key) {
1994 if (has_key != -1)
1995 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1996 Py_DECREF(ident);
1997 return -1;
1998 }
1999 if (PyDict_SetItem(s->markers, ident, obj)) {
2000 Py_DECREF(ident);
2001 return -1;
2002 }
2003 }
2004 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2005 if (newobj == NULL) {
2006 Py_XDECREF(ident);
2007 return -1;
2008 }
2009 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2010 Py_DECREF(newobj);
2011 if (rv) {
2012 Py_XDECREF(ident);
2013 return -1;
2014 }
2015 if (ident != NULL) {
2016 if (PyDict_DelItem(s->markers, ident)) {
2017 Py_XDECREF(ident);
2018 return -1;
2019 }
2020 Py_XDECREF(ident);
2021 }
2022 return rv;
2023 }
2024}
2025
2026static int
2027encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2028{
2029 /* Encode Python dict dct a JSON term, rval is a PyList */
2030 static PyObject *open_dict = NULL;
2031 static PyObject *close_dict = NULL;
2032 static PyObject *empty_dict = NULL;
2033 PyObject *kstr = NULL;
2034 PyObject *ident = NULL;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002035 PyObject *key = NULL;
2036 PyObject *value = NULL;
2037 PyObject *it = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002038 int skipkeys;
2039 Py_ssize_t idx;
2040
2041 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2042 open_dict = PyString_InternFromString("{");
2043 close_dict = PyString_InternFromString("}");
2044 empty_dict = PyString_InternFromString("{}");
2045 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2046 return -1;
2047 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002048 if (Py_SIZE(dct) == 0)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002049 return PyList_Append(rval, empty_dict);
2050
2051 if (s->markers != Py_None) {
2052 int has_key;
2053 ident = PyLong_FromVoidPtr(dct);
2054 if (ident == NULL)
2055 goto bail;
2056 has_key = PyDict_Contains(s->markers, ident);
2057 if (has_key) {
2058 if (has_key != -1)
2059 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2060 goto bail;
2061 }
2062 if (PyDict_SetItem(s->markers, ident, dct)) {
2063 goto bail;
2064 }
2065 }
2066
2067 if (PyList_Append(rval, open_dict))
2068 goto bail;
2069
2070 if (s->indent != Py_None) {
2071 /* TODO: DOES NOT RUN */
2072 indent_level += 1;
2073 /*
2074 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2075 separator = _item_separator + newline_indent
2076 buf += newline_indent
2077 */
2078 }
2079
2080 /* TODO: C speedup not implemented for sort_keys */
2081
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002082 it = PyObject_GetIter(dct);
2083 if (it == NULL)
2084 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002085 skipkeys = PyObject_IsTrue(s->skipkeys);
2086 idx = 0;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002087 while ((key = PyIter_Next(it)) != NULL) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002088 PyObject *encoded;
2089
2090 if (PyString_Check(key) || PyUnicode_Check(key)) {
2091 Py_INCREF(key);
2092 kstr = key;
2093 }
2094 else if (PyFloat_Check(key)) {
2095 kstr = encoder_encode_float(s, key);
2096 if (kstr == NULL)
2097 goto bail;
2098 }
2099 else if (PyInt_Check(key) || PyLong_Check(key)) {
2100 kstr = PyObject_Str(key);
2101 if (kstr == NULL)
2102 goto bail;
2103 }
2104 else if (key == Py_True || key == Py_False || key == Py_None) {
2105 kstr = _encoded_const(key);
2106 if (kstr == NULL)
2107 goto bail;
2108 }
2109 else if (skipkeys) {
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002110 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002111 continue;
2112 }
2113 else {
2114 /* TODO: include repr of key */
Doug Hellmannf31db932010-07-21 12:36:33 +00002115 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002116 goto bail;
2117 }
2118
2119 if (idx) {
2120 if (PyList_Append(rval, s->item_separator))
2121 goto bail;
2122 }
2123
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002124 value = PyObject_GetItem(dct, key);
2125 if (value == NULL)
2126 goto bail;
2127
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002128 encoded = encoder_encode_string(s, kstr);
2129 Py_CLEAR(kstr);
2130 if (encoded == NULL)
2131 goto bail;
2132 if (PyList_Append(rval, encoded)) {
2133 Py_DECREF(encoded);
2134 goto bail;
2135 }
2136 Py_DECREF(encoded);
2137 if (PyList_Append(rval, s->key_separator))
2138 goto bail;
2139 if (encoder_listencode_obj(s, rval, value, indent_level))
2140 goto bail;
2141 idx += 1;
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002142 Py_CLEAR(value);
2143 Py_DECREF(key);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002144 }
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002145 if (PyErr_Occurred())
2146 goto bail;
2147 Py_CLEAR(it);
2148
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002149 if (ident != NULL) {
2150 if (PyDict_DelItem(s->markers, ident))
2151 goto bail;
2152 Py_CLEAR(ident);
2153 }
2154 if (s->indent != Py_None) {
2155 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002156 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002157 indent_level -= 1;
2158
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002159 yield '\n' + (' ' * (_indent * _current_indent_level))
2160 */
2161 }
2162 if (PyList_Append(rval, close_dict))
2163 goto bail;
2164 return 0;
2165
2166bail:
Raymond Hettingercbba8d42010-10-30 07:29:44 +00002167 Py_XDECREF(it);
2168 Py_XDECREF(key);
2169 Py_XDECREF(value);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002170 Py_XDECREF(kstr);
2171 Py_XDECREF(ident);
2172 return -1;
2173}
2174
2175
2176static int
2177encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2178{
2179 /* Encode Python list seq to a JSON term, rval is a PyList */
2180 static PyObject *open_array = NULL;
2181 static PyObject *close_array = NULL;
2182 static PyObject *empty_array = NULL;
2183 PyObject *ident = NULL;
2184 PyObject *s_fast = NULL;
2185 Py_ssize_t num_items;
2186 PyObject **seq_items;
2187 Py_ssize_t i;
2188
2189 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2190 open_array = PyString_InternFromString("[");
2191 close_array = PyString_InternFromString("]");
2192 empty_array = PyString_InternFromString("[]");
2193 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2194 return -1;
2195 }
2196 ident = NULL;
2197 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2198 if (s_fast == NULL)
2199 return -1;
2200 num_items = PySequence_Fast_GET_SIZE(s_fast);
2201 if (num_items == 0) {
2202 Py_DECREF(s_fast);
2203 return PyList_Append(rval, empty_array);
2204 }
2205
2206 if (s->markers != Py_None) {
2207 int has_key;
2208 ident = PyLong_FromVoidPtr(seq);
2209 if (ident == NULL)
2210 goto bail;
2211 has_key = PyDict_Contains(s->markers, ident);
2212 if (has_key) {
2213 if (has_key != -1)
2214 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2215 goto bail;
2216 }
2217 if (PyDict_SetItem(s->markers, ident, seq)) {
2218 goto bail;
2219 }
2220 }
2221
2222 seq_items = PySequence_Fast_ITEMS(s_fast);
2223 if (PyList_Append(rval, open_array))
2224 goto bail;
2225 if (s->indent != Py_None) {
2226 /* TODO: DOES NOT RUN */
2227 indent_level += 1;
2228 /*
2229 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2230 separator = _item_separator + newline_indent
2231 buf += newline_indent
2232 */
2233 }
2234 for (i = 0; i < num_items; i++) {
2235 PyObject *obj = seq_items[i];
2236 if (i) {
2237 if (PyList_Append(rval, s->item_separator))
2238 goto bail;
2239 }
2240 if (encoder_listencode_obj(s, rval, obj, indent_level))
2241 goto bail;
2242 }
2243 if (ident != NULL) {
2244 if (PyDict_DelItem(s->markers, ident))
2245 goto bail;
2246 Py_CLEAR(ident);
2247 }
2248 if (s->indent != Py_None) {
2249 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002250 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002251 indent_level -= 1;
2252
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002253 yield '\n' + (' ' * (_indent * _current_indent_level))
2254 */
2255 }
2256 if (PyList_Append(rval, close_array))
2257 goto bail;
2258 Py_DECREF(s_fast);
2259 return 0;
2260
2261bail:
2262 Py_XDECREF(ident);
2263 Py_DECREF(s_fast);
2264 return -1;
2265}
2266
2267static void
2268encoder_dealloc(PyObject *self)
2269{
2270 /* Deallocate Encoder */
2271 encoder_clear(self);
2272 Py_TYPE(self)->tp_free(self);
2273}
2274
2275static int
2276encoder_traverse(PyObject *self, visitproc visit, void *arg)
2277{
2278 PyEncoderObject *s;
2279 assert(PyEncoder_Check(self));
2280 s = (PyEncoderObject *)self;
2281 Py_VISIT(s->markers);
2282 Py_VISIT(s->defaultfn);
2283 Py_VISIT(s->encoder);
2284 Py_VISIT(s->indent);
2285 Py_VISIT(s->key_separator);
2286 Py_VISIT(s->item_separator);
2287 Py_VISIT(s->sort_keys);
2288 Py_VISIT(s->skipkeys);
2289 return 0;
2290}
2291
2292static int
2293encoder_clear(PyObject *self)
2294{
2295 /* Deallocate Encoder */
2296 PyEncoderObject *s;
2297 assert(PyEncoder_Check(self));
2298 s = (PyEncoderObject *)self;
2299 Py_CLEAR(s->markers);
2300 Py_CLEAR(s->defaultfn);
2301 Py_CLEAR(s->encoder);
2302 Py_CLEAR(s->indent);
2303 Py_CLEAR(s->key_separator);
2304 Py_CLEAR(s->item_separator);
2305 Py_CLEAR(s->sort_keys);
2306 Py_CLEAR(s->skipkeys);
2307 return 0;
2308}
2309
2310PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2311
2312static
2313PyTypeObject PyEncoderType = {
2314 PyObject_HEAD_INIT(NULL)
2315 0, /* tp_internal */
2316 "_json.Encoder", /* tp_name */
2317 sizeof(PyEncoderObject), /* tp_basicsize */
2318 0, /* tp_itemsize */
2319 encoder_dealloc, /* tp_dealloc */
2320 0, /* tp_print */
2321 0, /* tp_getattr */
2322 0, /* tp_setattr */
2323 0, /* tp_compare */
2324 0, /* tp_repr */
2325 0, /* tp_as_number */
2326 0, /* tp_as_sequence */
2327 0, /* tp_as_mapping */
2328 0, /* tp_hash */
2329 encoder_call, /* tp_call */
2330 0, /* tp_str */
2331 0, /* tp_getattro */
2332 0, /* tp_setattro */
2333 0, /* tp_as_buffer */
2334 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2335 encoder_doc, /* tp_doc */
2336 encoder_traverse, /* tp_traverse */
2337 encoder_clear, /* tp_clear */
2338 0, /* tp_richcompare */
2339 0, /* tp_weaklistoffset */
2340 0, /* tp_iter */
2341 0, /* tp_iternext */
2342 0, /* tp_methods */
2343 encoder_members, /* tp_members */
2344 0, /* tp_getset */
2345 0, /* tp_base */
2346 0, /* tp_dict */
2347 0, /* tp_descr_get */
2348 0, /* tp_descr_set */
2349 0, /* tp_dictoffset */
2350 encoder_init, /* tp_init */
2351 0, /* tp_alloc */
2352 encoder_new, /* tp_new */
2353 0, /* tp_free */
2354};
2355
2356static PyMethodDef speedups_methods[] = {
2357 {"encode_basestring_ascii",
2358 (PyCFunction)py_encode_basestring_ascii,
2359 METH_O,
2360 pydoc_encode_basestring_ascii},
2361 {"scanstring",
2362 (PyCFunction)py_scanstring,
2363 METH_VARARGS,
2364 pydoc_scanstring},
Brett Cannon4b964f92008-05-05 20:21:38 +00002365 {NULL, NULL, 0, NULL}
2366};
2367
2368PyDoc_STRVAR(module_doc,
2369"json speedups\n");
2370
2371void
2372init_json(void)
2373{
2374 PyObject *m;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002375 PyScannerType.tp_new = PyType_GenericNew;
2376 if (PyType_Ready(&PyScannerType) < 0)
2377 return;
2378 PyEncoderType.tp_new = PyType_GenericNew;
2379 if (PyType_Ready(&PyEncoderType) < 0)
2380 return;
2381 m = Py_InitModule3("_json", speedups_methods, module_doc);
2382 Py_INCREF((PyObject*)&PyScannerType);
2383 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2384 Py_INCREF((PyObject*)&PyEncoderType);
2385 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
Brett Cannon4b964f92008-05-05 20:21:38 +00002386}