blob: b6599f83dc78c6e9315c84eaf3e443bf556edaa5 [file] [log] [blame]
Brett Cannon4b964f92008-05-05 20:21:38 +00001#include "Python.h"
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
16
17#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
Brett Cannon4b964f92008-05-05 20:21:38 +000022
23#define DEFAULT_ENCODING "utf-8"
Brett Cannon4b964f92008-05-05 20:21:38 +000024
Bob Ippolitod914e3f2009-03-17 23:19:00 +000025#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29
30static PyTypeObject PyScannerType;
31static PyTypeObject PyEncoderType;
32
33typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
Raymond Hettinger91852ca2009-03-19 19:19:03 +000038 PyObject *pairs_hook;
Bob Ippolitod914e3f2009-03-17 23:19:00 +000039 PyObject *parse_float;
40 PyObject *parse_int;
41 PyObject *parse_constant;
42} PyScannerObject;
43
44static PyMemberDef scanner_members[] = {
45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
Raymond Hettinger91852ca2009-03-19 19:19:03 +000048 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
Bob Ippolitod914e3f2009-03-17 23:19:00 +000049 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52 {NULL}
53};
54
55typedef struct _PyEncoderObject {
56 PyObject_HEAD
57 PyObject *markers;
58 PyObject *defaultfn;
59 PyObject *encoder;
60 PyObject *indent;
61 PyObject *key_separator;
62 PyObject *item_separator;
63 PyObject *sort_keys;
64 PyObject *skipkeys;
65 int fast_encode;
66 int allow_nan;
67} PyEncoderObject;
68
69static PyMemberDef encoder_members[] = {
70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78 {NULL}
79};
80
81static Py_ssize_t
82ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83static PyObject *
84ascii_escape_unicode(PyObject *pystr);
85static PyObject *
86ascii_escape_str(PyObject *pystr);
87static PyObject *
88py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89void init_json(void);
90static PyObject *
91scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92static PyObject *
93scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94static PyObject *
95_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96static PyObject *
97scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98static int
99scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100static void
101scanner_dealloc(PyObject *self);
102static int
103scanner_clear(PyObject *self);
104static PyObject *
105encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106static int
107encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108static void
109encoder_dealloc(PyObject *self);
110static int
111encoder_clear(PyObject *self);
112static int
113encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114static int
115encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116static int
117encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118static PyObject *
Raymond Hettingera0b8d9a2009-03-19 19:24:43 +0000119_encoded_const(PyObject *obj);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000120static void
121raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122static PyObject *
123encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124static int
125_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126static PyObject *
127_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128static PyObject *
129encoder_encode_float(PyEncoderObject *s, PyObject *obj);
130
131#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
133
134#define MIN_EXPANSION 6
Brett Cannon4b964f92008-05-05 20:21:38 +0000135#ifdef Py_UNICODE_WIDE
136#define MAX_EXPANSION (2 * MIN_EXPANSION)
137#else
138#define MAX_EXPANSION MIN_EXPANSION
139#endif
140
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000141static int
142_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
143{
144 /* PyObject to Py_ssize_t converter */
145 *size_ptr = PyInt_AsSsize_t(o);
Georg Brandlf71ba952009-05-05 07:48:12 +0000146 if (*size_ptr == -1 && PyErr_Occurred())
147 return 0;
148 return 1;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000149}
150
151static PyObject *
152_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
153{
154 /* Py_ssize_t to PyObject converter */
155 return PyInt_FromSsize_t(*size_ptr);
156}
157
Brett Cannon4b964f92008-05-05 20:21:38 +0000158static Py_ssize_t
159ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
160{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000161 /* Escape unicode code point c to ASCII escape sequences
162 in char *output. output must have at least 12 bytes unused to
163 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Brett Cannon4b964f92008-05-05 20:21:38 +0000164 output[chars++] = '\\';
165 switch (c) {
166 case '\\': output[chars++] = (char)c; break;
167 case '"': output[chars++] = (char)c; break;
168 case '\b': output[chars++] = 'b'; break;
169 case '\f': output[chars++] = 'f'; break;
170 case '\n': output[chars++] = 'n'; break;
171 case '\r': output[chars++] = 'r'; break;
172 case '\t': output[chars++] = 't'; break;
173 default:
174#ifdef Py_UNICODE_WIDE
175 if (c >= 0x10000) {
176 /* UTF-16 surrogate pair */
177 Py_UNICODE v = c - 0x10000;
178 c = 0xd800 | ((v >> 10) & 0x3ff);
179 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000180 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
182 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
183 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000184 c = 0xdc00 | (v & 0x3ff);
185 output[chars++] = '\\';
186 }
187#endif
188 output[chars++] = 'u';
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000189 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
191 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
192 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Brett Cannon4b964f92008-05-05 20:21:38 +0000193 }
194 return chars;
195}
196
197static PyObject *
198ascii_escape_unicode(PyObject *pystr)
199{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000200 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000201 Py_ssize_t i;
202 Py_ssize_t input_chars;
203 Py_ssize_t output_size;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000204 Py_ssize_t max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000205 Py_ssize_t chars;
206 PyObject *rval;
207 char *output;
208 Py_UNICODE *input_unicode;
209
210 input_chars = PyUnicode_GET_SIZE(pystr);
211 input_unicode = PyUnicode_AS_UNICODE(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000212
Brett Cannon4b964f92008-05-05 20:21:38 +0000213 /* One char input can be up to 6 chars output, estimate 4 of these */
214 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000215 max_output_size = 2 + (input_chars * MAX_EXPANSION);
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000216 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000217 if (rval == NULL) {
218 return NULL;
219 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000220 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000221 chars = 0;
222 output[chars++] = '"';
223 for (i = 0; i < input_chars; i++) {
224 Py_UNICODE c = input_unicode[i];
225 if (S_CHAR(c)) {
226 output[chars++] = (char)c;
227 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000228 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000229 chars = ascii_escape_char(c, output, chars);
230 }
231 if (output_size - chars < (1 + MAX_EXPANSION)) {
232 /* There's more than four, so let's resize by a lot */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000233 Py_ssize_t new_output_size = output_size * 2;
Brett Cannon4b964f92008-05-05 20:21:38 +0000234 /* This is an upper bound */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000235 if (new_output_size > max_output_size) {
236 new_output_size = max_output_size;
Brett Cannon4b964f92008-05-05 20:21:38 +0000237 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000238 /* Make sure that the output size changed before resizing */
239 if (new_output_size != output_size) {
240 output_size = new_output_size;
241 if (_PyString_Resize(&rval, output_size) == -1) {
242 return NULL;
243 }
244 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000245 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000246 }
247 }
248 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000249 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000250 return NULL;
251 }
252 return rval;
253}
254
255static PyObject *
256ascii_escape_str(PyObject *pystr)
257{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000258 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
Brett Cannon4b964f92008-05-05 20:21:38 +0000259 Py_ssize_t i;
260 Py_ssize_t input_chars;
261 Py_ssize_t output_size;
262 Py_ssize_t chars;
263 PyObject *rval;
264 char *output;
265 char *input_str;
266
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000267 input_chars = PyString_GET_SIZE(pystr);
268 input_str = PyString_AS_STRING(pystr);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000269
270 /* Fast path for a string that's already ASCII */
271 for (i = 0; i < input_chars; i++) {
272 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
273 if (!S_CHAR(c)) {
274 /* If we have to escape something, scan the string for unicode */
275 Py_ssize_t j;
276 for (j = i; j < input_chars; j++) {
277 c = (Py_UNICODE)(unsigned char)input_str[j];
278 if (c > 0x7f) {
279 /* We hit a non-ASCII character, bail to unicode mode */
280 PyObject *uni;
281 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
282 if (uni == NULL) {
283 return NULL;
284 }
285 rval = ascii_escape_unicode(uni);
286 Py_DECREF(uni);
287 return rval;
288 }
289 }
290 break;
291 }
292 }
293
294 if (i == input_chars) {
295 /* Input is already ASCII */
296 output_size = 2 + input_chars;
297 }
298 else {
299 /* One char input can be up to 6 chars output, estimate 4 of these */
300 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
301 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000302 rval = PyString_FromStringAndSize(NULL, output_size);
Brett Cannon4b964f92008-05-05 20:21:38 +0000303 if (rval == NULL) {
304 return NULL;
305 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000306 output = PyString_AS_STRING(rval);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000307 output[0] = '"';
308
309 /* We know that everything up to i is ASCII already */
310 chars = i + 1;
311 memcpy(&output[1], input_str, i);
312
313 for (; i < input_chars; i++) {
314 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
Brett Cannon4b964f92008-05-05 20:21:38 +0000315 if (S_CHAR(c)) {
316 output[chars++] = (char)c;
317 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000318 else {
Brett Cannon4b964f92008-05-05 20:21:38 +0000319 chars = ascii_escape_char(c, output, chars);
320 }
321 /* An ASCII char can't possibly expand to a surrogate! */
322 if (output_size - chars < (1 + MIN_EXPANSION)) {
323 /* There's more than four, so let's resize by a lot */
324 output_size *= 2;
325 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
326 output_size = 2 + (input_chars * MIN_EXPANSION);
327 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000328 if (_PyString_Resize(&rval, output_size) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000329 return NULL;
330 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000331 output = PyString_AS_STRING(rval);
Brett Cannon4b964f92008-05-05 20:21:38 +0000332 }
333 }
334 output[chars++] = '"';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000335 if (_PyString_Resize(&rval, chars) == -1) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000336 return NULL;
337 }
338 return rval;
339}
340
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000341static void
Brett Cannon4b964f92008-05-05 20:21:38 +0000342raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
343{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000344 /* Use the Python function json.decoder.errmsg to raise a nice
345 looking ValueError exception */
Brett Cannon4b964f92008-05-05 20:21:38 +0000346 static PyObject *errmsg_fn = NULL;
347 PyObject *pymsg;
348 if (errmsg_fn == NULL) {
349 PyObject *decoder = PyImport_ImportModule("json.decoder");
350 if (decoder == NULL)
351 return;
352 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000353 Py_DECREF(decoder);
Brett Cannon4b964f92008-05-05 20:21:38 +0000354 if (errmsg_fn == NULL)
355 return;
Brett Cannon4b964f92008-05-05 20:21:38 +0000356 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000357 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
Benjamin Peterson595e3cb2008-10-16 21:09:28 +0000358 if (pymsg) {
359 PyErr_SetObject(PyExc_ValueError, pymsg);
360 Py_DECREF(pymsg);
361 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000362}
363
364static PyObject *
365join_list_unicode(PyObject *lst)
366{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000367 /* return u''.join(lst) */
368 static PyObject *joinfn = NULL;
369 if (joinfn == NULL) {
370 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
371 if (ustr == NULL)
372 return NULL;
373
374 joinfn = PyObject_GetAttrString(ustr, "join");
375 Py_DECREF(ustr);
376 if (joinfn == NULL)
377 return NULL;
Brett Cannon4b964f92008-05-05 20:21:38 +0000378 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000379 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
Brett Cannon4b964f92008-05-05 20:21:38 +0000380}
381
382static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000383join_list_string(PyObject *lst)
Brett Cannon4b964f92008-05-05 20:21:38 +0000384{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000385 /* return ''.join(lst) */
386 static PyObject *joinfn = NULL;
387 if (joinfn == NULL) {
388 PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
389 if (ustr == NULL)
390 return NULL;
391
392 joinfn = PyObject_GetAttrString(ustr, "join");
393 Py_DECREF(ustr);
394 if (joinfn == NULL)
395 return NULL;
396 }
397 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
398}
399
400static PyObject *
401_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
402 /* return (rval, idx) tuple, stealing reference to rval */
403 PyObject *tpl;
404 PyObject *pyidx;
405 /*
406 steal a reference to rval, returns (rval, idx)
407 */
408 if (rval == NULL) {
409 return NULL;
410 }
411 pyidx = PyInt_FromSsize_t(idx);
412 if (pyidx == NULL) {
413 Py_DECREF(rval);
414 return NULL;
415 }
416 tpl = PyTuple_New(2);
417 if (tpl == NULL) {
418 Py_DECREF(pyidx);
419 Py_DECREF(rval);
420 return NULL;
421 }
422 PyTuple_SET_ITEM(tpl, 0, rval);
423 PyTuple_SET_ITEM(tpl, 1, pyidx);
424 return tpl;
425}
426
427static PyObject *
428scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
429{
430 /* Read the JSON string from PyString pystr.
431 end is the index of the first character after the quote.
432 encoding is the encoding of pystr (must be an ASCII superset)
433 if strict is zero then literal control characters are allowed
434 *next_end_ptr is a return-by-reference index of the character
435 after the end quote
436
437 Return value is a new PyString (if ASCII-only) or PyUnicode
438 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000439 PyObject *rval;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000440 Py_ssize_t len = PyString_GET_SIZE(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000441 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000442 Py_ssize_t next;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000443 int has_unicode = 0;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000444 char *buf = PyString_AS_STRING(pystr);
Brett Cannon4b964f92008-05-05 20:21:38 +0000445 PyObject *chunks = PyList_New(0);
446 if (chunks == NULL) {
447 goto bail;
448 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000449 if (end < 0 || len <= end) {
450 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
451 goto bail;
452 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000453 while (1) {
454 /* Find the end of the string or the next escape */
455 Py_UNICODE c = 0;
456 PyObject *chunk = NULL;
457 for (next = end; next < len; next++) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000458 c = (unsigned char)buf[next];
Brett Cannon4b964f92008-05-05 20:21:38 +0000459 if (c == '"' || c == '\\') {
460 break;
461 }
462 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000463 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000464 goto bail;
465 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000466 else if (c > 0x7f) {
467 has_unicode = 1;
468 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000469 }
470 if (!(c == '"' || c == '\\')) {
471 raise_errmsg("Unterminated string starting at", pystr, begin);
472 goto bail;
473 }
474 /* Pick up this chunk if it's not zero length */
475 if (next != end) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000476 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000477 if (strchunk == NULL) {
478 goto bail;
479 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000480 if (has_unicode) {
481 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
482 Py_DECREF(strchunk);
483 if (chunk == NULL) {
484 goto bail;
485 }
486 }
487 else {
488 chunk = strchunk;
Brett Cannon4b964f92008-05-05 20:21:38 +0000489 }
490 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000491 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000492 goto bail;
493 }
494 Py_DECREF(chunk);
495 }
496 next++;
497 if (c == '"') {
498 end = next;
499 break;
500 }
501 if (next == len) {
502 raise_errmsg("Unterminated string starting at", pystr, begin);
503 goto bail;
504 }
505 c = buf[next];
506 if (c != 'u') {
507 /* Non-unicode backslash escapes */
508 end = next + 1;
509 switch (c) {
510 case '"': break;
511 case '\\': break;
512 case '/': break;
513 case 'b': c = '\b'; break;
514 case 'f': c = '\f'; break;
515 case 'n': c = '\n'; break;
516 case 'r': c = '\r'; break;
517 case 't': c = '\t'; break;
518 default: c = 0;
519 }
520 if (c == 0) {
521 raise_errmsg("Invalid \\escape", pystr, end - 2);
522 goto bail;
523 }
524 }
525 else {
526 c = 0;
527 next++;
528 end = next + 4;
529 if (end >= len) {
530 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
531 goto bail;
532 }
533 /* Decode 4 hex digits */
534 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000535 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000536 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000537 switch (digit) {
538 case '0': case '1': case '2': case '3': case '4':
539 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000540 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000541 case 'a': case 'b': case 'c': case 'd': case 'e':
542 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000543 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000544 case 'A': case 'B': case 'C': case 'D': case 'E':
545 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000546 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000547 default:
548 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
549 goto bail;
550 }
551 }
552#ifdef Py_UNICODE_WIDE
553 /* Surrogate pair */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000554 if ((c & 0xfc00) == 0xd800) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000555 Py_UNICODE c2 = 0;
556 if (end + 6 >= len) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000557 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
558 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000559 }
560 if (buf[next++] != '\\' || buf[next++] != 'u') {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000561 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
562 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000563 }
564 end += 6;
565 /* Decode 4 hex digits */
566 for (; next < end; next++) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000567 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000568 Py_UNICODE digit = buf[next];
569 switch (digit) {
570 case '0': case '1': case '2': case '3': case '4':
571 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000572 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000573 case 'a': case 'b': case 'c': case 'd': case 'e':
574 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000575 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000576 case 'A': case 'B': case 'C': case 'D': case 'E':
577 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000578 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000579 default:
580 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
581 goto bail;
582 }
583 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000584 if ((c2 & 0xfc00) != 0xdc00) {
585 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
586 goto bail;
587 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000588 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
589 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000590 else if ((c & 0xfc00) == 0xdc00) {
591 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
592 goto bail;
593 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000594#endif
595 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000596 if (c > 0x7f) {
597 has_unicode = 1;
598 }
599 if (has_unicode) {
600 chunk = PyUnicode_FromUnicode(&c, 1);
601 if (chunk == NULL) {
602 goto bail;
603 }
604 }
605 else {
606 char c_char = Py_CHARMASK(c);
607 chunk = PyString_FromStringAndSize(&c_char, 1);
608 if (chunk == NULL) {
609 goto bail;
610 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000611 }
612 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000613 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000614 goto bail;
615 }
616 Py_DECREF(chunk);
617 }
618
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000619 rval = join_list_string(chunks);
Brett Cannon4b964f92008-05-05 20:21:38 +0000620 if (rval == NULL) {
621 goto bail;
622 }
Benjamin Peterson336680e2008-10-16 21:48:06 +0000623 Py_CLEAR(chunks);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000624 *next_end_ptr = end;
625 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000626bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000627 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000628 Py_XDECREF(chunks);
629 return NULL;
630}
631
632
633static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000634scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000635{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000636 /* Read the JSON string from PyUnicode pystr.
637 end is the index of the first character after the quote.
638 if strict is zero then literal control characters are allowed
639 *next_end_ptr is a return-by-reference index of the character
640 after the end quote
641
642 Return value is a new PyUnicode
643 */
Brett Cannon4b964f92008-05-05 20:21:38 +0000644 PyObject *rval;
645 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
646 Py_ssize_t begin = end - 1;
Brett Cannon8e9757e2010-05-03 23:43:49 +0000647 Py_ssize_t next;
Brett Cannon4b964f92008-05-05 20:21:38 +0000648 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
649 PyObject *chunks = PyList_New(0);
650 if (chunks == NULL) {
651 goto bail;
652 }
Bob Ippolitod648f642008-07-19 21:59:50 +0000653 if (end < 0 || len <= end) {
654 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
655 goto bail;
656 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000657 while (1) {
658 /* Find the end of the string or the next escape */
659 Py_UNICODE c = 0;
660 PyObject *chunk = NULL;
661 for (next = end; next < len; next++) {
662 c = buf[next];
663 if (c == '"' || c == '\\') {
664 break;
665 }
666 else if (strict && c <= 0x1f) {
Bob Ippolitod648f642008-07-19 21:59:50 +0000667 raise_errmsg("Invalid control character at", pystr, next);
Brett Cannon4b964f92008-05-05 20:21:38 +0000668 goto bail;
669 }
670 }
671 if (!(c == '"' || c == '\\')) {
672 raise_errmsg("Unterminated string starting at", pystr, begin);
673 goto bail;
674 }
675 /* Pick up this chunk if it's not zero length */
676 if (next != end) {
677 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
678 if (chunk == NULL) {
679 goto bail;
680 }
681 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson87e6ad22008-10-16 21:27:54 +0000682 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000683 goto bail;
684 }
685 Py_DECREF(chunk);
686 }
687 next++;
688 if (c == '"') {
689 end = next;
690 break;
691 }
692 if (next == len) {
693 raise_errmsg("Unterminated string starting at", pystr, begin);
694 goto bail;
695 }
696 c = buf[next];
697 if (c != 'u') {
698 /* Non-unicode backslash escapes */
699 end = next + 1;
700 switch (c) {
701 case '"': break;
702 case '\\': break;
703 case '/': break;
704 case 'b': c = '\b'; break;
705 case 'f': c = '\f'; break;
706 case 'n': c = '\n'; break;
707 case 'r': c = '\r'; break;
708 case 't': c = '\t'; break;
709 default: c = 0;
710 }
711 if (c == 0) {
712 raise_errmsg("Invalid \\escape", pystr, end - 2);
713 goto bail;
714 }
715 }
716 else {
717 c = 0;
718 next++;
719 end = next + 4;
720 if (end >= len) {
721 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
722 goto bail;
723 }
724 /* Decode 4 hex digits */
725 for (; next < end; next++) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000726 Py_UNICODE digit = buf[next];
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000727 c <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000728 switch (digit) {
729 case '0': case '1': case '2': case '3': case '4':
730 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000731 c |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000732 case 'a': case 'b': case 'c': case 'd': case 'e':
733 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000734 c |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000735 case 'A': case 'B': case 'C': case 'D': case 'E':
736 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000737 c |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000738 default:
739 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
740 goto bail;
741 }
742 }
743#ifdef Py_UNICODE_WIDE
744 /* Surrogate pair */
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000745 if ((c & 0xfc00) == 0xd800) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000746 Py_UNICODE c2 = 0;
747 if (end + 6 >= len) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000748 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
749 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000750 }
751 if (buf[next++] != '\\' || buf[next++] != 'u') {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000752 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
753 goto bail;
Brett Cannon4b964f92008-05-05 20:21:38 +0000754 }
755 end += 6;
756 /* Decode 4 hex digits */
757 for (; next < end; next++) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000758 c2 <<= 4;
Brett Cannon4b964f92008-05-05 20:21:38 +0000759 Py_UNICODE digit = buf[next];
760 switch (digit) {
761 case '0': case '1': case '2': case '3': case '4':
762 case '5': case '6': case '7': case '8': case '9':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000763 c2 |= (digit - '0'); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000764 case 'a': case 'b': case 'c': case 'd': case 'e':
765 case 'f':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000766 c2 |= (digit - 'a' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000767 case 'A': case 'B': case 'C': case 'D': case 'E':
768 case 'F':
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000769 c2 |= (digit - 'A' + 10); break;
Brett Cannon4b964f92008-05-05 20:21:38 +0000770 default:
771 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
772 goto bail;
773 }
774 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000775 if ((c2 & 0xfc00) != 0xdc00) {
776 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
777 goto bail;
778 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000779 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
780 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000781 else if ((c & 0xfc00) == 0xdc00) {
782 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
783 goto bail;
784 }
Brett Cannon4b964f92008-05-05 20:21:38 +0000785#endif
786 }
787 chunk = PyUnicode_FromUnicode(&c, 1);
788 if (chunk == NULL) {
789 goto bail;
790 }
791 if (PyList_Append(chunks, chunk)) {
Benjamin Peterson336680e2008-10-16 21:48:06 +0000792 Py_DECREF(chunk);
Brett Cannon4b964f92008-05-05 20:21:38 +0000793 goto bail;
794 }
795 Py_DECREF(chunk);
796 }
797
798 rval = join_list_unicode(chunks);
799 if (rval == NULL) {
800 goto bail;
801 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000802 Py_DECREF(chunks);
803 *next_end_ptr = end;
804 return rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000805bail:
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000806 *next_end_ptr = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000807 Py_XDECREF(chunks);
808 return NULL;
809}
810
811PyDoc_STRVAR(pydoc_scanstring,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000812 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
813 "\n"
814 "Scan the string s for a JSON string. End is the index of the\n"
815 "character in s after the quote that started the JSON string.\n"
816 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
817 "on attempt to decode an invalid string. If strict is False then literal\n"
818 "control characters are allowed in the string.\n"
819 "\n"
820 "Returns a tuple of the decoded string and the index of the character in s\n"
821 "after the end quote."
822);
Brett Cannon4b964f92008-05-05 20:21:38 +0000823
824static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000825py_scanstring(PyObject* self UNUSED, PyObject *args)
Brett Cannon4b964f92008-05-05 20:21:38 +0000826{
827 PyObject *pystr;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000828 PyObject *rval;
Brett Cannon4b964f92008-05-05 20:21:38 +0000829 Py_ssize_t end;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000830 Py_ssize_t next_end = -1;
Brett Cannon4b964f92008-05-05 20:21:38 +0000831 char *encoding = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000832 int strict = 1;
833 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000834 return NULL;
835 }
836 if (encoding == NULL) {
837 encoding = DEFAULT_ENCODING;
838 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000839 if (PyString_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000840 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000841 }
842 else if (PyUnicode_Check(pystr)) {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000843 rval = scanstring_unicode(pystr, end, strict, &next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000844 }
845 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000846 PyErr_Format(PyExc_TypeError,
847 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000848 Py_TYPE(pystr)->tp_name);
849 return NULL;
850 }
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000851 return _build_rval_index_tuple(rval, next_end);
Brett Cannon4b964f92008-05-05 20:21:38 +0000852}
853
854PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000855 "encode_basestring_ascii(basestring) -> str\n"
856 "\n"
857 "Return an ASCII-only JSON representation of a Python string"
858);
Brett Cannon4b964f92008-05-05 20:21:38 +0000859
860static PyObject *
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000861py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Brett Cannon4b964f92008-05-05 20:21:38 +0000862{
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000863 /* Return an ASCII-only JSON representation of a Python string */
Brett Cannon4b964f92008-05-05 20:21:38 +0000864 /* METH_O */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000865 if (PyString_Check(pystr)) {
Brett Cannon4b964f92008-05-05 20:21:38 +0000866 return ascii_escape_str(pystr);
867 }
868 else if (PyUnicode_Check(pystr)) {
869 return ascii_escape_unicode(pystr);
870 }
871 else {
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000872 PyErr_Format(PyExc_TypeError,
873 "first argument must be a string, not %.80s",
Brett Cannon4b964f92008-05-05 20:21:38 +0000874 Py_TYPE(pystr)->tp_name);
875 return NULL;
876 }
877}
878
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000879static void
880scanner_dealloc(PyObject *self)
881{
882 /* Deallocate scanner object */
883 scanner_clear(self);
884 Py_TYPE(self)->tp_free(self);
885}
886
887static int
888scanner_traverse(PyObject *self, visitproc visit, void *arg)
889{
890 PyScannerObject *s;
891 assert(PyScanner_Check(self));
892 s = (PyScannerObject *)self;
893 Py_VISIT(s->encoding);
894 Py_VISIT(s->strict);
895 Py_VISIT(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000896 Py_VISIT(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000897 Py_VISIT(s->parse_float);
898 Py_VISIT(s->parse_int);
899 Py_VISIT(s->parse_constant);
900 return 0;
901}
902
903static int
904scanner_clear(PyObject *self)
905{
906 PyScannerObject *s;
907 assert(PyScanner_Check(self));
908 s = (PyScannerObject *)self;
909 Py_CLEAR(s->encoding);
910 Py_CLEAR(s->strict);
911 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000912 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000913 Py_CLEAR(s->parse_float);
914 Py_CLEAR(s->parse_int);
915 Py_CLEAR(s->parse_constant);
916 return 0;
917}
918
919static PyObject *
920_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
921 /* Read a JSON object from PyString pystr.
922 idx is the index of the first character after the opening curly brace.
923 *next_idx_ptr is a return-by-reference index to the first character after
924 the closing curly brace.
925
926 Returns a new PyObject (usually a dict, but object_hook can change that)
927 */
928 char *str = PyString_AS_STRING(pystr);
929 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000930 PyObject *rval;
931 PyObject *pairs;
932 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000933 PyObject *key = NULL;
934 PyObject *val = NULL;
935 char *encoding = PyString_AS_STRING(s->encoding);
936 int strict = PyObject_IsTrue(s->strict);
937 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000938
939 pairs = PyList_New(0);
940 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000941 return NULL;
942
943 /* skip whitespace after { */
944 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
945
946 /* only loop if the object is non-empty */
947 if (idx <= end_idx && str[idx] != '}') {
948 while (idx <= end_idx) {
949 /* read key */
950 if (str[idx] != '"') {
951 raise_errmsg("Expecting property name", pystr, idx);
952 goto bail;
953 }
954 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
955 if (key == NULL)
956 goto bail;
957 idx = next_idx;
958
959 /* skip whitespace between key and : delimiter, read :, skip whitespace */
960 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
961 if (idx > end_idx || str[idx] != ':') {
962 raise_errmsg("Expecting : delimiter", pystr, idx);
963 goto bail;
964 }
965 idx++;
966 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
967
968 /* read any JSON data type */
969 val = scan_once_str(s, pystr, idx, &next_idx);
970 if (val == NULL)
971 goto bail;
972
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000973 item = PyTuple_Pack(2, key, val);
974 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000975 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000976 Py_CLEAR(key);
977 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +0000978 if (PyList_Append(pairs, item) == -1) {
979 Py_DECREF(item);
980 goto bail;
981 }
982 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000983 idx = next_idx;
984
985 /* skip whitespace before } or , */
986 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
987
988 /* bail if the object is closed or we didn't get the , delimiter */
989 if (idx > end_idx) break;
990 if (str[idx] == '}') {
991 break;
992 }
993 else if (str[idx] != ',') {
994 raise_errmsg("Expecting , delimiter", pystr, idx);
995 goto bail;
996 }
997 idx++;
998
999 /* skip whitespace after , delimiter */
1000 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1001 }
1002 }
1003 /* verify that idx < end_idx, str[idx] should be '}' */
1004 if (idx > end_idx || str[idx] != '}') {
1005 raise_errmsg("Expecting object", pystr, end_idx);
1006 goto bail;
1007 }
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001008
1009 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1010 if (s->pairs_hook != Py_None) {
1011 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1012 if (val == NULL)
1013 goto bail;
1014 Py_DECREF(pairs);
1015 *next_idx_ptr = idx + 1;
1016 return val;
1017 }
1018
1019 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1020 pairs, NULL);
1021 if (rval == NULL)
1022 goto bail;
1023 Py_CLEAR(pairs);
1024
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001025 /* if object_hook is not None: rval = object_hook(rval) */
1026 if (s->object_hook != Py_None) {
1027 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1028 if (val == NULL)
1029 goto bail;
1030 Py_DECREF(rval);
1031 rval = val;
1032 val = NULL;
1033 }
1034 *next_idx_ptr = idx + 1;
1035 return rval;
1036bail:
1037 Py_XDECREF(key);
1038 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001039 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001040 return NULL;
1041}
1042
1043static PyObject *
1044_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1045 /* Read a JSON object from PyUnicode pystr.
1046 idx is the index of the first character after the opening curly brace.
1047 *next_idx_ptr is a return-by-reference index to the first character after
1048 the closing curly brace.
1049
1050 Returns a new PyObject (usually a dict, but object_hook can change that)
1051 */
1052 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1053 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001054 PyObject *rval;
1055 PyObject *pairs;
1056 PyObject *item;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001057 PyObject *key = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001058 PyObject *val = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001059 int strict = PyObject_IsTrue(s->strict);
1060 Py_ssize_t next_idx;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001061
1062 pairs = PyList_New(0);
1063 if (pairs == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001064 return NULL;
1065
1066 /* skip whitespace after { */
1067 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1068
1069 /* only loop if the object is non-empty */
1070 if (idx <= end_idx && str[idx] != '}') {
1071 while (idx <= end_idx) {
1072 /* read key */
1073 if (str[idx] != '"') {
1074 raise_errmsg("Expecting property name", pystr, idx);
1075 goto bail;
1076 }
1077 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1078 if (key == NULL)
1079 goto bail;
1080 idx = next_idx;
1081
1082 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1083 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1084 if (idx > end_idx || str[idx] != ':') {
1085 raise_errmsg("Expecting : delimiter", pystr, idx);
1086 goto bail;
1087 }
1088 idx++;
1089 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1090
1091 /* read any JSON term */
1092 val = scan_once_unicode(s, pystr, idx, &next_idx);
1093 if (val == NULL)
1094 goto bail;
1095
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001096 item = PyTuple_Pack(2, key, val);
1097 if (item == NULL)
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001098 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001099 Py_CLEAR(key);
1100 Py_CLEAR(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001101 if (PyList_Append(pairs, item) == -1) {
1102 Py_DECREF(item);
1103 goto bail;
1104 }
1105 Py_DECREF(item);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001106 idx = next_idx;
1107
1108 /* skip whitespace before } or , */
1109 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1110
1111 /* bail if the object is closed or we didn't get the , delimiter */
1112 if (idx > end_idx) break;
1113 if (str[idx] == '}') {
1114 break;
1115 }
1116 else if (str[idx] != ',') {
1117 raise_errmsg("Expecting , delimiter", pystr, idx);
1118 goto bail;
1119 }
1120 idx++;
1121
1122 /* skip whitespace after , delimiter */
1123 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1124 }
1125 }
1126
1127 /* verify that idx < end_idx, str[idx] should be '}' */
1128 if (idx > end_idx || str[idx] != '}') {
1129 raise_errmsg("Expecting object", pystr, end_idx);
1130 goto bail;
1131 }
1132
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001133 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1134 if (s->pairs_hook != Py_None) {
1135 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1136 if (val == NULL)
1137 goto bail;
1138 Py_DECREF(pairs);
1139 *next_idx_ptr = idx + 1;
1140 return val;
1141 }
1142
1143 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1144 pairs, NULL);
1145 if (rval == NULL)
1146 goto bail;
1147 Py_CLEAR(pairs);
1148
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001149 /* if object_hook is not None: rval = object_hook(rval) */
1150 if (s->object_hook != Py_None) {
1151 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1152 if (val == NULL)
1153 goto bail;
1154 Py_DECREF(rval);
1155 rval = val;
1156 val = NULL;
1157 }
1158 *next_idx_ptr = idx + 1;
1159 return rval;
1160bail:
1161 Py_XDECREF(key);
1162 Py_XDECREF(val);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001163 Py_XDECREF(pairs);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001164 return NULL;
1165}
1166
1167static PyObject *
1168_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1169 /* Read a JSON array from PyString pystr.
1170 idx is the index of the first character after the opening brace.
1171 *next_idx_ptr is a return-by-reference index to the first character after
1172 the closing brace.
1173
1174 Returns a new PyList
1175 */
1176 char *str = PyString_AS_STRING(pystr);
1177 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1178 PyObject *val = NULL;
1179 PyObject *rval = PyList_New(0);
1180 Py_ssize_t next_idx;
1181 if (rval == NULL)
1182 return NULL;
1183
1184 /* skip whitespace after [ */
1185 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1186
1187 /* only loop if the array is non-empty */
1188 if (idx <= end_idx && str[idx] != ']') {
1189 while (idx <= end_idx) {
1190
1191 /* read any JSON term and de-tuplefy the (rval, idx) */
1192 val = scan_once_str(s, pystr, idx, &next_idx);
1193 if (val == NULL)
1194 goto bail;
1195
1196 if (PyList_Append(rval, val) == -1)
1197 goto bail;
1198
1199 Py_CLEAR(val);
1200 idx = next_idx;
1201
1202 /* skip whitespace between term and , */
1203 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1204
1205 /* bail if the array is closed or we didn't get the , delimiter */
1206 if (idx > end_idx) break;
1207 if (str[idx] == ']') {
1208 break;
1209 }
1210 else if (str[idx] != ',') {
1211 raise_errmsg("Expecting , delimiter", pystr, idx);
1212 goto bail;
1213 }
1214 idx++;
1215
1216 /* skip whitespace after , */
1217 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1218 }
1219 }
1220
1221 /* verify that idx < end_idx, str[idx] should be ']' */
1222 if (idx > end_idx || str[idx] != ']') {
1223 raise_errmsg("Expecting object", pystr, end_idx);
1224 goto bail;
1225 }
1226 *next_idx_ptr = idx + 1;
1227 return rval;
1228bail:
1229 Py_XDECREF(val);
1230 Py_DECREF(rval);
1231 return NULL;
1232}
1233
1234static PyObject *
1235_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1236 /* Read a JSON array from PyString pystr.
1237 idx is the index of the first character after the opening brace.
1238 *next_idx_ptr is a return-by-reference index to the first character after
1239 the closing brace.
1240
1241 Returns a new PyList
1242 */
1243 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1244 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1245 PyObject *val = NULL;
1246 PyObject *rval = PyList_New(0);
1247 Py_ssize_t next_idx;
1248 if (rval == NULL)
1249 return NULL;
1250
1251 /* skip whitespace after [ */
1252 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1253
1254 /* only loop if the array is non-empty */
1255 if (idx <= end_idx && str[idx] != ']') {
1256 while (idx <= end_idx) {
1257
1258 /* read any JSON term */
1259 val = scan_once_unicode(s, pystr, idx, &next_idx);
1260 if (val == NULL)
1261 goto bail;
1262
1263 if (PyList_Append(rval, val) == -1)
1264 goto bail;
1265
1266 Py_CLEAR(val);
1267 idx = next_idx;
1268
1269 /* skip whitespace between term and , */
1270 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1271
1272 /* bail if the array is closed or we didn't get the , delimiter */
1273 if (idx > end_idx) break;
1274 if (str[idx] == ']') {
1275 break;
1276 }
1277 else if (str[idx] != ',') {
1278 raise_errmsg("Expecting , delimiter", pystr, idx);
1279 goto bail;
1280 }
1281 idx++;
1282
1283 /* skip whitespace after , */
1284 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1285 }
1286 }
1287
1288 /* verify that idx < end_idx, str[idx] should be ']' */
1289 if (idx > end_idx || str[idx] != ']') {
1290 raise_errmsg("Expecting object", pystr, end_idx);
1291 goto bail;
1292 }
1293 *next_idx_ptr = idx + 1;
1294 return rval;
1295bail:
1296 Py_XDECREF(val);
1297 Py_DECREF(rval);
1298 return NULL;
1299}
1300
1301static PyObject *
1302_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1303 /* Read a JSON constant from PyString pystr.
1304 constant is the constant string that was found
1305 ("NaN", "Infinity", "-Infinity").
1306 idx is the index of the first character of the constant
1307 *next_idx_ptr is a return-by-reference index to the first character after
1308 the constant.
1309
1310 Returns the result of parse_constant
1311 */
1312 PyObject *cstr;
1313 PyObject *rval;
1314 /* constant is "NaN", "Infinity", or "-Infinity" */
1315 cstr = PyString_InternFromString(constant);
1316 if (cstr == NULL)
1317 return NULL;
1318
1319 /* rval = parse_constant(constant) */
1320 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1321 idx += PyString_GET_SIZE(cstr);
1322 Py_DECREF(cstr);
1323 *next_idx_ptr = idx;
1324 return rval;
1325}
1326
1327static PyObject *
1328_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1329 /* Read a JSON number from PyString pystr.
1330 idx is the index of the first character of the number
1331 *next_idx_ptr is a return-by-reference index to the first character after
1332 the number.
1333
1334 Returns a new PyObject representation of that number:
1335 PyInt, PyLong, or PyFloat.
1336 May return other types if parse_int or parse_float are set
1337 */
1338 char *str = PyString_AS_STRING(pystr);
1339 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1340 Py_ssize_t idx = start;
1341 int is_float = 0;
1342 PyObject *rval;
1343 PyObject *numstr;
1344
1345 /* read a sign if it's there, make sure it's not the end of the string */
1346 if (str[idx] == '-') {
1347 idx++;
1348 if (idx > end_idx) {
1349 PyErr_SetNone(PyExc_StopIteration);
1350 return NULL;
1351 }
1352 }
1353
1354 /* read as many integer digits as we find as long as it doesn't start with 0 */
1355 if (str[idx] >= '1' && str[idx] <= '9') {
1356 idx++;
1357 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1358 }
1359 /* if it starts with 0 we only expect one integer digit */
1360 else if (str[idx] == '0') {
1361 idx++;
1362 }
1363 /* no integer digits, error */
1364 else {
1365 PyErr_SetNone(PyExc_StopIteration);
1366 return NULL;
1367 }
1368
1369 /* if the next char is '.' followed by a digit then read all float digits */
1370 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1371 is_float = 1;
1372 idx += 2;
1373 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1374 }
1375
1376 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1377 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1378
1379 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1380 Py_ssize_t e_start = idx;
1381 idx++;
1382
1383 /* read an exponent sign if present */
1384 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1385
1386 /* read all digits */
1387 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1388
1389 /* if we got a digit, then parse as float. if not, backtrack */
1390 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1391 is_float = 1;
1392 }
1393 else {
1394 idx = e_start;
1395 }
1396 }
1397
1398 /* copy the section we determined to be a number */
1399 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1400 if (numstr == NULL)
1401 return NULL;
1402 if (is_float) {
1403 /* parse as a float using a fast path if available, otherwise call user defined method */
1404 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1405 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1406 }
1407 else {
Eric Smith129c97d2009-10-28 08:44:37 +00001408 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1409 NULL, NULL);
1410 if (d == -1.0 && PyErr_Occurred())
1411 return NULL;
1412 rval = PyFloat_FromDouble(d);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001413 }
1414 }
1415 else {
1416 /* parse as an int using a fast path if available, otherwise call user defined method */
1417 if (s->parse_int != (PyObject *)&PyInt_Type) {
1418 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1419 }
1420 else {
1421 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1422 }
1423 }
1424 Py_DECREF(numstr);
1425 *next_idx_ptr = idx;
1426 return rval;
1427}
1428
1429static PyObject *
1430_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1431 /* Read a JSON number from PyUnicode pystr.
1432 idx is the index of the first character of the number
1433 *next_idx_ptr is a return-by-reference index to the first character after
1434 the number.
1435
1436 Returns a new PyObject representation of that number:
1437 PyInt, PyLong, or PyFloat.
1438 May return other types if parse_int or parse_float are set
1439 */
1440 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1441 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1442 Py_ssize_t idx = start;
1443 int is_float = 0;
1444 PyObject *rval;
1445 PyObject *numstr;
1446
1447 /* read a sign if it's there, make sure it's not the end of the string */
1448 if (str[idx] == '-') {
1449 idx++;
1450 if (idx > end_idx) {
1451 PyErr_SetNone(PyExc_StopIteration);
1452 return NULL;
1453 }
1454 }
1455
1456 /* read as many integer digits as we find as long as it doesn't start with 0 */
1457 if (str[idx] >= '1' && str[idx] <= '9') {
1458 idx++;
1459 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1460 }
1461 /* if it starts with 0 we only expect one integer digit */
1462 else if (str[idx] == '0') {
1463 idx++;
1464 }
1465 /* no integer digits, error */
1466 else {
1467 PyErr_SetNone(PyExc_StopIteration);
1468 return NULL;
1469 }
1470
1471 /* if the next char is '.' followed by a digit then read all float digits */
1472 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1473 is_float = 1;
1474 idx += 2;
Bob Ippolito76a982a2009-03-29 22:33:58 +00001475 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001476 }
1477
1478 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1479 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1480 Py_ssize_t e_start = idx;
1481 idx++;
1482
1483 /* read an exponent sign if present */
1484 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1485
1486 /* read all digits */
1487 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1488
1489 /* if we got a digit, then parse as float. if not, backtrack */
1490 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1491 is_float = 1;
1492 }
1493 else {
1494 idx = e_start;
1495 }
1496 }
1497
1498 /* copy the section we determined to be a number */
1499 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1500 if (numstr == NULL)
1501 return NULL;
1502 if (is_float) {
1503 /* parse as a float using a fast path if available, otherwise call user defined method */
1504 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1505 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1506 }
1507 else {
1508 rval = PyFloat_FromString(numstr, NULL);
1509 }
1510 }
1511 else {
1512 /* no fast path for unicode -> int, just call */
1513 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1514 }
1515 Py_DECREF(numstr);
1516 *next_idx_ptr = idx;
1517 return rval;
1518}
1519
1520static PyObject *
1521scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1522{
1523 /* Read one JSON term (of any kind) from PyString pystr.
1524 idx is the index of the first character of the term
1525 *next_idx_ptr is a return-by-reference index to the first character after
1526 the number.
1527
1528 Returns a new PyObject representation of the term.
1529 */
1530 char *str = PyString_AS_STRING(pystr);
1531 Py_ssize_t length = PyString_GET_SIZE(pystr);
1532 if (idx >= length) {
1533 PyErr_SetNone(PyExc_StopIteration);
1534 return NULL;
1535 }
1536 switch (str[idx]) {
1537 case '"':
1538 /* string */
1539 return scanstring_str(pystr, idx + 1,
1540 PyString_AS_STRING(s->encoding),
1541 PyObject_IsTrue(s->strict),
1542 next_idx_ptr);
1543 case '{':
1544 /* object */
1545 return _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1546 case '[':
1547 /* array */
1548 return _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1549 case 'n':
1550 /* null */
1551 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1552 Py_INCREF(Py_None);
1553 *next_idx_ptr = idx + 4;
1554 return Py_None;
1555 }
1556 break;
1557 case 't':
1558 /* true */
1559 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1560 Py_INCREF(Py_True);
1561 *next_idx_ptr = idx + 4;
1562 return Py_True;
1563 }
1564 break;
1565 case 'f':
1566 /* false */
1567 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1568 Py_INCREF(Py_False);
1569 *next_idx_ptr = idx + 5;
1570 return Py_False;
1571 }
1572 break;
1573 case 'N':
1574 /* NaN */
1575 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1576 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1577 }
1578 break;
1579 case 'I':
1580 /* Infinity */
1581 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1582 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1583 }
1584 break;
1585 case '-':
1586 /* -Infinity */
1587 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1588 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1589 }
1590 break;
1591 }
1592 /* Didn't find a string, object, array, or named constant. Look for a number. */
1593 return _match_number_str(s, pystr, idx, next_idx_ptr);
1594}
1595
1596static PyObject *
1597scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1598{
1599 /* Read one JSON term (of any kind) from PyUnicode pystr.
1600 idx is the index of the first character of the term
1601 *next_idx_ptr is a return-by-reference index to the first character after
1602 the number.
1603
1604 Returns a new PyObject representation of the term.
1605 */
1606 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1607 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1608 if (idx >= length) {
1609 PyErr_SetNone(PyExc_StopIteration);
1610 return NULL;
1611 }
1612 switch (str[idx]) {
1613 case '"':
1614 /* string */
1615 return scanstring_unicode(pystr, idx + 1,
1616 PyObject_IsTrue(s->strict),
1617 next_idx_ptr);
1618 case '{':
1619 /* object */
1620 return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1621 case '[':
1622 /* array */
1623 return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1624 case 'n':
1625 /* null */
1626 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1627 Py_INCREF(Py_None);
1628 *next_idx_ptr = idx + 4;
1629 return Py_None;
1630 }
1631 break;
1632 case 't':
1633 /* true */
1634 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1635 Py_INCREF(Py_True);
1636 *next_idx_ptr = idx + 4;
1637 return Py_True;
1638 }
1639 break;
1640 case 'f':
1641 /* false */
1642 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1643 Py_INCREF(Py_False);
1644 *next_idx_ptr = idx + 5;
1645 return Py_False;
1646 }
1647 break;
1648 case 'N':
1649 /* NaN */
1650 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1651 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1652 }
1653 break;
1654 case 'I':
1655 /* Infinity */
1656 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1657 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1658 }
1659 break;
1660 case '-':
1661 /* -Infinity */
1662 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1663 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1664 }
1665 break;
1666 }
1667 /* Didn't find a string, object, array, or named constant. Look for a number. */
1668 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1669}
1670
1671static PyObject *
1672scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1673{
1674 /* Python callable interface to scan_once_{str,unicode} */
1675 PyObject *pystr;
1676 PyObject *rval;
1677 Py_ssize_t idx;
1678 Py_ssize_t next_idx = -1;
1679 static char *kwlist[] = {"string", "idx", NULL};
1680 PyScannerObject *s;
1681 assert(PyScanner_Check(self));
1682 s = (PyScannerObject *)self;
1683 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1684 return NULL;
1685
1686 if (PyString_Check(pystr)) {
1687 rval = scan_once_str(s, pystr, idx, &next_idx);
1688 }
1689 else if (PyUnicode_Check(pystr)) {
1690 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1691 }
1692 else {
1693 PyErr_Format(PyExc_TypeError,
1694 "first argument must be a string, not %.80s",
1695 Py_TYPE(pystr)->tp_name);
1696 return NULL;
1697 }
1698 return _build_rval_index_tuple(rval, next_idx);
1699}
1700
1701static PyObject *
1702scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1703{
1704 PyScannerObject *s;
1705 s = (PyScannerObject *)type->tp_alloc(type, 0);
1706 if (s != NULL) {
1707 s->encoding = NULL;
1708 s->strict = NULL;
1709 s->object_hook = NULL;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001710 s->pairs_hook = NULL;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001711 s->parse_float = NULL;
1712 s->parse_int = NULL;
1713 s->parse_constant = NULL;
1714 }
1715 return (PyObject *)s;
1716}
1717
1718static int
1719scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1720{
1721 /* Initialize Scanner object */
1722 PyObject *ctx;
1723 static char *kwlist[] = {"context", NULL};
1724 PyScannerObject *s;
1725
1726 assert(PyScanner_Check(self));
1727 s = (PyScannerObject *)self;
1728
1729 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1730 return -1;
1731
1732 /* PyString_AS_STRING is used on encoding */
1733 s->encoding = PyObject_GetAttrString(ctx, "encoding");
Antoine Pitrou187177f2009-12-08 15:40:51 +00001734 if (s->encoding == NULL)
1735 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001736 if (s->encoding == Py_None) {
1737 Py_DECREF(Py_None);
1738 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1739 }
1740 else if (PyUnicode_Check(s->encoding)) {
1741 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1742 Py_DECREF(s->encoding);
1743 s->encoding = tmp;
1744 }
1745 if (s->encoding == NULL || !PyString_Check(s->encoding))
1746 goto bail;
1747
1748 /* All of these will fail "gracefully" so we don't need to verify them */
1749 s->strict = PyObject_GetAttrString(ctx, "strict");
1750 if (s->strict == NULL)
1751 goto bail;
1752 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1753 if (s->object_hook == NULL)
1754 goto bail;
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001755 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1756 if (s->object_hook == NULL)
1757 goto bail;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001758 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1759 if (s->parse_float == NULL)
1760 goto bail;
1761 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1762 if (s->parse_int == NULL)
1763 goto bail;
1764 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1765 if (s->parse_constant == NULL)
1766 goto bail;
1767
1768 return 0;
1769
1770bail:
1771 Py_CLEAR(s->encoding);
1772 Py_CLEAR(s->strict);
1773 Py_CLEAR(s->object_hook);
Raymond Hettinger91852ca2009-03-19 19:19:03 +00001774 Py_CLEAR(s->pairs_hook);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001775 Py_CLEAR(s->parse_float);
1776 Py_CLEAR(s->parse_int);
1777 Py_CLEAR(s->parse_constant);
1778 return -1;
1779}
1780
1781PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1782
1783static
1784PyTypeObject PyScannerType = {
1785 PyObject_HEAD_INIT(NULL)
1786 0, /* tp_internal */
1787 "_json.Scanner", /* tp_name */
1788 sizeof(PyScannerObject), /* tp_basicsize */
1789 0, /* tp_itemsize */
1790 scanner_dealloc, /* tp_dealloc */
1791 0, /* tp_print */
1792 0, /* tp_getattr */
1793 0, /* tp_setattr */
1794 0, /* tp_compare */
1795 0, /* tp_repr */
1796 0, /* tp_as_number */
1797 0, /* tp_as_sequence */
1798 0, /* tp_as_mapping */
1799 0, /* tp_hash */
1800 scanner_call, /* tp_call */
1801 0, /* tp_str */
1802 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1803 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1804 0, /* tp_as_buffer */
1805 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1806 scanner_doc, /* tp_doc */
1807 scanner_traverse, /* tp_traverse */
1808 scanner_clear, /* tp_clear */
1809 0, /* tp_richcompare */
1810 0, /* tp_weaklistoffset */
1811 0, /* tp_iter */
1812 0, /* tp_iternext */
1813 0, /* tp_methods */
1814 scanner_members, /* tp_members */
1815 0, /* tp_getset */
1816 0, /* tp_base */
1817 0, /* tp_dict */
1818 0, /* tp_descr_get */
1819 0, /* tp_descr_set */
1820 0, /* tp_dictoffset */
1821 scanner_init, /* tp_init */
1822 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1823 scanner_new, /* tp_new */
1824 0,/* PyObject_GC_Del, */ /* tp_free */
1825};
1826
1827static PyObject *
1828encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1829{
1830 PyEncoderObject *s;
1831 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1832 if (s != NULL) {
1833 s->markers = NULL;
1834 s->defaultfn = NULL;
1835 s->encoder = NULL;
1836 s->indent = NULL;
1837 s->key_separator = NULL;
1838 s->item_separator = NULL;
1839 s->sort_keys = NULL;
1840 s->skipkeys = NULL;
1841 }
1842 return (PyObject *)s;
1843}
1844
1845static int
1846encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1847{
1848 /* initialize Encoder object */
1849 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1850
1851 PyEncoderObject *s;
Antoine Pitrou187177f2009-12-08 15:40:51 +00001852 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1853 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001854
1855 assert(PyEncoder_Check(self));
1856 s = (PyEncoderObject *)self;
1857
1858 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou187177f2009-12-08 15:40:51 +00001859 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1860 &sort_keys, &skipkeys, &allow_nan))
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001861 return -1;
1862
Antoine Pitrou187177f2009-12-08 15:40:51 +00001863 s->markers = markers;
1864 s->defaultfn = defaultfn;
1865 s->encoder = encoder;
1866 s->indent = indent;
1867 s->key_separator = key_separator;
1868 s->item_separator = item_separator;
1869 s->sort_keys = sort_keys;
1870 s->skipkeys = skipkeys;
1871 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1872 s->allow_nan = PyObject_IsTrue(allow_nan);
1873
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001874 Py_INCREF(s->markers);
1875 Py_INCREF(s->defaultfn);
1876 Py_INCREF(s->encoder);
1877 Py_INCREF(s->indent);
1878 Py_INCREF(s->key_separator);
1879 Py_INCREF(s->item_separator);
1880 Py_INCREF(s->sort_keys);
1881 Py_INCREF(s->skipkeys);
Bob Ippolitod914e3f2009-03-17 23:19:00 +00001882 return 0;
1883}
1884
1885static PyObject *
1886encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1887{
1888 /* Python callable interface to encode_listencode_obj */
1889 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1890 PyObject *obj;
1891 PyObject *rval;
1892 Py_ssize_t indent_level;
1893 PyEncoderObject *s;
1894 assert(PyEncoder_Check(self));
1895 s = (PyEncoderObject *)self;
1896 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1897 &obj, _convertPyInt_AsSsize_t, &indent_level))
1898 return NULL;
1899 rval = PyList_New(0);
1900 if (rval == NULL)
1901 return NULL;
1902 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1903 Py_DECREF(rval);
1904 return NULL;
1905 }
1906 return rval;
1907}
1908
1909static PyObject *
1910_encoded_const(PyObject *obj)
1911{
1912 /* Return the JSON string representation of None, True, False */
1913 if (obj == Py_None) {
1914 static PyObject *s_null = NULL;
1915 if (s_null == NULL) {
1916 s_null = PyString_InternFromString("null");
1917 }
1918 Py_INCREF(s_null);
1919 return s_null;
1920 }
1921 else if (obj == Py_True) {
1922 static PyObject *s_true = NULL;
1923 if (s_true == NULL) {
1924 s_true = PyString_InternFromString("true");
1925 }
1926 Py_INCREF(s_true);
1927 return s_true;
1928 }
1929 else if (obj == Py_False) {
1930 static PyObject *s_false = NULL;
1931 if (s_false == NULL) {
1932 s_false = PyString_InternFromString("false");
1933 }
1934 Py_INCREF(s_false);
1935 return s_false;
1936 }
1937 else {
1938 PyErr_SetString(PyExc_ValueError, "not a const");
1939 return NULL;
1940 }
1941}
1942
1943static PyObject *
1944encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1945{
1946 /* Return the JSON representation of a PyFloat */
1947 double i = PyFloat_AS_DOUBLE(obj);
1948 if (!Py_IS_FINITE(i)) {
1949 if (!s->allow_nan) {
1950 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1951 return NULL;
1952 }
1953 if (i > 0) {
1954 return PyString_FromString("Infinity");
1955 }
1956 else if (i < 0) {
1957 return PyString_FromString("-Infinity");
1958 }
1959 else {
1960 return PyString_FromString("NaN");
1961 }
1962 }
1963 /* Use a better float format here? */
1964 return PyObject_Repr(obj);
1965}
1966
1967static PyObject *
1968encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1969{
1970 /* Return the JSON representation of a string */
1971 if (s->fast_encode)
1972 return py_encode_basestring_ascii(NULL, obj);
1973 else
1974 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1975}
1976
1977static int
1978_steal_list_append(PyObject *lst, PyObject *stolen)
1979{
1980 /* Append stolen and then decrement its reference count */
1981 int rval = PyList_Append(lst, stolen);
1982 Py_DECREF(stolen);
1983 return rval;
1984}
1985
1986static int
1987encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1988{
1989 /* Encode Python object obj to a JSON term, rval is a PyList */
1990 PyObject *newobj;
1991 int rv;
1992
1993 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1994 PyObject *cstr = _encoded_const(obj);
1995 if (cstr == NULL)
1996 return -1;
1997 return _steal_list_append(rval, cstr);
1998 }
1999 else if (PyString_Check(obj) || PyUnicode_Check(obj))
2000 {
2001 PyObject *encoded = encoder_encode_string(s, obj);
2002 if (encoded == NULL)
2003 return -1;
2004 return _steal_list_append(rval, encoded);
2005 }
2006 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
2007 PyObject *encoded = PyObject_Str(obj);
2008 if (encoded == NULL)
2009 return -1;
2010 return _steal_list_append(rval, encoded);
2011 }
2012 else if (PyFloat_Check(obj)) {
2013 PyObject *encoded = encoder_encode_float(s, obj);
2014 if (encoded == NULL)
2015 return -1;
2016 return _steal_list_append(rval, encoded);
2017 }
2018 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
2019 return encoder_listencode_list(s, rval, obj, indent_level);
2020 }
2021 else if (PyDict_Check(obj)) {
2022 return encoder_listencode_dict(s, rval, obj, indent_level);
2023 }
2024 else {
2025 PyObject *ident = NULL;
2026 if (s->markers != Py_None) {
2027 int has_key;
2028 ident = PyLong_FromVoidPtr(obj);
2029 if (ident == NULL)
2030 return -1;
2031 has_key = PyDict_Contains(s->markers, ident);
2032 if (has_key) {
2033 if (has_key != -1)
2034 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2035 Py_DECREF(ident);
2036 return -1;
2037 }
2038 if (PyDict_SetItem(s->markers, ident, obj)) {
2039 Py_DECREF(ident);
2040 return -1;
2041 }
2042 }
2043 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2044 if (newobj == NULL) {
2045 Py_XDECREF(ident);
2046 return -1;
2047 }
2048 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2049 Py_DECREF(newobj);
2050 if (rv) {
2051 Py_XDECREF(ident);
2052 return -1;
2053 }
2054 if (ident != NULL) {
2055 if (PyDict_DelItem(s->markers, ident)) {
2056 Py_XDECREF(ident);
2057 return -1;
2058 }
2059 Py_XDECREF(ident);
2060 }
2061 return rv;
2062 }
2063}
2064
2065static int
2066encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2067{
2068 /* Encode Python dict dct a JSON term, rval is a PyList */
2069 static PyObject *open_dict = NULL;
2070 static PyObject *close_dict = NULL;
2071 static PyObject *empty_dict = NULL;
2072 PyObject *kstr = NULL;
2073 PyObject *ident = NULL;
2074 PyObject *key, *value;
2075 Py_ssize_t pos;
2076 int skipkeys;
2077 Py_ssize_t idx;
2078
2079 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2080 open_dict = PyString_InternFromString("{");
2081 close_dict = PyString_InternFromString("}");
2082 empty_dict = PyString_InternFromString("{}");
2083 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2084 return -1;
2085 }
2086 if (PyDict_Size(dct) == 0)
2087 return PyList_Append(rval, empty_dict);
2088
2089 if (s->markers != Py_None) {
2090 int has_key;
2091 ident = PyLong_FromVoidPtr(dct);
2092 if (ident == NULL)
2093 goto bail;
2094 has_key = PyDict_Contains(s->markers, ident);
2095 if (has_key) {
2096 if (has_key != -1)
2097 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2098 goto bail;
2099 }
2100 if (PyDict_SetItem(s->markers, ident, dct)) {
2101 goto bail;
2102 }
2103 }
2104
2105 if (PyList_Append(rval, open_dict))
2106 goto bail;
2107
2108 if (s->indent != Py_None) {
2109 /* TODO: DOES NOT RUN */
2110 indent_level += 1;
2111 /*
2112 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2113 separator = _item_separator + newline_indent
2114 buf += newline_indent
2115 */
2116 }
2117
2118 /* TODO: C speedup not implemented for sort_keys */
2119
2120 pos = 0;
2121 skipkeys = PyObject_IsTrue(s->skipkeys);
2122 idx = 0;
2123 while (PyDict_Next(dct, &pos, &key, &value)) {
2124 PyObject *encoded;
2125
2126 if (PyString_Check(key) || PyUnicode_Check(key)) {
2127 Py_INCREF(key);
2128 kstr = key;
2129 }
2130 else if (PyFloat_Check(key)) {
2131 kstr = encoder_encode_float(s, key);
2132 if (kstr == NULL)
2133 goto bail;
2134 }
2135 else if (PyInt_Check(key) || PyLong_Check(key)) {
2136 kstr = PyObject_Str(key);
2137 if (kstr == NULL)
2138 goto bail;
2139 }
2140 else if (key == Py_True || key == Py_False || key == Py_None) {
2141 kstr = _encoded_const(key);
2142 if (kstr == NULL)
2143 goto bail;
2144 }
2145 else if (skipkeys) {
2146 continue;
2147 }
2148 else {
2149 /* TODO: include repr of key */
2150 PyErr_SetString(PyExc_ValueError, "keys must be a string");
2151 goto bail;
2152 }
2153
2154 if (idx) {
2155 if (PyList_Append(rval, s->item_separator))
2156 goto bail;
2157 }
2158
2159 encoded = encoder_encode_string(s, kstr);
2160 Py_CLEAR(kstr);
2161 if (encoded == NULL)
2162 goto bail;
2163 if (PyList_Append(rval, encoded)) {
2164 Py_DECREF(encoded);
2165 goto bail;
2166 }
2167 Py_DECREF(encoded);
2168 if (PyList_Append(rval, s->key_separator))
2169 goto bail;
2170 if (encoder_listencode_obj(s, rval, value, indent_level))
2171 goto bail;
2172 idx += 1;
2173 }
2174 if (ident != NULL) {
2175 if (PyDict_DelItem(s->markers, ident))
2176 goto bail;
2177 Py_CLEAR(ident);
2178 }
2179 if (s->indent != Py_None) {
2180 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002181 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002182 indent_level -= 1;
2183
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002184 yield '\n' + (' ' * (_indent * _current_indent_level))
2185 */
2186 }
2187 if (PyList_Append(rval, close_dict))
2188 goto bail;
2189 return 0;
2190
2191bail:
2192 Py_XDECREF(kstr);
2193 Py_XDECREF(ident);
2194 return -1;
2195}
2196
2197
2198static int
2199encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2200{
2201 /* Encode Python list seq to a JSON term, rval is a PyList */
2202 static PyObject *open_array = NULL;
2203 static PyObject *close_array = NULL;
2204 static PyObject *empty_array = NULL;
2205 PyObject *ident = NULL;
2206 PyObject *s_fast = NULL;
2207 Py_ssize_t num_items;
2208 PyObject **seq_items;
2209 Py_ssize_t i;
2210
2211 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2212 open_array = PyString_InternFromString("[");
2213 close_array = PyString_InternFromString("]");
2214 empty_array = PyString_InternFromString("[]");
2215 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2216 return -1;
2217 }
2218 ident = NULL;
2219 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2220 if (s_fast == NULL)
2221 return -1;
2222 num_items = PySequence_Fast_GET_SIZE(s_fast);
2223 if (num_items == 0) {
2224 Py_DECREF(s_fast);
2225 return PyList_Append(rval, empty_array);
2226 }
2227
2228 if (s->markers != Py_None) {
2229 int has_key;
2230 ident = PyLong_FromVoidPtr(seq);
2231 if (ident == NULL)
2232 goto bail;
2233 has_key = PyDict_Contains(s->markers, ident);
2234 if (has_key) {
2235 if (has_key != -1)
2236 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2237 goto bail;
2238 }
2239 if (PyDict_SetItem(s->markers, ident, seq)) {
2240 goto bail;
2241 }
2242 }
2243
2244 seq_items = PySequence_Fast_ITEMS(s_fast);
2245 if (PyList_Append(rval, open_array))
2246 goto bail;
2247 if (s->indent != Py_None) {
2248 /* TODO: DOES NOT RUN */
2249 indent_level += 1;
2250 /*
2251 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2252 separator = _item_separator + newline_indent
2253 buf += newline_indent
2254 */
2255 }
2256 for (i = 0; i < num_items; i++) {
2257 PyObject *obj = seq_items[i];
2258 if (i) {
2259 if (PyList_Append(rval, s->item_separator))
2260 goto bail;
2261 }
2262 if (encoder_listencode_obj(s, rval, obj, indent_level))
2263 goto bail;
2264 }
2265 if (ident != NULL) {
2266 if (PyDict_DelItem(s->markers, ident))
2267 goto bail;
2268 Py_CLEAR(ident);
2269 }
2270 if (s->indent != Py_None) {
2271 /* TODO: DOES NOT RUN */
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002272 /*
Brett Cannon8e9757e2010-05-03 23:43:49 +00002273 indent_level -= 1;
2274
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002275 yield '\n' + (' ' * (_indent * _current_indent_level))
2276 */
2277 }
2278 if (PyList_Append(rval, close_array))
2279 goto bail;
2280 Py_DECREF(s_fast);
2281 return 0;
2282
2283bail:
2284 Py_XDECREF(ident);
2285 Py_DECREF(s_fast);
2286 return -1;
2287}
2288
2289static void
2290encoder_dealloc(PyObject *self)
2291{
2292 /* Deallocate Encoder */
2293 encoder_clear(self);
2294 Py_TYPE(self)->tp_free(self);
2295}
2296
2297static int
2298encoder_traverse(PyObject *self, visitproc visit, void *arg)
2299{
2300 PyEncoderObject *s;
2301 assert(PyEncoder_Check(self));
2302 s = (PyEncoderObject *)self;
2303 Py_VISIT(s->markers);
2304 Py_VISIT(s->defaultfn);
2305 Py_VISIT(s->encoder);
2306 Py_VISIT(s->indent);
2307 Py_VISIT(s->key_separator);
2308 Py_VISIT(s->item_separator);
2309 Py_VISIT(s->sort_keys);
2310 Py_VISIT(s->skipkeys);
2311 return 0;
2312}
2313
2314static int
2315encoder_clear(PyObject *self)
2316{
2317 /* Deallocate Encoder */
2318 PyEncoderObject *s;
2319 assert(PyEncoder_Check(self));
2320 s = (PyEncoderObject *)self;
2321 Py_CLEAR(s->markers);
2322 Py_CLEAR(s->defaultfn);
2323 Py_CLEAR(s->encoder);
2324 Py_CLEAR(s->indent);
2325 Py_CLEAR(s->key_separator);
2326 Py_CLEAR(s->item_separator);
2327 Py_CLEAR(s->sort_keys);
2328 Py_CLEAR(s->skipkeys);
2329 return 0;
2330}
2331
2332PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2333
2334static
2335PyTypeObject PyEncoderType = {
2336 PyObject_HEAD_INIT(NULL)
2337 0, /* tp_internal */
2338 "_json.Encoder", /* tp_name */
2339 sizeof(PyEncoderObject), /* tp_basicsize */
2340 0, /* tp_itemsize */
2341 encoder_dealloc, /* tp_dealloc */
2342 0, /* tp_print */
2343 0, /* tp_getattr */
2344 0, /* tp_setattr */
2345 0, /* tp_compare */
2346 0, /* tp_repr */
2347 0, /* tp_as_number */
2348 0, /* tp_as_sequence */
2349 0, /* tp_as_mapping */
2350 0, /* tp_hash */
2351 encoder_call, /* tp_call */
2352 0, /* tp_str */
2353 0, /* tp_getattro */
2354 0, /* tp_setattro */
2355 0, /* tp_as_buffer */
2356 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2357 encoder_doc, /* tp_doc */
2358 encoder_traverse, /* tp_traverse */
2359 encoder_clear, /* tp_clear */
2360 0, /* tp_richcompare */
2361 0, /* tp_weaklistoffset */
2362 0, /* tp_iter */
2363 0, /* tp_iternext */
2364 0, /* tp_methods */
2365 encoder_members, /* tp_members */
2366 0, /* tp_getset */
2367 0, /* tp_base */
2368 0, /* tp_dict */
2369 0, /* tp_descr_get */
2370 0, /* tp_descr_set */
2371 0, /* tp_dictoffset */
2372 encoder_init, /* tp_init */
2373 0, /* tp_alloc */
2374 encoder_new, /* tp_new */
2375 0, /* tp_free */
2376};
2377
2378static PyMethodDef speedups_methods[] = {
2379 {"encode_basestring_ascii",
2380 (PyCFunction)py_encode_basestring_ascii,
2381 METH_O,
2382 pydoc_encode_basestring_ascii},
2383 {"scanstring",
2384 (PyCFunction)py_scanstring,
2385 METH_VARARGS,
2386 pydoc_scanstring},
Brett Cannon4b964f92008-05-05 20:21:38 +00002387 {NULL, NULL, 0, NULL}
2388};
2389
2390PyDoc_STRVAR(module_doc,
2391"json speedups\n");
2392
2393void
2394init_json(void)
2395{
2396 PyObject *m;
Bob Ippolitod914e3f2009-03-17 23:19:00 +00002397 PyScannerType.tp_new = PyType_GenericNew;
2398 if (PyType_Ready(&PyScannerType) < 0)
2399 return;
2400 PyEncoderType.tp_new = PyType_GenericNew;
2401 if (PyType_Ready(&PyEncoderType) < 0)
2402 return;
2403 m = Py_InitModule3("_json", speedups_methods, module_doc);
2404 Py_INCREF((PyObject*)&PyScannerType);
2405 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2406 Py_INCREF((PyObject*)&PyEncoderType);
2407 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
Brett Cannon4b964f92008-05-05 20:21:38 +00002408}