blob: b06ba504a7caa65e3a1b2a1dc0f596d8b8a5489e [file] [log] [blame]
jcgregorio@wpgntai-ubiq72.hot.corp.google.comed132522010-04-19 11:12:52 -07001#include "Python.h"
2#include "structmember.h"
3#if PY_VERSION_HEX < 0x02070000 && !defined(PyOS_string_to_double)
4#define PyOS_string_to_double json_PyOS_string_to_double
5static double
6json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception);
7static double
8json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) {
9 double x;
10 assert(endptr == NULL);
11 assert(overflow_exception == NULL);
12 PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;)
13 x = PyOS_ascii_atof(s);
14 PyFPE_END_PROTECT(x)
15 return x;
16}
17#endif
18#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
19#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
20#endif
21#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
22typedef int Py_ssize_t;
23#define PY_SSIZE_T_MAX INT_MAX
24#define PY_SSIZE_T_MIN INT_MIN
25#define PyInt_FromSsize_t PyInt_FromLong
26#define PyInt_AsSsize_t PyInt_AsLong
27#endif
28#ifndef Py_IS_FINITE
29#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
30#endif
31
32#ifdef __GNUC__
33#define UNUSED __attribute__((__unused__))
34#else
35#define UNUSED
36#endif
37
38#define DEFAULT_ENCODING "utf-8"
39
40#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
41#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
42#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
43#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
44#define Decimal_Check(op) (PyObject_TypeCheck(op, DecimalTypePtr))
45
46static PyTypeObject PyScannerType;
47static PyTypeObject PyEncoderType;
48static PyTypeObject *DecimalTypePtr;
49
50typedef struct _PyScannerObject {
51 PyObject_HEAD
52 PyObject *encoding;
53 PyObject *strict;
54 PyObject *object_hook;
55 PyObject *pairs_hook;
56 PyObject *parse_float;
57 PyObject *parse_int;
58 PyObject *parse_constant;
59 PyObject *memo;
60} PyScannerObject;
61
62static PyMemberDef scanner_members[] = {
63 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
64 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
65 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
66 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
67 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
68 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
69 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
70 {NULL}
71};
72
73typedef struct _PyEncoderObject {
74 PyObject_HEAD
75 PyObject *markers;
76 PyObject *defaultfn;
77 PyObject *encoder;
78 PyObject *indent;
79 PyObject *key_separator;
80 PyObject *item_separator;
81 PyObject *sort_keys;
82 PyObject *skipkeys;
83 PyObject *key_memo;
84 int fast_encode;
85 int allow_nan;
86 int use_decimal;
87} PyEncoderObject;
88
89static PyMemberDef encoder_members[] = {
90 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
91 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
92 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
93 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
94 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
95 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
96 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
97 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
98 {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"},
99 {NULL}
100};
101
102static Py_ssize_t
103ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
104static PyObject *
105ascii_escape_unicode(PyObject *pystr);
106static PyObject *
107ascii_escape_str(PyObject *pystr);
108static PyObject *
109py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
110void init_speedups(void);
111static PyObject *
112scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
113static PyObject *
114scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
115static PyObject *
116_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
117static PyObject *
118scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
119static int
120scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
121static void
122scanner_dealloc(PyObject *self);
123static int
124scanner_clear(PyObject *self);
125static PyObject *
126encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
127static int
128encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
129static void
130encoder_dealloc(PyObject *self);
131static int
132encoder_clear(PyObject *self);
133static int
134encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
135static int
136encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
137static int
138encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
139static PyObject *
140_encoded_const(PyObject *obj);
141static void
142raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
143static PyObject *
144encoder_encode_string(PyEncoderObject *s, PyObject *obj);
145static int
146_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
147static PyObject *
148_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
149static PyObject *
150encoder_encode_float(PyEncoderObject *s, PyObject *obj);
151
152#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
153#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
154
155#define MIN_EXPANSION 6
156#ifdef Py_UNICODE_WIDE
157#define MAX_EXPANSION (2 * MIN_EXPANSION)
158#else
159#define MAX_EXPANSION MIN_EXPANSION
160#endif
161
162static int
163_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
164{
165 /* PyObject to Py_ssize_t converter */
166 *size_ptr = PyInt_AsSsize_t(o);
167 if (*size_ptr == -1 && PyErr_Occurred())
168 return 0;
169 return 1;
170}
171
172static PyObject *
173_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
174{
175 /* Py_ssize_t to PyObject converter */
176 return PyInt_FromSsize_t(*size_ptr);
177}
178
179static Py_ssize_t
180ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
181{
182 /* Escape unicode code point c to ASCII escape sequences
183 in char *output. output must have at least 12 bytes unused to
184 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
185 output[chars++] = '\\';
186 switch (c) {
187 case '\\': output[chars++] = (char)c; break;
188 case '"': output[chars++] = (char)c; break;
189 case '\b': output[chars++] = 'b'; break;
190 case '\f': output[chars++] = 'f'; break;
191 case '\n': output[chars++] = 'n'; break;
192 case '\r': output[chars++] = 'r'; break;
193 case '\t': output[chars++] = 't'; break;
194 default:
195#ifdef Py_UNICODE_WIDE
196 if (c >= 0x10000) {
197 /* UTF-16 surrogate pair */
198 Py_UNICODE v = c - 0x10000;
199 c = 0xd800 | ((v >> 10) & 0x3ff);
200 output[chars++] = 'u';
201 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
202 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
203 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
204 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
205 c = 0xdc00 | (v & 0x3ff);
206 output[chars++] = '\\';
207 }
208#endif
209 output[chars++] = 'u';
210 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
211 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
212 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
213 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
214 }
215 return chars;
216}
217
218static PyObject *
219ascii_escape_unicode(PyObject *pystr)
220{
221 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
222 Py_ssize_t i;
223 Py_ssize_t input_chars;
224 Py_ssize_t output_size;
225 Py_ssize_t max_output_size;
226 Py_ssize_t chars;
227 PyObject *rval;
228 char *output;
229 Py_UNICODE *input_unicode;
230
231 input_chars = PyUnicode_GET_SIZE(pystr);
232 input_unicode = PyUnicode_AS_UNICODE(pystr);
233
234 /* One char input can be up to 6 chars output, estimate 4 of these */
235 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
236 max_output_size = 2 + (input_chars * MAX_EXPANSION);
237 rval = PyString_FromStringAndSize(NULL, output_size);
238 if (rval == NULL) {
239 return NULL;
240 }
241 output = PyString_AS_STRING(rval);
242 chars = 0;
243 output[chars++] = '"';
244 for (i = 0; i < input_chars; i++) {
245 Py_UNICODE c = input_unicode[i];
246 if (S_CHAR(c)) {
247 output[chars++] = (char)c;
248 }
249 else {
250 chars = ascii_escape_char(c, output, chars);
251 }
252 if (output_size - chars < (1 + MAX_EXPANSION)) {
253 /* There's more than four, so let's resize by a lot */
254 Py_ssize_t new_output_size = output_size * 2;
255 /* This is an upper bound */
256 if (new_output_size > max_output_size) {
257 new_output_size = max_output_size;
258 }
259 /* Make sure that the output size changed before resizing */
260 if (new_output_size != output_size) {
261 output_size = new_output_size;
262 if (_PyString_Resize(&rval, output_size) == -1) {
263 return NULL;
264 }
265 output = PyString_AS_STRING(rval);
266 }
267 }
268 }
269 output[chars++] = '"';
270 if (_PyString_Resize(&rval, chars) == -1) {
271 return NULL;
272 }
273 return rval;
274}
275
276static PyObject *
277ascii_escape_str(PyObject *pystr)
278{
279 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
280 Py_ssize_t i;
281 Py_ssize_t input_chars;
282 Py_ssize_t output_size;
283 Py_ssize_t chars;
284 PyObject *rval;
285 char *output;
286 char *input_str;
287
288 input_chars = PyString_GET_SIZE(pystr);
289 input_str = PyString_AS_STRING(pystr);
290
291 /* Fast path for a string that's already ASCII */
292 for (i = 0; i < input_chars; i++) {
293 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
294 if (!S_CHAR(c)) {
295 /* If we have to escape something, scan the string for unicode */
296 Py_ssize_t j;
297 for (j = i; j < input_chars; j++) {
298 c = (Py_UNICODE)(unsigned char)input_str[j];
299 if (c > 0x7f) {
300 /* We hit a non-ASCII character, bail to unicode mode */
301 PyObject *uni;
302 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
303 if (uni == NULL) {
304 return NULL;
305 }
306 rval = ascii_escape_unicode(uni);
307 Py_DECREF(uni);
308 return rval;
309 }
310 }
311 break;
312 }
313 }
314
315 if (i == input_chars) {
316 /* Input is already ASCII */
317 output_size = 2 + input_chars;
318 }
319 else {
320 /* One char input can be up to 6 chars output, estimate 4 of these */
321 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
322 }
323 rval = PyString_FromStringAndSize(NULL, output_size);
324 if (rval == NULL) {
325 return NULL;
326 }
327 output = PyString_AS_STRING(rval);
328 output[0] = '"';
329
330 /* We know that everything up to i is ASCII already */
331 chars = i + 1;
332 memcpy(&output[1], input_str, i);
333
334 for (; i < input_chars; i++) {
335 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
336 if (S_CHAR(c)) {
337 output[chars++] = (char)c;
338 }
339 else {
340 chars = ascii_escape_char(c, output, chars);
341 }
342 /* An ASCII char can't possibly expand to a surrogate! */
343 if (output_size - chars < (1 + MIN_EXPANSION)) {
344 /* There's more than four, so let's resize by a lot */
345 output_size *= 2;
346 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
347 output_size = 2 + (input_chars * MIN_EXPANSION);
348 }
349 if (_PyString_Resize(&rval, output_size) == -1) {
350 return NULL;
351 }
352 output = PyString_AS_STRING(rval);
353 }
354 }
355 output[chars++] = '"';
356 if (_PyString_Resize(&rval, chars) == -1) {
357 return NULL;
358 }
359 return rval;
360}
361
362static void
363raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
364{
365 /* Use the Python function simplejson.decoder.errmsg to raise a nice
366 looking ValueError exception */
367 static PyObject *JSONDecodeError = NULL;
368 PyObject *exc;
369 if (JSONDecodeError == NULL) {
370 PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
371 if (decoder == NULL)
372 return;
373 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
374 Py_DECREF(decoder);
375 if (JSONDecodeError == NULL)
376 return;
377 }
378 exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
379 if (exc) {
380 PyErr_SetObject(JSONDecodeError, exc);
381 Py_DECREF(exc);
382 }
383}
384
385static PyObject *
386join_list_unicode(PyObject *lst)
387{
388 /* return u''.join(lst) */
389 static PyObject *joinfn = NULL;
390 if (joinfn == NULL) {
391 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
392 if (ustr == NULL)
393 return NULL;
394
395 joinfn = PyObject_GetAttrString(ustr, "join");
396 Py_DECREF(ustr);
397 if (joinfn == NULL)
398 return NULL;
399 }
400 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
401}
402
403static PyObject *
404join_list_string(PyObject *lst)
405{
406 /* return ''.join(lst) */
407 static PyObject *joinfn = NULL;
408 if (joinfn == NULL) {
409 PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
410 if (ustr == NULL)
411 return NULL;
412
413 joinfn = PyObject_GetAttrString(ustr, "join");
414 Py_DECREF(ustr);
415 if (joinfn == NULL)
416 return NULL;
417 }
418 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
419}
420
421static PyObject *
422_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
423 /* return (rval, idx) tuple, stealing reference to rval */
424 PyObject *tpl;
425 PyObject *pyidx;
426 /*
427 steal a reference to rval, returns (rval, idx)
428 */
429 if (rval == NULL) {
430 return NULL;
431 }
432 pyidx = PyInt_FromSsize_t(idx);
433 if (pyidx == NULL) {
434 Py_DECREF(rval);
435 return NULL;
436 }
437 tpl = PyTuple_New(2);
438 if (tpl == NULL) {
439 Py_DECREF(pyidx);
440 Py_DECREF(rval);
441 return NULL;
442 }
443 PyTuple_SET_ITEM(tpl, 0, rval);
444 PyTuple_SET_ITEM(tpl, 1, pyidx);
445 return tpl;
446}
447
448#define APPEND_OLD_CHUNK \
449 if (chunk != NULL) { \
450 if (chunks == NULL) { \
451 chunks = PyList_New(0); \
452 if (chunks == NULL) { \
453 goto bail; \
454 } \
455 } \
456 if (PyList_Append(chunks, chunk)) { \
457 goto bail; \
458 } \
459 Py_CLEAR(chunk); \
460 }
461
462static PyObject *
463scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
464{
465 /* Read the JSON string from PyString pystr.
466 end is the index of the first character after the quote.
467 encoding is the encoding of pystr (must be an ASCII superset)
468 if strict is zero then literal control characters are allowed
469 *next_end_ptr is a return-by-reference index of the character
470 after the end quote
471
472 Return value is a new PyString (if ASCII-only) or PyUnicode
473 */
474 PyObject *rval;
475 Py_ssize_t len = PyString_GET_SIZE(pystr);
476 Py_ssize_t begin = end - 1;
477 Py_ssize_t next = begin;
478 int has_unicode = 0;
479 char *buf = PyString_AS_STRING(pystr);
480 PyObject *chunks = NULL;
481 PyObject *chunk = NULL;
482
483 if (end < 0 || len <= end) {
484 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
485 goto bail;
486 }
487 while (1) {
488 /* Find the end of the string or the next escape */
489 Py_UNICODE c = 0;
490 for (next = end; next < len; next++) {
491 c = (unsigned char)buf[next];
492 if (c == '"' || c == '\\') {
493 break;
494 }
495 else if (strict && c <= 0x1f) {
496 raise_errmsg("Invalid control character at", pystr, next);
497 goto bail;
498 }
499 else if (c > 0x7f) {
500 has_unicode = 1;
501 }
502 }
503 if (!(c == '"' || c == '\\')) {
504 raise_errmsg("Unterminated string starting at", pystr, begin);
505 goto bail;
506 }
507 /* Pick up this chunk if it's not zero length */
508 if (next != end) {
509 PyObject *strchunk;
510 APPEND_OLD_CHUNK
511 strchunk = PyString_FromStringAndSize(&buf[end], next - end);
512 if (strchunk == NULL) {
513 goto bail;
514 }
515 if (has_unicode) {
516 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
517 Py_DECREF(strchunk);
518 if (chunk == NULL) {
519 goto bail;
520 }
521 }
522 else {
523 chunk = strchunk;
524 }
525 }
526 next++;
527 if (c == '"') {
528 end = next;
529 break;
530 }
531 if (next == len) {
532 raise_errmsg("Unterminated string starting at", pystr, begin);
533 goto bail;
534 }
535 c = buf[next];
536 if (c != 'u') {
537 /* Non-unicode backslash escapes */
538 end = next + 1;
539 switch (c) {
540 case '"': break;
541 case '\\': break;
542 case '/': break;
543 case 'b': c = '\b'; break;
544 case 'f': c = '\f'; break;
545 case 'n': c = '\n'; break;
546 case 'r': c = '\r'; break;
547 case 't': c = '\t'; break;
548 default: c = 0;
549 }
550 if (c == 0) {
551 raise_errmsg("Invalid \\escape", pystr, end - 2);
552 goto bail;
553 }
554 }
555 else {
556 c = 0;
557 next++;
558 end = next + 4;
559 if (end >= len) {
560 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
561 goto bail;
562 }
563 /* Decode 4 hex digits */
564 for (; next < end; next++) {
565 Py_UNICODE digit = buf[next];
566 c <<= 4;
567 switch (digit) {
568 case '0': case '1': case '2': case '3': case '4':
569 case '5': case '6': case '7': case '8': case '9':
570 c |= (digit - '0'); break;
571 case 'a': case 'b': case 'c': case 'd': case 'e':
572 case 'f':
573 c |= (digit - 'a' + 10); break;
574 case 'A': case 'B': case 'C': case 'D': case 'E':
575 case 'F':
576 c |= (digit - 'A' + 10); break;
577 default:
578 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
579 goto bail;
580 }
581 }
582#ifdef Py_UNICODE_WIDE
583 /* Surrogate pair */
584 if ((c & 0xfc00) == 0xd800) {
585 Py_UNICODE c2 = 0;
586 if (end + 6 >= len) {
587 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
588 goto bail;
589 }
590 if (buf[next++] != '\\' || buf[next++] != 'u') {
591 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
592 goto bail;
593 }
594 end += 6;
595 /* Decode 4 hex digits */
596 for (; next < end; next++) {
597 c2 <<= 4;
598 Py_UNICODE digit = buf[next];
599 switch (digit) {
600 case '0': case '1': case '2': case '3': case '4':
601 case '5': case '6': case '7': case '8': case '9':
602 c2 |= (digit - '0'); break;
603 case 'a': case 'b': case 'c': case 'd': case 'e':
604 case 'f':
605 c2 |= (digit - 'a' + 10); break;
606 case 'A': case 'B': case 'C': case 'D': case 'E':
607 case 'F':
608 c2 |= (digit - 'A' + 10); break;
609 default:
610 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
611 goto bail;
612 }
613 }
614 if ((c2 & 0xfc00) != 0xdc00) {
615 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
616 goto bail;
617 }
618 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
619 }
620 else if ((c & 0xfc00) == 0xdc00) {
621 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
622 goto bail;
623 }
624#endif
625 }
626 if (c > 0x7f) {
627 has_unicode = 1;
628 }
629 APPEND_OLD_CHUNK
630 if (has_unicode) {
631 chunk = PyUnicode_FromUnicode(&c, 1);
632 if (chunk == NULL) {
633 goto bail;
634 }
635 }
636 else {
637 char c_char = Py_CHARMASK(c);
638 chunk = PyString_FromStringAndSize(&c_char, 1);
639 if (chunk == NULL) {
640 goto bail;
641 }
642 }
643 }
644
645 if (chunks == NULL) {
646 if (chunk != NULL)
647 rval = chunk;
648 else
649 rval = PyString_FromStringAndSize("", 0);
650 }
651 else {
652 APPEND_OLD_CHUNK
653 rval = join_list_string(chunks);
654 if (rval == NULL) {
655 goto bail;
656 }
657 Py_CLEAR(chunks);
658 }
659
660 *next_end_ptr = end;
661 return rval;
662bail:
663 *next_end_ptr = -1;
664 Py_XDECREF(chunk);
665 Py_XDECREF(chunks);
666 return NULL;
667}
668
669
670static PyObject *
671scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
672{
673 /* Read the JSON string from PyUnicode pystr.
674 end is the index of the first character after the quote.
675 if strict is zero then literal control characters are allowed
676 *next_end_ptr is a return-by-reference index of the character
677 after the end quote
678
679 Return value is a new PyUnicode
680 */
681 PyObject *rval;
682 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
683 Py_ssize_t begin = end - 1;
684 Py_ssize_t next = begin;
685 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
686 PyObject *chunks = NULL;
687 PyObject *chunk = NULL;
688
689 if (end < 0 || len <= end) {
690 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
691 goto bail;
692 }
693 while (1) {
694 /* Find the end of the string or the next escape */
695 Py_UNICODE c = 0;
696 for (next = end; next < len; next++) {
697 c = buf[next];
698 if (c == '"' || c == '\\') {
699 break;
700 }
701 else if (strict && c <= 0x1f) {
702 raise_errmsg("Invalid control character at", pystr, next);
703 goto bail;
704 }
705 }
706 if (!(c == '"' || c == '\\')) {
707 raise_errmsg("Unterminated string starting at", pystr, begin);
708 goto bail;
709 }
710 /* Pick up this chunk if it's not zero length */
711 if (next != end) {
712 APPEND_OLD_CHUNK
713 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
714 if (chunk == NULL) {
715 goto bail;
716 }
717 }
718 next++;
719 if (c == '"') {
720 end = next;
721 break;
722 }
723 if (next == len) {
724 raise_errmsg("Unterminated string starting at", pystr, begin);
725 goto bail;
726 }
727 c = buf[next];
728 if (c != 'u') {
729 /* Non-unicode backslash escapes */
730 end = next + 1;
731 switch (c) {
732 case '"': break;
733 case '\\': break;
734 case '/': break;
735 case 'b': c = '\b'; break;
736 case 'f': c = '\f'; break;
737 case 'n': c = '\n'; break;
738 case 'r': c = '\r'; break;
739 case 't': c = '\t'; break;
740 default: c = 0;
741 }
742 if (c == 0) {
743 raise_errmsg("Invalid \\escape", pystr, end - 2);
744 goto bail;
745 }
746 }
747 else {
748 c = 0;
749 next++;
750 end = next + 4;
751 if (end >= len) {
752 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
753 goto bail;
754 }
755 /* Decode 4 hex digits */
756 for (; next < end; next++) {
757 Py_UNICODE digit = buf[next];
758 c <<= 4;
759 switch (digit) {
760 case '0': case '1': case '2': case '3': case '4':
761 case '5': case '6': case '7': case '8': case '9':
762 c |= (digit - '0'); break;
763 case 'a': case 'b': case 'c': case 'd': case 'e':
764 case 'f':
765 c |= (digit - 'a' + 10); break;
766 case 'A': case 'B': case 'C': case 'D': case 'E':
767 case 'F':
768 c |= (digit - 'A' + 10); break;
769 default:
770 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
771 goto bail;
772 }
773 }
774#ifdef Py_UNICODE_WIDE
775 /* Surrogate pair */
776 if ((c & 0xfc00) == 0xd800) {
777 Py_UNICODE c2 = 0;
778 if (end + 6 >= len) {
779 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
780 goto bail;
781 }
782 if (buf[next++] != '\\' || buf[next++] != 'u') {
783 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
784 goto bail;
785 }
786 end += 6;
787 /* Decode 4 hex digits */
788 for (; next < end; next++) {
789 c2 <<= 4;
790 Py_UNICODE digit = buf[next];
791 switch (digit) {
792 case '0': case '1': case '2': case '3': case '4':
793 case '5': case '6': case '7': case '8': case '9':
794 c2 |= (digit - '0'); break;
795 case 'a': case 'b': case 'c': case 'd': case 'e':
796 case 'f':
797 c2 |= (digit - 'a' + 10); break;
798 case 'A': case 'B': case 'C': case 'D': case 'E':
799 case 'F':
800 c2 |= (digit - 'A' + 10); break;
801 default:
802 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
803 goto bail;
804 }
805 }
806 if ((c2 & 0xfc00) != 0xdc00) {
807 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
808 goto bail;
809 }
810 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
811 }
812 else if ((c & 0xfc00) == 0xdc00) {
813 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
814 goto bail;
815 }
816#endif
817 }
818 APPEND_OLD_CHUNK
819 chunk = PyUnicode_FromUnicode(&c, 1);
820 if (chunk == NULL) {
821 goto bail;
822 }
823 }
824
825 if (chunks == NULL) {
826 if (chunk != NULL)
827 rval = chunk;
828 else
829 rval = PyUnicode_FromUnicode(NULL, 0);
830 }
831 else {
832 APPEND_OLD_CHUNK
833 rval = join_list_unicode(chunks);
834 if (rval == NULL) {
835 goto bail;
836 }
837 Py_CLEAR(chunks);
838 }
839 *next_end_ptr = end;
840 return rval;
841bail:
842 *next_end_ptr = -1;
843 Py_XDECREF(chunk);
844 Py_XDECREF(chunks);
845 return NULL;
846}
847
848PyDoc_STRVAR(pydoc_scanstring,
849 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
850 "\n"
851 "Scan the string s for a JSON string. End is the index of the\n"
852 "character in s after the quote that started the JSON string.\n"
853 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
854 "on attempt to decode an invalid string. If strict is False then literal\n"
855 "control characters are allowed in the string.\n"
856 "\n"
857 "Returns a tuple of the decoded string and the index of the character in s\n"
858 "after the end quote."
859);
860
861static PyObject *
862py_scanstring(PyObject* self UNUSED, PyObject *args)
863{
864 PyObject *pystr;
865 PyObject *rval;
866 Py_ssize_t end;
867 Py_ssize_t next_end = -1;
868 char *encoding = NULL;
869 int strict = 1;
870 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
871 return NULL;
872 }
873 if (encoding == NULL) {
874 encoding = DEFAULT_ENCODING;
875 }
876 if (PyString_Check(pystr)) {
877 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
878 }
879 else if (PyUnicode_Check(pystr)) {
880 rval = scanstring_unicode(pystr, end, strict, &next_end);
881 }
882 else {
883 PyErr_Format(PyExc_TypeError,
884 "first argument must be a string, not %.80s",
885 Py_TYPE(pystr)->tp_name);
886 return NULL;
887 }
888 return _build_rval_index_tuple(rval, next_end);
889}
890
891PyDoc_STRVAR(pydoc_encode_basestring_ascii,
892 "encode_basestring_ascii(basestring) -> str\n"
893 "\n"
894 "Return an ASCII-only JSON representation of a Python string"
895);
896
897static PyObject *
898py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
899{
900 /* Return an ASCII-only JSON representation of a Python string */
901 /* METH_O */
902 if (PyString_Check(pystr)) {
903 return ascii_escape_str(pystr);
904 }
905 else if (PyUnicode_Check(pystr)) {
906 return ascii_escape_unicode(pystr);
907 }
908 else {
909 PyErr_Format(PyExc_TypeError,
910 "first argument must be a string, not %.80s",
911 Py_TYPE(pystr)->tp_name);
912 return NULL;
913 }
914}
915
916static void
917scanner_dealloc(PyObject *self)
918{
919 /* Deallocate scanner object */
920 scanner_clear(self);
921 Py_TYPE(self)->tp_free(self);
922}
923
924static int
925scanner_traverse(PyObject *self, visitproc visit, void *arg)
926{
927 PyScannerObject *s;
928 assert(PyScanner_Check(self));
929 s = (PyScannerObject *)self;
930 Py_VISIT(s->encoding);
931 Py_VISIT(s->strict);
932 Py_VISIT(s->object_hook);
933 Py_VISIT(s->pairs_hook);
934 Py_VISIT(s->parse_float);
935 Py_VISIT(s->parse_int);
936 Py_VISIT(s->parse_constant);
937 Py_VISIT(s->memo);
938 return 0;
939}
940
941static int
942scanner_clear(PyObject *self)
943{
944 PyScannerObject *s;
945 assert(PyScanner_Check(self));
946 s = (PyScannerObject *)self;
947 Py_CLEAR(s->encoding);
948 Py_CLEAR(s->strict);
949 Py_CLEAR(s->object_hook);
950 Py_CLEAR(s->pairs_hook);
951 Py_CLEAR(s->parse_float);
952 Py_CLEAR(s->parse_int);
953 Py_CLEAR(s->parse_constant);
954 Py_CLEAR(s->memo);
955 return 0;
956}
957
958static PyObject *
959_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
960 /* Read a JSON object from PyString pystr.
961 idx is the index of the first character after the opening curly brace.
962 *next_idx_ptr is a return-by-reference index to the first character after
963 the closing curly brace.
964
965 Returns a new PyObject (usually a dict, but object_hook or
966 object_pairs_hook can change that)
967 */
968 char *str = PyString_AS_STRING(pystr);
969 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
970 PyObject *rval = NULL;
971 PyObject *pairs = NULL;
972 PyObject *item;
973 PyObject *key = NULL;
974 PyObject *val = NULL;
975 char *encoding = PyString_AS_STRING(s->encoding);
976 int strict = PyObject_IsTrue(s->strict);
977 int has_pairs_hook = (s->pairs_hook != Py_None);
978 Py_ssize_t next_idx;
979 if (has_pairs_hook) {
980 pairs = PyList_New(0);
981 if (pairs == NULL)
982 return NULL;
983 }
984 else {
985 rval = PyDict_New();
986 if (rval == NULL)
987 return NULL;
988 }
989
990 /* skip whitespace after { */
991 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
992
993 /* only loop if the object is non-empty */
994 if (idx <= end_idx && str[idx] != '}') {
995 while (idx <= end_idx) {
996 PyObject *memokey;
997
998 /* read key */
999 if (str[idx] != '"') {
1000 raise_errmsg("Expecting property name", pystr, idx);
1001 goto bail;
1002 }
1003 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
1004 if (key == NULL)
1005 goto bail;
1006 memokey = PyDict_GetItem(s->memo, key);
1007 if (memokey != NULL) {
1008 Py_INCREF(memokey);
1009 Py_DECREF(key);
1010 key = memokey;
1011 }
1012 else {
1013 if (PyDict_SetItem(s->memo, key, key) < 0)
1014 goto bail;
1015 }
1016 idx = next_idx;
1017
1018 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1019 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1020 if (idx > end_idx || str[idx] != ':') {
1021 raise_errmsg("Expecting : delimiter", pystr, idx);
1022 goto bail;
1023 }
1024 idx++;
1025 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1026
1027 /* read any JSON data type */
1028 val = scan_once_str(s, pystr, idx, &next_idx);
1029 if (val == NULL)
1030 goto bail;
1031
1032 if (has_pairs_hook) {
1033 item = PyTuple_Pack(2, key, val);
1034 if (item == NULL)
1035 goto bail;
1036 Py_CLEAR(key);
1037 Py_CLEAR(val);
1038 if (PyList_Append(pairs, item) == -1) {
1039 Py_DECREF(item);
1040 goto bail;
1041 }
1042 Py_DECREF(item);
1043 }
1044 else {
1045 if (PyDict_SetItem(rval, key, val) < 0)
1046 goto bail;
1047 Py_CLEAR(key);
1048 Py_CLEAR(val);
1049 }
1050 idx = next_idx;
1051
1052 /* skip whitespace before } or , */
1053 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1054
1055 /* bail if the object is closed or we didn't get the , delimiter */
1056 if (idx > end_idx) break;
1057 if (str[idx] == '}') {
1058 break;
1059 }
1060 else if (str[idx] != ',') {
1061 raise_errmsg("Expecting , delimiter", pystr, idx);
1062 goto bail;
1063 }
1064 idx++;
1065
1066 /* skip whitespace after , delimiter */
1067 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1068 }
1069 }
1070 /* verify that idx < end_idx, str[idx] should be '}' */
1071 if (idx > end_idx || str[idx] != '}') {
1072 raise_errmsg("Expecting object", pystr, end_idx);
1073 goto bail;
1074 }
1075
1076 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1077 if (s->pairs_hook != Py_None) {
1078 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1079 if (val == NULL)
1080 goto bail;
1081 Py_DECREF(pairs);
1082 *next_idx_ptr = idx + 1;
1083 return val;
1084 }
1085
1086 /* if object_hook is not None: rval = object_hook(rval) */
1087 if (s->object_hook != Py_None) {
1088 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1089 if (val == NULL)
1090 goto bail;
1091 Py_DECREF(rval);
1092 rval = val;
1093 val = NULL;
1094 }
1095 *next_idx_ptr = idx + 1;
1096 return rval;
1097bail:
1098 Py_XDECREF(rval);
1099 Py_XDECREF(key);
1100 Py_XDECREF(val);
1101 Py_XDECREF(pairs);
1102 return NULL;
1103}
1104
1105static PyObject *
1106_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1107 /* Read a JSON object from PyUnicode pystr.
1108 idx is the index of the first character after the opening curly brace.
1109 *next_idx_ptr is a return-by-reference index to the first character after
1110 the closing curly brace.
1111
1112 Returns a new PyObject (usually a dict, but object_hook can change that)
1113 */
1114 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1115 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1116 PyObject *rval = NULL;
1117 PyObject *pairs = NULL;
1118 PyObject *item;
1119 PyObject *key = NULL;
1120 PyObject *val = NULL;
1121 int strict = PyObject_IsTrue(s->strict);
1122 int has_pairs_hook = (s->pairs_hook != Py_None);
1123 Py_ssize_t next_idx;
1124
1125 if (has_pairs_hook) {
1126 pairs = PyList_New(0);
1127 if (pairs == NULL)
1128 return NULL;
1129 }
1130 else {
1131 rval = PyDict_New();
1132 if (rval == NULL)
1133 return NULL;
1134 }
1135
1136 /* skip whitespace after { */
1137 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1138
1139 /* only loop if the object is non-empty */
1140 if (idx <= end_idx && str[idx] != '}') {
1141 while (idx <= end_idx) {
1142 PyObject *memokey;
1143
1144 /* read key */
1145 if (str[idx] != '"') {
1146 raise_errmsg("Expecting property name", pystr, idx);
1147 goto bail;
1148 }
1149 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1150 if (key == NULL)
1151 goto bail;
1152 memokey = PyDict_GetItem(s->memo, key);
1153 if (memokey != NULL) {
1154 Py_INCREF(memokey);
1155 Py_DECREF(key);
1156 key = memokey;
1157 }
1158 else {
1159 if (PyDict_SetItem(s->memo, key, key) < 0)
1160 goto bail;
1161 }
1162 idx = next_idx;
1163
1164 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1165 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1166 if (idx > end_idx || str[idx] != ':') {
1167 raise_errmsg("Expecting : delimiter", pystr, idx);
1168 goto bail;
1169 }
1170 idx++;
1171 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1172
1173 /* read any JSON term */
1174 val = scan_once_unicode(s, pystr, idx, &next_idx);
1175 if (val == NULL)
1176 goto bail;
1177
1178 if (has_pairs_hook) {
1179 item = PyTuple_Pack(2, key, val);
1180 if (item == NULL)
1181 goto bail;
1182 Py_CLEAR(key);
1183 Py_CLEAR(val);
1184 if (PyList_Append(pairs, item) == -1) {
1185 Py_DECREF(item);
1186 goto bail;
1187 }
1188 Py_DECREF(item);
1189 }
1190 else {
1191 if (PyDict_SetItem(rval, key, val) < 0)
1192 goto bail;
1193 Py_CLEAR(key);
1194 Py_CLEAR(val);
1195 }
1196 idx = next_idx;
1197
1198 /* skip whitespace before } or , */
1199 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1200
1201 /* bail if the object is closed or we didn't get the , delimiter */
1202 if (idx > end_idx) break;
1203 if (str[idx] == '}') {
1204 break;
1205 }
1206 else if (str[idx] != ',') {
1207 raise_errmsg("Expecting , delimiter", pystr, idx);
1208 goto bail;
1209 }
1210 idx++;
1211
1212 /* skip whitespace after , delimiter */
1213 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1214 }
1215 }
1216
1217 /* verify that idx < end_idx, str[idx] should be '}' */
1218 if (idx > end_idx || str[idx] != '}') {
1219 raise_errmsg("Expecting object", pystr, end_idx);
1220 goto bail;
1221 }
1222
1223 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1224 if (s->pairs_hook != Py_None) {
1225 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1226 if (val == NULL)
1227 goto bail;
1228 Py_DECREF(pairs);
1229 *next_idx_ptr = idx + 1;
1230 return val;
1231 }
1232
1233 /* if object_hook is not None: rval = object_hook(rval) */
1234 if (s->object_hook != Py_None) {
1235 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1236 if (val == NULL)
1237 goto bail;
1238 Py_DECREF(rval);
1239 rval = val;
1240 val = NULL;
1241 }
1242 *next_idx_ptr = idx + 1;
1243 return rval;
1244bail:
1245 Py_XDECREF(rval);
1246 Py_XDECREF(key);
1247 Py_XDECREF(val);
1248 Py_XDECREF(pairs);
1249 return NULL;
1250}
1251
1252static PyObject *
1253_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1254 /* Read a JSON array from PyString pystr.
1255 idx is the index of the first character after the opening brace.
1256 *next_idx_ptr is a return-by-reference index to the first character after
1257 the closing brace.
1258
1259 Returns a new PyList
1260 */
1261 char *str = PyString_AS_STRING(pystr);
1262 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1263 PyObject *val = NULL;
1264 PyObject *rval = PyList_New(0);
1265 Py_ssize_t next_idx;
1266 if (rval == NULL)
1267 return NULL;
1268
1269 /* skip whitespace after [ */
1270 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1271
1272 /* only loop if the array is non-empty */
1273 if (idx <= end_idx && str[idx] != ']') {
1274 while (idx <= end_idx) {
1275
1276 /* read any JSON term and de-tuplefy the (rval, idx) */
1277 val = scan_once_str(s, pystr, idx, &next_idx);
1278 if (val == NULL) {
1279 if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
1280 PyErr_Clear();
1281 raise_errmsg("Expecting object", pystr, idx);
1282 }
1283 goto bail;
1284 }
1285
1286 if (PyList_Append(rval, val) == -1)
1287 goto bail;
1288
1289 Py_CLEAR(val);
1290 idx = next_idx;
1291
1292 /* skip whitespace between term and , */
1293 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1294
1295 /* bail if the array is closed or we didn't get the , delimiter */
1296 if (idx > end_idx) break;
1297 if (str[idx] == ']') {
1298 break;
1299 }
1300 else if (str[idx] != ',') {
1301 raise_errmsg("Expecting , delimiter", pystr, idx);
1302 goto bail;
1303 }
1304 idx++;
1305
1306 /* skip whitespace after , */
1307 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1308 }
1309 }
1310
1311 /* verify that idx < end_idx, str[idx] should be ']' */
1312 if (idx > end_idx || str[idx] != ']') {
1313 raise_errmsg("Expecting object", pystr, end_idx);
1314 goto bail;
1315 }
1316 *next_idx_ptr = idx + 1;
1317 return rval;
1318bail:
1319 Py_XDECREF(val);
1320 Py_DECREF(rval);
1321 return NULL;
1322}
1323
1324static PyObject *
1325_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1326 /* Read a JSON array from PyString pystr.
1327 idx is the index of the first character after the opening brace.
1328 *next_idx_ptr is a return-by-reference index to the first character after
1329 the closing brace.
1330
1331 Returns a new PyList
1332 */
1333 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1334 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1335 PyObject *val = NULL;
1336 PyObject *rval = PyList_New(0);
1337 Py_ssize_t next_idx;
1338 if (rval == NULL)
1339 return NULL;
1340
1341 /* skip whitespace after [ */
1342 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1343
1344 /* only loop if the array is non-empty */
1345 if (idx <= end_idx && str[idx] != ']') {
1346 while (idx <= end_idx) {
1347
1348 /* read any JSON term */
1349 val = scan_once_unicode(s, pystr, idx, &next_idx);
1350 if (val == NULL) {
1351 if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
1352 PyErr_Clear();
1353 raise_errmsg("Expecting object", pystr, idx);
1354 }
1355 goto bail;
1356 }
1357
1358 if (PyList_Append(rval, val) == -1)
1359 goto bail;
1360
1361 Py_CLEAR(val);
1362 idx = next_idx;
1363
1364 /* skip whitespace between term and , */
1365 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1366
1367 /* bail if the array is closed or we didn't get the , delimiter */
1368 if (idx > end_idx) break;
1369 if (str[idx] == ']') {
1370 break;
1371 }
1372 else if (str[idx] != ',') {
1373 raise_errmsg("Expecting , delimiter", pystr, idx);
1374 goto bail;
1375 }
1376 idx++;
1377
1378 /* skip whitespace after , */
1379 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1380 }
1381 }
1382
1383 /* verify that idx < end_idx, str[idx] should be ']' */
1384 if (idx > end_idx || str[idx] != ']') {
1385 raise_errmsg("Expecting object", pystr, end_idx);
1386 goto bail;
1387 }
1388 *next_idx_ptr = idx + 1;
1389 return rval;
1390bail:
1391 Py_XDECREF(val);
1392 Py_DECREF(rval);
1393 return NULL;
1394}
1395
1396static PyObject *
1397_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1398 /* Read a JSON constant from PyString pystr.
1399 constant is the constant string that was found
1400 ("NaN", "Infinity", "-Infinity").
1401 idx is the index of the first character of the constant
1402 *next_idx_ptr is a return-by-reference index to the first character after
1403 the constant.
1404
1405 Returns the result of parse_constant
1406 */
1407 PyObject *cstr;
1408 PyObject *rval;
1409 /* constant is "NaN", "Infinity", or "-Infinity" */
1410 cstr = PyString_InternFromString(constant);
1411 if (cstr == NULL)
1412 return NULL;
1413
1414 /* rval = parse_constant(constant) */
1415 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1416 idx += PyString_GET_SIZE(cstr);
1417 Py_DECREF(cstr);
1418 *next_idx_ptr = idx;
1419 return rval;
1420}
1421
1422static PyObject *
1423_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1424 /* Read a JSON number from PyString pystr.
1425 idx is the index of the first character of the number
1426 *next_idx_ptr is a return-by-reference index to the first character after
1427 the number.
1428
1429 Returns a new PyObject representation of that number:
1430 PyInt, PyLong, or PyFloat.
1431 May return other types if parse_int or parse_float are set
1432 */
1433 char *str = PyString_AS_STRING(pystr);
1434 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1435 Py_ssize_t idx = start;
1436 int is_float = 0;
1437 PyObject *rval;
1438 PyObject *numstr;
1439
1440 /* read a sign if it's there, make sure it's not the end of the string */
1441 if (str[idx] == '-') {
1442 idx++;
1443 if (idx > end_idx) {
1444 PyErr_SetNone(PyExc_StopIteration);
1445 return NULL;
1446 }
1447 }
1448
1449 /* read as many integer digits as we find as long as it doesn't start with 0 */
1450 if (str[idx] >= '1' && str[idx] <= '9') {
1451 idx++;
1452 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1453 }
1454 /* if it starts with 0 we only expect one integer digit */
1455 else if (str[idx] == '0') {
1456 idx++;
1457 }
1458 /* no integer digits, error */
1459 else {
1460 PyErr_SetNone(PyExc_StopIteration);
1461 return NULL;
1462 }
1463
1464 /* if the next char is '.' followed by a digit then read all float digits */
1465 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1466 is_float = 1;
1467 idx += 2;
1468 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1469 }
1470
1471 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1472 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1473
1474 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1475 Py_ssize_t e_start = idx;
1476 idx++;
1477
1478 /* read an exponent sign if present */
1479 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1480
1481 /* read all digits */
1482 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1483
1484 /* if we got a digit, then parse as float. if not, backtrack */
1485 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1486 is_float = 1;
1487 }
1488 else {
1489 idx = e_start;
1490 }
1491 }
1492
1493 /* copy the section we determined to be a number */
1494 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1495 if (numstr == NULL)
1496 return NULL;
1497 if (is_float) {
1498 /* parse as a float using a fast path if available, otherwise call user defined method */
1499 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1500 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1501 }
1502 else {
1503 /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */
1504 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1505 NULL, NULL);
1506 if (d == -1.0 && PyErr_Occurred())
1507 return NULL;
1508 rval = PyFloat_FromDouble(d);
1509 }
1510 }
1511 else {
1512 /* parse as an int using a fast path if available, otherwise call user defined method */
1513 if (s->parse_int != (PyObject *)&PyInt_Type) {
1514 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1515 }
1516 else {
1517 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1518 }
1519 }
1520 Py_DECREF(numstr);
1521 *next_idx_ptr = idx;
1522 return rval;
1523}
1524
1525static PyObject *
1526_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1527 /* Read a JSON number from PyUnicode pystr.
1528 idx is the index of the first character of the number
1529 *next_idx_ptr is a return-by-reference index to the first character after
1530 the number.
1531
1532 Returns a new PyObject representation of that number:
1533 PyInt, PyLong, or PyFloat.
1534 May return other types if parse_int or parse_float are set
1535 */
1536 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1537 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1538 Py_ssize_t idx = start;
1539 int is_float = 0;
1540 PyObject *rval;
1541 PyObject *numstr;
1542
1543 /* read a sign if it's there, make sure it's not the end of the string */
1544 if (str[idx] == '-') {
1545 idx++;
1546 if (idx > end_idx) {
1547 PyErr_SetNone(PyExc_StopIteration);
1548 return NULL;
1549 }
1550 }
1551
1552 /* read as many integer digits as we find as long as it doesn't start with 0 */
1553 if (str[idx] >= '1' && str[idx] <= '9') {
1554 idx++;
1555 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1556 }
1557 /* if it starts with 0 we only expect one integer digit */
1558 else if (str[idx] == '0') {
1559 idx++;
1560 }
1561 /* no integer digits, error */
1562 else {
1563 PyErr_SetNone(PyExc_StopIteration);
1564 return NULL;
1565 }
1566
1567 /* if the next char is '.' followed by a digit then read all float digits */
1568 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1569 is_float = 1;
1570 idx += 2;
1571 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1572 }
1573
1574 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1575 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1576 Py_ssize_t e_start = idx;
1577 idx++;
1578
1579 /* read an exponent sign if present */
1580 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1581
1582 /* read all digits */
1583 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1584
1585 /* if we got a digit, then parse as float. if not, backtrack */
1586 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1587 is_float = 1;
1588 }
1589 else {
1590 idx = e_start;
1591 }
1592 }
1593
1594 /* copy the section we determined to be a number */
1595 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1596 if (numstr == NULL)
1597 return NULL;
1598 if (is_float) {
1599 /* parse as a float using a fast path if available, otherwise call user defined method */
1600 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1601 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1602 }
1603 else {
1604 rval = PyFloat_FromString(numstr, NULL);
1605 }
1606 }
1607 else {
1608 /* no fast path for unicode -> int, just call */
1609 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1610 }
1611 Py_DECREF(numstr);
1612 *next_idx_ptr = idx;
1613 return rval;
1614}
1615
1616static PyObject *
1617scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1618{
1619 /* Read one JSON term (of any kind) from PyString pystr.
1620 idx is the index of the first character of the term
1621 *next_idx_ptr is a return-by-reference index to the first character after
1622 the number.
1623
1624 Returns a new PyObject representation of the term.
1625 */
1626 char *str = PyString_AS_STRING(pystr);
1627 Py_ssize_t length = PyString_GET_SIZE(pystr);
1628 if (idx >= length) {
1629 PyErr_SetNone(PyExc_StopIteration);
1630 return NULL;
1631 }
1632 switch (str[idx]) {
1633 case '"':
1634 /* string */
1635 return scanstring_str(pystr, idx + 1,
1636 PyString_AS_STRING(s->encoding),
1637 PyObject_IsTrue(s->strict),
1638 next_idx_ptr);
1639 case '{':
1640 /* object */
1641 return _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1642 case '[':
1643 /* array */
1644 return _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1645 case 'n':
1646 /* null */
1647 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1648 Py_INCREF(Py_None);
1649 *next_idx_ptr = idx + 4;
1650 return Py_None;
1651 }
1652 break;
1653 case 't':
1654 /* true */
1655 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1656 Py_INCREF(Py_True);
1657 *next_idx_ptr = idx + 4;
1658 return Py_True;
1659 }
1660 break;
1661 case 'f':
1662 /* false */
1663 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1664 Py_INCREF(Py_False);
1665 *next_idx_ptr = idx + 5;
1666 return Py_False;
1667 }
1668 break;
1669 case 'N':
1670 /* NaN */
1671 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1672 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1673 }
1674 break;
1675 case 'I':
1676 /* Infinity */
1677 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1678 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1679 }
1680 break;
1681 case '-':
1682 /* -Infinity */
1683 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1684 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1685 }
1686 break;
1687 }
1688 /* Didn't find a string, object, array, or named constant. Look for a number. */
1689 return _match_number_str(s, pystr, idx, next_idx_ptr);
1690}
1691
1692static PyObject *
1693scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1694{
1695 /* Read one JSON term (of any kind) from PyUnicode pystr.
1696 idx is the index of the first character of the term
1697 *next_idx_ptr is a return-by-reference index to the first character after
1698 the number.
1699
1700 Returns a new PyObject representation of the term.
1701 */
1702 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1703 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1704 if (idx >= length) {
1705 PyErr_SetNone(PyExc_StopIteration);
1706 return NULL;
1707 }
1708 switch (str[idx]) {
1709 case '"':
1710 /* string */
1711 return scanstring_unicode(pystr, idx + 1,
1712 PyObject_IsTrue(s->strict),
1713 next_idx_ptr);
1714 case '{':
1715 /* object */
1716 return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1717 case '[':
1718 /* array */
1719 return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1720 case 'n':
1721 /* null */
1722 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1723 Py_INCREF(Py_None);
1724 *next_idx_ptr = idx + 4;
1725 return Py_None;
1726 }
1727 break;
1728 case 't':
1729 /* true */
1730 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1731 Py_INCREF(Py_True);
1732 *next_idx_ptr = idx + 4;
1733 return Py_True;
1734 }
1735 break;
1736 case 'f':
1737 /* false */
1738 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1739 Py_INCREF(Py_False);
1740 *next_idx_ptr = idx + 5;
1741 return Py_False;
1742 }
1743 break;
1744 case 'N':
1745 /* NaN */
1746 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1747 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1748 }
1749 break;
1750 case 'I':
1751 /* Infinity */
1752 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1753 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1754 }
1755 break;
1756 case '-':
1757 /* -Infinity */
1758 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1759 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1760 }
1761 break;
1762 }
1763 /* Didn't find a string, object, array, or named constant. Look for a number. */
1764 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1765}
1766
1767static PyObject *
1768scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1769{
1770 /* Python callable interface to scan_once_{str,unicode} */
1771 PyObject *pystr;
1772 PyObject *rval;
1773 Py_ssize_t idx;
1774 Py_ssize_t next_idx = -1;
1775 static char *kwlist[] = {"string", "idx", NULL};
1776 PyScannerObject *s;
1777 assert(PyScanner_Check(self));
1778 s = (PyScannerObject *)self;
1779 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1780 return NULL;
1781
1782 if (PyString_Check(pystr)) {
1783 rval = scan_once_str(s, pystr, idx, &next_idx);
1784 }
1785 else if (PyUnicode_Check(pystr)) {
1786 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1787 }
1788 else {
1789 PyErr_Format(PyExc_TypeError,
1790 "first argument must be a string, not %.80s",
1791 Py_TYPE(pystr)->tp_name);
1792 return NULL;
1793 }
1794 PyDict_Clear(s->memo);
1795 return _build_rval_index_tuple(rval, next_idx);
1796}
1797
1798static PyObject *
1799scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1800{
1801 PyScannerObject *s;
1802 s = (PyScannerObject *)type->tp_alloc(type, 0);
1803 if (s != NULL) {
1804 s->encoding = NULL;
1805 s->strict = NULL;
1806 s->object_hook = NULL;
1807 s->pairs_hook = NULL;
1808 s->parse_float = NULL;
1809 s->parse_int = NULL;
1810 s->parse_constant = NULL;
1811 }
1812 return (PyObject *)s;
1813}
1814
1815static int
1816scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1817{
1818 /* Initialize Scanner object */
1819 PyObject *ctx;
1820 static char *kwlist[] = {"context", NULL};
1821 PyScannerObject *s;
1822
1823 assert(PyScanner_Check(self));
1824 s = (PyScannerObject *)self;
1825
1826 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1827 return -1;
1828
1829 if (s->memo == NULL) {
1830 s->memo = PyDict_New();
1831 if (s->memo == NULL)
1832 goto bail;
1833 }
1834
1835 /* PyString_AS_STRING is used on encoding */
1836 s->encoding = PyObject_GetAttrString(ctx, "encoding");
1837 if (s->encoding == NULL)
1838 goto bail;
1839 if (s->encoding == Py_None) {
1840 Py_DECREF(Py_None);
1841 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1842 }
1843 else if (PyUnicode_Check(s->encoding)) {
1844 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1845 Py_DECREF(s->encoding);
1846 s->encoding = tmp;
1847 }
1848 if (s->encoding == NULL || !PyString_Check(s->encoding))
1849 goto bail;
1850
1851 /* All of these will fail "gracefully" so we don't need to verify them */
1852 s->strict = PyObject_GetAttrString(ctx, "strict");
1853 if (s->strict == NULL)
1854 goto bail;
1855 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1856 if (s->object_hook == NULL)
1857 goto bail;
1858 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1859 if (s->pairs_hook == NULL)
1860 goto bail;
1861 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1862 if (s->parse_float == NULL)
1863 goto bail;
1864 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1865 if (s->parse_int == NULL)
1866 goto bail;
1867 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1868 if (s->parse_constant == NULL)
1869 goto bail;
1870
1871 return 0;
1872
1873bail:
1874 Py_CLEAR(s->encoding);
1875 Py_CLEAR(s->strict);
1876 Py_CLEAR(s->object_hook);
1877 Py_CLEAR(s->pairs_hook);
1878 Py_CLEAR(s->parse_float);
1879 Py_CLEAR(s->parse_int);
1880 Py_CLEAR(s->parse_constant);
1881 return -1;
1882}
1883
1884PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1885
1886static
1887PyTypeObject PyScannerType = {
1888 PyObject_HEAD_INIT(NULL)
1889 0, /* tp_internal */
1890 "simplejson._speedups.Scanner", /* tp_name */
1891 sizeof(PyScannerObject), /* tp_basicsize */
1892 0, /* tp_itemsize */
1893 scanner_dealloc, /* tp_dealloc */
1894 0, /* tp_print */
1895 0, /* tp_getattr */
1896 0, /* tp_setattr */
1897 0, /* tp_compare */
1898 0, /* tp_repr */
1899 0, /* tp_as_number */
1900 0, /* tp_as_sequence */
1901 0, /* tp_as_mapping */
1902 0, /* tp_hash */
1903 scanner_call, /* tp_call */
1904 0, /* tp_str */
1905 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1906 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1907 0, /* tp_as_buffer */
1908 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1909 scanner_doc, /* tp_doc */
1910 scanner_traverse, /* tp_traverse */
1911 scanner_clear, /* tp_clear */
1912 0, /* tp_richcompare */
1913 0, /* tp_weaklistoffset */
1914 0, /* tp_iter */
1915 0, /* tp_iternext */
1916 0, /* tp_methods */
1917 scanner_members, /* tp_members */
1918 0, /* tp_getset */
1919 0, /* tp_base */
1920 0, /* tp_dict */
1921 0, /* tp_descr_get */
1922 0, /* tp_descr_set */
1923 0, /* tp_dictoffset */
1924 scanner_init, /* tp_init */
1925 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1926 scanner_new, /* tp_new */
1927 0,/* PyObject_GC_Del, */ /* tp_free */
1928};
1929
1930static PyObject *
1931encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1932{
1933 PyEncoderObject *s;
1934 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1935 if (s != NULL) {
1936 s->markers = NULL;
1937 s->defaultfn = NULL;
1938 s->encoder = NULL;
1939 s->indent = NULL;
1940 s->key_separator = NULL;
1941 s->item_separator = NULL;
1942 s->sort_keys = NULL;
1943 s->skipkeys = NULL;
1944 s->key_memo = NULL;
1945 }
1946 return (PyObject *)s;
1947}
1948
1949static int
1950encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1951{
1952 /* initialize Encoder object */
1953 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", NULL};
1954
1955 PyEncoderObject *s;
1956 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1957 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo, *use_decimal;
1958
1959 assert(PyEncoder_Check(self));
1960 s = (PyEncoderObject *)self;
1961
1962 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOO:make_encoder", kwlist,
1963 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1964 &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal))
1965 return -1;
1966
1967 s->markers = markers;
1968 s->defaultfn = defaultfn;
1969 s->encoder = encoder;
1970 s->indent = indent;
1971 s->key_separator = key_separator;
1972 s->item_separator = item_separator;
1973 s->sort_keys = sort_keys;
1974 s->skipkeys = skipkeys;
1975 s->key_memo = key_memo;
1976 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1977 s->allow_nan = PyObject_IsTrue(allow_nan);
1978 s->use_decimal = PyObject_IsTrue(use_decimal);
1979
1980 Py_INCREF(s->markers);
1981 Py_INCREF(s->defaultfn);
1982 Py_INCREF(s->encoder);
1983 Py_INCREF(s->indent);
1984 Py_INCREF(s->key_separator);
1985 Py_INCREF(s->item_separator);
1986 Py_INCREF(s->sort_keys);
1987 Py_INCREF(s->skipkeys);
1988 Py_INCREF(s->key_memo);
1989 return 0;
1990}
1991
1992static PyObject *
1993encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1994{
1995 /* Python callable interface to encode_listencode_obj */
1996 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1997 PyObject *obj;
1998 PyObject *rval;
1999 Py_ssize_t indent_level;
2000 PyEncoderObject *s;
2001 assert(PyEncoder_Check(self));
2002 s = (PyEncoderObject *)self;
2003 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
2004 &obj, _convertPyInt_AsSsize_t, &indent_level))
2005 return NULL;
2006 rval = PyList_New(0);
2007 if (rval == NULL)
2008 return NULL;
2009 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
2010 Py_DECREF(rval);
2011 return NULL;
2012 }
2013 return rval;
2014}
2015
2016static PyObject *
2017_encoded_const(PyObject *obj)
2018{
2019 /* Return the JSON string representation of None, True, False */
2020 if (obj == Py_None) {
2021 static PyObject *s_null = NULL;
2022 if (s_null == NULL) {
2023 s_null = PyString_InternFromString("null");
2024 }
2025 Py_INCREF(s_null);
2026 return s_null;
2027 }
2028 else if (obj == Py_True) {
2029 static PyObject *s_true = NULL;
2030 if (s_true == NULL) {
2031 s_true = PyString_InternFromString("true");
2032 }
2033 Py_INCREF(s_true);
2034 return s_true;
2035 }
2036 else if (obj == Py_False) {
2037 static PyObject *s_false = NULL;
2038 if (s_false == NULL) {
2039 s_false = PyString_InternFromString("false");
2040 }
2041 Py_INCREF(s_false);
2042 return s_false;
2043 }
2044 else {
2045 PyErr_SetString(PyExc_ValueError, "not a const");
2046 return NULL;
2047 }
2048}
2049
2050static PyObject *
2051encoder_encode_float(PyEncoderObject *s, PyObject *obj)
2052{
2053 /* Return the JSON representation of a PyFloat */
2054 double i = PyFloat_AS_DOUBLE(obj);
2055 if (!Py_IS_FINITE(i)) {
2056 if (!s->allow_nan) {
2057 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
2058 return NULL;
2059 }
2060 if (i > 0) {
2061 return PyString_FromString("Infinity");
2062 }
2063 else if (i < 0) {
2064 return PyString_FromString("-Infinity");
2065 }
2066 else {
2067 return PyString_FromString("NaN");
2068 }
2069 }
2070 /* Use a better float format here? */
2071 return PyObject_Repr(obj);
2072}
2073
2074static PyObject *
2075encoder_encode_string(PyEncoderObject *s, PyObject *obj)
2076{
2077 /* Return the JSON representation of a string */
2078 if (s->fast_encode)
2079 return py_encode_basestring_ascii(NULL, obj);
2080 else
2081 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
2082}
2083
2084static int
2085_steal_list_append(PyObject *lst, PyObject *stolen)
2086{
2087 /* Append stolen and then decrement its reference count */
2088 int rval = PyList_Append(lst, stolen);
2089 Py_DECREF(stolen);
2090 return rval;
2091}
2092
2093static int
2094encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
2095{
2096 /* Encode Python object obj to a JSON term, rval is a PyList */
2097 PyObject *newobj;
2098 int rv;
2099
2100 if (obj == Py_None || obj == Py_True || obj == Py_False) {
2101 PyObject *cstr = _encoded_const(obj);
2102 if (cstr == NULL)
2103 return -1;
2104 return _steal_list_append(rval, cstr);
2105 }
2106 else if (PyString_Check(obj) || PyUnicode_Check(obj))
2107 {
2108 PyObject *encoded = encoder_encode_string(s, obj);
2109 if (encoded == NULL)
2110 return -1;
2111 return _steal_list_append(rval, encoded);
2112 }
2113 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
2114 PyObject *encoded = PyObject_Str(obj);
2115 if (encoded == NULL)
2116 return -1;
2117 return _steal_list_append(rval, encoded);
2118 }
2119 else if (PyFloat_Check(obj)) {
2120 PyObject *encoded = encoder_encode_float(s, obj);
2121 if (encoded == NULL)
2122 return -1;
2123 return _steal_list_append(rval, encoded);
2124 }
2125 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
2126 return encoder_listencode_list(s, rval, obj, indent_level);
2127 }
2128 else if (PyDict_Check(obj)) {
2129 return encoder_listencode_dict(s, rval, obj, indent_level);
2130 }
2131 else if (s->use_decimal && Decimal_Check(obj)) {
2132 PyObject *encoded = PyObject_Str(obj);
2133 if (encoded == NULL)
2134 return -1;
2135 return _steal_list_append(rval, encoded);
2136 }
2137 else {
2138 PyObject *ident = NULL;
2139 if (s->markers != Py_None) {
2140 int has_key;
2141 ident = PyLong_FromVoidPtr(obj);
2142 if (ident == NULL)
2143 return -1;
2144 has_key = PyDict_Contains(s->markers, ident);
2145 if (has_key) {
2146 if (has_key != -1)
2147 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2148 Py_DECREF(ident);
2149 return -1;
2150 }
2151 if (PyDict_SetItem(s->markers, ident, obj)) {
2152 Py_DECREF(ident);
2153 return -1;
2154 }
2155 }
2156 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2157 if (newobj == NULL) {
2158 Py_XDECREF(ident);
2159 return -1;
2160 }
2161 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2162 Py_DECREF(newobj);
2163 if (rv) {
2164 Py_XDECREF(ident);
2165 return -1;
2166 }
2167 if (ident != NULL) {
2168 if (PyDict_DelItem(s->markers, ident)) {
2169 Py_XDECREF(ident);
2170 return -1;
2171 }
2172 Py_XDECREF(ident);
2173 }
2174 return rv;
2175 }
2176}
2177
2178static int
2179encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2180{
2181 /* Encode Python dict dct a JSON term, rval is a PyList */
2182 static PyObject *open_dict = NULL;
2183 static PyObject *close_dict = NULL;
2184 static PyObject *empty_dict = NULL;
2185 static PyObject *iteritems = NULL;
2186 PyObject *kstr = NULL;
2187 PyObject *ident = NULL;
2188 PyObject *key, *value;
2189 PyObject *iter = NULL;
2190 PyObject *item = NULL;
2191 PyObject *encoded = NULL;
2192 int skipkeys;
2193 Py_ssize_t idx;
2194
2195 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) {
2196 open_dict = PyString_InternFromString("{");
2197 close_dict = PyString_InternFromString("}");
2198 empty_dict = PyString_InternFromString("{}");
2199 iteritems = PyString_InternFromString("iteritems");
2200 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL)
2201 return -1;
2202 }
2203 if (PyDict_Size(dct) == 0)
2204 return PyList_Append(rval, empty_dict);
2205
2206 if (s->markers != Py_None) {
2207 int has_key;
2208 ident = PyLong_FromVoidPtr(dct);
2209 if (ident == NULL)
2210 goto bail;
2211 has_key = PyDict_Contains(s->markers, ident);
2212 if (has_key) {
2213 if (has_key != -1)
2214 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2215 goto bail;
2216 }
2217 if (PyDict_SetItem(s->markers, ident, dct)) {
2218 goto bail;
2219 }
2220 }
2221
2222 if (PyList_Append(rval, open_dict))
2223 goto bail;
2224
2225 if (s->indent != Py_None) {
2226 /* TODO: DOES NOT RUN */
2227 indent_level += 1;
2228 /*
2229 newline_indent = '\n' + (_indent * _current_indent_level)
2230 separator = _item_separator + newline_indent
2231 buf += newline_indent
2232 */
2233 }
2234
2235 /* TODO: C speedup not implemented for sort_keys */
2236
2237 skipkeys = PyObject_IsTrue(s->skipkeys);
2238 idx = 0;
2239 iter = PyObject_CallMethodObjArgs(dct, iteritems, NULL);
2240 if (iter == NULL)
2241 goto bail;
2242 while ((item = PyIter_Next(iter))) {
2243
2244 key = PyTuple_GetItem(item, 0);
2245 if (key == NULL)
2246 goto bail;
2247 value = PyTuple_GetItem(item, 1);
2248 if (value == NULL)
2249 goto bail;
2250
2251 encoded = PyDict_GetItem(s->key_memo, key);
2252 if (encoded != NULL) {
2253 Py_INCREF(encoded);
2254 }
2255 else if (PyString_Check(key) || PyUnicode_Check(key)) {
2256 Py_INCREF(key);
2257 kstr = key;
2258 }
2259 else if (PyFloat_Check(key)) {
2260 kstr = encoder_encode_float(s, key);
2261 if (kstr == NULL)
2262 goto bail;
2263 }
2264 else if (PyInt_Check(key) || PyLong_Check(key)) {
2265 kstr = PyObject_Str(key);
2266 if (kstr == NULL)
2267 goto bail;
2268 }
2269 else if (key == Py_True || key == Py_False || key == Py_None) {
2270 kstr = _encoded_const(key);
2271 if (kstr == NULL)
2272 goto bail;
2273 }
2274 else if (skipkeys) {
2275 Py_DECREF(item);
2276 continue;
2277 }
2278 else {
2279 /* TODO: include repr of key */
2280 PyErr_SetString(PyExc_ValueError, "keys must be a string");
2281 goto bail;
2282 }
2283
2284 if (idx) {
2285 if (PyList_Append(rval, s->item_separator))
2286 goto bail;
2287 }
2288
2289 if (encoded == NULL) {
2290 encoded = encoder_encode_string(s, kstr);
2291 Py_CLEAR(kstr);
2292 if (encoded == NULL)
2293 goto bail;
2294 if (PyDict_SetItem(s->key_memo, key, encoded))
2295 goto bail;
2296 }
2297 if (PyList_Append(rval, encoded)) {
2298 goto bail;
2299 }
2300 Py_CLEAR(encoded);
2301 if (PyList_Append(rval, s->key_separator))
2302 goto bail;
2303 if (encoder_listencode_obj(s, rval, value, indent_level))
2304 goto bail;
2305 Py_CLEAR(item);
2306 idx += 1;
2307 }
2308 Py_CLEAR(iter);
2309 if (PyErr_Occurred())
2310 goto bail;
2311 if (ident != NULL) {
2312 if (PyDict_DelItem(s->markers, ident))
2313 goto bail;
2314 Py_CLEAR(ident);
2315 }
2316 if (s->indent != Py_None) {
2317 /* TODO: DOES NOT RUN */
2318 indent_level -= 1;
2319 /*
2320 yield '\n' + (_indent * _current_indent_level)
2321 */
2322 }
2323 if (PyList_Append(rval, close_dict))
2324 goto bail;
2325 return 0;
2326
2327bail:
2328 Py_XDECREF(encoded);
2329 Py_XDECREF(item);
2330 Py_XDECREF(iter);
2331 Py_XDECREF(kstr);
2332 Py_XDECREF(ident);
2333 return -1;
2334}
2335
2336
2337static int
2338encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2339{
2340 /* Encode Python list seq to a JSON term, rval is a PyList */
2341 static PyObject *open_array = NULL;
2342 static PyObject *close_array = NULL;
2343 static PyObject *empty_array = NULL;
2344 PyObject *ident = NULL;
2345 PyObject *iter = NULL;
2346 PyObject *obj = NULL;
2347 int is_true;
2348 int i = 0;
2349
2350 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2351 open_array = PyString_InternFromString("[");
2352 close_array = PyString_InternFromString("]");
2353 empty_array = PyString_InternFromString("[]");
2354 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2355 return -1;
2356 }
2357 ident = NULL;
2358 is_true = PyObject_IsTrue(seq);
2359 if (is_true == -1)
2360 return -1;
2361 else if (is_true == 0)
2362 return PyList_Append(rval, empty_array);
2363
2364 if (s->markers != Py_None) {
2365 int has_key;
2366 ident = PyLong_FromVoidPtr(seq);
2367 if (ident == NULL)
2368 goto bail;
2369 has_key = PyDict_Contains(s->markers, ident);
2370 if (has_key) {
2371 if (has_key != -1)
2372 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2373 goto bail;
2374 }
2375 if (PyDict_SetItem(s->markers, ident, seq)) {
2376 goto bail;
2377 }
2378 }
2379
2380 iter = PyObject_GetIter(seq);
2381 if (iter == NULL)
2382 goto bail;
2383
2384 if (PyList_Append(rval, open_array))
2385 goto bail;
2386 if (s->indent != Py_None) {
2387 /* TODO: DOES NOT RUN */
2388 indent_level += 1;
2389 /*
2390 newline_indent = '\n' + (_indent * _current_indent_level)
2391 separator = _item_separator + newline_indent
2392 buf += newline_indent
2393 */
2394 }
2395 while ((obj = PyIter_Next(iter))) {
2396 if (i) {
2397 if (PyList_Append(rval, s->item_separator))
2398 goto bail;
2399 }
2400 if (encoder_listencode_obj(s, rval, obj, indent_level))
2401 goto bail;
2402 i++;
2403 Py_CLEAR(obj);
2404 }
2405 Py_CLEAR(iter);
2406 if (PyErr_Occurred())
2407 goto bail;
2408 if (ident != NULL) {
2409 if (PyDict_DelItem(s->markers, ident))
2410 goto bail;
2411 Py_CLEAR(ident);
2412 }
2413 if (s->indent != Py_None) {
2414 /* TODO: DOES NOT RUN */
2415 indent_level -= 1;
2416 /*
2417 yield '\n' + (_indent * _current_indent_level)
2418 */
2419 }
2420 if (PyList_Append(rval, close_array))
2421 goto bail;
2422 return 0;
2423
2424bail:
2425 Py_XDECREF(obj);
2426 Py_XDECREF(iter);
2427 Py_XDECREF(ident);
2428 return -1;
2429}
2430
2431static void
2432encoder_dealloc(PyObject *self)
2433{
2434 /* Deallocate Encoder */
2435 encoder_clear(self);
2436 Py_TYPE(self)->tp_free(self);
2437}
2438
2439static int
2440encoder_traverse(PyObject *self, visitproc visit, void *arg)
2441{
2442 PyEncoderObject *s;
2443 assert(PyEncoder_Check(self));
2444 s = (PyEncoderObject *)self;
2445 Py_VISIT(s->markers);
2446 Py_VISIT(s->defaultfn);
2447 Py_VISIT(s->encoder);
2448 Py_VISIT(s->indent);
2449 Py_VISIT(s->key_separator);
2450 Py_VISIT(s->item_separator);
2451 Py_VISIT(s->sort_keys);
2452 Py_VISIT(s->skipkeys);
2453 Py_VISIT(s->key_memo);
2454 return 0;
2455}
2456
2457static int
2458encoder_clear(PyObject *self)
2459{
2460 /* Deallocate Encoder */
2461 PyEncoderObject *s;
2462 assert(PyEncoder_Check(self));
2463 s = (PyEncoderObject *)self;
2464 Py_CLEAR(s->markers);
2465 Py_CLEAR(s->defaultfn);
2466 Py_CLEAR(s->encoder);
2467 Py_CLEAR(s->indent);
2468 Py_CLEAR(s->key_separator);
2469 Py_CLEAR(s->item_separator);
2470 Py_CLEAR(s->sort_keys);
2471 Py_CLEAR(s->skipkeys);
2472 Py_CLEAR(s->key_memo);
2473 return 0;
2474}
2475
2476PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2477
2478static
2479PyTypeObject PyEncoderType = {
2480 PyObject_HEAD_INIT(NULL)
2481 0, /* tp_internal */
2482 "simplejson._speedups.Encoder", /* tp_name */
2483 sizeof(PyEncoderObject), /* tp_basicsize */
2484 0, /* tp_itemsize */
2485 encoder_dealloc, /* tp_dealloc */
2486 0, /* tp_print */
2487 0, /* tp_getattr */
2488 0, /* tp_setattr */
2489 0, /* tp_compare */
2490 0, /* tp_repr */
2491 0, /* tp_as_number */
2492 0, /* tp_as_sequence */
2493 0, /* tp_as_mapping */
2494 0, /* tp_hash */
2495 encoder_call, /* tp_call */
2496 0, /* tp_str */
2497 0, /* tp_getattro */
2498 0, /* tp_setattro */
2499 0, /* tp_as_buffer */
2500 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2501 encoder_doc, /* tp_doc */
2502 encoder_traverse, /* tp_traverse */
2503 encoder_clear, /* tp_clear */
2504 0, /* tp_richcompare */
2505 0, /* tp_weaklistoffset */
2506 0, /* tp_iter */
2507 0, /* tp_iternext */
2508 0, /* tp_methods */
2509 encoder_members, /* tp_members */
2510 0, /* tp_getset */
2511 0, /* tp_base */
2512 0, /* tp_dict */
2513 0, /* tp_descr_get */
2514 0, /* tp_descr_set */
2515 0, /* tp_dictoffset */
2516 encoder_init, /* tp_init */
2517 0, /* tp_alloc */
2518 encoder_new, /* tp_new */
2519 0, /* tp_free */
2520};
2521
2522static PyMethodDef speedups_methods[] = {
2523 {"encode_basestring_ascii",
2524 (PyCFunction)py_encode_basestring_ascii,
2525 METH_O,
2526 pydoc_encode_basestring_ascii},
2527 {"scanstring",
2528 (PyCFunction)py_scanstring,
2529 METH_VARARGS,
2530 pydoc_scanstring},
2531 {NULL, NULL, 0, NULL}
2532};
2533
2534PyDoc_STRVAR(module_doc,
2535"simplejson speedups\n");
2536
2537void
2538init_speedups(void)
2539{
2540 PyObject *m, *decimal;
2541 PyScannerType.tp_new = PyType_GenericNew;
2542 if (PyType_Ready(&PyScannerType) < 0)
2543 return;
2544 PyEncoderType.tp_new = PyType_GenericNew;
2545 if (PyType_Ready(&PyEncoderType) < 0)
2546 return;
2547
2548 decimal = PyImport_ImportModule("decimal");
2549 if (decimal == NULL)
2550 return;
2551 DecimalTypePtr = (PyTypeObject*)PyObject_GetAttrString(decimal, "Decimal");
2552 Py_DECREF(decimal);
2553 if (DecimalTypePtr == NULL)
2554 return;
2555
2556 m = Py_InitModule3("_speedups", speedups_methods, module_doc);
2557 Py_INCREF((PyObject*)&PyScannerType);
2558 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2559 Py_INCREF((PyObject*)&PyEncoderType);
2560 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
2561}