blob: 5bd52cb78988e6f7855936f8800cef747cd65078 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001#include "Python.h"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00002#include "structmember.h"
3#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5#endif
6#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7typedef int Py_ssize_t;
8#define PY_SSIZE_T_MAX INT_MAX
9#define PY_SSIZE_T_MIN INT_MIN
10#define PyInt_FromSsize_t PyInt_FromLong
11#define PyInt_AsSsize_t PyInt_AsLong
12#endif
13#ifndef Py_IS_FINITE
14#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15#endif
Christian Heimes90540002008-05-08 14:29:10 +000016
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000017#ifdef __GNUC__
18#define UNUSED __attribute__((__unused__))
19#else
20#define UNUSED
21#endif
22
23#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
24#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
25#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
26#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
27
28static PyTypeObject PyScannerType;
29static PyTypeObject PyEncoderType;
30
31typedef struct _PyScannerObject {
32 PyObject_HEAD
33 PyObject *strict;
34 PyObject *object_hook;
35 PyObject *object_pairs_hook;
36 PyObject *parse_float;
37 PyObject *parse_int;
38 PyObject *parse_constant;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +000039 PyObject *memo;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000040} PyScannerObject;
41
42static PyMemberDef scanner_members[] = {
43 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
44 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
45 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
46 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
47 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
48 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
49 {NULL}
50};
51
52typedef struct _PyEncoderObject {
53 PyObject_HEAD
54 PyObject *markers;
55 PyObject *defaultfn;
56 PyObject *encoder;
57 PyObject *indent;
58 PyObject *key_separator;
59 PyObject *item_separator;
60 PyObject *sort_keys;
61 PyObject *skipkeys;
62 int fast_encode;
63 int allow_nan;
64} PyEncoderObject;
65
66static PyMemberDef encoder_members[] = {
67 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
68 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
69 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
70 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
71 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
72 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
73 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
74 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
75 {NULL}
76};
77
78static PyObject *
79ascii_escape_unicode(PyObject *pystr);
80static PyObject *
81py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
82void init_json(void);
83static PyObject *
84scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
85static PyObject *
86_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
87static PyObject *
88scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
89static int
90scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
91static void
92scanner_dealloc(PyObject *self);
93static int
94scanner_clear(PyObject *self);
95static PyObject *
96encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
97static int
98encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
99static void
100encoder_dealloc(PyObject *self);
101static int
102encoder_clear(PyObject *self);
103static int
104encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
105static int
106encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
107static int
108encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
109static PyObject *
Hirokazu Yamamotofecf5d12009-05-02 15:55:19 +0000110_encoded_const(PyObject *obj);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111static void
112raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
113static PyObject *
114encoder_encode_string(PyEncoderObject *s, PyObject *obj);
115static int
116_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
117static PyObject *
118_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
119static PyObject *
120encoder_encode_float(PyEncoderObject *s, PyObject *obj);
121
Christian Heimes90540002008-05-08 14:29:10 +0000122#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000123#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
Christian Heimes90540002008-05-08 14:29:10 +0000124
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000125#define MIN_EXPANSION 6
Christian Heimes90540002008-05-08 14:29:10 +0000126#ifdef Py_UNICODE_WIDE
127#define MAX_EXPANSION (2 * MIN_EXPANSION)
128#else
129#define MAX_EXPANSION MIN_EXPANSION
130#endif
131
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000132static int
133_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000134{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000135 /* PyObject to Py_ssize_t converter */
136 *size_ptr = PyLong_AsSsize_t(o);
Georg Brandl59682052009-05-05 07:52:05 +0000137 if (*size_ptr == -1 && PyErr_Occurred())
138 return 0;
139 return 1;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000140}
141
142static PyObject *
143_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
144{
145 /* Py_ssize_t to PyObject converter */
146 return PyLong_FromSsize_t(*size_ptr);
147}
148
149static Py_ssize_t
150ascii_escape_unichar(Py_UNICODE c, Py_UNICODE *output, Py_ssize_t chars)
151{
152 /* Escape unicode code point c to ASCII escape sequences
153 in char *output. output must have at least 12 bytes unused to
154 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
Christian Heimes90540002008-05-08 14:29:10 +0000155 output[chars++] = '\\';
156 switch (c) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000157 case '\\': output[chars++] = c; break;
158 case '"': output[chars++] = c; break;
Christian Heimes90540002008-05-08 14:29:10 +0000159 case '\b': output[chars++] = 'b'; break;
160 case '\f': output[chars++] = 'f'; break;
161 case '\n': output[chars++] = 'n'; break;
162 case '\r': output[chars++] = 'r'; break;
163 case '\t': output[chars++] = 't'; break;
164 default:
165#ifdef Py_UNICODE_WIDE
166 if (c >= 0x10000) {
167 /* UTF-16 surrogate pair */
168 Py_UNICODE v = c - 0x10000;
169 c = 0xd800 | ((v >> 10) & 0x3ff);
170 output[chars++] = 'u';
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000171 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
172 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
173 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
174 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000175 c = 0xdc00 | (v & 0x3ff);
176 output[chars++] = '\\';
177 }
178#endif
179 output[chars++] = 'u';
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000180 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
182 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
183 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
Christian Heimes90540002008-05-08 14:29:10 +0000184 }
185 return chars;
186}
187
188static PyObject *
189ascii_escape_unicode(PyObject *pystr)
190{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000191 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Christian Heimes90540002008-05-08 14:29:10 +0000192 Py_ssize_t i;
193 Py_ssize_t input_chars;
194 Py_ssize_t output_size;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000195 Py_ssize_t max_output_size;
Christian Heimes90540002008-05-08 14:29:10 +0000196 Py_ssize_t chars;
197 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000198 Py_UNICODE *output;
Christian Heimes90540002008-05-08 14:29:10 +0000199 Py_UNICODE *input_unicode;
200
201 input_chars = PyUnicode_GET_SIZE(pystr);
202 input_unicode = PyUnicode_AS_UNICODE(pystr);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000203
Christian Heimes90540002008-05-08 14:29:10 +0000204 /* One char input can be up to 6 chars output, estimate 4 of these */
205 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000206 max_output_size = 2 + (input_chars * MAX_EXPANSION);
207 rval = PyUnicode_FromStringAndSize(NULL, output_size);
Christian Heimes90540002008-05-08 14:29:10 +0000208 if (rval == NULL) {
209 return NULL;
210 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000211 output = PyUnicode_AS_UNICODE(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000212 chars = 0;
213 output[chars++] = '"';
214 for (i = 0; i < input_chars; i++) {
215 Py_UNICODE c = input_unicode[i];
216 if (S_CHAR(c)) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000217 output[chars++] = c;
Christian Heimes90540002008-05-08 14:29:10 +0000218 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000219 else {
220 chars = ascii_escape_unichar(c, output, chars);
Christian Heimes90540002008-05-08 14:29:10 +0000221 }
222 if (output_size - chars < (1 + MAX_EXPANSION)) {
223 /* There's more than four, so let's resize by a lot */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000224 Py_ssize_t new_output_size = output_size * 2;
Christian Heimes90540002008-05-08 14:29:10 +0000225 /* This is an upper bound */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000226 if (new_output_size > max_output_size) {
227 new_output_size = max_output_size;
Christian Heimes90540002008-05-08 14:29:10 +0000228 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000229 /* Make sure that the output size changed before resizing */
230 if (new_output_size != output_size) {
231 output_size = new_output_size;
232 if (PyUnicode_Resize(&rval, output_size) == -1) {
233 return NULL;
234 }
235 output = PyUnicode_AS_UNICODE(rval);
Christian Heimes90540002008-05-08 14:29:10 +0000236 }
Christian Heimes90540002008-05-08 14:29:10 +0000237 }
238 }
239 output[chars++] = '"';
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000240 if (PyUnicode_Resize(&rval, chars) == -1) {
Christian Heimes90540002008-05-08 14:29:10 +0000241 return NULL;
242 }
243 return rval;
244}
245
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000246static void
Christian Heimes90540002008-05-08 14:29:10 +0000247raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
248{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000249 /* Use the Python function json.decoder.errmsg to raise a nice
250 looking ValueError exception */
Christian Heimes90540002008-05-08 14:29:10 +0000251 static PyObject *errmsg_fn = NULL;
252 PyObject *pymsg;
253 if (errmsg_fn == NULL) {
254 PyObject *decoder = PyImport_ImportModule("json.decoder");
255 if (decoder == NULL)
256 return;
257 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000258 Py_DECREF(decoder);
Christian Heimes90540002008-05-08 14:29:10 +0000259 if (errmsg_fn == NULL)
260 return;
Christian Heimes90540002008-05-08 14:29:10 +0000261 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000262 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
Benjamin Petersona13d4752008-10-16 21:17:24 +0000263 if (pymsg) {
264 PyErr_SetObject(PyExc_ValueError, pymsg);
265 Py_DECREF(pymsg);
266 }
Christian Heimes90540002008-05-08 14:29:10 +0000267}
268
269static PyObject *
270join_list_unicode(PyObject *lst)
271{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000272 /* return u''.join(lst) */
273 static PyObject *sep = NULL;
274 if (sep == NULL) {
275 sep = PyUnicode_FromStringAndSize("", 0);
276 if (sep == NULL)
277 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +0000278 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000279 return PyUnicode_Join(sep, lst);
280}
281
282static PyObject *
283_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
284 /* return (rval, idx) tuple, stealing reference to rval */
285 PyObject *tpl;
286 PyObject *pyidx;
287 /*
288 steal a reference to rval, returns (rval, idx)
289 */
290 if (rval == NULL) {
Christian Heimes90540002008-05-08 14:29:10 +0000291 return NULL;
292 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000293 pyidx = PyLong_FromSsize_t(idx);
294 if (pyidx == NULL) {
295 Py_DECREF(rval);
296 return NULL;
297 }
298 tpl = PyTuple_New(2);
299 if (tpl == NULL) {
300 Py_DECREF(pyidx);
301 Py_DECREF(rval);
302 return NULL;
303 }
304 PyTuple_SET_ITEM(tpl, 0, rval);
305 PyTuple_SET_ITEM(tpl, 1, pyidx);
306 return tpl;
Christian Heimes90540002008-05-08 14:29:10 +0000307}
308
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000309#define APPEND_OLD_CHUNK \
310 if (chunk != NULL) { \
311 if (chunks == NULL) { \
312 chunks = PyList_New(0); \
313 if (chunks == NULL) { \
314 goto bail; \
315 } \
316 } \
317 if (PyList_Append(chunks, chunk)) { \
318 Py_DECREF(chunk); \
319 goto bail; \
320 } \
321 Py_CLEAR(chunk); \
322 }
323
Christian Heimes90540002008-05-08 14:29:10 +0000324static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000325scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
Christian Heimes90540002008-05-08 14:29:10 +0000326{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000327 /* Read the JSON string from PyUnicode pystr.
328 end is the index of the first character after the quote.
329 if strict is zero then literal control characters are allowed
330 *next_end_ptr is a return-by-reference index of the character
331 after the end quote
Christian Heimes90540002008-05-08 14:29:10 +0000332
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000333 Return value is a new PyUnicode
334 */
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000335 PyObject *rval = NULL;
Christian Heimes90540002008-05-08 14:29:10 +0000336 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
337 Py_ssize_t begin = end - 1;
338 Py_ssize_t next = begin;
339 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000340 PyObject *chunks = NULL;
341 PyObject *chunk = NULL;
342
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000343 if (end < 0 || len <= end) {
344 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
345 goto bail;
346 }
Christian Heimes90540002008-05-08 14:29:10 +0000347 while (1) {
348 /* Find the end of the string or the next escape */
349 Py_UNICODE c = 0;
Christian Heimes90540002008-05-08 14:29:10 +0000350 for (next = end; next < len; next++) {
351 c = buf[next];
352 if (c == '"' || c == '\\') {
353 break;
354 }
355 else if (strict && c <= 0x1f) {
Benjamin Peterson7af6eec2008-07-19 22:26:35 +0000356 raise_errmsg("Invalid control character at", pystr, next);
Christian Heimes90540002008-05-08 14:29:10 +0000357 goto bail;
358 }
359 }
360 if (!(c == '"' || c == '\\')) {
361 raise_errmsg("Unterminated string starting at", pystr, begin);
362 goto bail;
363 }
364 /* Pick up this chunk if it's not zero length */
365 if (next != end) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000366 APPEND_OLD_CHUNK
Christian Heimes90540002008-05-08 14:29:10 +0000367 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
368 if (chunk == NULL) {
369 goto bail;
370 }
Christian Heimes90540002008-05-08 14:29:10 +0000371 }
372 next++;
373 if (c == '"') {
374 end = next;
375 break;
376 }
377 if (next == len) {
378 raise_errmsg("Unterminated string starting at", pystr, begin);
379 goto bail;
380 }
381 c = buf[next];
382 if (c != 'u') {
383 /* Non-unicode backslash escapes */
384 end = next + 1;
385 switch (c) {
386 case '"': break;
387 case '\\': break;
388 case '/': break;
389 case 'b': c = '\b'; break;
390 case 'f': c = '\f'; break;
391 case 'n': c = '\n'; break;
392 case 'r': c = '\r'; break;
393 case 't': c = '\t'; break;
394 default: c = 0;
395 }
396 if (c == 0) {
397 raise_errmsg("Invalid \\escape", pystr, end - 2);
398 goto bail;
399 }
400 }
401 else {
402 c = 0;
403 next++;
404 end = next + 4;
405 if (end >= len) {
406 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
407 goto bail;
408 }
409 /* Decode 4 hex digits */
410 for (; next < end; next++) {
Christian Heimes90540002008-05-08 14:29:10 +0000411 Py_UNICODE digit = buf[next];
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000412 c <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000413 switch (digit) {
414 case '0': case '1': case '2': case '3': case '4':
415 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000416 c |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000417 case 'a': case 'b': case 'c': case 'd': case 'e':
418 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000419 c |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000420 case 'A': case 'B': case 'C': case 'D': case 'E':
421 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000422 c |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000423 default:
424 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
425 goto bail;
426 }
427 }
428#ifdef Py_UNICODE_WIDE
429 /* Surrogate pair */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000430 if ((c & 0xfc00) == 0xd800) {
Christian Heimes90540002008-05-08 14:29:10 +0000431 Py_UNICODE c2 = 0;
432 if (end + 6 >= len) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000433 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
434 goto bail;
Christian Heimes90540002008-05-08 14:29:10 +0000435 }
436 if (buf[next++] != '\\' || buf[next++] != 'u') {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000437 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
438 goto bail;
Christian Heimes90540002008-05-08 14:29:10 +0000439 }
440 end += 6;
441 /* Decode 4 hex digits */
442 for (; next < end; next++) {
Christian Heimes90540002008-05-08 14:29:10 +0000443 Py_UNICODE digit = buf[next];
Antoine Pitrou5b0e9e82010-10-09 15:24:28 +0000444 c2 <<= 4;
Christian Heimes90540002008-05-08 14:29:10 +0000445 switch (digit) {
446 case '0': case '1': case '2': case '3': case '4':
447 case '5': case '6': case '7': case '8': case '9':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000448 c2 |= (digit - '0'); break;
Christian Heimes90540002008-05-08 14:29:10 +0000449 case 'a': case 'b': case 'c': case 'd': case 'e':
450 case 'f':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000451 c2 |= (digit - 'a' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000452 case 'A': case 'B': case 'C': case 'D': case 'E':
453 case 'F':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000454 c2 |= (digit - 'A' + 10); break;
Christian Heimes90540002008-05-08 14:29:10 +0000455 default:
456 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
457 goto bail;
458 }
459 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000460 if ((c2 & 0xfc00) != 0xdc00) {
461 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
462 goto bail;
463 }
Christian Heimes90540002008-05-08 14:29:10 +0000464 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
465 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000466 else if ((c & 0xfc00) == 0xdc00) {
467 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
468 goto bail;
469 }
Christian Heimes90540002008-05-08 14:29:10 +0000470#endif
471 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000472 APPEND_OLD_CHUNK
Christian Heimes90540002008-05-08 14:29:10 +0000473 chunk = PyUnicode_FromUnicode(&c, 1);
474 if (chunk == NULL) {
475 goto bail;
476 }
Christian Heimes90540002008-05-08 14:29:10 +0000477 }
478
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000479 if (chunks == NULL) {
480 if (chunk != NULL)
481 rval = chunk;
482 else
483 rval = PyUnicode_FromStringAndSize("", 0);
Christian Heimes90540002008-05-08 14:29:10 +0000484 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000485 else {
486 APPEND_OLD_CHUNK
487 rval = join_list_unicode(chunks);
488 if (rval == NULL) {
489 goto bail;
490 }
491 Py_CLEAR(chunks);
492 }
493
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000494 *next_end_ptr = end;
495 return rval;
Christian Heimes90540002008-05-08 14:29:10 +0000496bail:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000497 *next_end_ptr = -1;
Christian Heimes90540002008-05-08 14:29:10 +0000498 Py_XDECREF(chunks);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000499 Py_XDECREF(chunk);
Christian Heimes90540002008-05-08 14:29:10 +0000500 return NULL;
501}
502
503PyDoc_STRVAR(pydoc_scanstring,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000504 "scanstring(string, end, strict=True) -> (string, end)\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000505 "\n"
506 "Scan the string s for a JSON string. End is the index of the\n"
507 "character in s after the quote that started the JSON string.\n"
508 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
509 "on attempt to decode an invalid string. If strict is False then literal\n"
510 "control characters are allowed in the string.\n"
511 "\n"
512 "Returns a tuple of the decoded string and the index of the character in s\n"
513 "after the end quote."
514);
Christian Heimes90540002008-05-08 14:29:10 +0000515
516static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000517py_scanstring(PyObject* self UNUSED, PyObject *args)
Christian Heimes90540002008-05-08 14:29:10 +0000518{
519 PyObject *pystr;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000520 PyObject *rval;
Christian Heimes90540002008-05-08 14:29:10 +0000521 Py_ssize_t end;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000522 Py_ssize_t next_end = -1;
523 int strict = 1;
524 if (!PyArg_ParseTuple(args, "OO&|i:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &strict)) {
Christian Heimes90540002008-05-08 14:29:10 +0000525 return NULL;
526 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000527 if (PyUnicode_Check(pystr)) {
528 rval = scanstring_unicode(pystr, end, strict, &next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000529 }
530 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 PyErr_Format(PyExc_TypeError,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000532 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000533 Py_TYPE(pystr)->tp_name);
534 return NULL;
535 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000536 return _build_rval_index_tuple(rval, next_end);
Christian Heimes90540002008-05-08 14:29:10 +0000537}
538
539PyDoc_STRVAR(pydoc_encode_basestring_ascii,
Georg Brandlc8284cf2010-08-02 20:16:18 +0000540 "encode_basestring_ascii(string) -> string\n"
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000541 "\n"
542 "Return an ASCII-only JSON representation of a Python string"
543);
Christian Heimes90540002008-05-08 14:29:10 +0000544
545static PyObject *
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000546py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
Christian Heimes90540002008-05-08 14:29:10 +0000547{
548 PyObject *rval;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000549 /* Return an ASCII-only JSON representation of a Python string */
Christian Heimes90540002008-05-08 14:29:10 +0000550 /* METH_O */
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000551 if (PyUnicode_Check(pystr)) {
Christian Heimes90540002008-05-08 14:29:10 +0000552 rval = ascii_escape_unicode(pystr);
553 }
554 else {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000555 PyErr_Format(PyExc_TypeError,
556 "first argument must be a string, not %.80s",
Christian Heimes90540002008-05-08 14:29:10 +0000557 Py_TYPE(pystr)->tp_name);
558 return NULL;
559 }
Christian Heimes90540002008-05-08 14:29:10 +0000560 return rval;
561}
562
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000563static void
564scanner_dealloc(PyObject *self)
565{
566 /* Deallocate scanner object */
567 scanner_clear(self);
568 Py_TYPE(self)->tp_free(self);
569}
570
571static int
572scanner_traverse(PyObject *self, visitproc visit, void *arg)
573{
574 PyScannerObject *s;
575 assert(PyScanner_Check(self));
576 s = (PyScannerObject *)self;
577 Py_VISIT(s->strict);
578 Py_VISIT(s->object_hook);
579 Py_VISIT(s->object_pairs_hook);
580 Py_VISIT(s->parse_float);
581 Py_VISIT(s->parse_int);
582 Py_VISIT(s->parse_constant);
583 return 0;
584}
585
586static int
587scanner_clear(PyObject *self)
588{
589 PyScannerObject *s;
590 assert(PyScanner_Check(self));
591 s = (PyScannerObject *)self;
592 Py_CLEAR(s->strict);
593 Py_CLEAR(s->object_hook);
594 Py_CLEAR(s->object_pairs_hook);
595 Py_CLEAR(s->parse_float);
596 Py_CLEAR(s->parse_int);
597 Py_CLEAR(s->parse_constant);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000598 Py_CLEAR(s->memo);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000599 return 0;
600}
601
602static PyObject *
603_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
604 /* Read a JSON object from PyUnicode pystr.
605 idx is the index of the first character after the opening curly brace.
606 *next_idx_ptr is a return-by-reference index to the first character after
607 the closing curly brace.
608
609 Returns a new PyObject (usually a dict, but object_hook can change that)
610 */
611 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
612 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
613 PyObject *val = NULL;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000614 PyObject *rval = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000615 PyObject *key = NULL;
616 int strict = PyObject_IsTrue(s->strict);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000617 int has_pairs_hook = (s->object_pairs_hook != Py_None);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000618 Py_ssize_t next_idx;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000619
620 if (has_pairs_hook)
621 rval = PyList_New(0);
622 else
623 rval = PyDict_New();
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000624 if (rval == NULL)
625 return NULL;
626
627 /* skip whitespace after { */
628 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
629
630 /* only loop if the object is non-empty */
631 if (idx <= end_idx && str[idx] != '}') {
632 while (idx <= end_idx) {
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000633 PyObject *memokey;
634
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000635 /* read key */
636 if (str[idx] != '"') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200637 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000638 goto bail;
639 }
640 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
641 if (key == NULL)
642 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000643 memokey = PyDict_GetItem(s->memo, key);
644 if (memokey != NULL) {
645 Py_INCREF(memokey);
646 Py_DECREF(key);
647 key = memokey;
648 }
649 else {
650 if (PyDict_SetItem(s->memo, key, key) < 0)
651 goto bail;
652 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000653 idx = next_idx;
654
655 /* skip whitespace between key and : delimiter, read :, skip whitespace */
656 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
657 if (idx > end_idx || str[idx] != ':') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200658 raise_errmsg("Expecting ':' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000659 goto bail;
660 }
661 idx++;
662 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
663
664 /* read any JSON term */
665 val = scan_once_unicode(s, pystr, idx, &next_idx);
666 if (val == NULL)
667 goto bail;
668
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000669 if (has_pairs_hook) {
670 PyObject *item = PyTuple_Pack(2, key, val);
671 if (item == NULL)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000672 goto bail;
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000673 Py_CLEAR(key);
674 Py_CLEAR(val);
675 if (PyList_Append(rval, item) == -1) {
676 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000677 goto bail;
678 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000679 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000680 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000681 else {
682 if (PyDict_SetItem(rval, key, val) < 0)
683 goto bail;
684 Py_CLEAR(key);
685 Py_CLEAR(val);
686 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000687 idx = next_idx;
688
689 /* skip whitespace before } or , */
690 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
691
692 /* bail if the object is closed or we didn't get the , delimiter */
693 if (idx > end_idx) break;
694 if (str[idx] == '}') {
695 break;
696 }
697 else if (str[idx] != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200698 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000699 goto bail;
700 }
701 idx++;
702
703 /* skip whitespace after , delimiter */
704 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
705 }
706 }
707
708 /* verify that idx < end_idx, str[idx] should be '}' */
709 if (idx > end_idx || str[idx] != '}') {
710 raise_errmsg("Expecting object", pystr, end_idx);
711 goto bail;
712 }
713
714 *next_idx_ptr = idx + 1;
715
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000716 if (has_pairs_hook) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000717 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000718 Py_DECREF(rval);
719 return val;
720 }
721
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000722 /* if object_hook is not None: rval = object_hook(rval) */
723 if (s->object_hook != Py_None) {
724 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000725 Py_DECREF(rval);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000726 return val;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000727 }
728 return rval;
729bail:
730 Py_XDECREF(key);
731 Py_XDECREF(val);
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000732 Py_XDECREF(rval);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000733 return NULL;
734}
735
736static PyObject *
737_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
738 /* Read a JSON array from PyString pystr.
739 idx is the index of the first character after the opening brace.
740 *next_idx_ptr is a return-by-reference index to the first character after
741 the closing brace.
742
743 Returns a new PyList
744 */
745 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
746 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
747 PyObject *val = NULL;
748 PyObject *rval = PyList_New(0);
749 Py_ssize_t next_idx;
750 if (rval == NULL)
751 return NULL;
752
753 /* skip whitespace after [ */
754 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
755
756 /* only loop if the array is non-empty */
757 if (idx <= end_idx && str[idx] != ']') {
758 while (idx <= end_idx) {
759
760 /* read any JSON term */
761 val = scan_once_unicode(s, pystr, idx, &next_idx);
762 if (val == NULL)
763 goto bail;
764
765 if (PyList_Append(rval, val) == -1)
766 goto bail;
767
768 Py_CLEAR(val);
769 idx = next_idx;
770
771 /* skip whitespace between term and , */
772 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
773
774 /* bail if the array is closed or we didn't get the , delimiter */
775 if (idx > end_idx) break;
776 if (str[idx] == ']') {
777 break;
778 }
779 else if (str[idx] != ',') {
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200780 raise_errmsg("Expecting ',' delimiter", pystr, idx);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000781 goto bail;
782 }
783 idx++;
784
785 /* skip whitespace after , */
786 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
787 }
788 }
789
790 /* verify that idx < end_idx, str[idx] should be ']' */
791 if (idx > end_idx || str[idx] != ']') {
792 raise_errmsg("Expecting object", pystr, end_idx);
793 goto bail;
794 }
795 *next_idx_ptr = idx + 1;
796 return rval;
797bail:
798 Py_XDECREF(val);
799 Py_DECREF(rval);
800 return NULL;
801}
802
803static PyObject *
804_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
805 /* Read a JSON constant from PyString pystr.
806 constant is the constant string that was found
807 ("NaN", "Infinity", "-Infinity").
808 idx is the index of the first character of the constant
809 *next_idx_ptr is a return-by-reference index to the first character after
810 the constant.
811
812 Returns the result of parse_constant
813 */
814 PyObject *cstr;
815 PyObject *rval;
816 /* constant is "NaN", "Infinity", or "-Infinity" */
817 cstr = PyUnicode_InternFromString(constant);
818 if (cstr == NULL)
819 return NULL;
820
821 /* rval = parse_constant(constant) */
822 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
823 idx += PyUnicode_GET_SIZE(cstr);
824 Py_DECREF(cstr);
825 *next_idx_ptr = idx;
826 return rval;
827}
828
829static PyObject *
830_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
831 /* Read a JSON number from PyUnicode pystr.
832 idx is the index of the first character of the number
833 *next_idx_ptr is a return-by-reference index to the first character after
834 the number.
835
836 Returns a new PyObject representation of that number:
837 PyInt, PyLong, or PyFloat.
838 May return other types if parse_int or parse_float are set
839 */
840 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
841 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
842 Py_ssize_t idx = start;
843 int is_float = 0;
844 PyObject *rval;
845 PyObject *numstr;
846
847 /* read a sign if it's there, make sure it's not the end of the string */
848 if (str[idx] == '-') {
849 idx++;
850 if (idx > end_idx) {
851 PyErr_SetNone(PyExc_StopIteration);
852 return NULL;
853 }
854 }
855
856 /* read as many integer digits as we find as long as it doesn't start with 0 */
857 if (str[idx] >= '1' && str[idx] <= '9') {
858 idx++;
859 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
860 }
861 /* if it starts with 0 we only expect one integer digit */
862 else if (str[idx] == '0') {
863 idx++;
864 }
865 /* no integer digits, error */
866 else {
867 PyErr_SetNone(PyExc_StopIteration);
868 return NULL;
869 }
870
871 /* if the next char is '.' followed by a digit then read all float digits */
872 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
873 is_float = 1;
874 idx += 2;
875 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
876 }
877
878 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
879 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
880 Py_ssize_t e_start = idx;
881 idx++;
882
883 /* read an exponent sign if present */
884 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
885
886 /* read all digits */
887 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
888
889 /* if we got a digit, then parse as float. if not, backtrack */
890 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
891 is_float = 1;
892 }
893 else {
894 idx = e_start;
895 }
896 }
897
898 /* copy the section we determined to be a number */
899 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
900 if (numstr == NULL)
901 return NULL;
902 if (is_float) {
903 /* parse as a float using a fast path if available, otherwise call user defined method */
904 if (s->parse_float != (PyObject *)&PyFloat_Type) {
905 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
906 }
907 else {
908 rval = PyFloat_FromString(numstr);
909 }
910 }
911 else {
912 /* no fast path for unicode -> int, just call */
913 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
914 }
915 Py_DECREF(numstr);
916 *next_idx_ptr = idx;
917 return rval;
918}
919
920static PyObject *
921scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
922{
923 /* Read one JSON term (of any kind) from PyUnicode pystr.
924 idx is the index of the first character of the term
925 *next_idx_ptr is a return-by-reference index to the first character after
926 the number.
927
928 Returns a new PyObject representation of the term.
929 */
Ezio Melotti362b9512011-05-07 17:58:09 +0300930 PyObject *res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000931 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
932 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
Benjamin Peterson99b5afa2014-04-13 22:10:38 -0400933 if (idx < 0)
934 /* Compatibility with Python version. */
935 idx += length;
936 if (idx < 0 || idx >= length) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000937 PyErr_SetNone(PyExc_StopIteration);
938 return NULL;
939 }
940 switch (str[idx]) {
941 case '"':
942 /* string */
943 return scanstring_unicode(pystr, idx + 1,
944 PyObject_IsTrue(s->strict),
945 next_idx_ptr);
946 case '{':
947 /* object */
Ezio Melotti362b9512011-05-07 17:58:09 +0300948 if (Py_EnterRecursiveCall(" while decoding a JSON object "
949 "from a unicode string"))
950 return NULL;
951 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
952 Py_LeaveRecursiveCall();
953 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000954 case '[':
955 /* array */
Ezio Melotti362b9512011-05-07 17:58:09 +0300956 if (Py_EnterRecursiveCall(" while decoding a JSON array "
957 "from a unicode string"))
958 return NULL;
959 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
960 Py_LeaveRecursiveCall();
961 return res;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000962 case 'n':
963 /* null */
964 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
965 Py_INCREF(Py_None);
966 *next_idx_ptr = idx + 4;
967 return Py_None;
968 }
969 break;
970 case 't':
971 /* true */
972 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
973 Py_INCREF(Py_True);
974 *next_idx_ptr = idx + 4;
975 return Py_True;
976 }
977 break;
978 case 'f':
979 /* false */
980 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
981 Py_INCREF(Py_False);
982 *next_idx_ptr = idx + 5;
983 return Py_False;
984 }
985 break;
986 case 'N':
987 /* NaN */
988 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
989 return _parse_constant(s, "NaN", idx, next_idx_ptr);
990 }
991 break;
992 case 'I':
993 /* Infinity */
994 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
995 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
996 }
997 break;
998 case '-':
999 /* -Infinity */
1000 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1001 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1002 }
1003 break;
1004 }
1005 /* Didn't find a string, object, array, or named constant. Look for a number. */
1006 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1007}
1008
1009static PyObject *
1010scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1011{
1012 /* Python callable interface to scan_once_{str,unicode} */
1013 PyObject *pystr;
1014 PyObject *rval;
1015 Py_ssize_t idx;
1016 Py_ssize_t next_idx = -1;
1017 static char *kwlist[] = {"string", "idx", NULL};
1018 PyScannerObject *s;
1019 assert(PyScanner_Check(self));
1020 s = (PyScannerObject *)self;
1021 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1022 return NULL;
1023
1024 if (PyUnicode_Check(pystr)) {
1025 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1026 }
1027 else {
1028 PyErr_Format(PyExc_TypeError,
1029 "first argument must be a string, not %.80s",
1030 Py_TYPE(pystr)->tp_name);
1031 return NULL;
1032 }
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001033 PyDict_Clear(s->memo);
1034 if (rval == NULL)
1035 return NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001036 return _build_rval_index_tuple(rval, next_idx);
1037}
1038
1039static PyObject *
1040scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1041{
1042 PyScannerObject *s;
1043 s = (PyScannerObject *)type->tp_alloc(type, 0);
1044 if (s != NULL) {
1045 s->strict = NULL;
1046 s->object_hook = NULL;
1047 s->object_pairs_hook = NULL;
1048 s->parse_float = NULL;
1049 s->parse_int = NULL;
1050 s->parse_constant = NULL;
1051 }
1052 return (PyObject *)s;
1053}
1054
1055static int
1056scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1057{
1058 /* Initialize Scanner object */
1059 PyObject *ctx;
1060 static char *kwlist[] = {"context", NULL};
1061 PyScannerObject *s;
1062
1063 assert(PyScanner_Check(self));
1064 s = (PyScannerObject *)self;
1065
1066 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1067 return -1;
1068
Antoine Pitrou7d6e0762010-09-04 20:16:53 +00001069 if (s->memo == NULL) {
1070 s->memo = PyDict_New();
1071 if (s->memo == NULL)
1072 goto bail;
1073 }
1074
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001075 /* All of these will fail "gracefully" so we don't need to verify them */
1076 s->strict = PyObject_GetAttrString(ctx, "strict");
1077 if (s->strict == NULL)
1078 goto bail;
1079 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1080 if (s->object_hook == NULL)
1081 goto bail;
1082 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1083 if (s->object_pairs_hook == NULL)
1084 goto bail;
1085 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1086 if (s->parse_float == NULL)
1087 goto bail;
1088 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1089 if (s->parse_int == NULL)
1090 goto bail;
1091 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1092 if (s->parse_constant == NULL)
1093 goto bail;
1094
1095 return 0;
1096
1097bail:
1098 Py_CLEAR(s->strict);
1099 Py_CLEAR(s->object_hook);
1100 Py_CLEAR(s->object_pairs_hook);
1101 Py_CLEAR(s->parse_float);
1102 Py_CLEAR(s->parse_int);
1103 Py_CLEAR(s->parse_constant);
1104 return -1;
1105}
1106
1107PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1108
1109static
1110PyTypeObject PyScannerType = {
1111 PyVarObject_HEAD_INIT(NULL, 0)
1112 "_json.Scanner", /* tp_name */
1113 sizeof(PyScannerObject), /* tp_basicsize */
1114 0, /* tp_itemsize */
1115 scanner_dealloc, /* tp_dealloc */
1116 0, /* tp_print */
1117 0, /* tp_getattr */
1118 0, /* tp_setattr */
1119 0, /* tp_compare */
1120 0, /* tp_repr */
1121 0, /* tp_as_number */
1122 0, /* tp_as_sequence */
1123 0, /* tp_as_mapping */
1124 0, /* tp_hash */
1125 scanner_call, /* tp_call */
1126 0, /* tp_str */
1127 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1128 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1129 0, /* tp_as_buffer */
1130 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1131 scanner_doc, /* tp_doc */
1132 scanner_traverse, /* tp_traverse */
1133 scanner_clear, /* tp_clear */
1134 0, /* tp_richcompare */
1135 0, /* tp_weaklistoffset */
1136 0, /* tp_iter */
1137 0, /* tp_iternext */
1138 0, /* tp_methods */
1139 scanner_members, /* tp_members */
1140 0, /* tp_getset */
1141 0, /* tp_base */
1142 0, /* tp_dict */
1143 0, /* tp_descr_get */
1144 0, /* tp_descr_set */
1145 0, /* tp_dictoffset */
1146 scanner_init, /* tp_init */
1147 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1148 scanner_new, /* tp_new */
1149 0,/* PyObject_GC_Del, */ /* tp_free */
1150};
1151
1152static PyObject *
1153encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1154{
1155 PyEncoderObject *s;
1156 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1157 if (s != NULL) {
1158 s->markers = NULL;
1159 s->defaultfn = NULL;
1160 s->encoder = NULL;
1161 s->indent = NULL;
1162 s->key_separator = NULL;
1163 s->item_separator = NULL;
1164 s->sort_keys = NULL;
1165 s->skipkeys = NULL;
1166 }
1167 return (PyObject *)s;
1168}
1169
1170static int
1171encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1172{
1173 /* initialize Encoder object */
1174 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1175
1176 PyEncoderObject *s;
Antoine Pitrou781eba72009-12-08 15:57:31 +00001177 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1178 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001179
1180 assert(PyEncoder_Check(self));
1181 s = (PyEncoderObject *)self;
1182
1183 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
Antoine Pitrou781eba72009-12-08 15:57:31 +00001184 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1185 &sort_keys, &skipkeys, &allow_nan))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001186 return -1;
1187
Antoine Pitrou781eba72009-12-08 15:57:31 +00001188 s->markers = markers;
1189 s->defaultfn = defaultfn;
1190 s->encoder = encoder;
1191 s->indent = indent;
1192 s->key_separator = key_separator;
1193 s->item_separator = item_separator;
1194 s->sort_keys = sort_keys;
1195 s->skipkeys = skipkeys;
1196 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1197 s->allow_nan = PyObject_IsTrue(allow_nan);
1198
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001199 Py_INCREF(s->markers);
1200 Py_INCREF(s->defaultfn);
1201 Py_INCREF(s->encoder);
1202 Py_INCREF(s->indent);
1203 Py_INCREF(s->key_separator);
1204 Py_INCREF(s->item_separator);
1205 Py_INCREF(s->sort_keys);
1206 Py_INCREF(s->skipkeys);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001207 return 0;
1208}
1209
1210static PyObject *
1211encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1212{
1213 /* Python callable interface to encode_listencode_obj */
1214 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1215 PyObject *obj;
1216 PyObject *rval;
1217 Py_ssize_t indent_level;
1218 PyEncoderObject *s;
1219 assert(PyEncoder_Check(self));
1220 s = (PyEncoderObject *)self;
1221 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1222 &obj, _convertPyInt_AsSsize_t, &indent_level))
1223 return NULL;
1224 rval = PyList_New(0);
1225 if (rval == NULL)
1226 return NULL;
1227 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1228 Py_DECREF(rval);
1229 return NULL;
1230 }
1231 return rval;
1232}
1233
1234static PyObject *
1235_encoded_const(PyObject *obj)
1236{
1237 /* Return the JSON string representation of None, True, False */
1238 if (obj == Py_None) {
1239 static PyObject *s_null = NULL;
1240 if (s_null == NULL) {
1241 s_null = PyUnicode_InternFromString("null");
1242 }
1243 Py_INCREF(s_null);
1244 return s_null;
1245 }
1246 else if (obj == Py_True) {
1247 static PyObject *s_true = NULL;
1248 if (s_true == NULL) {
1249 s_true = PyUnicode_InternFromString("true");
1250 }
1251 Py_INCREF(s_true);
1252 return s_true;
1253 }
1254 else if (obj == Py_False) {
1255 static PyObject *s_false = NULL;
1256 if (s_false == NULL) {
1257 s_false = PyUnicode_InternFromString("false");
1258 }
1259 Py_INCREF(s_false);
1260 return s_false;
1261 }
1262 else {
1263 PyErr_SetString(PyExc_ValueError, "not a const");
1264 return NULL;
1265 }
1266}
1267
1268static PyObject *
1269encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1270{
1271 /* Return the JSON representation of a PyFloat */
1272 double i = PyFloat_AS_DOUBLE(obj);
1273 if (!Py_IS_FINITE(i)) {
1274 if (!s->allow_nan) {
1275 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1276 return NULL;
1277 }
1278 if (i > 0) {
1279 return PyUnicode_FromString("Infinity");
1280 }
1281 else if (i < 0) {
1282 return PyUnicode_FromString("-Infinity");
1283 }
1284 else {
1285 return PyUnicode_FromString("NaN");
1286 }
1287 }
1288 /* Use a better float format here? */
1289 return PyObject_Repr(obj);
1290}
1291
1292static PyObject *
1293encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1294{
1295 /* Return the JSON representation of a string */
1296 if (s->fast_encode)
1297 return py_encode_basestring_ascii(NULL, obj);
1298 else
1299 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1300}
1301
1302static int
1303_steal_list_append(PyObject *lst, PyObject *stolen)
1304{
1305 /* Append stolen and then decrement its reference count */
1306 int rval = PyList_Append(lst, stolen);
1307 Py_DECREF(stolen);
1308 return rval;
1309}
1310
1311static int
1312encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1313{
1314 /* Encode Python object obj to a JSON term, rval is a PyList */
1315 PyObject *newobj;
1316 int rv;
1317
1318 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1319 PyObject *cstr = _encoded_const(obj);
1320 if (cstr == NULL)
1321 return -1;
1322 return _steal_list_append(rval, cstr);
1323 }
1324 else if (PyUnicode_Check(obj))
1325 {
1326 PyObject *encoded = encoder_encode_string(s, obj);
1327 if (encoded == NULL)
1328 return -1;
1329 return _steal_list_append(rval, encoded);
1330 }
1331 else if (PyLong_Check(obj)) {
1332 PyObject *encoded = PyObject_Str(obj);
1333 if (encoded == NULL)
1334 return -1;
1335 return _steal_list_append(rval, encoded);
1336 }
1337 else if (PyFloat_Check(obj)) {
1338 PyObject *encoded = encoder_encode_float(s, obj);
1339 if (encoded == NULL)
1340 return -1;
1341 return _steal_list_append(rval, encoded);
1342 }
1343 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001344 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1345 return -1;
1346 rv = encoder_listencode_list(s, rval, obj, indent_level);
1347 Py_LeaveRecursiveCall();
1348 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001349 }
1350 else if (PyDict_Check(obj)) {
Ezio Melotti13672652011-05-11 01:02:56 +03001351 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1352 return -1;
1353 rv = encoder_listencode_dict(s, rval, obj, indent_level);
1354 Py_LeaveRecursiveCall();
1355 return rv;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001356 }
1357 else {
1358 PyObject *ident = NULL;
1359 if (s->markers != Py_None) {
1360 int has_key;
1361 ident = PyLong_FromVoidPtr(obj);
1362 if (ident == NULL)
1363 return -1;
1364 has_key = PyDict_Contains(s->markers, ident);
1365 if (has_key) {
1366 if (has_key != -1)
1367 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1368 Py_DECREF(ident);
1369 return -1;
1370 }
1371 if (PyDict_SetItem(s->markers, ident, obj)) {
1372 Py_DECREF(ident);
1373 return -1;
1374 }
1375 }
1376 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1377 if (newobj == NULL) {
1378 Py_XDECREF(ident);
1379 return -1;
1380 }
Ezio Melotti13672652011-05-11 01:02:56 +03001381
1382 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1383 return -1;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001384 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
Ezio Melotti13672652011-05-11 01:02:56 +03001385 Py_LeaveRecursiveCall();
1386
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001387 Py_DECREF(newobj);
1388 if (rv) {
1389 Py_XDECREF(ident);
1390 return -1;
1391 }
1392 if (ident != NULL) {
1393 if (PyDict_DelItem(s->markers, ident)) {
1394 Py_XDECREF(ident);
1395 return -1;
1396 }
1397 Py_XDECREF(ident);
1398 }
1399 return rv;
1400 }
1401}
1402
1403static int
1404encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
1405{
1406 /* Encode Python dict dct a JSON term, rval is a PyList */
1407 static PyObject *open_dict = NULL;
1408 static PyObject *close_dict = NULL;
1409 static PyObject *empty_dict = NULL;
1410 PyObject *kstr = NULL;
1411 PyObject *ident = NULL;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001412 PyObject *it = NULL;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001413 PyObject *items;
1414 PyObject *item = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001415 int skipkeys;
1416 Py_ssize_t idx;
1417
1418 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1419 open_dict = PyUnicode_InternFromString("{");
1420 close_dict = PyUnicode_InternFromString("}");
1421 empty_dict = PyUnicode_InternFromString("{}");
1422 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1423 return -1;
1424 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001425 if (Py_SIZE(dct) == 0)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001426 return PyList_Append(rval, empty_dict);
1427
1428 if (s->markers != Py_None) {
1429 int has_key;
1430 ident = PyLong_FromVoidPtr(dct);
1431 if (ident == NULL)
1432 goto bail;
1433 has_key = PyDict_Contains(s->markers, ident);
1434 if (has_key) {
1435 if (has_key != -1)
1436 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1437 goto bail;
1438 }
1439 if (PyDict_SetItem(s->markers, ident, dct)) {
1440 goto bail;
1441 }
1442 }
1443
1444 if (PyList_Append(rval, open_dict))
1445 goto bail;
1446
1447 if (s->indent != Py_None) {
1448 /* TODO: DOES NOT RUN */
1449 indent_level += 1;
1450 /*
1451 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1452 separator = _item_separator + newline_indent
1453 buf += newline_indent
1454 */
1455 }
1456
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001457 if (PyObject_IsTrue(s->sort_keys)) {
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001458 /* First sort the keys then replace them with (key, value) tuples. */
1459 Py_ssize_t i, nitems;
1460 items = PyMapping_Keys(dct);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001461 if (items == NULL)
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001462 goto bail;
1463 if (!PyList_Check(items)) {
1464 PyErr_SetString(PyExc_ValueError, "keys must return list");
1465 goto bail;
1466 }
1467 if (PyList_Sort(items) < 0)
1468 goto bail;
1469 nitems = PyList_GET_SIZE(items);
1470 for (i = 0; i < nitems; i++) {
1471 PyObject *key, *value;
1472 key = PyList_GET_ITEM(items, i);
1473 value = PyDict_GetItem(dct, key);
1474 item = PyTuple_Pack(2, key, value);
1475 if (item == NULL)
1476 goto bail;
1477 PyList_SET_ITEM(items, i, item);
1478 Py_DECREF(key);
1479 }
1480 }
1481 else {
1482 items = PyMapping_Items(dct);
1483 }
1484 if (items == NULL)
Raymond Hettinger491a4cb2009-05-27 11:19:02 +00001485 goto bail;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001486 it = PyObject_GetIter(items);
Antoine Pitrou2397dd52010-11-04 16:51:32 +00001487 Py_DECREF(items);
1488 if (it == NULL)
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001489 goto bail;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001490 skipkeys = PyObject_IsTrue(s->skipkeys);
1491 idx = 0;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001492 while ((item = PyIter_Next(it)) != NULL) {
1493 PyObject *encoded, *key, *value;
1494 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
1495 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1496 goto bail;
1497 }
1498 key = PyTuple_GET_ITEM(item, 0);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001499 if (PyUnicode_Check(key)) {
1500 Py_INCREF(key);
1501 kstr = key;
1502 }
1503 else if (PyFloat_Check(key)) {
1504 kstr = encoder_encode_float(s, key);
1505 if (kstr == NULL)
1506 goto bail;
1507 }
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001508 else if (key == Py_True || key == Py_False || key == Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 /* This must come before the PyLong_Check because
1510 True and False are also 1 and 0.*/
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001511 kstr = _encoded_const(key);
1512 if (kstr == NULL)
1513 goto bail;
1514 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001515 else if (PyLong_Check(key)) {
1516 kstr = PyObject_Str(key);
1517 if (kstr == NULL)
1518 goto bail;
1519 }
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001520 else if (skipkeys) {
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001521 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001522 continue;
1523 }
1524 else {
1525 /* TODO: include repr of key */
Doug Hellmann1c524752010-07-21 12:29:04 +00001526 PyErr_SetString(PyExc_TypeError, "keys must be a string");
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001527 goto bail;
1528 }
1529
1530 if (idx) {
1531 if (PyList_Append(rval, s->item_separator))
1532 goto bail;
1533 }
1534
1535 encoded = encoder_encode_string(s, kstr);
1536 Py_CLEAR(kstr);
1537 if (encoded == NULL)
1538 goto bail;
1539 if (PyList_Append(rval, encoded)) {
1540 Py_DECREF(encoded);
1541 goto bail;
1542 }
1543 Py_DECREF(encoded);
1544 if (PyList_Append(rval, s->key_separator))
1545 goto bail;
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001546
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001547 value = PyTuple_GET_ITEM(item, 1);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001548 if (encoder_listencode_obj(s, rval, value, indent_level))
1549 goto bail;
1550 idx += 1;
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001551 Py_DECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001552 }
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001553 if (PyErr_Occurred())
1554 goto bail;
1555 Py_CLEAR(it);
1556
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001557 if (ident != NULL) {
1558 if (PyDict_DelItem(s->markers, ident))
1559 goto bail;
1560 Py_CLEAR(ident);
1561 }
1562 if (s->indent != Py_None) {
1563 /* TODO: DOES NOT RUN */
1564 indent_level -= 1;
1565 /*
1566 yield '\n' + (' ' * (_indent * _current_indent_level))
1567 */
1568 }
1569 if (PyList_Append(rval, close_dict))
1570 goto bail;
1571 return 0;
1572
1573bail:
Raymond Hettingerc8d952d2009-05-27 06:50:31 +00001574 Py_XDECREF(it);
Raymond Hettingerbcf6f922009-05-27 09:58:34 +00001575 Py_XDECREF(item);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001576 Py_XDECREF(kstr);
1577 Py_XDECREF(ident);
1578 return -1;
1579}
1580
1581
1582static int
1583encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
1584{
1585 /* Encode Python list seq to a JSON term, rval is a PyList */
1586 static PyObject *open_array = NULL;
1587 static PyObject *close_array = NULL;
1588 static PyObject *empty_array = NULL;
1589 PyObject *ident = NULL;
1590 PyObject *s_fast = NULL;
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001591 Py_ssize_t i;
1592
1593 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1594 open_array = PyUnicode_InternFromString("[");
1595 close_array = PyUnicode_InternFromString("]");
1596 empty_array = PyUnicode_InternFromString("[]");
1597 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1598 return -1;
1599 }
1600 ident = NULL;
1601 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1602 if (s_fast == NULL)
1603 return -1;
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001604 if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001605 Py_DECREF(s_fast);
1606 return PyList_Append(rval, empty_array);
1607 }
1608
1609 if (s->markers != Py_None) {
1610 int has_key;
1611 ident = PyLong_FromVoidPtr(seq);
1612 if (ident == NULL)
1613 goto bail;
1614 has_key = PyDict_Contains(s->markers, ident);
1615 if (has_key) {
1616 if (has_key != -1)
1617 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1618 goto bail;
1619 }
1620 if (PyDict_SetItem(s->markers, ident, seq)) {
1621 goto bail;
1622 }
1623 }
1624
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001625 if (PyList_Append(rval, open_array))
1626 goto bail;
1627 if (s->indent != Py_None) {
1628 /* TODO: DOES NOT RUN */
1629 indent_level += 1;
1630 /*
1631 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1632 separator = _item_separator + newline_indent
1633 buf += newline_indent
1634 */
1635 }
Antoine Pitrou9f69e792012-11-01 19:52:06 +01001636 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1637 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001638 if (i) {
1639 if (PyList_Append(rval, s->item_separator))
1640 goto bail;
1641 }
1642 if (encoder_listencode_obj(s, rval, obj, indent_level))
1643 goto bail;
1644 }
1645 if (ident != NULL) {
1646 if (PyDict_DelItem(s->markers, ident))
1647 goto bail;
1648 Py_CLEAR(ident);
1649 }
1650 if (s->indent != Py_None) {
1651 /* TODO: DOES NOT RUN */
1652 indent_level -= 1;
1653 /*
1654 yield '\n' + (' ' * (_indent * _current_indent_level))
1655 */
1656 }
1657 if (PyList_Append(rval, close_array))
1658 goto bail;
1659 Py_DECREF(s_fast);
1660 return 0;
1661
1662bail:
1663 Py_XDECREF(ident);
1664 Py_DECREF(s_fast);
1665 return -1;
1666}
1667
1668static void
1669encoder_dealloc(PyObject *self)
1670{
1671 /* Deallocate Encoder */
1672 encoder_clear(self);
1673 Py_TYPE(self)->tp_free(self);
1674}
1675
1676static int
1677encoder_traverse(PyObject *self, visitproc visit, void *arg)
1678{
1679 PyEncoderObject *s;
1680 assert(PyEncoder_Check(self));
1681 s = (PyEncoderObject *)self;
1682 Py_VISIT(s->markers);
1683 Py_VISIT(s->defaultfn);
1684 Py_VISIT(s->encoder);
1685 Py_VISIT(s->indent);
1686 Py_VISIT(s->key_separator);
1687 Py_VISIT(s->item_separator);
1688 Py_VISIT(s->sort_keys);
1689 Py_VISIT(s->skipkeys);
1690 return 0;
1691}
1692
1693static int
1694encoder_clear(PyObject *self)
1695{
1696 /* Deallocate Encoder */
1697 PyEncoderObject *s;
1698 assert(PyEncoder_Check(self));
1699 s = (PyEncoderObject *)self;
1700 Py_CLEAR(s->markers);
1701 Py_CLEAR(s->defaultfn);
1702 Py_CLEAR(s->encoder);
1703 Py_CLEAR(s->indent);
1704 Py_CLEAR(s->key_separator);
1705 Py_CLEAR(s->item_separator);
1706 Py_CLEAR(s->sort_keys);
1707 Py_CLEAR(s->skipkeys);
1708 return 0;
1709}
1710
1711PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1712
1713static
1714PyTypeObject PyEncoderType = {
1715 PyVarObject_HEAD_INIT(NULL, 0)
1716 "_json.Encoder", /* tp_name */
1717 sizeof(PyEncoderObject), /* tp_basicsize */
1718 0, /* tp_itemsize */
1719 encoder_dealloc, /* tp_dealloc */
1720 0, /* tp_print */
1721 0, /* tp_getattr */
1722 0, /* tp_setattr */
1723 0, /* tp_compare */
1724 0, /* tp_repr */
1725 0, /* tp_as_number */
1726 0, /* tp_as_sequence */
1727 0, /* tp_as_mapping */
1728 0, /* tp_hash */
1729 encoder_call, /* tp_call */
1730 0, /* tp_str */
1731 0, /* tp_getattro */
1732 0, /* tp_setattro */
1733 0, /* tp_as_buffer */
1734 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1735 encoder_doc, /* tp_doc */
1736 encoder_traverse, /* tp_traverse */
1737 encoder_clear, /* tp_clear */
1738 0, /* tp_richcompare */
1739 0, /* tp_weaklistoffset */
1740 0, /* tp_iter */
1741 0, /* tp_iternext */
1742 0, /* tp_methods */
1743 encoder_members, /* tp_members */
1744 0, /* tp_getset */
1745 0, /* tp_base */
1746 0, /* tp_dict */
1747 0, /* tp_descr_get */
1748 0, /* tp_descr_set */
1749 0, /* tp_dictoffset */
1750 encoder_init, /* tp_init */
1751 0, /* tp_alloc */
1752 encoder_new, /* tp_new */
1753 0, /* tp_free */
1754};
1755
1756static PyMethodDef speedups_methods[] = {
1757 {"encode_basestring_ascii",
1758 (PyCFunction)py_encode_basestring_ascii,
1759 METH_O,
1760 pydoc_encode_basestring_ascii},
1761 {"scanstring",
1762 (PyCFunction)py_scanstring,
1763 METH_VARARGS,
1764 pydoc_scanstring},
Christian Heimes90540002008-05-08 14:29:10 +00001765 {NULL, NULL, 0, NULL}
1766};
1767
1768PyDoc_STRVAR(module_doc,
1769"json speedups\n");
1770
Martin v. Löwis1a214512008-06-11 05:26:20 +00001771static struct PyModuleDef jsonmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001772 PyModuleDef_HEAD_INIT,
1773 "_json",
1774 module_doc,
1775 -1,
1776 speedups_methods,
1777 NULL,
1778 NULL,
1779 NULL,
1780 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001781};
1782
1783PyObject*
1784PyInit__json(void)
Christian Heimes90540002008-05-08 14:29:10 +00001785{
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00001786 PyObject *m = PyModule_Create(&jsonmodule);
1787 if (!m)
1788 return NULL;
1789 PyScannerType.tp_new = PyType_GenericNew;
1790 if (PyType_Ready(&PyScannerType) < 0)
1791 goto fail;
1792 PyEncoderType.tp_new = PyType_GenericNew;
1793 if (PyType_Ready(&PyEncoderType) < 0)
1794 goto fail;
1795 Py_INCREF((PyObject*)&PyScannerType);
1796 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1797 Py_DECREF((PyObject*)&PyScannerType);
1798 goto fail;
1799 }
1800 Py_INCREF((PyObject*)&PyEncoderType);
1801 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1802 Py_DECREF((PyObject*)&PyEncoderType);
1803 goto fail;
1804 }
1805 return m;
1806 fail:
1807 Py_DECREF(m);
1808 return NULL;
Christian Heimes90540002008-05-08 14:29:10 +00001809}