| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 1 | #include "Python.h" | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2 | #include "structmember.h" | 
 | 3 | #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) | 
 | 4 | #define Py_TYPE(ob)     (((PyObject*)(ob))->ob_type) | 
 | 5 | #endif | 
 | 6 | #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) | 
 | 7 | typedef int Py_ssize_t; | 
 | 8 | #define PY_SSIZE_T_MAX INT_MAX | 
 | 9 | #define PY_SSIZE_T_MIN INT_MIN | 
 | 10 | #define PyInt_FromSsize_t PyInt_FromLong | 
 | 11 | #define PyInt_AsSsize_t PyInt_AsLong | 
 | 12 | #endif | 
 | 13 | #ifndef Py_IS_FINITE | 
 | 14 | #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) | 
 | 15 | #endif | 
 | 16 |  | 
 | 17 | #ifdef __GNUC__ | 
 | 18 | #define UNUSED __attribute__((__unused__)) | 
 | 19 | #else | 
 | 20 | #define UNUSED | 
 | 21 | #endif | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 22 |  | 
 | 23 | #define DEFAULT_ENCODING "utf-8" | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 24 |  | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 25 | #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) | 
 | 26 | #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) | 
 | 27 | #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) | 
 | 28 | #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) | 
 | 29 |  | 
 | 30 | static PyTypeObject PyScannerType; | 
 | 31 | static PyTypeObject PyEncoderType; | 
 | 32 |  | 
 | 33 | typedef struct _PyScannerObject { | 
 | 34 |     PyObject_HEAD | 
 | 35 |     PyObject *encoding; | 
 | 36 |     PyObject *strict; | 
 | 37 |     PyObject *object_hook; | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 38 |     PyObject *pairs_hook; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 39 |     PyObject *parse_float; | 
 | 40 |     PyObject *parse_int; | 
 | 41 |     PyObject *parse_constant; | 
 | 42 | } PyScannerObject; | 
 | 43 |  | 
 | 44 | static PyMemberDef scanner_members[] = { | 
 | 45 |     {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, | 
 | 46 |     {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, | 
 | 47 |     {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 48 |     {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"}, | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 49 |     {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, | 
 | 50 |     {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, | 
 | 51 |     {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, | 
 | 52 |     {NULL} | 
 | 53 | }; | 
 | 54 |  | 
 | 55 | typedef struct _PyEncoderObject { | 
 | 56 |     PyObject_HEAD | 
 | 57 |     PyObject *markers; | 
 | 58 |     PyObject *defaultfn; | 
 | 59 |     PyObject *encoder; | 
 | 60 |     PyObject *indent; | 
 | 61 |     PyObject *key_separator; | 
 | 62 |     PyObject *item_separator; | 
 | 63 |     PyObject *sort_keys; | 
 | 64 |     PyObject *skipkeys; | 
 | 65 |     int fast_encode; | 
 | 66 |     int allow_nan; | 
 | 67 | } PyEncoderObject; | 
 | 68 |  | 
 | 69 | static PyMemberDef encoder_members[] = { | 
 | 70 |     {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, | 
 | 71 |     {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, | 
 | 72 |     {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, | 
 | 73 |     {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, | 
 | 74 |     {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, | 
 | 75 |     {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, | 
 | 76 |     {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, | 
 | 77 |     {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, | 
 | 78 |     {NULL} | 
 | 79 | }; | 
 | 80 |  | 
 | 81 | static Py_ssize_t | 
 | 82 | ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); | 
 | 83 | static PyObject * | 
 | 84 | ascii_escape_unicode(PyObject *pystr); | 
 | 85 | static PyObject * | 
 | 86 | ascii_escape_str(PyObject *pystr); | 
 | 87 | static PyObject * | 
 | 88 | py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); | 
 | 89 | void init_json(void); | 
 | 90 | static PyObject * | 
 | 91 | scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); | 
 | 92 | static PyObject * | 
 | 93 | scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); | 
 | 94 | static PyObject * | 
 | 95 | _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); | 
 | 96 | static PyObject * | 
 | 97 | scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); | 
 | 98 | static int | 
 | 99 | scanner_init(PyObject *self, PyObject *args, PyObject *kwds); | 
 | 100 | static void | 
 | 101 | scanner_dealloc(PyObject *self); | 
 | 102 | static int | 
 | 103 | scanner_clear(PyObject *self); | 
 | 104 | static PyObject * | 
 | 105 | encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); | 
 | 106 | static int | 
 | 107 | encoder_init(PyObject *self, PyObject *args, PyObject *kwds); | 
 | 108 | static void | 
 | 109 | encoder_dealloc(PyObject *self); | 
 | 110 | static int | 
 | 111 | encoder_clear(PyObject *self); | 
 | 112 | static int | 
 | 113 | encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); | 
 | 114 | static int | 
 | 115 | encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); | 
 | 116 | static int | 
 | 117 | encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); | 
 | 118 | static PyObject * | 
| Raymond Hettinger | a0b8d9a | 2009-03-19 19:24:43 +0000 | [diff] [blame] | 119 | _encoded_const(PyObject *obj); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 120 | static void | 
 | 121 | raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); | 
 | 122 | static PyObject * | 
 | 123 | encoder_encode_string(PyEncoderObject *s, PyObject *obj); | 
 | 124 | static int | 
 | 125 | _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); | 
 | 126 | static PyObject * | 
 | 127 | _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); | 
 | 128 | static PyObject * | 
 | 129 | encoder_encode_float(PyEncoderObject *s, PyObject *obj); | 
 | 130 |  | 
 | 131 | #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') | 
 | 132 | #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) | 
 | 133 |  | 
 | 134 | #define MIN_EXPANSION 6 | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 135 | #ifdef Py_UNICODE_WIDE | 
 | 136 | #define MAX_EXPANSION (2 * MIN_EXPANSION) | 
 | 137 | #else | 
 | 138 | #define MAX_EXPANSION MIN_EXPANSION | 
 | 139 | #endif | 
 | 140 |  | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 141 | static int | 
 | 142 | _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) | 
 | 143 | { | 
 | 144 |     /* PyObject to Py_ssize_t converter */ | 
 | 145 |     *size_ptr = PyInt_AsSsize_t(o); | 
| Georg Brandl | f71ba95 | 2009-05-05 07:48:12 +0000 | [diff] [blame] | 146 |     if (*size_ptr == -1 && PyErr_Occurred()) | 
 | 147 |         return 0; | 
 | 148 |     return 1; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 149 | } | 
 | 150 |  | 
 | 151 | static PyObject * | 
 | 152 | _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) | 
 | 153 | { | 
 | 154 |     /* Py_ssize_t to PyObject converter */ | 
 | 155 |     return PyInt_FromSsize_t(*size_ptr); | 
 | 156 | } | 
 | 157 |  | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 158 | static Py_ssize_t | 
 | 159 | ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) | 
 | 160 | { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 161 |     /* Escape unicode code point c to ASCII escape sequences | 
 | 162 |     in char *output. output must have at least 12 bytes unused to | 
 | 163 |     accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 164 |     output[chars++] = '\\'; | 
 | 165 |     switch (c) { | 
 | 166 |         case '\\': output[chars++] = (char)c; break; | 
 | 167 |         case '"': output[chars++] = (char)c; break; | 
 | 168 |         case '\b': output[chars++] = 'b'; break; | 
 | 169 |         case '\f': output[chars++] = 'f'; break; | 
 | 170 |         case '\n': output[chars++] = 'n'; break; | 
 | 171 |         case '\r': output[chars++] = 'r'; break; | 
 | 172 |         case '\t': output[chars++] = 't'; break; | 
 | 173 |         default: | 
 | 174 | #ifdef Py_UNICODE_WIDE | 
 | 175 |             if (c >= 0x10000) { | 
 | 176 |                 /* UTF-16 surrogate pair */ | 
 | 177 |                 Py_UNICODE v = c - 0x10000; | 
 | 178 |                 c = 0xd800 | ((v >> 10) & 0x3ff); | 
 | 179 |                 output[chars++] = 'u'; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 180 |                 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; | 
 | 181 |                 output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf]; | 
 | 182 |                 output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf]; | 
 | 183 |                 output[chars++] = "0123456789abcdef"[(c      ) & 0xf]; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 184 |                 c = 0xdc00 | (v & 0x3ff); | 
 | 185 |                 output[chars++] = '\\'; | 
 | 186 |             } | 
 | 187 | #endif | 
 | 188 |             output[chars++] = 'u'; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 189 |             output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; | 
 | 190 |             output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf]; | 
 | 191 |             output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf]; | 
 | 192 |             output[chars++] = "0123456789abcdef"[(c      ) & 0xf]; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 193 |     } | 
 | 194 |     return chars; | 
 | 195 | } | 
 | 196 |  | 
 | 197 | static PyObject * | 
 | 198 | ascii_escape_unicode(PyObject *pystr) | 
 | 199 | { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 200 |     /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 201 |     Py_ssize_t i; | 
 | 202 |     Py_ssize_t input_chars; | 
 | 203 |     Py_ssize_t output_size; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 204 |     Py_ssize_t max_output_size; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 205 |     Py_ssize_t chars; | 
 | 206 |     PyObject *rval; | 
 | 207 |     char *output; | 
 | 208 |     Py_UNICODE *input_unicode; | 
 | 209 |  | 
 | 210 |     input_chars = PyUnicode_GET_SIZE(pystr); | 
 | 211 |     input_unicode = PyUnicode_AS_UNICODE(pystr); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 212 |  | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 213 |     /* One char input can be up to 6 chars output, estimate 4 of these */ | 
 | 214 |     output_size = 2 + (MIN_EXPANSION * 4) + input_chars; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 215 |     max_output_size = 2 + (input_chars * MAX_EXPANSION); | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 216 |     rval = PyString_FromStringAndSize(NULL, output_size); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 217 |     if (rval == NULL) { | 
 | 218 |         return NULL; | 
 | 219 |     } | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 220 |     output = PyString_AS_STRING(rval); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 221 |     chars = 0; | 
 | 222 |     output[chars++] = '"'; | 
 | 223 |     for (i = 0; i < input_chars; i++) { | 
 | 224 |         Py_UNICODE c = input_unicode[i]; | 
 | 225 |         if (S_CHAR(c)) { | 
 | 226 |             output[chars++] = (char)c; | 
 | 227 |         } | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 228 |         else { | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 229 |             chars = ascii_escape_char(c, output, chars); | 
 | 230 |         } | 
 | 231 |         if (output_size - chars < (1 + MAX_EXPANSION)) { | 
 | 232 |             /* There's more than four, so let's resize by a lot */ | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 233 |             Py_ssize_t new_output_size = output_size * 2; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 234 |             /* This is an upper bound */ | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 235 |             if (new_output_size > max_output_size) { | 
 | 236 |                 new_output_size = max_output_size; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 237 |             } | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 238 |             /* Make sure that the output size changed before resizing */ | 
 | 239 |             if (new_output_size != output_size) { | 
 | 240 |                 output_size = new_output_size; | 
 | 241 |                 if (_PyString_Resize(&rval, output_size) == -1) { | 
 | 242 |                     return NULL; | 
 | 243 |                 } | 
 | 244 |                 output = PyString_AS_STRING(rval); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 245 |             } | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 246 |         } | 
 | 247 |     } | 
 | 248 |     output[chars++] = '"'; | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 249 |     if (_PyString_Resize(&rval, chars) == -1) { | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 250 |         return NULL; | 
 | 251 |     } | 
 | 252 |     return rval; | 
 | 253 | } | 
 | 254 |  | 
 | 255 | static PyObject * | 
 | 256 | ascii_escape_str(PyObject *pystr) | 
 | 257 | { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 258 |     /* Take a PyString pystr and return a new ASCII-only escaped PyString */ | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 259 |     Py_ssize_t i; | 
 | 260 |     Py_ssize_t input_chars; | 
 | 261 |     Py_ssize_t output_size; | 
 | 262 |     Py_ssize_t chars; | 
 | 263 |     PyObject *rval; | 
 | 264 |     char *output; | 
 | 265 |     char *input_str; | 
 | 266 |  | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 267 |     input_chars = PyString_GET_SIZE(pystr); | 
 | 268 |     input_str = PyString_AS_STRING(pystr); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 269 |  | 
 | 270 |     /* Fast path for a string that's already ASCII */ | 
 | 271 |     for (i = 0; i < input_chars; i++) { | 
 | 272 |         Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; | 
 | 273 |         if (!S_CHAR(c)) { | 
 | 274 |             /* If we have to escape something, scan the string for unicode */ | 
 | 275 |             Py_ssize_t j; | 
 | 276 |             for (j = i; j < input_chars; j++) { | 
 | 277 |                 c = (Py_UNICODE)(unsigned char)input_str[j]; | 
 | 278 |                 if (c > 0x7f) { | 
 | 279 |                     /* We hit a non-ASCII character, bail to unicode mode */ | 
 | 280 |                     PyObject *uni; | 
 | 281 |                     uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); | 
 | 282 |                     if (uni == NULL) { | 
 | 283 |                         return NULL; | 
 | 284 |                     } | 
 | 285 |                     rval = ascii_escape_unicode(uni); | 
 | 286 |                     Py_DECREF(uni); | 
 | 287 |                     return rval; | 
 | 288 |                 } | 
 | 289 |             } | 
 | 290 |             break; | 
 | 291 |         } | 
 | 292 |     } | 
 | 293 |  | 
 | 294 |     if (i == input_chars) { | 
 | 295 |         /* Input is already ASCII */ | 
 | 296 |         output_size = 2 + input_chars; | 
 | 297 |     } | 
 | 298 |     else { | 
 | 299 |         /* One char input can be up to 6 chars output, estimate 4 of these */ | 
 | 300 |         output_size = 2 + (MIN_EXPANSION * 4) + input_chars; | 
 | 301 |     } | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 302 |     rval = PyString_FromStringAndSize(NULL, output_size); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 303 |     if (rval == NULL) { | 
 | 304 |         return NULL; | 
 | 305 |     } | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 306 |     output = PyString_AS_STRING(rval); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 307 |     output[0] = '"'; | 
 | 308 |  | 
 | 309 |     /* We know that everything up to i is ASCII already */ | 
 | 310 |     chars = i + 1; | 
 | 311 |     memcpy(&output[1], input_str, i); | 
 | 312 |  | 
 | 313 |     for (; i < input_chars; i++) { | 
 | 314 |         Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 315 |         if (S_CHAR(c)) { | 
 | 316 |             output[chars++] = (char)c; | 
 | 317 |         } | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 318 |         else { | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 319 |             chars = ascii_escape_char(c, output, chars); | 
 | 320 |         } | 
 | 321 |         /* An ASCII char can't possibly expand to a surrogate! */ | 
 | 322 |         if (output_size - chars < (1 + MIN_EXPANSION)) { | 
 | 323 |             /* There's more than four, so let's resize by a lot */ | 
 | 324 |             output_size *= 2; | 
 | 325 |             if (output_size > 2 + (input_chars * MIN_EXPANSION)) { | 
 | 326 |                 output_size = 2 + (input_chars * MIN_EXPANSION); | 
 | 327 |             } | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 328 |             if (_PyString_Resize(&rval, output_size) == -1) { | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 329 |                 return NULL; | 
 | 330 |             } | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 331 |             output = PyString_AS_STRING(rval); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 332 |         } | 
 | 333 |     } | 
 | 334 |     output[chars++] = '"'; | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 335 |     if (_PyString_Resize(&rval, chars) == -1) { | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 336 |         return NULL; | 
 | 337 |     } | 
 | 338 |     return rval; | 
 | 339 | } | 
 | 340 |  | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 341 | static void | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 342 | raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) | 
 | 343 | { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 344 |     /* Use the Python function json.decoder.errmsg to raise a nice | 
 | 345 |     looking ValueError exception */ | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 346 |     static PyObject *errmsg_fn = NULL; | 
 | 347 |     PyObject *pymsg; | 
 | 348 |     if (errmsg_fn == NULL) { | 
 | 349 |         PyObject *decoder = PyImport_ImportModule("json.decoder"); | 
 | 350 |         if (decoder == NULL) | 
 | 351 |             return; | 
 | 352 |         errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 353 |         Py_DECREF(decoder); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 354 |         if (errmsg_fn == NULL) | 
 | 355 |             return; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 356 |     } | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 357 |     pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); | 
| Benjamin Peterson | 595e3cb | 2008-10-16 21:09:28 +0000 | [diff] [blame] | 358 |     if (pymsg) { | 
 | 359 |         PyErr_SetObject(PyExc_ValueError, pymsg); | 
 | 360 |         Py_DECREF(pymsg); | 
 | 361 |     } | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 362 | } | 
 | 363 |  | 
 | 364 | static PyObject * | 
 | 365 | join_list_unicode(PyObject *lst) | 
 | 366 | { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 367 |     /* return u''.join(lst) */ | 
 | 368 |     static PyObject *joinfn = NULL; | 
 | 369 |     if (joinfn == NULL) { | 
 | 370 |         PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); | 
 | 371 |         if (ustr == NULL) | 
 | 372 |             return NULL; | 
 | 373 |  | 
 | 374 |         joinfn = PyObject_GetAttrString(ustr, "join"); | 
 | 375 |         Py_DECREF(ustr); | 
 | 376 |         if (joinfn == NULL) | 
 | 377 |             return NULL; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 378 |     } | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 379 |     return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 380 | } | 
 | 381 |  | 
 | 382 | static PyObject * | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 383 | _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { | 
 | 384 |     /* return (rval, idx) tuple, stealing reference to rval */ | 
 | 385 |     PyObject *tpl; | 
 | 386 |     PyObject *pyidx; | 
 | 387 |     /* | 
 | 388 |     steal a reference to rval, returns (rval, idx) | 
 | 389 |     */ | 
 | 390 |     if (rval == NULL) { | 
 | 391 |         return NULL; | 
 | 392 |     } | 
 | 393 |     pyidx = PyInt_FromSsize_t(idx); | 
 | 394 |     if (pyidx == NULL) { | 
 | 395 |         Py_DECREF(rval); | 
 | 396 |         return NULL; | 
 | 397 |     } | 
 | 398 |     tpl = PyTuple_New(2); | 
 | 399 |     if (tpl == NULL) { | 
 | 400 |         Py_DECREF(pyidx); | 
 | 401 |         Py_DECREF(rval); | 
 | 402 |         return NULL; | 
 | 403 |     } | 
 | 404 |     PyTuple_SET_ITEM(tpl, 0, rval); | 
 | 405 |     PyTuple_SET_ITEM(tpl, 1, pyidx); | 
 | 406 |     return tpl; | 
 | 407 | } | 
 | 408 |  | 
 | 409 | static PyObject * | 
 | 410 | scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) | 
 | 411 | { | 
 | 412 |     /* Read the JSON string from PyString pystr. | 
 | 413 |     end is the index of the first character after the quote. | 
 | 414 |     encoding is the encoding of pystr (must be an ASCII superset) | 
 | 415 |     if strict is zero then literal control characters are allowed | 
 | 416 |     *next_end_ptr is a return-by-reference index of the character | 
 | 417 |         after the end quote | 
 | 418 |  | 
 | 419 |     Return value is a new PyString (if ASCII-only) or PyUnicode | 
 | 420 |     */ | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 421 |     PyObject *rval; | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 422 |     Py_ssize_t len = PyString_GET_SIZE(pystr); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 423 |     Py_ssize_t begin = end - 1; | 
| Brett Cannon | 8e9757e | 2010-05-03 23:43:49 +0000 | [diff] [blame] | 424 |     Py_ssize_t next; | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 425 |     char *buf = PyString_AS_STRING(pystr); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 426 |     PyObject *chunks = PyList_New(0); | 
 | 427 |     if (chunks == NULL) { | 
 | 428 |         goto bail; | 
 | 429 |     } | 
| Bob Ippolito | d648f64 | 2008-07-19 21:59:50 +0000 | [diff] [blame] | 430 |     if (end < 0 || len <= end) { | 
 | 431 |         PyErr_SetString(PyExc_ValueError, "end is out of bounds"); | 
 | 432 |         goto bail; | 
 | 433 |     } | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 434 |     while (1) { | 
 | 435 |         /* Find the end of the string or the next escape */ | 
 | 436 |         Py_UNICODE c = 0; | 
 | 437 |         PyObject *chunk = NULL; | 
 | 438 |         for (next = end; next < len; next++) { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 439 |             c = (unsigned char)buf[next]; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 440 |             if (c == '"' || c == '\\') { | 
 | 441 |                 break; | 
 | 442 |             } | 
 | 443 |             else if (strict && c <= 0x1f) { | 
| Bob Ippolito | d648f64 | 2008-07-19 21:59:50 +0000 | [diff] [blame] | 444 |                 raise_errmsg("Invalid control character at", pystr, next); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 445 |                 goto bail; | 
 | 446 |             } | 
 | 447 |         } | 
 | 448 |         if (!(c == '"' || c == '\\')) { | 
 | 449 |             raise_errmsg("Unterminated string starting at", pystr, begin); | 
 | 450 |             goto bail; | 
 | 451 |         } | 
 | 452 |         /* Pick up this chunk if it's not zero length */ | 
 | 453 |         if (next != end) { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 454 |             PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 455 |             if (strchunk == NULL) { | 
 | 456 |                 goto bail; | 
 | 457 |             } | 
| Barry Warsaw | fa65827 | 2010-11-02 21:03:09 +0000 | [diff] [blame] | 458 |             chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); | 
 | 459 |             Py_DECREF(strchunk); | 
 | 460 |             if (chunk == NULL) { | 
 | 461 |                 goto bail; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 462 |             } | 
 | 463 |             if (PyList_Append(chunks, chunk)) { | 
| Benjamin Peterson | 336680e | 2008-10-16 21:48:06 +0000 | [diff] [blame] | 464 |                 Py_DECREF(chunk); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 465 |                 goto bail; | 
 | 466 |             } | 
 | 467 |             Py_DECREF(chunk); | 
 | 468 |         } | 
 | 469 |         next++; | 
 | 470 |         if (c == '"') { | 
 | 471 |             end = next; | 
 | 472 |             break; | 
 | 473 |         } | 
 | 474 |         if (next == len) { | 
 | 475 |             raise_errmsg("Unterminated string starting at", pystr, begin); | 
 | 476 |             goto bail; | 
 | 477 |         } | 
 | 478 |         c = buf[next]; | 
 | 479 |         if (c != 'u') { | 
 | 480 |             /* Non-unicode backslash escapes */ | 
 | 481 |             end = next + 1; | 
 | 482 |             switch (c) { | 
 | 483 |                 case '"': break; | 
 | 484 |                 case '\\': break; | 
 | 485 |                 case '/': break; | 
 | 486 |                 case 'b': c = '\b'; break; | 
 | 487 |                 case 'f': c = '\f'; break; | 
 | 488 |                 case 'n': c = '\n'; break; | 
 | 489 |                 case 'r': c = '\r'; break; | 
 | 490 |                 case 't': c = '\t'; break; | 
 | 491 |                 default: c = 0; | 
 | 492 |             } | 
 | 493 |             if (c == 0) { | 
 | 494 |                 raise_errmsg("Invalid \\escape", pystr, end - 2); | 
 | 495 |                 goto bail; | 
 | 496 |             } | 
 | 497 |         } | 
 | 498 |         else { | 
 | 499 |             c = 0; | 
 | 500 |             next++; | 
 | 501 |             end = next + 4; | 
 | 502 |             if (end >= len) { | 
 | 503 |                 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); | 
 | 504 |                 goto bail; | 
 | 505 |             } | 
 | 506 |             /* Decode 4 hex digits */ | 
 | 507 |             for (; next < end; next++) { | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 508 |                 Py_UNICODE digit = buf[next]; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 509 |                 c <<= 4; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 510 |                 switch (digit) { | 
 | 511 |                     case '0': case '1': case '2': case '3': case '4': | 
 | 512 |                     case '5': case '6': case '7': case '8': case '9': | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 513 |                         c |= (digit - '0'); break; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 514 |                     case 'a': case 'b': case 'c': case 'd': case 'e': | 
 | 515 |                     case 'f': | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 516 |                         c |= (digit - 'a' + 10); break; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 517 |                     case 'A': case 'B': case 'C': case 'D': case 'E': | 
 | 518 |                     case 'F': | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 519 |                         c |= (digit - 'A' + 10); break; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 520 |                     default: | 
 | 521 |                         raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); | 
 | 522 |                         goto bail; | 
 | 523 |                 } | 
 | 524 |             } | 
 | 525 | #ifdef Py_UNICODE_WIDE | 
 | 526 |             /* Surrogate pair */ | 
| Serhiy Storchaka | dafda9b | 2013-11-26 21:25:15 +0200 | [diff] [blame] | 527 |             if ((c & 0xfc00) == 0xd800 && end + 6 < len && | 
 | 528 |                 buf[next++] == '\\' && | 
 | 529 |                 buf[next++] == 'u') { | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 530 |                 Py_UNICODE c2 = 0; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 531 |                 end += 6; | 
 | 532 |                 /* Decode 4 hex digits */ | 
 | 533 |                 for (; next < end; next++) { | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 534 |                     Py_UNICODE digit = buf[next]; | 
| Antoine Pitrou | 22ad245 | 2010-10-09 15:28:59 +0000 | [diff] [blame] | 535 |                     c2 <<= 4; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 536 |                     switch (digit) { | 
 | 537 |                         case '0': case '1': case '2': case '3': case '4': | 
 | 538 |                         case '5': case '6': case '7': case '8': case '9': | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 539 |                             c2 |= (digit - '0'); break; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 540 |                         case 'a': case 'b': case 'c': case 'd': case 'e': | 
 | 541 |                         case 'f': | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 542 |                             c2 |= (digit - 'a' + 10); break; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 543 |                         case 'A': case 'B': case 'C': case 'D': case 'E': | 
 | 544 |                         case 'F': | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 545 |                             c2 |= (digit - 'A' + 10); break; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 546 |                         default: | 
 | 547 |                             raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); | 
 | 548 |                             goto bail; | 
 | 549 |                     } | 
 | 550 |                 } | 
| Serhiy Storchaka | dafda9b | 2013-11-26 21:25:15 +0200 | [diff] [blame] | 551 |                 if ((c2 & 0xfc00) == 0xdc00) | 
 | 552 |                     c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); | 
 | 553 |                 else | 
 | 554 |                     end -= 6; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 555 |             } | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 556 | #endif | 
 | 557 |         } | 
| Barry Warsaw | fa65827 | 2010-11-02 21:03:09 +0000 | [diff] [blame] | 558 |         chunk = PyUnicode_FromUnicode(&c, 1); | 
 | 559 |         if (chunk == NULL) { | 
 | 560 |             goto bail; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 561 |         } | 
 | 562 |         if (PyList_Append(chunks, chunk)) { | 
| Benjamin Peterson | 336680e | 2008-10-16 21:48:06 +0000 | [diff] [blame] | 563 |             Py_DECREF(chunk); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 564 |             goto bail; | 
 | 565 |         } | 
 | 566 |         Py_DECREF(chunk); | 
 | 567 |     } | 
 | 568 |  | 
| Ezio Melotti | df8a8f7 | 2011-05-04 14:40:53 +0300 | [diff] [blame] | 569 |     rval = join_list_unicode(chunks); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 570 |     if (rval == NULL) { | 
 | 571 |         goto bail; | 
 | 572 |     } | 
| Benjamin Peterson | 336680e | 2008-10-16 21:48:06 +0000 | [diff] [blame] | 573 |     Py_CLEAR(chunks); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 574 |     *next_end_ptr = end; | 
 | 575 |     return rval; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 576 | bail: | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 577 |     *next_end_ptr = -1; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 578 |     Py_XDECREF(chunks); | 
 | 579 |     return NULL; | 
 | 580 | } | 
 | 581 |  | 
 | 582 |  | 
 | 583 | static PyObject * | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 584 | scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 585 | { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 586 |     /* Read the JSON string from PyUnicode pystr. | 
 | 587 |     end is the index of the first character after the quote. | 
 | 588 |     if strict is zero then literal control characters are allowed | 
 | 589 |     *next_end_ptr is a return-by-reference index of the character | 
 | 590 |         after the end quote | 
 | 591 |  | 
 | 592 |     Return value is a new PyUnicode | 
 | 593 |     */ | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 594 |     PyObject *rval; | 
 | 595 |     Py_ssize_t len = PyUnicode_GET_SIZE(pystr); | 
 | 596 |     Py_ssize_t begin = end - 1; | 
| Brett Cannon | 8e9757e | 2010-05-03 23:43:49 +0000 | [diff] [blame] | 597 |     Py_ssize_t next; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 598 |     const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); | 
 | 599 |     PyObject *chunks = PyList_New(0); | 
 | 600 |     if (chunks == NULL) { | 
 | 601 |         goto bail; | 
 | 602 |     } | 
| Bob Ippolito | d648f64 | 2008-07-19 21:59:50 +0000 | [diff] [blame] | 603 |     if (end < 0 || len <= end) { | 
 | 604 |         PyErr_SetString(PyExc_ValueError, "end is out of bounds"); | 
 | 605 |         goto bail; | 
 | 606 |     } | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 607 |     while (1) { | 
 | 608 |         /* Find the end of the string or the next escape */ | 
 | 609 |         Py_UNICODE c = 0; | 
 | 610 |         PyObject *chunk = NULL; | 
 | 611 |         for (next = end; next < len; next++) { | 
 | 612 |             c = buf[next]; | 
 | 613 |             if (c == '"' || c == '\\') { | 
 | 614 |                 break; | 
 | 615 |             } | 
 | 616 |             else if (strict && c <= 0x1f) { | 
| Bob Ippolito | d648f64 | 2008-07-19 21:59:50 +0000 | [diff] [blame] | 617 |                 raise_errmsg("Invalid control character at", pystr, next); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 618 |                 goto bail; | 
 | 619 |             } | 
 | 620 |         } | 
 | 621 |         if (!(c == '"' || c == '\\')) { | 
 | 622 |             raise_errmsg("Unterminated string starting at", pystr, begin); | 
 | 623 |             goto bail; | 
 | 624 |         } | 
 | 625 |         /* Pick up this chunk if it's not zero length */ | 
 | 626 |         if (next != end) { | 
 | 627 |             chunk = PyUnicode_FromUnicode(&buf[end], next - end); | 
 | 628 |             if (chunk == NULL) { | 
 | 629 |                 goto bail; | 
 | 630 |             } | 
 | 631 |             if (PyList_Append(chunks, chunk)) { | 
| Benjamin Peterson | 87e6ad2 | 2008-10-16 21:27:54 +0000 | [diff] [blame] | 632 |                 Py_DECREF(chunk); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 633 |                 goto bail; | 
 | 634 |             } | 
 | 635 |             Py_DECREF(chunk); | 
 | 636 |         } | 
 | 637 |         next++; | 
 | 638 |         if (c == '"') { | 
 | 639 |             end = next; | 
 | 640 |             break; | 
 | 641 |         } | 
 | 642 |         if (next == len) { | 
 | 643 |             raise_errmsg("Unterminated string starting at", pystr, begin); | 
 | 644 |             goto bail; | 
 | 645 |         } | 
 | 646 |         c = buf[next]; | 
 | 647 |         if (c != 'u') { | 
 | 648 |             /* Non-unicode backslash escapes */ | 
 | 649 |             end = next + 1; | 
 | 650 |             switch (c) { | 
 | 651 |                 case '"': break; | 
 | 652 |                 case '\\': break; | 
 | 653 |                 case '/': break; | 
 | 654 |                 case 'b': c = '\b'; break; | 
 | 655 |                 case 'f': c = '\f'; break; | 
 | 656 |                 case 'n': c = '\n'; break; | 
 | 657 |                 case 'r': c = '\r'; break; | 
 | 658 |                 case 't': c = '\t'; break; | 
 | 659 |                 default: c = 0; | 
 | 660 |             } | 
 | 661 |             if (c == 0) { | 
 | 662 |                 raise_errmsg("Invalid \\escape", pystr, end - 2); | 
 | 663 |                 goto bail; | 
 | 664 |             } | 
 | 665 |         } | 
 | 666 |         else { | 
 | 667 |             c = 0; | 
 | 668 |             next++; | 
 | 669 |             end = next + 4; | 
 | 670 |             if (end >= len) { | 
 | 671 |                 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); | 
 | 672 |                 goto bail; | 
 | 673 |             } | 
 | 674 |             /* Decode 4 hex digits */ | 
 | 675 |             for (; next < end; next++) { | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 676 |                 Py_UNICODE digit = buf[next]; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 677 |                 c <<= 4; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 678 |                 switch (digit) { | 
 | 679 |                     case '0': case '1': case '2': case '3': case '4': | 
 | 680 |                     case '5': case '6': case '7': case '8': case '9': | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 681 |                         c |= (digit - '0'); break; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 682 |                     case 'a': case 'b': case 'c': case 'd': case 'e': | 
 | 683 |                     case 'f': | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 684 |                         c |= (digit - 'a' + 10); break; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 685 |                     case 'A': case 'B': case 'C': case 'D': case 'E': | 
 | 686 |                     case 'F': | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 687 |                         c |= (digit - 'A' + 10); break; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 688 |                     default: | 
 | 689 |                         raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); | 
 | 690 |                         goto bail; | 
 | 691 |                 } | 
 | 692 |             } | 
 | 693 | #ifdef Py_UNICODE_WIDE | 
 | 694 |             /* Surrogate pair */ | 
| Serhiy Storchaka | dafda9b | 2013-11-26 21:25:15 +0200 | [diff] [blame] | 695 |             if ((c & 0xfc00) == 0xd800 && end + 6 < len && | 
 | 696 |                 buf[next++] == '\\' && buf[next++] == 'u') { | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 697 |                 Py_UNICODE c2 = 0; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 698 |                 end += 6; | 
 | 699 |                 /* Decode 4 hex digits */ | 
 | 700 |                 for (; next < end; next++) { | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 701 |                     Py_UNICODE digit = buf[next]; | 
| Antoine Pitrou | 22ad245 | 2010-10-09 15:28:59 +0000 | [diff] [blame] | 702 |                     c2 <<= 4; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 703 |                     switch (digit) { | 
 | 704 |                         case '0': case '1': case '2': case '3': case '4': | 
 | 705 |                         case '5': case '6': case '7': case '8': case '9': | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 706 |                             c2 |= (digit - '0'); break; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 707 |                         case 'a': case 'b': case 'c': case 'd': case 'e': | 
 | 708 |                         case 'f': | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 709 |                             c2 |= (digit - 'a' + 10); break; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 710 |                         case 'A': case 'B': case 'C': case 'D': case 'E': | 
 | 711 |                         case 'F': | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 712 |                             c2 |= (digit - 'A' + 10); break; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 713 |                         default: | 
 | 714 |                             raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); | 
 | 715 |                             goto bail; | 
 | 716 |                     } | 
 | 717 |                 } | 
| Serhiy Storchaka | dafda9b | 2013-11-26 21:25:15 +0200 | [diff] [blame] | 718 |                 if ((c2 & 0xfc00) == 0xdc00) | 
 | 719 |                     c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); | 
 | 720 |                 else | 
 | 721 |                     end -= 6; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 722 |             } | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 723 | #endif | 
 | 724 |         } | 
 | 725 |         chunk = PyUnicode_FromUnicode(&c, 1); | 
 | 726 |         if (chunk == NULL) { | 
 | 727 |             goto bail; | 
 | 728 |         } | 
 | 729 |         if (PyList_Append(chunks, chunk)) { | 
| Benjamin Peterson | 336680e | 2008-10-16 21:48:06 +0000 | [diff] [blame] | 730 |             Py_DECREF(chunk); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 731 |             goto bail; | 
 | 732 |         } | 
 | 733 |         Py_DECREF(chunk); | 
 | 734 |     } | 
 | 735 |  | 
 | 736 |     rval = join_list_unicode(chunks); | 
 | 737 |     if (rval == NULL) { | 
 | 738 |         goto bail; | 
 | 739 |     } | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 740 |     Py_DECREF(chunks); | 
 | 741 |     *next_end_ptr = end; | 
 | 742 |     return rval; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 743 | bail: | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 744 |     *next_end_ptr = -1; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 745 |     Py_XDECREF(chunks); | 
 | 746 |     return NULL; | 
 | 747 | } | 
 | 748 |  | 
 | 749 | PyDoc_STRVAR(pydoc_scanstring, | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 750 |     "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" | 
 | 751 |     "\n" | 
 | 752 |     "Scan the string s for a JSON string. End is the index of the\n" | 
 | 753 |     "character in s after the quote that started the JSON string.\n" | 
 | 754 |     "Unescapes all valid JSON string escape sequences and raises ValueError\n" | 
 | 755 |     "on attempt to decode an invalid string. If strict is False then literal\n" | 
 | 756 |     "control characters are allowed in the string.\n" | 
 | 757 |     "\n" | 
 | 758 |     "Returns a tuple of the decoded string and the index of the character in s\n" | 
 | 759 |     "after the end quote." | 
 | 760 | ); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 761 |  | 
 | 762 | static PyObject * | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 763 | py_scanstring(PyObject* self UNUSED, PyObject *args) | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 764 | { | 
 | 765 |     PyObject *pystr; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 766 |     PyObject *rval; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 767 |     Py_ssize_t end; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 768 |     Py_ssize_t next_end = -1; | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 769 |     char *encoding = NULL; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 770 |     int strict = 1; | 
 | 771 |     if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 772 |         return NULL; | 
 | 773 |     } | 
 | 774 |     if (encoding == NULL) { | 
 | 775 |         encoding = DEFAULT_ENCODING; | 
 | 776 |     } | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 777 |     if (PyString_Check(pystr)) { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 778 |         rval = scanstring_str(pystr, end, encoding, strict, &next_end); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 779 |     } | 
 | 780 |     else if (PyUnicode_Check(pystr)) { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 781 |         rval = scanstring_unicode(pystr, end, strict, &next_end); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 782 |     } | 
 | 783 |     else { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 784 |         PyErr_Format(PyExc_TypeError, | 
 | 785 |                      "first argument must be a string, not %.80s", | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 786 |                      Py_TYPE(pystr)->tp_name); | 
 | 787 |         return NULL; | 
 | 788 |     } | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 789 |     return _build_rval_index_tuple(rval, next_end); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 790 | } | 
 | 791 |  | 
 | 792 | PyDoc_STRVAR(pydoc_encode_basestring_ascii, | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 793 |     "encode_basestring_ascii(basestring) -> str\n" | 
 | 794 |     "\n" | 
 | 795 |     "Return an ASCII-only JSON representation of a Python string" | 
 | 796 | ); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 797 |  | 
 | 798 | static PyObject * | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 799 | py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 800 | { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 801 |     /* Return an ASCII-only JSON representation of a Python string */ | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 802 |     /* METH_O */ | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 803 |     if (PyString_Check(pystr)) { | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 804 |         return ascii_escape_str(pystr); | 
 | 805 |     } | 
 | 806 |     else if (PyUnicode_Check(pystr)) { | 
 | 807 |         return ascii_escape_unicode(pystr); | 
 | 808 |     } | 
 | 809 |     else { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 810 |         PyErr_Format(PyExc_TypeError, | 
 | 811 |                      "first argument must be a string, not %.80s", | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 812 |                      Py_TYPE(pystr)->tp_name); | 
 | 813 |         return NULL; | 
 | 814 |     } | 
 | 815 | } | 
 | 816 |  | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 817 | static void | 
 | 818 | scanner_dealloc(PyObject *self) | 
 | 819 | { | 
 | 820 |     /* Deallocate scanner object */ | 
 | 821 |     scanner_clear(self); | 
 | 822 |     Py_TYPE(self)->tp_free(self); | 
 | 823 | } | 
 | 824 |  | 
 | 825 | static int | 
 | 826 | scanner_traverse(PyObject *self, visitproc visit, void *arg) | 
 | 827 | { | 
 | 828 |     PyScannerObject *s; | 
 | 829 |     assert(PyScanner_Check(self)); | 
 | 830 |     s = (PyScannerObject *)self; | 
 | 831 |     Py_VISIT(s->encoding); | 
 | 832 |     Py_VISIT(s->strict); | 
 | 833 |     Py_VISIT(s->object_hook); | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 834 |     Py_VISIT(s->pairs_hook); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 835 |     Py_VISIT(s->parse_float); | 
 | 836 |     Py_VISIT(s->parse_int); | 
 | 837 |     Py_VISIT(s->parse_constant); | 
 | 838 |     return 0; | 
 | 839 | } | 
 | 840 |  | 
 | 841 | static int | 
 | 842 | scanner_clear(PyObject *self) | 
 | 843 | { | 
 | 844 |     PyScannerObject *s; | 
 | 845 |     assert(PyScanner_Check(self)); | 
 | 846 |     s = (PyScannerObject *)self; | 
 | 847 |     Py_CLEAR(s->encoding); | 
 | 848 |     Py_CLEAR(s->strict); | 
 | 849 |     Py_CLEAR(s->object_hook); | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 850 |     Py_CLEAR(s->pairs_hook); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 851 |     Py_CLEAR(s->parse_float); | 
 | 852 |     Py_CLEAR(s->parse_int); | 
 | 853 |     Py_CLEAR(s->parse_constant); | 
 | 854 |     return 0; | 
 | 855 | } | 
 | 856 |  | 
 | 857 | static PyObject * | 
 | 858 | _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { | 
 | 859 |     /* Read a JSON object from PyString pystr. | 
 | 860 |     idx is the index of the first character after the opening curly brace. | 
 | 861 |     *next_idx_ptr is a return-by-reference index to the first character after | 
 | 862 |         the closing curly brace. | 
 | 863 |  | 
 | 864 |     Returns a new PyObject (usually a dict, but object_hook can change that) | 
 | 865 |     */ | 
 | 866 |     char *str = PyString_AS_STRING(pystr); | 
 | 867 |     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 868 |     PyObject *rval; | 
 | 869 |     PyObject *pairs; | 
 | 870 |     PyObject *item; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 871 |     PyObject *key = NULL; | 
 | 872 |     PyObject *val = NULL; | 
 | 873 |     char *encoding = PyString_AS_STRING(s->encoding); | 
 | 874 |     int strict = PyObject_IsTrue(s->strict); | 
 | 875 |     Py_ssize_t next_idx; | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 876 |  | 
 | 877 |     pairs = PyList_New(0); | 
 | 878 |     if (pairs == NULL) | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 879 |         return NULL; | 
 | 880 |  | 
 | 881 |     /* skip whitespace after { */ | 
 | 882 |     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 883 |  | 
 | 884 |     /* only loop if the object is non-empty */ | 
 | 885 |     if (idx <= end_idx && str[idx] != '}') { | 
 | 886 |         while (idx <= end_idx) { | 
 | 887 |             /* read key */ | 
 | 888 |             if (str[idx] != '"') { | 
 | 889 |                 raise_errmsg("Expecting property name", pystr, idx); | 
 | 890 |                 goto bail; | 
 | 891 |             } | 
 | 892 |             key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); | 
 | 893 |             if (key == NULL) | 
 | 894 |                 goto bail; | 
 | 895 |             idx = next_idx; | 
 | 896 |  | 
 | 897 |             /* skip whitespace between key and : delimiter, read :, skip whitespace */ | 
 | 898 |             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 899 |             if (idx > end_idx || str[idx] != ':') { | 
 | 900 |                 raise_errmsg("Expecting : delimiter", pystr, idx); | 
 | 901 |                 goto bail; | 
 | 902 |             } | 
 | 903 |             idx++; | 
 | 904 |             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 905 |  | 
 | 906 |             /* read any JSON data type */ | 
 | 907 |             val = scan_once_str(s, pystr, idx, &next_idx); | 
 | 908 |             if (val == NULL) | 
 | 909 |                 goto bail; | 
 | 910 |  | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 911 |             item = PyTuple_Pack(2, key, val); | 
 | 912 |             if (item == NULL) | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 913 |                 goto bail; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 914 |             Py_CLEAR(key); | 
 | 915 |             Py_CLEAR(val); | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 916 |             if (PyList_Append(pairs, item) == -1) { | 
 | 917 |                 Py_DECREF(item); | 
 | 918 |                 goto bail; | 
 | 919 |             } | 
 | 920 |             Py_DECREF(item); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 921 |             idx = next_idx; | 
 | 922 |  | 
 | 923 |             /* skip whitespace before } or , */ | 
 | 924 |             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 925 |  | 
 | 926 |             /* bail if the object is closed or we didn't get the , delimiter */ | 
 | 927 |             if (idx > end_idx) break; | 
 | 928 |             if (str[idx] == '}') { | 
 | 929 |                 break; | 
 | 930 |             } | 
 | 931 |             else if (str[idx] != ',') { | 
 | 932 |                 raise_errmsg("Expecting , delimiter", pystr, idx); | 
 | 933 |                 goto bail; | 
 | 934 |             } | 
 | 935 |             idx++; | 
 | 936 |  | 
 | 937 |             /* skip whitespace after , delimiter */ | 
 | 938 |             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 939 |         } | 
 | 940 |     } | 
 | 941 |     /* verify that idx < end_idx, str[idx] should be '}' */ | 
 | 942 |     if (idx > end_idx || str[idx] != '}') { | 
 | 943 |         raise_errmsg("Expecting object", pystr, end_idx); | 
 | 944 |         goto bail; | 
 | 945 |     } | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 946 |  | 
 | 947 |     /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ | 
 | 948 |     if (s->pairs_hook != Py_None) { | 
 | 949 |         val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); | 
 | 950 |         if (val == NULL) | 
 | 951 |             goto bail; | 
 | 952 |         Py_DECREF(pairs); | 
 | 953 |         *next_idx_ptr = idx + 1; | 
 | 954 |         return val; | 
 | 955 |     } | 
 | 956 |  | 
 | 957 |     rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),  | 
 | 958 |                                          pairs, NULL); | 
 | 959 |     if (rval == NULL) | 
 | 960 |         goto bail; | 
 | 961 |     Py_CLEAR(pairs); | 
 | 962 |  | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 963 |     /* if object_hook is not None: rval = object_hook(rval) */ | 
 | 964 |     if (s->object_hook != Py_None) { | 
 | 965 |         val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); | 
 | 966 |         if (val == NULL) | 
 | 967 |             goto bail; | 
 | 968 |         Py_DECREF(rval); | 
 | 969 |         rval = val; | 
 | 970 |         val = NULL; | 
 | 971 |     } | 
 | 972 |     *next_idx_ptr = idx + 1; | 
 | 973 |     return rval; | 
 | 974 | bail: | 
 | 975 |     Py_XDECREF(key); | 
 | 976 |     Py_XDECREF(val); | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 977 |     Py_XDECREF(pairs); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 978 |     return NULL; | 
 | 979 | } | 
 | 980 |  | 
 | 981 | static PyObject * | 
 | 982 | _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { | 
 | 983 |     /* Read a JSON object from PyUnicode pystr. | 
 | 984 |     idx is the index of the first character after the opening curly brace. | 
 | 985 |     *next_idx_ptr is a return-by-reference index to the first character after | 
 | 986 |         the closing curly brace. | 
 | 987 |  | 
 | 988 |     Returns a new PyObject (usually a dict, but object_hook can change that) | 
 | 989 |     */ | 
 | 990 |     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); | 
 | 991 |     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 992 |     PyObject *rval; | 
 | 993 |     PyObject *pairs; | 
 | 994 |     PyObject *item; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 995 |     PyObject *key = NULL; | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 996 |     PyObject *val = NULL; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 997 |     int strict = PyObject_IsTrue(s->strict); | 
 | 998 |     Py_ssize_t next_idx; | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 999 |  | 
 | 1000 |     pairs = PyList_New(0); | 
 | 1001 |     if (pairs == NULL) | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1002 |         return NULL; | 
 | 1003 |  | 
 | 1004 |     /* skip whitespace after { */ | 
 | 1005 |     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 1006 |  | 
 | 1007 |     /* only loop if the object is non-empty */ | 
 | 1008 |     if (idx <= end_idx && str[idx] != '}') { | 
 | 1009 |         while (idx <= end_idx) { | 
 | 1010 |             /* read key */ | 
 | 1011 |             if (str[idx] != '"') { | 
| Antoine Pitrou | d9a5137 | 2012-06-29 01:58:26 +0200 | [diff] [blame] | 1012 |                 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1013 |                 goto bail; | 
 | 1014 |             } | 
 | 1015 |             key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); | 
 | 1016 |             if (key == NULL) | 
 | 1017 |                 goto bail; | 
 | 1018 |             idx = next_idx; | 
 | 1019 |  | 
 | 1020 |             /* skip whitespace between key and : delimiter, read :, skip whitespace */ | 
 | 1021 |             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 1022 |             if (idx > end_idx || str[idx] != ':') { | 
| Antoine Pitrou | d9a5137 | 2012-06-29 01:58:26 +0200 | [diff] [blame] | 1023 |                 raise_errmsg("Expecting ':' delimiter", pystr, idx); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1024 |                 goto bail; | 
 | 1025 |             } | 
 | 1026 |             idx++; | 
 | 1027 |             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 1028 |  | 
 | 1029 |             /* read any JSON term */ | 
 | 1030 |             val = scan_once_unicode(s, pystr, idx, &next_idx); | 
 | 1031 |             if (val == NULL) | 
 | 1032 |                 goto bail; | 
 | 1033 |  | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 1034 |             item = PyTuple_Pack(2, key, val); | 
 | 1035 |             if (item == NULL) | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1036 |                 goto bail; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1037 |             Py_CLEAR(key); | 
 | 1038 |             Py_CLEAR(val); | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 1039 |             if (PyList_Append(pairs, item) == -1) { | 
 | 1040 |                 Py_DECREF(item); | 
 | 1041 |                 goto bail; | 
 | 1042 |             } | 
 | 1043 |             Py_DECREF(item); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1044 |             idx = next_idx; | 
 | 1045 |  | 
 | 1046 |             /* skip whitespace before } or , */ | 
 | 1047 |             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 1048 |  | 
 | 1049 |             /* bail if the object is closed or we didn't get the , delimiter */ | 
 | 1050 |             if (idx > end_idx) break; | 
 | 1051 |             if (str[idx] == '}') { | 
 | 1052 |                 break; | 
 | 1053 |             } | 
 | 1054 |             else if (str[idx] != ',') { | 
| Antoine Pitrou | d9a5137 | 2012-06-29 01:58:26 +0200 | [diff] [blame] | 1055 |                 raise_errmsg("Expecting ',' delimiter", pystr, idx); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1056 |                 goto bail; | 
 | 1057 |             } | 
 | 1058 |             idx++; | 
 | 1059 |  | 
 | 1060 |             /* skip whitespace after , delimiter */ | 
 | 1061 |             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 1062 |         } | 
 | 1063 |     } | 
 | 1064 |  | 
 | 1065 |     /* verify that idx < end_idx, str[idx] should be '}' */ | 
 | 1066 |     if (idx > end_idx || str[idx] != '}') { | 
 | 1067 |         raise_errmsg("Expecting object", pystr, end_idx); | 
 | 1068 |         goto bail; | 
 | 1069 |     } | 
 | 1070 |  | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 1071 |     /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ | 
 | 1072 |     if (s->pairs_hook != Py_None) { | 
 | 1073 |         val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); | 
 | 1074 |         if (val == NULL) | 
 | 1075 |             goto bail; | 
 | 1076 |         Py_DECREF(pairs); | 
 | 1077 |         *next_idx_ptr = idx + 1; | 
 | 1078 |         return val; | 
 | 1079 |     } | 
 | 1080 |  | 
 | 1081 |     rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),  | 
 | 1082 |                                          pairs, NULL); | 
 | 1083 |     if (rval == NULL) | 
 | 1084 |         goto bail; | 
 | 1085 |     Py_CLEAR(pairs); | 
 | 1086 |  | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1087 |     /* if object_hook is not None: rval = object_hook(rval) */ | 
 | 1088 |     if (s->object_hook != Py_None) { | 
 | 1089 |         val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); | 
 | 1090 |         if (val == NULL) | 
 | 1091 |             goto bail; | 
 | 1092 |         Py_DECREF(rval); | 
 | 1093 |         rval = val; | 
 | 1094 |         val = NULL; | 
 | 1095 |     } | 
 | 1096 |     *next_idx_ptr = idx + 1; | 
 | 1097 |     return rval; | 
 | 1098 | bail: | 
 | 1099 |     Py_XDECREF(key); | 
 | 1100 |     Py_XDECREF(val); | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 1101 |     Py_XDECREF(pairs); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1102 |     return NULL; | 
 | 1103 | } | 
 | 1104 |  | 
 | 1105 | static PyObject * | 
 | 1106 | _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { | 
 | 1107 |     /* Read a JSON array from PyString pystr. | 
 | 1108 |     idx is the index of the first character after the opening brace. | 
 | 1109 |     *next_idx_ptr is a return-by-reference index to the first character after | 
 | 1110 |         the closing brace. | 
 | 1111 |  | 
 | 1112 |     Returns a new PyList | 
 | 1113 |     */ | 
 | 1114 |     char *str = PyString_AS_STRING(pystr); | 
 | 1115 |     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; | 
 | 1116 |     PyObject *val = NULL; | 
 | 1117 |     PyObject *rval = PyList_New(0); | 
 | 1118 |     Py_ssize_t next_idx; | 
 | 1119 |     if (rval == NULL) | 
 | 1120 |         return NULL; | 
 | 1121 |  | 
 | 1122 |     /* skip whitespace after [ */ | 
 | 1123 |     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 1124 |  | 
 | 1125 |     /* only loop if the array is non-empty */ | 
 | 1126 |     if (idx <= end_idx && str[idx] != ']') { | 
 | 1127 |         while (idx <= end_idx) { | 
 | 1128 |  | 
 | 1129 |             /* read any JSON term and de-tuplefy the (rval, idx) */ | 
 | 1130 |             val = scan_once_str(s, pystr, idx, &next_idx); | 
 | 1131 |             if (val == NULL) | 
 | 1132 |                 goto bail; | 
 | 1133 |  | 
 | 1134 |             if (PyList_Append(rval, val) == -1) | 
 | 1135 |                 goto bail; | 
 | 1136 |  | 
 | 1137 |             Py_CLEAR(val); | 
 | 1138 |             idx = next_idx; | 
 | 1139 |  | 
 | 1140 |             /* skip whitespace between term and , */ | 
 | 1141 |             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 1142 |  | 
 | 1143 |             /* bail if the array is closed or we didn't get the , delimiter */ | 
 | 1144 |             if (idx > end_idx) break; | 
 | 1145 |             if (str[idx] == ']') { | 
 | 1146 |                 break; | 
 | 1147 |             } | 
 | 1148 |             else if (str[idx] != ',') { | 
 | 1149 |                 raise_errmsg("Expecting , delimiter", pystr, idx); | 
 | 1150 |                 goto bail; | 
 | 1151 |             } | 
 | 1152 |             idx++; | 
 | 1153 |  | 
 | 1154 |             /* skip whitespace after , */ | 
 | 1155 |             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 1156 |         } | 
 | 1157 |     } | 
 | 1158 |  | 
 | 1159 |     /* verify that idx < end_idx, str[idx] should be ']' */ | 
 | 1160 |     if (idx > end_idx || str[idx] != ']') { | 
 | 1161 |         raise_errmsg("Expecting object", pystr, end_idx); | 
 | 1162 |         goto bail; | 
 | 1163 |     } | 
 | 1164 |     *next_idx_ptr = idx + 1; | 
 | 1165 |     return rval; | 
 | 1166 | bail: | 
 | 1167 |     Py_XDECREF(val); | 
 | 1168 |     Py_DECREF(rval); | 
 | 1169 |     return NULL; | 
 | 1170 | } | 
 | 1171 |  | 
 | 1172 | static PyObject * | 
 | 1173 | _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { | 
 | 1174 |     /* Read a JSON array from PyString pystr. | 
 | 1175 |     idx is the index of the first character after the opening brace. | 
 | 1176 |     *next_idx_ptr is a return-by-reference index to the first character after | 
 | 1177 |         the closing brace. | 
 | 1178 |  | 
 | 1179 |     Returns a new PyList | 
 | 1180 |     */ | 
 | 1181 |     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); | 
 | 1182 |     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; | 
 | 1183 |     PyObject *val = NULL; | 
 | 1184 |     PyObject *rval = PyList_New(0); | 
 | 1185 |     Py_ssize_t next_idx; | 
 | 1186 |     if (rval == NULL) | 
 | 1187 |         return NULL; | 
 | 1188 |  | 
 | 1189 |     /* skip whitespace after [ */ | 
 | 1190 |     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 1191 |  | 
 | 1192 |     /* only loop if the array is non-empty */ | 
 | 1193 |     if (idx <= end_idx && str[idx] != ']') { | 
 | 1194 |         while (idx <= end_idx) { | 
 | 1195 |  | 
 | 1196 |             /* read any JSON term  */ | 
 | 1197 |             val = scan_once_unicode(s, pystr, idx, &next_idx); | 
 | 1198 |             if (val == NULL) | 
 | 1199 |                 goto bail; | 
 | 1200 |  | 
 | 1201 |             if (PyList_Append(rval, val) == -1) | 
 | 1202 |                 goto bail; | 
 | 1203 |  | 
 | 1204 |             Py_CLEAR(val); | 
 | 1205 |             idx = next_idx; | 
 | 1206 |  | 
 | 1207 |             /* skip whitespace between term and , */ | 
 | 1208 |             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 1209 |  | 
 | 1210 |             /* bail if the array is closed or we didn't get the , delimiter */ | 
 | 1211 |             if (idx > end_idx) break; | 
 | 1212 |             if (str[idx] == ']') { | 
 | 1213 |                 break; | 
 | 1214 |             } | 
 | 1215 |             else if (str[idx] != ',') { | 
| Antoine Pitrou | d9a5137 | 2012-06-29 01:58:26 +0200 | [diff] [blame] | 1216 |                 raise_errmsg("Expecting ',' delimiter", pystr, idx); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1217 |                 goto bail; | 
 | 1218 |             } | 
 | 1219 |             idx++; | 
 | 1220 |  | 
 | 1221 |             /* skip whitespace after , */ | 
 | 1222 |             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | 
 | 1223 |         } | 
 | 1224 |     } | 
 | 1225 |  | 
 | 1226 |     /* verify that idx < end_idx, str[idx] should be ']' */ | 
 | 1227 |     if (idx > end_idx || str[idx] != ']') { | 
 | 1228 |         raise_errmsg("Expecting object", pystr, end_idx); | 
 | 1229 |         goto bail; | 
 | 1230 |     } | 
 | 1231 |     *next_idx_ptr = idx + 1; | 
 | 1232 |     return rval; | 
 | 1233 | bail: | 
 | 1234 |     Py_XDECREF(val); | 
 | 1235 |     Py_DECREF(rval); | 
 | 1236 |     return NULL; | 
 | 1237 | } | 
 | 1238 |  | 
 | 1239 | static PyObject * | 
 | 1240 | _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { | 
 | 1241 |     /* Read a JSON constant from PyString pystr. | 
 | 1242 |     constant is the constant string that was found | 
 | 1243 |         ("NaN", "Infinity", "-Infinity"). | 
 | 1244 |     idx is the index of the first character of the constant | 
 | 1245 |     *next_idx_ptr is a return-by-reference index to the first character after | 
 | 1246 |         the constant. | 
 | 1247 |  | 
 | 1248 |     Returns the result of parse_constant | 
 | 1249 |     */ | 
 | 1250 |     PyObject *cstr; | 
 | 1251 |     PyObject *rval; | 
 | 1252 |     /* constant is "NaN", "Infinity", or "-Infinity" */ | 
 | 1253 |     cstr = PyString_InternFromString(constant); | 
 | 1254 |     if (cstr == NULL) | 
 | 1255 |         return NULL; | 
 | 1256 |  | 
 | 1257 |     /* rval = parse_constant(constant) */ | 
 | 1258 |     rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); | 
 | 1259 |     idx += PyString_GET_SIZE(cstr); | 
 | 1260 |     Py_DECREF(cstr); | 
 | 1261 |     *next_idx_ptr = idx; | 
 | 1262 |     return rval; | 
 | 1263 | } | 
 | 1264 |  | 
 | 1265 | static PyObject * | 
 | 1266 | _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { | 
 | 1267 |     /* Read a JSON number from PyString pystr. | 
 | 1268 |     idx is the index of the first character of the number | 
 | 1269 |     *next_idx_ptr is a return-by-reference index to the first character after | 
 | 1270 |         the number. | 
 | 1271 |  | 
 | 1272 |     Returns a new PyObject representation of that number: | 
 | 1273 |         PyInt, PyLong, or PyFloat. | 
 | 1274 |         May return other types if parse_int or parse_float are set | 
 | 1275 |     */ | 
 | 1276 |     char *str = PyString_AS_STRING(pystr); | 
 | 1277 |     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; | 
 | 1278 |     Py_ssize_t idx = start; | 
 | 1279 |     int is_float = 0; | 
 | 1280 |     PyObject *rval; | 
 | 1281 |     PyObject *numstr; | 
 | 1282 |  | 
 | 1283 |     /* read a sign if it's there, make sure it's not the end of the string */ | 
 | 1284 |     if (str[idx] == '-') { | 
 | 1285 |         idx++; | 
 | 1286 |         if (idx > end_idx) { | 
 | 1287 |             PyErr_SetNone(PyExc_StopIteration); | 
 | 1288 |             return NULL; | 
 | 1289 |         } | 
 | 1290 |     } | 
 | 1291 |  | 
 | 1292 |     /* read as many integer digits as we find as long as it doesn't start with 0 */ | 
 | 1293 |     if (str[idx] >= '1' && str[idx] <= '9') { | 
 | 1294 |         idx++; | 
 | 1295 |         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | 
 | 1296 |     } | 
 | 1297 |     /* if it starts with 0 we only expect one integer digit */ | 
 | 1298 |     else if (str[idx] == '0') { | 
 | 1299 |         idx++; | 
 | 1300 |     } | 
 | 1301 |     /* no integer digits, error */ | 
 | 1302 |     else { | 
 | 1303 |         PyErr_SetNone(PyExc_StopIteration); | 
 | 1304 |         return NULL; | 
 | 1305 |     } | 
 | 1306 |  | 
 | 1307 |     /* if the next char is '.' followed by a digit then read all float digits */ | 
 | 1308 |     if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { | 
 | 1309 |         is_float = 1; | 
 | 1310 |         idx += 2; | 
 | 1311 |         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | 
 | 1312 |     } | 
 | 1313 |  | 
 | 1314 |     /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ | 
 | 1315 |     if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { | 
 | 1316 |  | 
 | 1317 |         /* save the index of the 'e' or 'E' just in case we need to backtrack */ | 
 | 1318 |         Py_ssize_t e_start = idx; | 
 | 1319 |         idx++; | 
 | 1320 |  | 
 | 1321 |         /* read an exponent sign if present */ | 
 | 1322 |         if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; | 
 | 1323 |  | 
 | 1324 |         /* read all digits */ | 
 | 1325 |         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | 
 | 1326 |  | 
 | 1327 |         /* if we got a digit, then parse as float. if not, backtrack */ | 
 | 1328 |         if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { | 
 | 1329 |             is_float = 1; | 
 | 1330 |         } | 
 | 1331 |         else { | 
 | 1332 |             idx = e_start; | 
 | 1333 |         } | 
 | 1334 |     } | 
 | 1335 |  | 
 | 1336 |     /* copy the section we determined to be a number */ | 
 | 1337 |     numstr = PyString_FromStringAndSize(&str[start], idx - start); | 
 | 1338 |     if (numstr == NULL) | 
 | 1339 |         return NULL; | 
 | 1340 |     if (is_float) { | 
 | 1341 |         /* parse as a float using a fast path if available, otherwise call user defined method */ | 
 | 1342 |         if (s->parse_float != (PyObject *)&PyFloat_Type) { | 
 | 1343 |             rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); | 
 | 1344 |         } | 
 | 1345 |         else { | 
| Eric Smith | 129c97d | 2009-10-28 08:44:37 +0000 | [diff] [blame] | 1346 |             double d = PyOS_string_to_double(PyString_AS_STRING(numstr), | 
 | 1347 |                                              NULL, NULL); | 
 | 1348 |             if (d == -1.0 && PyErr_Occurred()) | 
 | 1349 |                 return NULL; | 
 | 1350 |             rval = PyFloat_FromDouble(d); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1351 |         } | 
 | 1352 |     } | 
 | 1353 |     else { | 
 | 1354 |         /* parse as an int using a fast path if available, otherwise call user defined method */ | 
 | 1355 |         if (s->parse_int != (PyObject *)&PyInt_Type) { | 
 | 1356 |             rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); | 
 | 1357 |         } | 
 | 1358 |         else { | 
 | 1359 |             rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); | 
 | 1360 |         } | 
 | 1361 |     } | 
 | 1362 |     Py_DECREF(numstr); | 
 | 1363 |     *next_idx_ptr = idx; | 
 | 1364 |     return rval; | 
 | 1365 | } | 
 | 1366 |  | 
 | 1367 | static PyObject * | 
 | 1368 | _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { | 
 | 1369 |     /* Read a JSON number from PyUnicode pystr. | 
 | 1370 |     idx is the index of the first character of the number | 
 | 1371 |     *next_idx_ptr is a return-by-reference index to the first character after | 
 | 1372 |         the number. | 
 | 1373 |  | 
 | 1374 |     Returns a new PyObject representation of that number: | 
 | 1375 |         PyInt, PyLong, or PyFloat. | 
 | 1376 |         May return other types if parse_int or parse_float are set | 
 | 1377 |     */ | 
 | 1378 |     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); | 
 | 1379 |     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; | 
 | 1380 |     Py_ssize_t idx = start; | 
 | 1381 |     int is_float = 0; | 
 | 1382 |     PyObject *rval; | 
 | 1383 |     PyObject *numstr; | 
 | 1384 |  | 
 | 1385 |     /* read a sign if it's there, make sure it's not the end of the string */ | 
 | 1386 |     if (str[idx] == '-') { | 
 | 1387 |         idx++; | 
 | 1388 |         if (idx > end_idx) { | 
 | 1389 |             PyErr_SetNone(PyExc_StopIteration); | 
 | 1390 |             return NULL; | 
 | 1391 |         } | 
 | 1392 |     } | 
 | 1393 |  | 
 | 1394 |     /* read as many integer digits as we find as long as it doesn't start with 0 */ | 
 | 1395 |     if (str[idx] >= '1' && str[idx] <= '9') { | 
 | 1396 |         idx++; | 
 | 1397 |         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | 
 | 1398 |     } | 
 | 1399 |     /* if it starts with 0 we only expect one integer digit */ | 
 | 1400 |     else if (str[idx] == '0') { | 
 | 1401 |         idx++; | 
 | 1402 |     } | 
 | 1403 |     /* no integer digits, error */ | 
 | 1404 |     else { | 
 | 1405 |         PyErr_SetNone(PyExc_StopIteration); | 
 | 1406 |         return NULL; | 
 | 1407 |     } | 
 | 1408 |  | 
 | 1409 |     /* if the next char is '.' followed by a digit then read all float digits */ | 
 | 1410 |     if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { | 
 | 1411 |         is_float = 1; | 
 | 1412 |         idx += 2; | 
| Bob Ippolito | 76a982a | 2009-03-29 22:33:58 +0000 | [diff] [blame] | 1413 |         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1414 |     } | 
 | 1415 |  | 
 | 1416 |     /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ | 
 | 1417 |     if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { | 
 | 1418 |         Py_ssize_t e_start = idx; | 
 | 1419 |         idx++; | 
 | 1420 |  | 
 | 1421 |         /* read an exponent sign if present */ | 
 | 1422 |         if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; | 
 | 1423 |  | 
 | 1424 |         /* read all digits */ | 
 | 1425 |         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | 
 | 1426 |  | 
 | 1427 |         /* if we got a digit, then parse as float. if not, backtrack */ | 
 | 1428 |         if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { | 
 | 1429 |             is_float = 1; | 
 | 1430 |         } | 
 | 1431 |         else { | 
 | 1432 |             idx = e_start; | 
 | 1433 |         } | 
 | 1434 |     } | 
 | 1435 |  | 
 | 1436 |     /* copy the section we determined to be a number */ | 
 | 1437 |     numstr = PyUnicode_FromUnicode(&str[start], idx - start); | 
 | 1438 |     if (numstr == NULL) | 
 | 1439 |         return NULL; | 
 | 1440 |     if (is_float) { | 
 | 1441 |         /* parse as a float using a fast path if available, otherwise call user defined method */ | 
 | 1442 |         if (s->parse_float != (PyObject *)&PyFloat_Type) { | 
 | 1443 |             rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); | 
 | 1444 |         } | 
 | 1445 |         else { | 
 | 1446 |             rval = PyFloat_FromString(numstr, NULL); | 
 | 1447 |         } | 
 | 1448 |     } | 
 | 1449 |     else { | 
 | 1450 |         /* no fast path for unicode -> int, just call */ | 
 | 1451 |         rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); | 
 | 1452 |     } | 
 | 1453 |     Py_DECREF(numstr); | 
 | 1454 |     *next_idx_ptr = idx; | 
 | 1455 |     return rval; | 
 | 1456 | } | 
 | 1457 |  | 
 | 1458 | static PyObject * | 
 | 1459 | scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) | 
 | 1460 | { | 
 | 1461 |     /* Read one JSON term (of any kind) from PyString pystr. | 
 | 1462 |     idx is the index of the first character of the term | 
 | 1463 |     *next_idx_ptr is a return-by-reference index to the first character after | 
 | 1464 |         the number. | 
 | 1465 |  | 
 | 1466 |     Returns a new PyObject representation of the term. | 
 | 1467 |     */ | 
| Ezio Melotti | cec4649 | 2011-05-07 17:40:23 +0300 | [diff] [blame] | 1468 |     PyObject *res; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1469 |     char *str = PyString_AS_STRING(pystr); | 
 | 1470 |     Py_ssize_t length = PyString_GET_SIZE(pystr); | 
| Benjamin Peterson | 3e5d87b | 2014-04-14 11:43:09 -0400 | [diff] [blame] | 1471 |     if (idx < 0) { | 
 | 1472 |         PyErr_SetString(PyExc_ValueError, "idx cannot be negative"); | 
 | 1473 |         return NULL; | 
 | 1474 |     } | 
 | 1475 |     if (idx >= length) { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1476 |         PyErr_SetNone(PyExc_StopIteration); | 
 | 1477 |         return NULL; | 
 | 1478 |     } | 
 | 1479 |     switch (str[idx]) { | 
 | 1480 |         case '"': | 
 | 1481 |             /* string */ | 
 | 1482 |             return scanstring_str(pystr, idx + 1, | 
 | 1483 |                 PyString_AS_STRING(s->encoding), | 
 | 1484 |                 PyObject_IsTrue(s->strict), | 
 | 1485 |                 next_idx_ptr); | 
 | 1486 |         case '{': | 
 | 1487 |             /* object */ | 
| Ezio Melotti | cec4649 | 2011-05-07 17:40:23 +0300 | [diff] [blame] | 1488 |             if (Py_EnterRecursiveCall(" while decoding a JSON object " | 
 | 1489 |                                       "from a byte string")) | 
 | 1490 |                 return NULL; | 
 | 1491 |             res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); | 
 | 1492 |             Py_LeaveRecursiveCall(); | 
 | 1493 |             return res; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1494 |         case '[': | 
 | 1495 |             /* array */ | 
| Ezio Melotti | cec4649 | 2011-05-07 17:40:23 +0300 | [diff] [blame] | 1496 |             if (Py_EnterRecursiveCall(" while decoding a JSON array " | 
 | 1497 |                                       "from a byte string")) | 
 | 1498 |                 return NULL; | 
 | 1499 |             res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); | 
 | 1500 |             Py_LeaveRecursiveCall(); | 
 | 1501 |             return res; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1502 |         case 'n': | 
 | 1503 |             /* null */ | 
 | 1504 |             if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { | 
 | 1505 |                 Py_INCREF(Py_None); | 
 | 1506 |                 *next_idx_ptr = idx + 4; | 
 | 1507 |                 return Py_None; | 
 | 1508 |             } | 
 | 1509 |             break; | 
 | 1510 |         case 't': | 
 | 1511 |             /* true */ | 
 | 1512 |             if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { | 
 | 1513 |                 Py_INCREF(Py_True); | 
 | 1514 |                 *next_idx_ptr = idx + 4; | 
 | 1515 |                 return Py_True; | 
 | 1516 |             } | 
 | 1517 |             break; | 
 | 1518 |         case 'f': | 
 | 1519 |             /* false */ | 
 | 1520 |             if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { | 
 | 1521 |                 Py_INCREF(Py_False); | 
 | 1522 |                 *next_idx_ptr = idx + 5; | 
 | 1523 |                 return Py_False; | 
 | 1524 |             } | 
 | 1525 |             break; | 
 | 1526 |         case 'N': | 
 | 1527 |             /* NaN */ | 
 | 1528 |             if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { | 
 | 1529 |                 return _parse_constant(s, "NaN", idx, next_idx_ptr); | 
 | 1530 |             } | 
 | 1531 |             break; | 
 | 1532 |         case 'I': | 
 | 1533 |             /* Infinity */ | 
 | 1534 |             if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { | 
 | 1535 |                 return _parse_constant(s, "Infinity", idx, next_idx_ptr); | 
 | 1536 |             } | 
 | 1537 |             break; | 
 | 1538 |         case '-': | 
 | 1539 |             /* -Infinity */ | 
 | 1540 |             if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { | 
 | 1541 |                 return _parse_constant(s, "-Infinity", idx, next_idx_ptr); | 
 | 1542 |             } | 
 | 1543 |             break; | 
 | 1544 |     } | 
 | 1545 |     /* Didn't find a string, object, array, or named constant. Look for a number. */ | 
 | 1546 |     return _match_number_str(s, pystr, idx, next_idx_ptr); | 
 | 1547 | } | 
 | 1548 |  | 
 | 1549 | static PyObject * | 
 | 1550 | scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) | 
 | 1551 | { | 
 | 1552 |     /* Read one JSON term (of any kind) from PyUnicode pystr. | 
 | 1553 |     idx is the index of the first character of the term | 
 | 1554 |     *next_idx_ptr is a return-by-reference index to the first character after | 
 | 1555 |         the number. | 
 | 1556 |  | 
 | 1557 |     Returns a new PyObject representation of the term. | 
 | 1558 |     */ | 
| Ezio Melotti | cec4649 | 2011-05-07 17:40:23 +0300 | [diff] [blame] | 1559 |     PyObject *res; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1560 |     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); | 
 | 1561 |     Py_ssize_t length = PyUnicode_GET_SIZE(pystr); | 
| Benjamin Peterson | 3e5d87b | 2014-04-14 11:43:09 -0400 | [diff] [blame] | 1562 |     if (idx < 0) { | 
 | 1563 |         PyErr_SetString(PyExc_ValueError, "idx cannot be negative"); | 
 | 1564 |         return NULL; | 
 | 1565 |     } | 
 | 1566 |     if (idx >= length) { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1567 |         PyErr_SetNone(PyExc_StopIteration); | 
 | 1568 |         return NULL; | 
 | 1569 |     } | 
 | 1570 |     switch (str[idx]) { | 
 | 1571 |         case '"': | 
 | 1572 |             /* string */ | 
 | 1573 |             return scanstring_unicode(pystr, idx + 1, | 
 | 1574 |                 PyObject_IsTrue(s->strict), | 
 | 1575 |                 next_idx_ptr); | 
 | 1576 |         case '{': | 
 | 1577 |             /* object */ | 
| Ezio Melotti | cec4649 | 2011-05-07 17:40:23 +0300 | [diff] [blame] | 1578 |             if (Py_EnterRecursiveCall(" while decoding a JSON object " | 
 | 1579 |                                       "from a unicode string")) | 
 | 1580 |                 return NULL; | 
 | 1581 |             res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); | 
 | 1582 |             Py_LeaveRecursiveCall(); | 
 | 1583 |             return res; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1584 |         case '[': | 
 | 1585 |             /* array */ | 
| Ezio Melotti | cec4649 | 2011-05-07 17:40:23 +0300 | [diff] [blame] | 1586 |             if (Py_EnterRecursiveCall(" while decoding a JSON array " | 
 | 1587 |                                       "from a unicode string")) | 
 | 1588 |                 return NULL; | 
 | 1589 |             res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); | 
 | 1590 |             Py_LeaveRecursiveCall(); | 
 | 1591 |             return res; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1592 |         case 'n': | 
 | 1593 |             /* null */ | 
 | 1594 |             if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { | 
 | 1595 |                 Py_INCREF(Py_None); | 
 | 1596 |                 *next_idx_ptr = idx + 4; | 
 | 1597 |                 return Py_None; | 
 | 1598 |             } | 
 | 1599 |             break; | 
 | 1600 |         case 't': | 
 | 1601 |             /* true */ | 
 | 1602 |             if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { | 
 | 1603 |                 Py_INCREF(Py_True); | 
 | 1604 |                 *next_idx_ptr = idx + 4; | 
 | 1605 |                 return Py_True; | 
 | 1606 |             } | 
 | 1607 |             break; | 
 | 1608 |         case 'f': | 
 | 1609 |             /* false */ | 
 | 1610 |             if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { | 
 | 1611 |                 Py_INCREF(Py_False); | 
 | 1612 |                 *next_idx_ptr = idx + 5; | 
 | 1613 |                 return Py_False; | 
 | 1614 |             } | 
 | 1615 |             break; | 
 | 1616 |         case 'N': | 
 | 1617 |             /* NaN */ | 
 | 1618 |             if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { | 
 | 1619 |                 return _parse_constant(s, "NaN", idx, next_idx_ptr); | 
 | 1620 |             } | 
 | 1621 |             break; | 
 | 1622 |         case 'I': | 
 | 1623 |             /* Infinity */ | 
 | 1624 |             if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { | 
 | 1625 |                 return _parse_constant(s, "Infinity", idx, next_idx_ptr); | 
 | 1626 |             } | 
 | 1627 |             break; | 
 | 1628 |         case '-': | 
 | 1629 |             /* -Infinity */ | 
 | 1630 |             if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { | 
 | 1631 |                 return _parse_constant(s, "-Infinity", idx, next_idx_ptr); | 
 | 1632 |             } | 
 | 1633 |             break; | 
 | 1634 |     } | 
 | 1635 |     /* Didn't find a string, object, array, or named constant. Look for a number. */ | 
 | 1636 |     return _match_number_unicode(s, pystr, idx, next_idx_ptr); | 
 | 1637 | } | 
 | 1638 |  | 
 | 1639 | static PyObject * | 
 | 1640 | scanner_call(PyObject *self, PyObject *args, PyObject *kwds) | 
 | 1641 | { | 
 | 1642 |     /* Python callable interface to scan_once_{str,unicode} */ | 
 | 1643 |     PyObject *pystr; | 
 | 1644 |     PyObject *rval; | 
 | 1645 |     Py_ssize_t idx; | 
 | 1646 |     Py_ssize_t next_idx = -1; | 
 | 1647 |     static char *kwlist[] = {"string", "idx", NULL}; | 
 | 1648 |     PyScannerObject *s; | 
 | 1649 |     assert(PyScanner_Check(self)); | 
 | 1650 |     s = (PyScannerObject *)self; | 
 | 1651 |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) | 
 | 1652 |         return NULL; | 
 | 1653 |  | 
 | 1654 |     if (PyString_Check(pystr)) { | 
 | 1655 |         rval = scan_once_str(s, pystr, idx, &next_idx); | 
 | 1656 |     } | 
 | 1657 |     else if (PyUnicode_Check(pystr)) { | 
 | 1658 |         rval = scan_once_unicode(s, pystr, idx, &next_idx); | 
 | 1659 |     } | 
 | 1660 |     else { | 
 | 1661 |         PyErr_Format(PyExc_TypeError, | 
 | 1662 |                  "first argument must be a string, not %.80s", | 
 | 1663 |                  Py_TYPE(pystr)->tp_name); | 
 | 1664 |         return NULL; | 
 | 1665 |     } | 
 | 1666 |     return _build_rval_index_tuple(rval, next_idx); | 
 | 1667 | } | 
 | 1668 |  | 
 | 1669 | static PyObject * | 
 | 1670 | scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | 
 | 1671 | { | 
 | 1672 |     PyScannerObject *s; | 
 | 1673 |     s = (PyScannerObject *)type->tp_alloc(type, 0); | 
 | 1674 |     if (s != NULL) { | 
 | 1675 |         s->encoding = NULL; | 
 | 1676 |         s->strict = NULL; | 
 | 1677 |         s->object_hook = NULL; | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 1678 |         s->pairs_hook = NULL; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1679 |         s->parse_float = NULL; | 
 | 1680 |         s->parse_int = NULL; | 
 | 1681 |         s->parse_constant = NULL; | 
 | 1682 |     } | 
 | 1683 |     return (PyObject *)s; | 
 | 1684 | } | 
 | 1685 |  | 
 | 1686 | static int | 
 | 1687 | scanner_init(PyObject *self, PyObject *args, PyObject *kwds) | 
 | 1688 | { | 
 | 1689 |     /* Initialize Scanner object */ | 
 | 1690 |     PyObject *ctx; | 
 | 1691 |     static char *kwlist[] = {"context", NULL}; | 
 | 1692 |     PyScannerObject *s; | 
 | 1693 |  | 
 | 1694 |     assert(PyScanner_Check(self)); | 
 | 1695 |     s = (PyScannerObject *)self; | 
 | 1696 |  | 
 | 1697 |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) | 
 | 1698 |         return -1; | 
 | 1699 |  | 
 | 1700 |     /* PyString_AS_STRING is used on encoding */ | 
 | 1701 |     s->encoding = PyObject_GetAttrString(ctx, "encoding"); | 
| Antoine Pitrou | 187177f | 2009-12-08 15:40:51 +0000 | [diff] [blame] | 1702 |     if (s->encoding == NULL) | 
 | 1703 |         goto bail; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1704 |     if (s->encoding == Py_None) { | 
 | 1705 |         Py_DECREF(Py_None); | 
 | 1706 |         s->encoding = PyString_InternFromString(DEFAULT_ENCODING); | 
 | 1707 |     } | 
 | 1708 |     else if (PyUnicode_Check(s->encoding)) { | 
 | 1709 |         PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); | 
 | 1710 |         Py_DECREF(s->encoding); | 
 | 1711 |         s->encoding = tmp; | 
 | 1712 |     } | 
| Amaury Forgeot d'Arc | dee76e6 | 2012-01-13 22:53:25 +0100 | [diff] [blame] | 1713 |     if (s->encoding == NULL) | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1714 |         goto bail; | 
| Amaury Forgeot d'Arc | dee76e6 | 2012-01-13 22:53:25 +0100 | [diff] [blame] | 1715 |     if (!PyString_Check(s->encoding)) { | 
 | 1716 | 	PyErr_Format(PyExc_TypeError, | 
 | 1717 | 		     "encoding must be a string, not %.80s", | 
 | 1718 | 		     Py_TYPE(s->encoding)->tp_name); | 
 | 1719 | 	goto bail; | 
 | 1720 |     } | 
 | 1721 |         | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1722 |  | 
 | 1723 |     /* All of these will fail "gracefully" so we don't need to verify them */ | 
 | 1724 |     s->strict = PyObject_GetAttrString(ctx, "strict"); | 
 | 1725 |     if (s->strict == NULL) | 
 | 1726 |         goto bail; | 
 | 1727 |     s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); | 
 | 1728 |     if (s->object_hook == NULL) | 
 | 1729 |         goto bail; | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 1730 |     s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook"); | 
| Georg Brandl | d823bdc | 2011-01-02 14:20:16 +0000 | [diff] [blame] | 1731 |     if (s->pairs_hook == NULL) | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 1732 |         goto bail; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1733 |     s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); | 
 | 1734 |     if (s->parse_float == NULL) | 
 | 1735 |         goto bail; | 
 | 1736 |     s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); | 
 | 1737 |     if (s->parse_int == NULL) | 
 | 1738 |         goto bail; | 
 | 1739 |     s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); | 
 | 1740 |     if (s->parse_constant == NULL) | 
 | 1741 |         goto bail; | 
 | 1742 |  | 
 | 1743 |     return 0; | 
 | 1744 |  | 
 | 1745 | bail: | 
 | 1746 |     Py_CLEAR(s->encoding); | 
 | 1747 |     Py_CLEAR(s->strict); | 
 | 1748 |     Py_CLEAR(s->object_hook); | 
| Raymond Hettinger | 91852ca | 2009-03-19 19:19:03 +0000 | [diff] [blame] | 1749 |     Py_CLEAR(s->pairs_hook); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1750 |     Py_CLEAR(s->parse_float); | 
 | 1751 |     Py_CLEAR(s->parse_int); | 
 | 1752 |     Py_CLEAR(s->parse_constant); | 
 | 1753 |     return -1; | 
 | 1754 | } | 
 | 1755 |  | 
 | 1756 | PyDoc_STRVAR(scanner_doc, "JSON scanner object"); | 
 | 1757 |  | 
 | 1758 | static | 
 | 1759 | PyTypeObject PyScannerType = { | 
 | 1760 |     PyObject_HEAD_INIT(NULL) | 
 | 1761 |     0,                    /* tp_internal */ | 
 | 1762 |     "_json.Scanner",       /* tp_name */ | 
 | 1763 |     sizeof(PyScannerObject), /* tp_basicsize */ | 
 | 1764 |     0,                    /* tp_itemsize */ | 
 | 1765 |     scanner_dealloc, /* tp_dealloc */ | 
 | 1766 |     0,                    /* tp_print */ | 
 | 1767 |     0,                    /* tp_getattr */ | 
 | 1768 |     0,                    /* tp_setattr */ | 
 | 1769 |     0,                    /* tp_compare */ | 
 | 1770 |     0,                    /* tp_repr */ | 
 | 1771 |     0,                    /* tp_as_number */ | 
 | 1772 |     0,                    /* tp_as_sequence */ | 
 | 1773 |     0,                    /* tp_as_mapping */ | 
 | 1774 |     0,                    /* tp_hash */ | 
 | 1775 |     scanner_call,         /* tp_call */ | 
 | 1776 |     0,                    /* tp_str */ | 
 | 1777 |     0,/* PyObject_GenericGetAttr, */                    /* tp_getattro */ | 
 | 1778 |     0,/* PyObject_GenericSetAttr, */                    /* tp_setattro */ | 
 | 1779 |     0,                    /* tp_as_buffer */ | 
 | 1780 |     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */ | 
 | 1781 |     scanner_doc,          /* tp_doc */ | 
 | 1782 |     scanner_traverse,                    /* tp_traverse */ | 
 | 1783 |     scanner_clear,                    /* tp_clear */ | 
 | 1784 |     0,                    /* tp_richcompare */ | 
 | 1785 |     0,                    /* tp_weaklistoffset */ | 
 | 1786 |     0,                    /* tp_iter */ | 
 | 1787 |     0,                    /* tp_iternext */ | 
 | 1788 |     0,                    /* tp_methods */ | 
 | 1789 |     scanner_members,                    /* tp_members */ | 
 | 1790 |     0,                    /* tp_getset */ | 
 | 1791 |     0,                    /* tp_base */ | 
 | 1792 |     0,                    /* tp_dict */ | 
 | 1793 |     0,                    /* tp_descr_get */ | 
 | 1794 |     0,                    /* tp_descr_set */ | 
 | 1795 |     0,                    /* tp_dictoffset */ | 
 | 1796 |     scanner_init,                    /* tp_init */ | 
 | 1797 |     0,/* PyType_GenericAlloc, */        /* tp_alloc */ | 
 | 1798 |     scanner_new,          /* tp_new */ | 
 | 1799 |     0,/* PyObject_GC_Del, */              /* tp_free */ | 
 | 1800 | }; | 
 | 1801 |  | 
 | 1802 | static PyObject * | 
 | 1803 | encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | 
 | 1804 | { | 
 | 1805 |     PyEncoderObject *s; | 
 | 1806 |     s = (PyEncoderObject *)type->tp_alloc(type, 0); | 
 | 1807 |     if (s != NULL) { | 
 | 1808 |         s->markers = NULL; | 
 | 1809 |         s->defaultfn = NULL; | 
 | 1810 |         s->encoder = NULL; | 
 | 1811 |         s->indent = NULL; | 
 | 1812 |         s->key_separator = NULL; | 
 | 1813 |         s->item_separator = NULL; | 
 | 1814 |         s->sort_keys = NULL; | 
 | 1815 |         s->skipkeys = NULL; | 
 | 1816 |     } | 
 | 1817 |     return (PyObject *)s; | 
 | 1818 | } | 
 | 1819 |  | 
 | 1820 | static int | 
 | 1821 | encoder_init(PyObject *self, PyObject *args, PyObject *kwds) | 
 | 1822 | { | 
 | 1823 |     /* initialize Encoder object */ | 
 | 1824 |     static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; | 
 | 1825 |  | 
 | 1826 |     PyEncoderObject *s; | 
| Antoine Pitrou | 187177f | 2009-12-08 15:40:51 +0000 | [diff] [blame] | 1827 |     PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; | 
 | 1828 |     PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1829 |  | 
 | 1830 |     assert(PyEncoder_Check(self)); | 
 | 1831 |     s = (PyEncoderObject *)self; | 
 | 1832 |  | 
 | 1833 |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, | 
| Antoine Pitrou | 187177f | 2009-12-08 15:40:51 +0000 | [diff] [blame] | 1834 |         &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, | 
 | 1835 |         &sort_keys, &skipkeys, &allow_nan)) | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1836 |         return -1; | 
 | 1837 |  | 
| Antoine Pitrou | 187177f | 2009-12-08 15:40:51 +0000 | [diff] [blame] | 1838 |     s->markers = markers; | 
 | 1839 |     s->defaultfn = defaultfn; | 
 | 1840 |     s->encoder = encoder; | 
 | 1841 |     s->indent = indent; | 
 | 1842 |     s->key_separator = key_separator; | 
 | 1843 |     s->item_separator = item_separator; | 
 | 1844 |     s->sort_keys = sort_keys; | 
 | 1845 |     s->skipkeys = skipkeys; | 
 | 1846 |     s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); | 
 | 1847 |     s->allow_nan = PyObject_IsTrue(allow_nan); | 
 | 1848 |  | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1849 |     Py_INCREF(s->markers); | 
 | 1850 |     Py_INCREF(s->defaultfn); | 
 | 1851 |     Py_INCREF(s->encoder); | 
 | 1852 |     Py_INCREF(s->indent); | 
 | 1853 |     Py_INCREF(s->key_separator); | 
 | 1854 |     Py_INCREF(s->item_separator); | 
 | 1855 |     Py_INCREF(s->sort_keys); | 
 | 1856 |     Py_INCREF(s->skipkeys); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1857 |     return 0; | 
 | 1858 | } | 
 | 1859 |  | 
 | 1860 | static PyObject * | 
 | 1861 | encoder_call(PyObject *self, PyObject *args, PyObject *kwds) | 
 | 1862 | { | 
 | 1863 |     /* Python callable interface to encode_listencode_obj */ | 
 | 1864 |     static char *kwlist[] = {"obj", "_current_indent_level", NULL}; | 
 | 1865 |     PyObject *obj; | 
 | 1866 |     PyObject *rval; | 
 | 1867 |     Py_ssize_t indent_level; | 
 | 1868 |     PyEncoderObject *s; | 
 | 1869 |     assert(PyEncoder_Check(self)); | 
 | 1870 |     s = (PyEncoderObject *)self; | 
 | 1871 |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, | 
 | 1872 |         &obj, _convertPyInt_AsSsize_t, &indent_level)) | 
 | 1873 |         return NULL; | 
 | 1874 |     rval = PyList_New(0); | 
 | 1875 |     if (rval == NULL) | 
 | 1876 |         return NULL; | 
 | 1877 |     if (encoder_listencode_obj(s, rval, obj, indent_level)) { | 
 | 1878 |         Py_DECREF(rval); | 
 | 1879 |         return NULL; | 
 | 1880 |     } | 
 | 1881 |     return rval; | 
 | 1882 | } | 
 | 1883 |  | 
 | 1884 | static PyObject * | 
 | 1885 | _encoded_const(PyObject *obj) | 
 | 1886 | { | 
 | 1887 |     /* Return the JSON string representation of None, True, False */ | 
 | 1888 |     if (obj == Py_None) { | 
 | 1889 |         static PyObject *s_null = NULL; | 
 | 1890 |         if (s_null == NULL) { | 
 | 1891 |             s_null = PyString_InternFromString("null"); | 
 | 1892 |         } | 
 | 1893 |         Py_INCREF(s_null); | 
 | 1894 |         return s_null; | 
 | 1895 |     } | 
 | 1896 |     else if (obj == Py_True) { | 
 | 1897 |         static PyObject *s_true = NULL; | 
 | 1898 |         if (s_true == NULL) { | 
 | 1899 |             s_true = PyString_InternFromString("true"); | 
 | 1900 |         } | 
 | 1901 |         Py_INCREF(s_true); | 
 | 1902 |         return s_true; | 
 | 1903 |     } | 
 | 1904 |     else if (obj == Py_False) { | 
 | 1905 |         static PyObject *s_false = NULL; | 
 | 1906 |         if (s_false == NULL) { | 
 | 1907 |             s_false = PyString_InternFromString("false"); | 
 | 1908 |         } | 
 | 1909 |         Py_INCREF(s_false); | 
 | 1910 |         return s_false; | 
 | 1911 |     } | 
 | 1912 |     else { | 
 | 1913 |         PyErr_SetString(PyExc_ValueError, "not a const"); | 
 | 1914 |         return NULL; | 
 | 1915 |     } | 
 | 1916 | } | 
 | 1917 |  | 
 | 1918 | static PyObject * | 
 | 1919 | encoder_encode_float(PyEncoderObject *s, PyObject *obj) | 
 | 1920 | { | 
 | 1921 |     /* Return the JSON representation of a PyFloat */ | 
 | 1922 |     double i = PyFloat_AS_DOUBLE(obj); | 
 | 1923 |     if (!Py_IS_FINITE(i)) { | 
 | 1924 |         if (!s->allow_nan) { | 
 | 1925 |             PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); | 
 | 1926 |             return NULL; | 
 | 1927 |         } | 
 | 1928 |         if (i > 0) { | 
 | 1929 |             return PyString_FromString("Infinity"); | 
 | 1930 |         } | 
 | 1931 |         else if (i < 0) { | 
 | 1932 |             return PyString_FromString("-Infinity"); | 
 | 1933 |         } | 
 | 1934 |         else { | 
 | 1935 |             return PyString_FromString("NaN"); | 
 | 1936 |         } | 
 | 1937 |     } | 
 | 1938 |     /* Use a better float format here? */ | 
 | 1939 |     return PyObject_Repr(obj); | 
 | 1940 | } | 
 | 1941 |  | 
 | 1942 | static PyObject * | 
 | 1943 | encoder_encode_string(PyEncoderObject *s, PyObject *obj) | 
 | 1944 | { | 
 | 1945 |     /* Return the JSON representation of a string */ | 
 | 1946 |     if (s->fast_encode) | 
 | 1947 |         return py_encode_basestring_ascii(NULL, obj); | 
 | 1948 |     else | 
 | 1949 |         return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); | 
 | 1950 | } | 
 | 1951 |  | 
 | 1952 | static int | 
 | 1953 | _steal_list_append(PyObject *lst, PyObject *stolen) | 
 | 1954 | { | 
 | 1955 |     /* Append stolen and then decrement its reference count */ | 
 | 1956 |     int rval = PyList_Append(lst, stolen); | 
 | 1957 |     Py_DECREF(stolen); | 
 | 1958 |     return rval; | 
 | 1959 | } | 
 | 1960 |  | 
 | 1961 | static int | 
 | 1962 | encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) | 
 | 1963 | { | 
 | 1964 |     /* Encode Python object obj to a JSON term, rval is a PyList */ | 
 | 1965 |     PyObject *newobj; | 
 | 1966 |     int rv; | 
 | 1967 |  | 
 | 1968 |     if (obj == Py_None || obj == Py_True || obj == Py_False) { | 
 | 1969 |         PyObject *cstr = _encoded_const(obj); | 
 | 1970 |         if (cstr == NULL) | 
 | 1971 |             return -1; | 
 | 1972 |         return _steal_list_append(rval, cstr); | 
 | 1973 |     } | 
 | 1974 |     else if (PyString_Check(obj) || PyUnicode_Check(obj)) | 
 | 1975 |     { | 
 | 1976 |         PyObject *encoded = encoder_encode_string(s, obj); | 
 | 1977 |         if (encoded == NULL) | 
 | 1978 |             return -1; | 
 | 1979 |         return _steal_list_append(rval, encoded); | 
 | 1980 |     } | 
 | 1981 |     else if (PyInt_Check(obj) || PyLong_Check(obj)) { | 
 | 1982 |         PyObject *encoded = PyObject_Str(obj); | 
 | 1983 |         if (encoded == NULL) | 
 | 1984 |             return -1; | 
 | 1985 |         return _steal_list_append(rval, encoded); | 
 | 1986 |     } | 
 | 1987 |     else if (PyFloat_Check(obj)) { | 
 | 1988 |         PyObject *encoded = encoder_encode_float(s, obj); | 
 | 1989 |         if (encoded == NULL) | 
 | 1990 |             return -1; | 
 | 1991 |         return _steal_list_append(rval, encoded); | 
 | 1992 |     } | 
 | 1993 |     else if (PyList_Check(obj) || PyTuple_Check(obj)) { | 
| Ezio Melotti | c1ec7b5 | 2011-05-11 00:55:35 +0300 | [diff] [blame] | 1994 |         if (Py_EnterRecursiveCall(" while encoding a JSON object")) | 
 | 1995 |             return -1; | 
 | 1996 |         rv = encoder_listencode_list(s, rval, obj, indent_level); | 
 | 1997 |         Py_LeaveRecursiveCall(); | 
 | 1998 |         return rv; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1999 |     } | 
 | 2000 |     else if (PyDict_Check(obj)) { | 
| Ezio Melotti | c1ec7b5 | 2011-05-11 00:55:35 +0300 | [diff] [blame] | 2001 |         if (Py_EnterRecursiveCall(" while encoding a JSON object")) | 
 | 2002 |             return -1; | 
 | 2003 |         rv = encoder_listencode_dict(s, rval, obj, indent_level); | 
 | 2004 |         Py_LeaveRecursiveCall(); | 
 | 2005 |         return rv; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2006 |     } | 
 | 2007 |     else { | 
 | 2008 |         PyObject *ident = NULL; | 
 | 2009 |         if (s->markers != Py_None) { | 
 | 2010 |             int has_key; | 
 | 2011 |             ident = PyLong_FromVoidPtr(obj); | 
 | 2012 |             if (ident == NULL) | 
 | 2013 |                 return -1; | 
 | 2014 |             has_key = PyDict_Contains(s->markers, ident); | 
 | 2015 |             if (has_key) { | 
 | 2016 |                 if (has_key != -1) | 
 | 2017 |                     PyErr_SetString(PyExc_ValueError, "Circular reference detected"); | 
 | 2018 |                 Py_DECREF(ident); | 
 | 2019 |                 return -1; | 
 | 2020 |             } | 
 | 2021 |             if (PyDict_SetItem(s->markers, ident, obj)) { | 
 | 2022 |                 Py_DECREF(ident); | 
 | 2023 |                 return -1; | 
 | 2024 |             } | 
 | 2025 |         } | 
 | 2026 |         newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); | 
 | 2027 |         if (newobj == NULL) { | 
 | 2028 |             Py_XDECREF(ident); | 
 | 2029 |             return -1; | 
 | 2030 |         } | 
| Ezio Melotti | c1ec7b5 | 2011-05-11 00:55:35 +0300 | [diff] [blame] | 2031 |  | 
 | 2032 |         if (Py_EnterRecursiveCall(" while encoding a JSON object")) | 
 | 2033 |             return -1; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2034 |         rv = encoder_listencode_obj(s, rval, newobj, indent_level); | 
| Ezio Melotti | c1ec7b5 | 2011-05-11 00:55:35 +0300 | [diff] [blame] | 2035 |         Py_LeaveRecursiveCall(); | 
 | 2036 |  | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2037 |         Py_DECREF(newobj); | 
 | 2038 |         if (rv) { | 
 | 2039 |             Py_XDECREF(ident); | 
 | 2040 |             return -1; | 
 | 2041 |         } | 
 | 2042 |         if (ident != NULL) { | 
 | 2043 |             if (PyDict_DelItem(s->markers, ident)) { | 
 | 2044 |                 Py_XDECREF(ident); | 
 | 2045 |                 return -1; | 
 | 2046 |             } | 
 | 2047 |             Py_XDECREF(ident); | 
 | 2048 |         } | 
 | 2049 |         return rv; | 
 | 2050 |     } | 
 | 2051 | } | 
 | 2052 |  | 
 | 2053 | static int | 
 | 2054 | encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) | 
 | 2055 | { | 
 | 2056 |     /* Encode Python dict dct a JSON term, rval is a PyList */ | 
 | 2057 |     static PyObject *open_dict = NULL; | 
 | 2058 |     static PyObject *close_dict = NULL; | 
 | 2059 |     static PyObject *empty_dict = NULL; | 
 | 2060 |     PyObject *kstr = NULL; | 
 | 2061 |     PyObject *ident = NULL; | 
| Raymond Hettinger | cbba8d4 | 2010-10-30 07:29:44 +0000 | [diff] [blame] | 2062 |     PyObject *key = NULL; | 
 | 2063 |     PyObject *value = NULL; | 
 | 2064 |     PyObject *it = NULL; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2065 |     int skipkeys; | 
 | 2066 |     Py_ssize_t idx; | 
 | 2067 |  | 
 | 2068 |     if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { | 
 | 2069 |         open_dict = PyString_InternFromString("{"); | 
 | 2070 |         close_dict = PyString_InternFromString("}"); | 
 | 2071 |         empty_dict = PyString_InternFromString("{}"); | 
 | 2072 |         if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) | 
 | 2073 |             return -1; | 
 | 2074 |     } | 
| Raymond Hettinger | cbba8d4 | 2010-10-30 07:29:44 +0000 | [diff] [blame] | 2075 |     if (Py_SIZE(dct) == 0) | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2076 |         return PyList_Append(rval, empty_dict); | 
 | 2077 |  | 
 | 2078 |     if (s->markers != Py_None) { | 
 | 2079 |         int has_key; | 
 | 2080 |         ident = PyLong_FromVoidPtr(dct); | 
 | 2081 |         if (ident == NULL) | 
 | 2082 |             goto bail; | 
 | 2083 |         has_key = PyDict_Contains(s->markers, ident); | 
 | 2084 |         if (has_key) { | 
 | 2085 |             if (has_key != -1) | 
 | 2086 |                 PyErr_SetString(PyExc_ValueError, "Circular reference detected"); | 
 | 2087 |             goto bail; | 
 | 2088 |         } | 
 | 2089 |         if (PyDict_SetItem(s->markers, ident, dct)) { | 
 | 2090 |             goto bail; | 
 | 2091 |         } | 
 | 2092 |     } | 
 | 2093 |  | 
 | 2094 |     if (PyList_Append(rval, open_dict)) | 
 | 2095 |         goto bail; | 
 | 2096 |  | 
 | 2097 |     if (s->indent != Py_None) { | 
 | 2098 |         /* TODO: DOES NOT RUN */ | 
 | 2099 |         indent_level += 1; | 
 | 2100 |         /* | 
 | 2101 |             newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) | 
 | 2102 |             separator = _item_separator + newline_indent | 
 | 2103 |             buf += newline_indent | 
 | 2104 |         */ | 
 | 2105 |     } | 
 | 2106 |  | 
 | 2107 |     /* TODO: C speedup not implemented for sort_keys */ | 
 | 2108 |  | 
| Raymond Hettinger | cbba8d4 | 2010-10-30 07:29:44 +0000 | [diff] [blame] | 2109 |     it = PyObject_GetIter(dct); | 
 | 2110 |     if (it == NULL) | 
 | 2111 |         goto bail; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2112 |     skipkeys = PyObject_IsTrue(s->skipkeys); | 
 | 2113 |     idx = 0; | 
| Raymond Hettinger | cbba8d4 | 2010-10-30 07:29:44 +0000 | [diff] [blame] | 2114 |     while ((key = PyIter_Next(it)) != NULL) { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2115 |         PyObject *encoded; | 
 | 2116 |  | 
 | 2117 |         if (PyString_Check(key) || PyUnicode_Check(key)) { | 
 | 2118 |             Py_INCREF(key); | 
 | 2119 |             kstr = key; | 
 | 2120 |         } | 
 | 2121 |         else if (PyFloat_Check(key)) { | 
 | 2122 |             kstr = encoder_encode_float(s, key); | 
 | 2123 |             if (kstr == NULL) | 
 | 2124 |                 goto bail; | 
 | 2125 |         } | 
 | 2126 |         else if (PyInt_Check(key) || PyLong_Check(key)) { | 
 | 2127 |             kstr = PyObject_Str(key); | 
 | 2128 |             if (kstr == NULL) | 
 | 2129 |                 goto bail; | 
 | 2130 |         } | 
 | 2131 |         else if (key == Py_True || key == Py_False || key == Py_None) { | 
 | 2132 |             kstr = _encoded_const(key); | 
 | 2133 |             if (kstr == NULL) | 
 | 2134 |                 goto bail; | 
 | 2135 |         } | 
 | 2136 |         else if (skipkeys) { | 
| Raymond Hettinger | cbba8d4 | 2010-10-30 07:29:44 +0000 | [diff] [blame] | 2137 |             Py_DECREF(key); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2138 |             continue; | 
 | 2139 |         } | 
 | 2140 |         else { | 
 | 2141 |             /* TODO: include repr of key */ | 
| Doug Hellmann | f31db93 | 2010-07-21 12:36:33 +0000 | [diff] [blame] | 2142 |             PyErr_SetString(PyExc_TypeError, "keys must be a string"); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2143 |             goto bail; | 
 | 2144 |         } | 
 | 2145 |  | 
 | 2146 |         if (idx) { | 
 | 2147 |             if (PyList_Append(rval, s->item_separator)) | 
 | 2148 |                 goto bail; | 
 | 2149 |         } | 
 | 2150 |  | 
| Raymond Hettinger | cbba8d4 | 2010-10-30 07:29:44 +0000 | [diff] [blame] | 2151 |         value = PyObject_GetItem(dct, key); | 
 | 2152 |         if (value == NULL) | 
 | 2153 |             goto bail; | 
 | 2154 |  | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2155 |         encoded = encoder_encode_string(s, kstr); | 
 | 2156 |         Py_CLEAR(kstr); | 
 | 2157 |         if (encoded == NULL) | 
 | 2158 |             goto bail; | 
 | 2159 |         if (PyList_Append(rval, encoded)) { | 
 | 2160 |             Py_DECREF(encoded); | 
 | 2161 |             goto bail; | 
 | 2162 |         } | 
 | 2163 |         Py_DECREF(encoded); | 
 | 2164 |         if (PyList_Append(rval, s->key_separator)) | 
 | 2165 |             goto bail; | 
 | 2166 |         if (encoder_listencode_obj(s, rval, value, indent_level)) | 
 | 2167 |             goto bail; | 
 | 2168 |         idx += 1; | 
| Raymond Hettinger | cbba8d4 | 2010-10-30 07:29:44 +0000 | [diff] [blame] | 2169 |         Py_CLEAR(value); | 
 | 2170 |         Py_DECREF(key); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2171 |     } | 
| Raymond Hettinger | cbba8d4 | 2010-10-30 07:29:44 +0000 | [diff] [blame] | 2172 |     if (PyErr_Occurred()) | 
 | 2173 |         goto bail; | 
 | 2174 |     Py_CLEAR(it); | 
 | 2175 |  | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2176 |     if (ident != NULL) { | 
 | 2177 |         if (PyDict_DelItem(s->markers, ident)) | 
 | 2178 |             goto bail; | 
 | 2179 |         Py_CLEAR(ident); | 
 | 2180 |     } | 
 | 2181 |     if (s->indent != Py_None) { | 
 | 2182 |         /* TODO: DOES NOT RUN */ | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2183 |         /* | 
| Brett Cannon | 8e9757e | 2010-05-03 23:43:49 +0000 | [diff] [blame] | 2184 |             indent_level -= 1; | 
 | 2185 |  | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2186 |             yield '\n' + (' ' * (_indent * _current_indent_level)) | 
 | 2187 |         */ | 
 | 2188 |     } | 
 | 2189 |     if (PyList_Append(rval, close_dict)) | 
 | 2190 |         goto bail; | 
 | 2191 |     return 0; | 
 | 2192 |  | 
 | 2193 | bail: | 
| Raymond Hettinger | cbba8d4 | 2010-10-30 07:29:44 +0000 | [diff] [blame] | 2194 |     Py_XDECREF(it); | 
 | 2195 |     Py_XDECREF(key); | 
 | 2196 |     Py_XDECREF(value); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2197 |     Py_XDECREF(kstr); | 
 | 2198 |     Py_XDECREF(ident); | 
 | 2199 |     return -1; | 
 | 2200 | } | 
 | 2201 |  | 
 | 2202 |  | 
 | 2203 | static int | 
 | 2204 | encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) | 
 | 2205 | { | 
 | 2206 |     /* Encode Python list seq to a JSON term, rval is a PyList */ | 
 | 2207 |     static PyObject *open_array = NULL; | 
 | 2208 |     static PyObject *close_array = NULL; | 
 | 2209 |     static PyObject *empty_array = NULL; | 
 | 2210 |     PyObject *ident = NULL; | 
 | 2211 |     PyObject *s_fast = NULL; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2212 |     Py_ssize_t i; | 
 | 2213 |  | 
 | 2214 |     if (open_array == NULL || close_array == NULL || empty_array == NULL) { | 
 | 2215 |         open_array = PyString_InternFromString("["); | 
 | 2216 |         close_array = PyString_InternFromString("]"); | 
 | 2217 |         empty_array = PyString_InternFromString("[]"); | 
 | 2218 |         if (open_array == NULL || close_array == NULL || empty_array == NULL) | 
 | 2219 |             return -1; | 
 | 2220 |     } | 
 | 2221 |     ident = NULL; | 
 | 2222 |     s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); | 
 | 2223 |     if (s_fast == NULL) | 
 | 2224 |         return -1; | 
| Antoine Pitrou | e9e35c3 | 2012-11-01 20:07:40 +0100 | [diff] [blame] | 2225 |     if (PySequence_Fast_GET_SIZE(s_fast) == 0) { | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2226 |         Py_DECREF(s_fast); | 
 | 2227 |         return PyList_Append(rval, empty_array); | 
 | 2228 |     } | 
 | 2229 |  | 
 | 2230 |     if (s->markers != Py_None) { | 
 | 2231 |         int has_key; | 
 | 2232 |         ident = PyLong_FromVoidPtr(seq); | 
 | 2233 |         if (ident == NULL) | 
 | 2234 |             goto bail; | 
 | 2235 |         has_key = PyDict_Contains(s->markers, ident); | 
 | 2236 |         if (has_key) { | 
 | 2237 |             if (has_key != -1) | 
 | 2238 |                 PyErr_SetString(PyExc_ValueError, "Circular reference detected"); | 
 | 2239 |             goto bail; | 
 | 2240 |         } | 
 | 2241 |         if (PyDict_SetItem(s->markers, ident, seq)) { | 
 | 2242 |             goto bail; | 
 | 2243 |         } | 
 | 2244 |     } | 
 | 2245 |  | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2246 |     if (PyList_Append(rval, open_array)) | 
 | 2247 |         goto bail; | 
 | 2248 |     if (s->indent != Py_None) { | 
 | 2249 |         /* TODO: DOES NOT RUN */ | 
 | 2250 |         indent_level += 1; | 
 | 2251 |         /* | 
 | 2252 |             newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) | 
 | 2253 |             separator = _item_separator + newline_indent | 
 | 2254 |             buf += newline_indent | 
 | 2255 |         */ | 
 | 2256 |     } | 
| Antoine Pitrou | e9e35c3 | 2012-11-01 20:07:40 +0100 | [diff] [blame] | 2257 |     for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { | 
 | 2258 |         PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2259 |         if (i) { | 
 | 2260 |             if (PyList_Append(rval, s->item_separator)) | 
 | 2261 |                 goto bail; | 
 | 2262 |         } | 
 | 2263 |         if (encoder_listencode_obj(s, rval, obj, indent_level)) | 
 | 2264 |             goto bail; | 
 | 2265 |     } | 
 | 2266 |     if (ident != NULL) { | 
 | 2267 |         if (PyDict_DelItem(s->markers, ident)) | 
 | 2268 |             goto bail; | 
 | 2269 |         Py_CLEAR(ident); | 
 | 2270 |     } | 
 | 2271 |     if (s->indent != Py_None) { | 
 | 2272 |         /* TODO: DOES NOT RUN */ | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2273 |         /* | 
| Brett Cannon | 8e9757e | 2010-05-03 23:43:49 +0000 | [diff] [blame] | 2274 |             indent_level -= 1; | 
 | 2275 |  | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2276 |             yield '\n' + (' ' * (_indent * _current_indent_level)) | 
 | 2277 |         */ | 
 | 2278 |     } | 
 | 2279 |     if (PyList_Append(rval, close_array)) | 
 | 2280 |         goto bail; | 
 | 2281 |     Py_DECREF(s_fast); | 
 | 2282 |     return 0; | 
 | 2283 |  | 
 | 2284 | bail: | 
 | 2285 |     Py_XDECREF(ident); | 
 | 2286 |     Py_DECREF(s_fast); | 
 | 2287 |     return -1; | 
 | 2288 | } | 
 | 2289 |  | 
 | 2290 | static void | 
 | 2291 | encoder_dealloc(PyObject *self) | 
 | 2292 | { | 
 | 2293 |     /* Deallocate Encoder */ | 
 | 2294 |     encoder_clear(self); | 
 | 2295 |     Py_TYPE(self)->tp_free(self); | 
 | 2296 | } | 
 | 2297 |  | 
 | 2298 | static int | 
 | 2299 | encoder_traverse(PyObject *self, visitproc visit, void *arg) | 
 | 2300 | { | 
 | 2301 |     PyEncoderObject *s; | 
 | 2302 |     assert(PyEncoder_Check(self)); | 
 | 2303 |     s = (PyEncoderObject *)self; | 
 | 2304 |     Py_VISIT(s->markers); | 
 | 2305 |     Py_VISIT(s->defaultfn); | 
 | 2306 |     Py_VISIT(s->encoder); | 
 | 2307 |     Py_VISIT(s->indent); | 
 | 2308 |     Py_VISIT(s->key_separator); | 
 | 2309 |     Py_VISIT(s->item_separator); | 
 | 2310 |     Py_VISIT(s->sort_keys); | 
 | 2311 |     Py_VISIT(s->skipkeys); | 
 | 2312 |     return 0; | 
 | 2313 | } | 
 | 2314 |  | 
 | 2315 | static int | 
 | 2316 | encoder_clear(PyObject *self) | 
 | 2317 | { | 
 | 2318 |     /* Deallocate Encoder */ | 
 | 2319 |     PyEncoderObject *s; | 
 | 2320 |     assert(PyEncoder_Check(self)); | 
 | 2321 |     s = (PyEncoderObject *)self; | 
 | 2322 |     Py_CLEAR(s->markers); | 
 | 2323 |     Py_CLEAR(s->defaultfn); | 
 | 2324 |     Py_CLEAR(s->encoder); | 
 | 2325 |     Py_CLEAR(s->indent); | 
 | 2326 |     Py_CLEAR(s->key_separator); | 
 | 2327 |     Py_CLEAR(s->item_separator); | 
 | 2328 |     Py_CLEAR(s->sort_keys); | 
 | 2329 |     Py_CLEAR(s->skipkeys); | 
 | 2330 |     return 0; | 
 | 2331 | } | 
 | 2332 |  | 
 | 2333 | PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); | 
 | 2334 |  | 
 | 2335 | static | 
 | 2336 | PyTypeObject PyEncoderType = { | 
 | 2337 |     PyObject_HEAD_INIT(NULL) | 
 | 2338 |     0,                    /* tp_internal */ | 
 | 2339 |     "_json.Encoder",       /* tp_name */ | 
 | 2340 |     sizeof(PyEncoderObject), /* tp_basicsize */ | 
 | 2341 |     0,                    /* tp_itemsize */ | 
 | 2342 |     encoder_dealloc, /* tp_dealloc */ | 
 | 2343 |     0,                    /* tp_print */ | 
 | 2344 |     0,                    /* tp_getattr */ | 
 | 2345 |     0,                    /* tp_setattr */ | 
 | 2346 |     0,                    /* tp_compare */ | 
 | 2347 |     0,                    /* tp_repr */ | 
 | 2348 |     0,                    /* tp_as_number */ | 
 | 2349 |     0,                    /* tp_as_sequence */ | 
 | 2350 |     0,                    /* tp_as_mapping */ | 
 | 2351 |     0,                    /* tp_hash */ | 
 | 2352 |     encoder_call,         /* tp_call */ | 
 | 2353 |     0,                    /* tp_str */ | 
 | 2354 |     0,                    /* tp_getattro */ | 
 | 2355 |     0,                    /* tp_setattro */ | 
 | 2356 |     0,                    /* tp_as_buffer */ | 
 | 2357 |     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */ | 
 | 2358 |     encoder_doc,          /* tp_doc */ | 
 | 2359 |     encoder_traverse,     /* tp_traverse */ | 
 | 2360 |     encoder_clear,        /* tp_clear */ | 
 | 2361 |     0,                    /* tp_richcompare */ | 
 | 2362 |     0,                    /* tp_weaklistoffset */ | 
 | 2363 |     0,                    /* tp_iter */ | 
 | 2364 |     0,                    /* tp_iternext */ | 
 | 2365 |     0,                    /* tp_methods */ | 
 | 2366 |     encoder_members,      /* tp_members */ | 
 | 2367 |     0,                    /* tp_getset */ | 
 | 2368 |     0,                    /* tp_base */ | 
 | 2369 |     0,                    /* tp_dict */ | 
 | 2370 |     0,                    /* tp_descr_get */ | 
 | 2371 |     0,                    /* tp_descr_set */ | 
 | 2372 |     0,                    /* tp_dictoffset */ | 
 | 2373 |     encoder_init,         /* tp_init */ | 
 | 2374 |     0,                    /* tp_alloc */ | 
 | 2375 |     encoder_new,          /* tp_new */ | 
 | 2376 |     0,                    /* tp_free */ | 
 | 2377 | }; | 
 | 2378 |  | 
 | 2379 | static PyMethodDef speedups_methods[] = { | 
 | 2380 |     {"encode_basestring_ascii", | 
 | 2381 |         (PyCFunction)py_encode_basestring_ascii, | 
 | 2382 |         METH_O, | 
 | 2383 |         pydoc_encode_basestring_ascii}, | 
 | 2384 |     {"scanstring", | 
 | 2385 |         (PyCFunction)py_scanstring, | 
 | 2386 |         METH_VARARGS, | 
 | 2387 |         pydoc_scanstring}, | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 2388 |     {NULL, NULL, 0, NULL} | 
 | 2389 | }; | 
 | 2390 |  | 
 | 2391 | PyDoc_STRVAR(module_doc, | 
 | 2392 | "json speedups\n"); | 
 | 2393 |  | 
 | 2394 | void | 
 | 2395 | init_json(void) | 
 | 2396 | { | 
 | 2397 |     PyObject *m; | 
| Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 2398 |     PyScannerType.tp_new = PyType_GenericNew; | 
 | 2399 |     if (PyType_Ready(&PyScannerType) < 0) | 
 | 2400 |         return; | 
 | 2401 |     PyEncoderType.tp_new = PyType_GenericNew; | 
 | 2402 |     if (PyType_Ready(&PyEncoderType) < 0) | 
 | 2403 |         return; | 
 | 2404 |     m = Py_InitModule3("_json", speedups_methods, module_doc); | 
 | 2405 |     Py_INCREF((PyObject*)&PyScannerType); | 
 | 2406 |     PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); | 
 | 2407 |     Py_INCREF((PyObject*)&PyEncoderType); | 
 | 2408 |     PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); | 
| Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 2409 | } |