Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 1 | /* |
| 2 | string_format.h -- implementation of string.format(). |
| 3 | |
| 4 | It uses the Objects/stringlib conventions, so that it can be |
| 5 | compiled for both unicode and string objects. |
| 6 | */ |
| 7 | |
| 8 | |
Eric Smith | 8fd3eba | 2008-02-17 19:48:00 +0000 | [diff] [blame] | 9 | /* Defines for Python 2.6 compatability */ |
| 10 | #if PY_VERSION_HEX < 0x03000000 |
| 11 | #define PyLong_FromSsize_t _PyLong_FromSsize_t |
| 12 | #endif |
| 13 | |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 14 | /* Defines for more efficiently reallocating the string buffer */ |
| 15 | #define INITIAL_SIZE_INCREMENT 100 |
| 16 | #define SIZE_MULTIPLIER 2 |
| 17 | #define MAX_SIZE_INCREMENT 3200 |
| 18 | |
| 19 | |
| 20 | /************************************************************************/ |
| 21 | /*********** Global data structures and forward declarations *********/ |
| 22 | /************************************************************************/ |
| 23 | |
| 24 | /* |
| 25 | A SubString consists of the characters between two string or |
| 26 | unicode pointers. |
| 27 | */ |
| 28 | typedef struct { |
| 29 | STRINGLIB_CHAR *ptr; |
| 30 | STRINGLIB_CHAR *end; |
| 31 | } SubString; |
| 32 | |
| 33 | |
| 34 | /* forward declaration for recursion */ |
| 35 | static PyObject * |
| 36 | build_string(SubString *input, PyObject *args, PyObject *kwargs, |
Eric Smith | 45c0787 | 2007-09-05 02:02:43 +0000 | [diff] [blame] | 37 | int recursion_depth); |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 38 | |
| 39 | |
| 40 | |
| 41 | /************************************************************************/ |
| 42 | /************************** Utility functions ************************/ |
| 43 | /************************************************************************/ |
| 44 | |
| 45 | /* fill in a SubString from a pointer and length */ |
| 46 | Py_LOCAL_INLINE(void) |
| 47 | SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len) |
| 48 | { |
| 49 | str->ptr = p; |
| 50 | if (p == NULL) |
| 51 | str->end = NULL; |
| 52 | else |
| 53 | str->end = str->ptr + len; |
| 54 | } |
| 55 | |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 56 | /* return a new string. if str->ptr is NULL, return None */ |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 57 | Py_LOCAL_INLINE(PyObject *) |
| 58 | SubString_new_object(SubString *str) |
| 59 | { |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 60 | if (str->ptr == NULL) { |
| 61 | Py_INCREF(Py_None); |
| 62 | return Py_None; |
| 63 | } |
| 64 | return STRINGLIB_NEW(str->ptr, str->end - str->ptr); |
| 65 | } |
| 66 | |
| 67 | /* return a new string. if str->ptr is NULL, return None */ |
| 68 | Py_LOCAL_INLINE(PyObject *) |
| 69 | SubString_new_object_or_empty(SubString *str) |
| 70 | { |
| 71 | if (str->ptr == NULL) { |
| 72 | return STRINGLIB_NEW(NULL, 0); |
| 73 | } |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 74 | return STRINGLIB_NEW(str->ptr, str->end - str->ptr); |
| 75 | } |
| 76 | |
| 77 | /************************************************************************/ |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 78 | /*********** Output string management functions ****************/ |
| 79 | /************************************************************************/ |
| 80 | |
| 81 | typedef struct { |
| 82 | STRINGLIB_CHAR *ptr; |
| 83 | STRINGLIB_CHAR *end; |
| 84 | PyObject *obj; |
| 85 | Py_ssize_t size_increment; |
| 86 | } OutputString; |
| 87 | |
| 88 | /* initialize an OutputString object, reserving size characters */ |
| 89 | static int |
| 90 | output_initialize(OutputString *output, Py_ssize_t size) |
| 91 | { |
| 92 | output->obj = STRINGLIB_NEW(NULL, size); |
| 93 | if (output->obj == NULL) |
| 94 | return 0; |
| 95 | |
| 96 | output->ptr = STRINGLIB_STR(output->obj); |
| 97 | output->end = STRINGLIB_LEN(output->obj) + output->ptr; |
| 98 | output->size_increment = INITIAL_SIZE_INCREMENT; |
| 99 | |
| 100 | return 1; |
| 101 | } |
| 102 | |
| 103 | /* |
| 104 | output_extend reallocates the output string buffer. |
| 105 | It returns a status: 0 for a failed reallocation, |
| 106 | 1 for success. |
| 107 | */ |
| 108 | |
| 109 | static int |
| 110 | output_extend(OutputString *output, Py_ssize_t count) |
| 111 | { |
| 112 | STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj); |
| 113 | Py_ssize_t curlen = output->ptr - startptr; |
| 114 | Py_ssize_t maxlen = curlen + count + output->size_increment; |
| 115 | |
| 116 | if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0) |
| 117 | return 0; |
| 118 | startptr = STRINGLIB_STR(output->obj); |
| 119 | output->ptr = startptr + curlen; |
| 120 | output->end = startptr + maxlen; |
| 121 | if (output->size_increment < MAX_SIZE_INCREMENT) |
| 122 | output->size_increment *= SIZE_MULTIPLIER; |
| 123 | return 1; |
| 124 | } |
| 125 | |
| 126 | /* |
| 127 | output_data dumps characters into our output string |
| 128 | buffer. |
| 129 | |
| 130 | In some cases, it has to reallocate the string. |
| 131 | |
| 132 | It returns a status: 0 for a failed reallocation, |
| 133 | 1 for success. |
| 134 | */ |
| 135 | static int |
| 136 | output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count) |
| 137 | { |
| 138 | if ((count > output->end - output->ptr) && !output_extend(output, count)) |
| 139 | return 0; |
| 140 | memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR)); |
| 141 | output->ptr += count; |
| 142 | return 1; |
| 143 | } |
| 144 | |
| 145 | /************************************************************************/ |
| 146 | /*********** Format string parsing -- integers and identifiers *********/ |
| 147 | /************************************************************************/ |
| 148 | |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 149 | static Py_ssize_t |
| 150 | get_integer(const SubString *str) |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 151 | { |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 152 | Py_ssize_t accumulator = 0; |
| 153 | Py_ssize_t digitval; |
| 154 | Py_ssize_t oldaccumulator; |
| 155 | STRINGLIB_CHAR *p; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 156 | |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 157 | /* empty string is an error */ |
| 158 | if (str->ptr >= str->end) |
| 159 | return -1; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 160 | |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 161 | for (p = str->ptr; p < str->end; p++) { |
| 162 | digitval = STRINGLIB_TODECIMAL(*p); |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 163 | if (digitval < 0) |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 164 | return -1; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 165 | /* |
| 166 | This trick was copied from old Unicode format code. It's cute, |
| 167 | but would really suck on an old machine with a slow divide |
| 168 | implementation. Fortunately, in the normal case we do not |
| 169 | expect too many digits. |
| 170 | */ |
| 171 | oldaccumulator = accumulator; |
| 172 | accumulator *= 10; |
| 173 | if ((accumulator+10)/10 != oldaccumulator+1) { |
| 174 | PyErr_Format(PyExc_ValueError, |
| 175 | "Too many decimal digits in format string"); |
| 176 | return -1; |
| 177 | } |
| 178 | accumulator += digitval; |
| 179 | } |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 180 | return accumulator; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 181 | } |
| 182 | |
| 183 | /************************************************************************/ |
| 184 | /******** Functions to get field objects and specification strings ******/ |
| 185 | /************************************************************************/ |
| 186 | |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 187 | /* do the equivalent of obj.name */ |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 188 | static PyObject * |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 189 | getattr(PyObject *obj, SubString *name) |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 190 | { |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 191 | PyObject *newobj; |
Eric Smith | 7a6dd29 | 2007-08-27 23:30:47 +0000 | [diff] [blame] | 192 | PyObject *str = SubString_new_object(name); |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 193 | if (str == NULL) |
| 194 | return NULL; |
| 195 | newobj = PyObject_GetAttr(obj, str); |
| 196 | Py_DECREF(str); |
| 197 | return newobj; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 198 | } |
| 199 | |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 200 | /* do the equivalent of obj[idx], where obj is a sequence */ |
| 201 | static PyObject * |
| 202 | getitem_sequence(PyObject *obj, Py_ssize_t idx) |
| 203 | { |
| 204 | return PySequence_GetItem(obj, idx); |
| 205 | } |
| 206 | |
| 207 | /* do the equivalent of obj[idx], where obj is not a sequence */ |
| 208 | static PyObject * |
| 209 | getitem_idx(PyObject *obj, Py_ssize_t idx) |
| 210 | { |
| 211 | PyObject *newobj; |
Christian Heimes | 217cfd1 | 2007-12-02 14:31:20 +0000 | [diff] [blame] | 212 | PyObject *idx_obj = PyLong_FromSsize_t(idx); |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 213 | if (idx_obj == NULL) |
| 214 | return NULL; |
| 215 | newobj = PyObject_GetItem(obj, idx_obj); |
| 216 | Py_DECREF(idx_obj); |
| 217 | return newobj; |
| 218 | } |
| 219 | |
| 220 | /* do the equivalent of obj[name] */ |
| 221 | static PyObject * |
| 222 | getitem_str(PyObject *obj, SubString *name) |
| 223 | { |
| 224 | PyObject *newobj; |
Eric Smith | 7a6dd29 | 2007-08-27 23:30:47 +0000 | [diff] [blame] | 225 | PyObject *str = SubString_new_object(name); |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 226 | if (str == NULL) |
| 227 | return NULL; |
| 228 | newobj = PyObject_GetItem(obj, str); |
| 229 | Py_DECREF(str); |
| 230 | return newobj; |
| 231 | } |
| 232 | |
| 233 | typedef struct { |
| 234 | /* the entire string we're parsing. we assume that someone else |
| 235 | is managing its lifetime, and that it will exist for the |
| 236 | lifetime of the iterator. can be empty */ |
| 237 | SubString str; |
| 238 | |
| 239 | /* pointer to where we are inside field_name */ |
| 240 | STRINGLIB_CHAR *ptr; |
| 241 | } FieldNameIterator; |
| 242 | |
| 243 | |
| 244 | static int |
| 245 | FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr, |
| 246 | Py_ssize_t len) |
| 247 | { |
| 248 | SubString_init(&self->str, ptr, len); |
| 249 | self->ptr = self->str.ptr; |
| 250 | return 1; |
| 251 | } |
| 252 | |
| 253 | static int |
| 254 | _FieldNameIterator_attr(FieldNameIterator *self, SubString *name) |
| 255 | { |
| 256 | STRINGLIB_CHAR c; |
| 257 | |
| 258 | name->ptr = self->ptr; |
| 259 | |
| 260 | /* return everything until '.' or '[' */ |
| 261 | while (self->ptr < self->str.end) { |
| 262 | switch (c = *self->ptr++) { |
| 263 | case '[': |
| 264 | case '.': |
| 265 | /* backup so that we this character will be seen next time */ |
| 266 | self->ptr--; |
| 267 | break; |
| 268 | default: |
| 269 | continue; |
| 270 | } |
| 271 | break; |
| 272 | } |
| 273 | /* end of string is okay */ |
| 274 | name->end = self->ptr; |
| 275 | return 1; |
| 276 | } |
| 277 | |
| 278 | static int |
| 279 | _FieldNameIterator_item(FieldNameIterator *self, SubString *name) |
| 280 | { |
Eric Smith | 4cb4e4e | 2007-09-03 08:40:29 +0000 | [diff] [blame] | 281 | int bracket_seen = 0; |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 282 | STRINGLIB_CHAR c; |
| 283 | |
| 284 | name->ptr = self->ptr; |
| 285 | |
| 286 | /* return everything until ']' */ |
| 287 | while (self->ptr < self->str.end) { |
| 288 | switch (c = *self->ptr++) { |
| 289 | case ']': |
Eric Smith | 4cb4e4e | 2007-09-03 08:40:29 +0000 | [diff] [blame] | 290 | bracket_seen = 1; |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 291 | break; |
| 292 | default: |
| 293 | continue; |
| 294 | } |
| 295 | break; |
| 296 | } |
Eric Smith | 4cb4e4e | 2007-09-03 08:40:29 +0000 | [diff] [blame] | 297 | /* make sure we ended with a ']' */ |
| 298 | if (!bracket_seen) { |
| 299 | PyErr_SetString(PyExc_ValueError, "Missing ']' in format string"); |
| 300 | return 0; |
| 301 | } |
| 302 | |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 303 | /* end of string is okay */ |
| 304 | /* don't include the ']' */ |
| 305 | name->end = self->ptr-1; |
| 306 | return 1; |
| 307 | } |
| 308 | |
| 309 | /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */ |
| 310 | static int |
| 311 | FieldNameIterator_next(FieldNameIterator *self, int *is_attribute, |
| 312 | Py_ssize_t *name_idx, SubString *name) |
| 313 | { |
| 314 | /* check at end of input */ |
| 315 | if (self->ptr >= self->str.end) |
| 316 | return 1; |
| 317 | |
| 318 | switch (*self->ptr++) { |
| 319 | case '.': |
| 320 | *is_attribute = 1; |
Eric Smith | 4cb4e4e | 2007-09-03 08:40:29 +0000 | [diff] [blame] | 321 | if (_FieldNameIterator_attr(self, name) == 0) |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 322 | return 0; |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 323 | *name_idx = -1; |
| 324 | break; |
| 325 | case '[': |
| 326 | *is_attribute = 0; |
Eric Smith | 4cb4e4e | 2007-09-03 08:40:29 +0000 | [diff] [blame] | 327 | if (_FieldNameIterator_item(self, name) == 0) |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 328 | return 0; |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 329 | *name_idx = get_integer(name); |
| 330 | break; |
| 331 | default: |
| 332 | /* interal error, can't get here */ |
| 333 | assert(0); |
| 334 | return 0; |
| 335 | } |
| 336 | |
| 337 | /* empty string is an error */ |
| 338 | if (name->ptr == name->end) { |
| 339 | PyErr_SetString(PyExc_ValueError, "Empty attribute in format string"); |
| 340 | return 0; |
| 341 | } |
| 342 | |
| 343 | return 2; |
| 344 | } |
| 345 | |
| 346 | |
| 347 | /* input: field_name |
| 348 | output: 'first' points to the part before the first '[' or '.' |
| 349 | 'first_idx' is -1 if 'first' is not an integer, otherwise |
| 350 | it's the value of first converted to an integer |
| 351 | 'rest' is an iterator to return the rest |
| 352 | */ |
| 353 | static int |
| 354 | field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first, |
| 355 | Py_ssize_t *first_idx, FieldNameIterator *rest) |
| 356 | { |
| 357 | STRINGLIB_CHAR c; |
| 358 | STRINGLIB_CHAR *p = ptr; |
| 359 | STRINGLIB_CHAR *end = ptr + len; |
| 360 | |
| 361 | /* find the part up until the first '.' or '[' */ |
| 362 | while (p < end) { |
| 363 | switch (c = *p++) { |
| 364 | case '[': |
| 365 | case '.': |
| 366 | /* backup so that we this character is available to the |
| 367 | "rest" iterator */ |
| 368 | p--; |
| 369 | break; |
| 370 | default: |
| 371 | continue; |
| 372 | } |
| 373 | break; |
| 374 | } |
| 375 | |
| 376 | /* set up the return values */ |
| 377 | SubString_init(first, ptr, p - ptr); |
| 378 | FieldNameIterator_init(rest, p, end - p); |
| 379 | |
| 380 | /* see if "first" is an integer, in which case it's used as an index */ |
| 381 | *first_idx = get_integer(first); |
| 382 | |
| 383 | /* zero length string is an error */ |
| 384 | if (first->ptr >= first->end) { |
| 385 | PyErr_SetString(PyExc_ValueError, "empty field name"); |
| 386 | goto error; |
| 387 | } |
| 388 | |
| 389 | return 1; |
| 390 | error: |
| 391 | return 0; |
| 392 | } |
| 393 | |
| 394 | |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 395 | /* |
| 396 | get_field_object returns the object inside {}, before the |
| 397 | format_spec. It handles getindex and getattr lookups and consumes |
| 398 | the entire input string. |
| 399 | */ |
| 400 | static PyObject * |
| 401 | get_field_object(SubString *input, PyObject *args, PyObject *kwargs) |
| 402 | { |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 403 | PyObject *obj = NULL; |
| 404 | int ok; |
| 405 | int is_attribute; |
| 406 | SubString name; |
| 407 | SubString first; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 408 | Py_ssize_t index; |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 409 | FieldNameIterator rest; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 410 | |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 411 | if (!field_name_split(input->ptr, input->end - input->ptr, &first, |
| 412 | &index, &rest)) { |
| 413 | goto error; |
| 414 | } |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 415 | |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 416 | if (index == -1) { |
| 417 | /* look up in kwargs */ |
Eric Smith | 7a6dd29 | 2007-08-27 23:30:47 +0000 | [diff] [blame] | 418 | PyObject *key = SubString_new_object(&first); |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 419 | if (key == NULL) |
| 420 | goto error; |
| 421 | if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) { |
Eric Smith | 1152919 | 2007-09-04 23:04:22 +0000 | [diff] [blame] | 422 | PyErr_SetObject(PyExc_KeyError, key); |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 423 | Py_DECREF(key); |
| 424 | goto error; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 425 | } |
Neal Norwitz | 8a4eb29 | 2007-08-27 07:24:17 +0000 | [diff] [blame] | 426 | Py_DECREF(key); |
Neal Norwitz | 247b515 | 2007-08-27 03:22:50 +0000 | [diff] [blame] | 427 | Py_INCREF(obj); |
Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame] | 428 | } |
| 429 | else { |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 430 | /* look up in args */ |
| 431 | obj = PySequence_GetItem(args, index); |
Eric Smith | 1152919 | 2007-09-04 23:04:22 +0000 | [diff] [blame] | 432 | if (obj == NULL) |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 433 | goto error; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 434 | } |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 435 | |
| 436 | /* iterate over the rest of the field_name */ |
| 437 | while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index, |
| 438 | &name)) == 2) { |
| 439 | PyObject *tmp; |
| 440 | |
| 441 | if (is_attribute) |
| 442 | /* getattr lookup "." */ |
| 443 | tmp = getattr(obj, &name); |
| 444 | else |
| 445 | /* getitem lookup "[]" */ |
| 446 | if (index == -1) |
| 447 | tmp = getitem_str(obj, &name); |
| 448 | else |
| 449 | if (PySequence_Check(obj)) |
| 450 | tmp = getitem_sequence(obj, index); |
| 451 | else |
| 452 | /* not a sequence */ |
| 453 | tmp = getitem_idx(obj, index); |
| 454 | if (tmp == NULL) |
| 455 | goto error; |
| 456 | |
| 457 | /* assign to obj */ |
| 458 | Py_DECREF(obj); |
| 459 | obj = tmp; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 460 | } |
Eric Smith | 7ade648 | 2007-08-26 22:27:13 +0000 | [diff] [blame] | 461 | /* end of iterator, this is the non-error case */ |
| 462 | if (ok == 1) |
| 463 | return obj; |
| 464 | error: |
| 465 | Py_XDECREF(obj); |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 466 | return NULL; |
| 467 | } |
| 468 | |
| 469 | /************************************************************************/ |
| 470 | /***************** Field rendering functions **************************/ |
| 471 | /************************************************************************/ |
| 472 | |
| 473 | /* |
| 474 | render_field() is the main function in this section. It takes the |
| 475 | field object and field specification string generated by |
| 476 | get_field_and_spec, and renders the field into the output string. |
| 477 | |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 478 | render_field calls fieldobj.__format__(format_spec) method, and |
| 479 | appends to the output. |
| 480 | */ |
| 481 | static int |
| 482 | render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output) |
| 483 | { |
| 484 | int ok = 0; |
Eric Smith | 8fd3eba | 2008-02-17 19:48:00 +0000 | [diff] [blame] | 485 | PyObject *result = NULL; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 486 | |
Eric Smith | 8fd3eba | 2008-02-17 19:48:00 +0000 | [diff] [blame] | 487 | /* we need to create an object out of the pointers we have */ |
| 488 | PyObject *format_spec_object = SubString_new_object_or_empty(format_spec); |
| 489 | if (format_spec_object == NULL) |
| 490 | goto done; |
| 491 | |
| 492 | result = PyObject_Format(fieldobj, format_spec_object); |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 493 | if (result == NULL) |
| 494 | goto done; |
| 495 | |
Eric Smith | 8a0217c | 2008-02-18 18:07:47 +0000 | [diff] [blame] | 496 | #if PY_VERSION_HEX >= 0x03000000 |
Eric Smith | ecbac8f | 2008-02-24 21:44:34 +0000 | [diff] [blame] | 497 | assert(PyUnicode_Check(result)); |
Eric Smith | 8a0217c | 2008-02-18 18:07:47 +0000 | [diff] [blame] | 498 | #else |
Christian Heimes | 72b710a | 2008-05-26 13:28:38 +0000 | [diff] [blame^] | 499 | assert(PyBytes_Check(result) || PyUnicode_Check(result)); |
Eric Smith | 8a0217c | 2008-02-18 18:07:47 +0000 | [diff] [blame] | 500 | |
| 501 | /* Convert result to our type. We could be str, and result could |
| 502 | be unicode */ |
| 503 | { |
| 504 | PyObject *tmp = STRINGLIB_TOSTR(result); |
| 505 | if (tmp == NULL) |
| 506 | goto done; |
| 507 | Py_DECREF(result); |
| 508 | result = tmp; |
| 509 | } |
| 510 | #endif |
| 511 | |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 512 | ok = output_data(output, |
| 513 | STRINGLIB_STR(result), STRINGLIB_LEN(result)); |
| 514 | done: |
Eric Smith | 8fd3eba | 2008-02-17 19:48:00 +0000 | [diff] [blame] | 515 | Py_DECREF(format_spec_object); |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 516 | Py_XDECREF(result); |
| 517 | return ok; |
| 518 | } |
| 519 | |
| 520 | static int |
| 521 | parse_field(SubString *str, SubString *field_name, SubString *format_spec, |
| 522 | STRINGLIB_CHAR *conversion) |
| 523 | { |
| 524 | STRINGLIB_CHAR c = 0; |
| 525 | |
| 526 | /* initialize these, as they may be empty */ |
| 527 | *conversion = '\0'; |
| 528 | SubString_init(format_spec, NULL, 0); |
| 529 | |
| 530 | /* search for the field name. it's terminated by the end of the |
| 531 | string, or a ':' or '!' */ |
| 532 | field_name->ptr = str->ptr; |
| 533 | while (str->ptr < str->end) { |
| 534 | switch (c = *(str->ptr++)) { |
| 535 | case ':': |
| 536 | case '!': |
| 537 | break; |
| 538 | default: |
| 539 | continue; |
| 540 | } |
| 541 | break; |
| 542 | } |
| 543 | |
| 544 | if (c == '!' || c == ':') { |
| 545 | /* we have a format specifier and/or a conversion */ |
| 546 | /* don't include the last character */ |
| 547 | field_name->end = str->ptr-1; |
| 548 | |
| 549 | /* the format specifier is the rest of the string */ |
| 550 | format_spec->ptr = str->ptr; |
| 551 | format_spec->end = str->end; |
| 552 | |
| 553 | /* see if there's a conversion specifier */ |
| 554 | if (c == '!') { |
| 555 | /* there must be another character present */ |
| 556 | if (format_spec->ptr >= format_spec->end) { |
| 557 | PyErr_SetString(PyExc_ValueError, |
| 558 | "end of format while looking for conversion " |
| 559 | "specifier"); |
| 560 | return 0; |
| 561 | } |
| 562 | *conversion = *(format_spec->ptr++); |
| 563 | |
| 564 | /* if there is another character, it must be a colon */ |
| 565 | if (format_spec->ptr < format_spec->end) { |
| 566 | c = *(format_spec->ptr++); |
| 567 | if (c != ':') { |
| 568 | PyErr_SetString(PyExc_ValueError, |
| 569 | "expected ':' after format specifier"); |
| 570 | return 0; |
| 571 | } |
| 572 | } |
| 573 | } |
| 574 | |
| 575 | return 1; |
| 576 | |
Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame] | 577 | } |
| 578 | else { |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 579 | /* end of string, there's no format_spec or conversion */ |
| 580 | field_name->end = str->ptr; |
| 581 | return 1; |
| 582 | } |
| 583 | } |
| 584 | |
| 585 | /************************************************************************/ |
| 586 | /******* Output string allocation and escape-to-markup processing ******/ |
| 587 | /************************************************************************/ |
| 588 | |
| 589 | /* MarkupIterator breaks the string into pieces of either literal |
| 590 | text, or things inside {} that need to be marked up. it is |
| 591 | designed to make it easy to wrap a Python iterator around it, for |
| 592 | use with the Formatter class */ |
| 593 | |
| 594 | typedef struct { |
| 595 | SubString str; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 596 | } MarkupIterator; |
| 597 | |
| 598 | static int |
| 599 | MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len) |
| 600 | { |
| 601 | SubString_init(&self->str, ptr, len); |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 602 | return 1; |
| 603 | } |
| 604 | |
| 605 | /* returns 0 on error, 1 on non-error termination, and 2 if it got a |
| 606 | string (or something to be expanded) */ |
| 607 | static int |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 608 | MarkupIterator_next(MarkupIterator *self, SubString *literal, |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 609 | SubString *field_name, SubString *format_spec, |
| 610 | STRINGLIB_CHAR *conversion, |
| 611 | int *format_spec_needs_expanding) |
| 612 | { |
| 613 | int at_end; |
| 614 | STRINGLIB_CHAR c = 0; |
| 615 | STRINGLIB_CHAR *start; |
| 616 | int count; |
| 617 | Py_ssize_t len; |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 618 | int markup_follows = 0; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 619 | |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 620 | /* initialize all of the output variables */ |
| 621 | SubString_init(literal, NULL, 0); |
| 622 | SubString_init(field_name, NULL, 0); |
| 623 | SubString_init(format_spec, NULL, 0); |
| 624 | *conversion = '\0'; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 625 | *format_spec_needs_expanding = 0; |
| 626 | |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 627 | /* No more input, end of iterator. This is the normal exit |
| 628 | path. */ |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 629 | if (self->str.ptr >= self->str.end) |
| 630 | return 1; |
| 631 | |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 632 | start = self->str.ptr; |
| 633 | |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 634 | /* First read any literal text. Read until the end of string, an |
| 635 | escaped '{' or '}', or an unescaped '{'. In order to never |
| 636 | allocate memory and so I can just pass pointers around, if |
| 637 | there's an escaped '{' or '}' then we'll return the literal |
| 638 | including the brace, but no format object. The next time |
| 639 | through, we'll return the rest of the literal, skipping past |
| 640 | the second consecutive brace. */ |
| 641 | while (self->str.ptr < self->str.end) { |
| 642 | switch (c = *(self->str.ptr++)) { |
| 643 | case '{': |
| 644 | case '}': |
| 645 | markup_follows = 1; |
| 646 | break; |
| 647 | default: |
| 648 | continue; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 649 | } |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 650 | break; |
Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame] | 651 | } |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 652 | |
| 653 | at_end = self->str.ptr >= self->str.end; |
| 654 | len = self->str.ptr - start; |
| 655 | |
| 656 | if ((c == '}') && (at_end || (c != *self->str.ptr))) { |
| 657 | PyErr_SetString(PyExc_ValueError, "Single '}' encountered " |
| 658 | "in format string"); |
| 659 | return 0; |
| 660 | } |
| 661 | if (at_end && c == '{') { |
| 662 | PyErr_SetString(PyExc_ValueError, "Single '{' encountered " |
| 663 | "in format string"); |
| 664 | return 0; |
| 665 | } |
| 666 | if (!at_end) { |
| 667 | if (c == *self->str.ptr) { |
| 668 | /* escaped } or {, skip it in the input. there is no |
| 669 | markup object following us, just this literal text */ |
| 670 | self->str.ptr++; |
| 671 | markup_follows = 0; |
| 672 | } |
| 673 | else |
| 674 | len--; |
| 675 | } |
| 676 | |
| 677 | /* record the literal text */ |
| 678 | literal->ptr = start; |
| 679 | literal->end = start + len; |
| 680 | |
| 681 | if (!markup_follows) |
| 682 | return 2; |
| 683 | |
| 684 | /* this is markup, find the end of the string by counting nested |
| 685 | braces. note that this prohibits escaped braces, so that |
| 686 | format_specs cannot have braces in them. */ |
| 687 | count = 1; |
| 688 | |
| 689 | start = self->str.ptr; |
| 690 | |
| 691 | /* we know we can't have a zero length string, so don't worry |
| 692 | about that case */ |
| 693 | while (self->str.ptr < self->str.end) { |
| 694 | switch (c = *(self->str.ptr++)) { |
| 695 | case '{': |
| 696 | /* the format spec needs to be recursively expanded. |
| 697 | this is an optimization, and not strictly needed */ |
| 698 | *format_spec_needs_expanding = 1; |
| 699 | count++; |
| 700 | break; |
| 701 | case '}': |
| 702 | count--; |
| 703 | if (count <= 0) { |
| 704 | /* we're done. parse and get out */ |
| 705 | SubString s; |
| 706 | |
| 707 | SubString_init(&s, start, self->str.ptr - 1 - start); |
| 708 | if (parse_field(&s, field_name, format_spec, conversion) == 0) |
| 709 | return 0; |
| 710 | |
| 711 | /* a zero length field_name is an error */ |
| 712 | if (field_name->ptr == field_name->end) { |
| 713 | PyErr_SetString(PyExc_ValueError, "zero length field name " |
| 714 | "in format"); |
| 715 | return 0; |
| 716 | } |
| 717 | |
| 718 | /* success */ |
| 719 | return 2; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 720 | } |
| 721 | break; |
| 722 | } |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 723 | } |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 724 | |
| 725 | /* end of string while searching for matching '}' */ |
| 726 | PyErr_SetString(PyExc_ValueError, "unmatched '{' in format"); |
| 727 | return 0; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 728 | } |
| 729 | |
| 730 | |
| 731 | /* do the !r or !s conversion on obj */ |
| 732 | static PyObject * |
| 733 | do_conversion(PyObject *obj, STRINGLIB_CHAR conversion) |
| 734 | { |
| 735 | /* XXX in pre-3.0, do we need to convert this to unicode, since it |
| 736 | might have returned a string? */ |
| 737 | switch (conversion) { |
| 738 | case 'r': |
| 739 | return PyObject_Repr(obj); |
| 740 | case 's': |
Eric Smith | 8fd3eba | 2008-02-17 19:48:00 +0000 | [diff] [blame] | 741 | return STRINGLIB_TOSTR(obj); |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 742 | default: |
Martin v. Löwis | 5a6f458 | 2008-04-07 03:22:07 +0000 | [diff] [blame] | 743 | if (conversion > 32 && conversion < 127) { |
| 744 | /* It's the ASCII subrange; casting to char is safe |
| 745 | (assuming the execution character set is an ASCII |
| 746 | superset). */ |
| 747 | PyErr_Format(PyExc_ValueError, |
| 748 | "Unknown conversion specifier %c", |
| 749 | (char)conversion); |
| 750 | } else |
| 751 | PyErr_Format(PyExc_ValueError, |
| 752 | "Unknown conversion specifier \\x%x", |
| 753 | (unsigned int)conversion); |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 754 | return NULL; |
| 755 | } |
| 756 | } |
| 757 | |
| 758 | /* given: |
| 759 | |
| 760 | {field_name!conversion:format_spec} |
| 761 | |
| 762 | compute the result and write it to output. |
| 763 | format_spec_needs_expanding is an optimization. if it's false, |
| 764 | just output the string directly, otherwise recursively expand the |
| 765 | format_spec string. */ |
| 766 | |
| 767 | static int |
| 768 | output_markup(SubString *field_name, SubString *format_spec, |
| 769 | int format_spec_needs_expanding, STRINGLIB_CHAR conversion, |
| 770 | OutputString *output, PyObject *args, PyObject *kwargs, |
Eric Smith | 45c0787 | 2007-09-05 02:02:43 +0000 | [diff] [blame] | 771 | int recursion_depth) |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 772 | { |
| 773 | PyObject *tmp = NULL; |
| 774 | PyObject *fieldobj = NULL; |
| 775 | SubString expanded_format_spec; |
| 776 | SubString *actual_format_spec; |
| 777 | int result = 0; |
| 778 | |
| 779 | /* convert field_name to an object */ |
| 780 | fieldobj = get_field_object(field_name, args, kwargs); |
| 781 | if (fieldobj == NULL) |
| 782 | goto done; |
| 783 | |
| 784 | if (conversion != '\0') { |
| 785 | tmp = do_conversion(fieldobj, conversion); |
| 786 | if (tmp == NULL) |
| 787 | goto done; |
| 788 | |
| 789 | /* do the assignment, transferring ownership: fieldobj = tmp */ |
| 790 | Py_DECREF(fieldobj); |
| 791 | fieldobj = tmp; |
| 792 | tmp = NULL; |
| 793 | } |
| 794 | |
| 795 | /* if needed, recurively compute the format_spec */ |
| 796 | if (format_spec_needs_expanding) { |
Eric Smith | 45c0787 | 2007-09-05 02:02:43 +0000 | [diff] [blame] | 797 | tmp = build_string(format_spec, args, kwargs, recursion_depth-1); |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 798 | if (tmp == NULL) |
| 799 | goto done; |
| 800 | |
| 801 | /* note that in the case we're expanding the format string, |
| 802 | tmp must be kept around until after the call to |
| 803 | render_field. */ |
| 804 | SubString_init(&expanded_format_spec, |
| 805 | STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp)); |
| 806 | actual_format_spec = &expanded_format_spec; |
Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame] | 807 | } |
| 808 | else |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 809 | actual_format_spec = format_spec; |
| 810 | |
| 811 | if (render_field(fieldobj, actual_format_spec, output) == 0) |
| 812 | goto done; |
| 813 | |
| 814 | result = 1; |
| 815 | |
| 816 | done: |
| 817 | Py_XDECREF(fieldobj); |
| 818 | Py_XDECREF(tmp); |
| 819 | |
| 820 | return result; |
| 821 | } |
| 822 | |
| 823 | /* |
Eric Smith | 8fd3eba | 2008-02-17 19:48:00 +0000 | [diff] [blame] | 824 | do_markup is the top-level loop for the format() method. It |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 825 | searches through the format string for escapes to markup codes, and |
| 826 | calls other functions to move non-markup text to the output, |
| 827 | and to perform the markup to the output. |
| 828 | */ |
| 829 | static int |
| 830 | do_markup(SubString *input, PyObject *args, PyObject *kwargs, |
Eric Smith | 45c0787 | 2007-09-05 02:02:43 +0000 | [diff] [blame] | 831 | OutputString *output, int recursion_depth) |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 832 | { |
| 833 | MarkupIterator iter; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 834 | int format_spec_needs_expanding; |
| 835 | int result; |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 836 | SubString literal; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 837 | SubString field_name; |
| 838 | SubString format_spec; |
| 839 | STRINGLIB_CHAR conversion; |
| 840 | |
| 841 | MarkupIterator_init(&iter, input->ptr, input->end - input->ptr); |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 842 | while ((result = MarkupIterator_next(&iter, &literal, &field_name, |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 843 | &format_spec, &conversion, |
| 844 | &format_spec_needs_expanding)) == 2) { |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 845 | if (!output_data(output, literal.ptr, literal.end - literal.ptr)) |
| 846 | return 0; |
| 847 | if (field_name.ptr != field_name.end) |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 848 | if (!output_markup(&field_name, &format_spec, |
| 849 | format_spec_needs_expanding, conversion, output, |
Eric Smith | 45c0787 | 2007-09-05 02:02:43 +0000 | [diff] [blame] | 850 | args, kwargs, recursion_depth)) |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 851 | return 0; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 852 | } |
| 853 | return result; |
| 854 | } |
| 855 | |
| 856 | |
| 857 | /* |
| 858 | build_string allocates the output string and then |
| 859 | calls do_markup to do the heavy lifting. |
| 860 | */ |
| 861 | static PyObject * |
| 862 | build_string(SubString *input, PyObject *args, PyObject *kwargs, |
Eric Smith | 45c0787 | 2007-09-05 02:02:43 +0000 | [diff] [blame] | 863 | int recursion_depth) |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 864 | { |
| 865 | OutputString output; |
| 866 | PyObject *result = NULL; |
| 867 | Py_ssize_t count; |
| 868 | |
| 869 | output.obj = NULL; /* needed so cleanup code always works */ |
| 870 | |
| 871 | /* check the recursion level */ |
Eric Smith | 45c0787 | 2007-09-05 02:02:43 +0000 | [diff] [blame] | 872 | if (recursion_depth <= 0) { |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 873 | PyErr_SetString(PyExc_ValueError, |
| 874 | "Max string recursion exceeded"); |
| 875 | goto done; |
| 876 | } |
| 877 | |
| 878 | /* initial size is the length of the format string, plus the size |
| 879 | increment. seems like a reasonable default */ |
| 880 | if (!output_initialize(&output, |
| 881 | input->end - input->ptr + |
| 882 | INITIAL_SIZE_INCREMENT)) |
| 883 | goto done; |
| 884 | |
Eric Smith | 45c0787 | 2007-09-05 02:02:43 +0000 | [diff] [blame] | 885 | if (!do_markup(input, args, kwargs, &output, recursion_depth)) { |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 886 | goto done; |
| 887 | } |
| 888 | |
| 889 | count = output.ptr - STRINGLIB_STR(output.obj); |
| 890 | if (STRINGLIB_RESIZE(&output.obj, count) < 0) { |
| 891 | goto done; |
| 892 | } |
| 893 | |
| 894 | /* transfer ownership to result */ |
| 895 | result = output.obj; |
| 896 | output.obj = NULL; |
| 897 | |
| 898 | done: |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 899 | Py_XDECREF(output.obj); |
| 900 | return result; |
| 901 | } |
| 902 | |
| 903 | /************************************************************************/ |
| 904 | /*********** main routine ***********************************************/ |
| 905 | /************************************************************************/ |
| 906 | |
| 907 | /* this is the main entry point */ |
| 908 | static PyObject * |
| 909 | do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) |
| 910 | { |
| 911 | SubString input; |
| 912 | |
| 913 | /* PEP 3101 says only 2 levels, so that |
| 914 | "{0:{1}}".format('abc', 's') # works |
| 915 | "{0:{1:{2}}}".format('abc', 's', '') # fails |
| 916 | */ |
Eric Smith | 45c0787 | 2007-09-05 02:02:43 +0000 | [diff] [blame] | 917 | int recursion_depth = 2; |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 918 | |
| 919 | SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self)); |
Eric Smith | 45c0787 | 2007-09-05 02:02:43 +0000 | [diff] [blame] | 920 | return build_string(&input, args, kwargs, recursion_depth); |
Eric Smith | 8c66326 | 2007-08-25 02:26:07 +0000 | [diff] [blame] | 921 | } |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 922 | |
| 923 | |
| 924 | |
| 925 | /************************************************************************/ |
| 926 | /*********** formatteriterator ******************************************/ |
| 927 | /************************************************************************/ |
| 928 | |
| 929 | /* This is used to implement string.Formatter.vparse(). It exists so |
| 930 | Formatter can share code with the built in unicode.format() method. |
| 931 | It's really just a wrapper around MarkupIterator that is callable |
| 932 | from Python. */ |
| 933 | |
| 934 | typedef struct { |
| 935 | PyObject_HEAD |
| 936 | |
Eric Smith | 8fd3eba | 2008-02-17 19:48:00 +0000 | [diff] [blame] | 937 | STRINGLIB_OBJECT *str; |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 938 | |
| 939 | MarkupIterator it_markup; |
| 940 | } formatteriterobject; |
| 941 | |
| 942 | static void |
| 943 | formatteriter_dealloc(formatteriterobject *it) |
| 944 | { |
| 945 | Py_XDECREF(it->str); |
| 946 | PyObject_FREE(it); |
| 947 | } |
| 948 | |
| 949 | /* returns a tuple: |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 950 | (literal, field_name, format_spec, conversion) |
| 951 | |
| 952 | literal is any literal text to output. might be zero length |
| 953 | field_name is the string before the ':'. might be None |
| 954 | format_spec is the string after the ':'. mibht be None |
| 955 | conversion is either None, or the string after the '!' |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 956 | */ |
| 957 | static PyObject * |
| 958 | formatteriter_next(formatteriterobject *it) |
| 959 | { |
| 960 | SubString literal; |
| 961 | SubString field_name; |
| 962 | SubString format_spec; |
Eric Smith | 8fd3eba | 2008-02-17 19:48:00 +0000 | [diff] [blame] | 963 | STRINGLIB_CHAR conversion; |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 964 | int format_spec_needs_expanding; |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 965 | int result = MarkupIterator_next(&it->it_markup, &literal, &field_name, |
| 966 | &format_spec, &conversion, |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 967 | &format_spec_needs_expanding); |
| 968 | |
| 969 | /* all of the SubString objects point into it->str, so no |
| 970 | memory management needs to be done on them */ |
| 971 | assert(0 <= result && result <= 2); |
Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame] | 972 | if (result == 0 || result == 1) |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 973 | /* if 0, error has already been set, if 1, iterator is empty */ |
| 974 | return NULL; |
Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame] | 975 | else { |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 976 | PyObject *literal_str = NULL; |
| 977 | PyObject *field_name_str = NULL; |
| 978 | PyObject *format_spec_str = NULL; |
| 979 | PyObject *conversion_str = NULL; |
| 980 | PyObject *tuple = NULL; |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 981 | int has_field = field_name.ptr != field_name.end; |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 982 | |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 983 | literal_str = SubString_new_object(&literal); |
| 984 | if (literal_str == NULL) |
| 985 | goto done; |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 986 | |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 987 | field_name_str = SubString_new_object(&field_name); |
| 988 | if (field_name_str == NULL) |
| 989 | goto done; |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 990 | |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 991 | /* if field_name is non-zero length, return a string for |
| 992 | format_spec (even if zero length), else return None */ |
| 993 | format_spec_str = (has_field ? |
| 994 | SubString_new_object_or_empty : |
| 995 | SubString_new_object)(&format_spec); |
| 996 | if (format_spec_str == NULL) |
| 997 | goto done; |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 998 | |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 999 | /* if the conversion is not specified, return a None, |
| 1000 | otherwise create a one length string with the conversion |
| 1001 | character */ |
| 1002 | if (conversion == '\0') { |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1003 | conversion_str = Py_None; |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1004 | Py_INCREF(conversion_str); |
| 1005 | } |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 1006 | else |
Eric Smith | 8fd3eba | 2008-02-17 19:48:00 +0000 | [diff] [blame] | 1007 | conversion_str = STRINGLIB_NEW(&conversion, 1); |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 1008 | if (conversion_str == NULL) |
| 1009 | goto done; |
| 1010 | |
Eric Smith | 9e7c8da | 2007-08-28 11:15:20 +0000 | [diff] [blame] | 1011 | tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str, |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1012 | conversion_str); |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 1013 | done: |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1014 | Py_XDECREF(literal_str); |
| 1015 | Py_XDECREF(field_name_str); |
| 1016 | Py_XDECREF(format_spec_str); |
| 1017 | Py_XDECREF(conversion_str); |
| 1018 | return tuple; |
| 1019 | } |
| 1020 | } |
| 1021 | |
| 1022 | static PyMethodDef formatteriter_methods[] = { |
| 1023 | {NULL, NULL} /* sentinel */ |
| 1024 | }; |
| 1025 | |
Eric Smith | 8fd3eba | 2008-02-17 19:48:00 +0000 | [diff] [blame] | 1026 | static PyTypeObject PyFormatterIter_Type = { |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1027 | PyVarObject_HEAD_INIT(&PyType_Type, 0) |
| 1028 | "formatteriterator", /* tp_name */ |
| 1029 | sizeof(formatteriterobject), /* tp_basicsize */ |
| 1030 | 0, /* tp_itemsize */ |
| 1031 | /* methods */ |
| 1032 | (destructor)formatteriter_dealloc, /* tp_dealloc */ |
| 1033 | 0, /* tp_print */ |
| 1034 | 0, /* tp_getattr */ |
| 1035 | 0, /* tp_setattr */ |
| 1036 | 0, /* tp_compare */ |
| 1037 | 0, /* tp_repr */ |
| 1038 | 0, /* tp_as_number */ |
| 1039 | 0, /* tp_as_sequence */ |
| 1040 | 0, /* tp_as_mapping */ |
| 1041 | 0, /* tp_hash */ |
| 1042 | 0, /* tp_call */ |
| 1043 | 0, /* tp_str */ |
| 1044 | PyObject_GenericGetAttr, /* tp_getattro */ |
| 1045 | 0, /* tp_setattro */ |
| 1046 | 0, /* tp_as_buffer */ |
| 1047 | Py_TPFLAGS_DEFAULT, /* tp_flags */ |
| 1048 | 0, /* tp_doc */ |
| 1049 | 0, /* tp_traverse */ |
| 1050 | 0, /* tp_clear */ |
| 1051 | 0, /* tp_richcompare */ |
| 1052 | 0, /* tp_weaklistoffset */ |
| 1053 | PyObject_SelfIter, /* tp_iter */ |
| 1054 | (iternextfunc)formatteriter_next, /* tp_iternext */ |
| 1055 | formatteriter_methods, /* tp_methods */ |
| 1056 | 0, |
| 1057 | }; |
| 1058 | |
| 1059 | /* unicode_formatter_parser is used to implement |
| 1060 | string.Formatter.vformat. it parses a string and returns tuples |
| 1061 | describing the parsed elements. It's a wrapper around |
| 1062 | stringlib/string_format.h's MarkupIterator */ |
| 1063 | static PyObject * |
Eric Smith | 8fd3eba | 2008-02-17 19:48:00 +0000 | [diff] [blame] | 1064 | formatter_parser(STRINGLIB_OBJECT *self) |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1065 | { |
| 1066 | formatteriterobject *it; |
| 1067 | |
| 1068 | it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); |
| 1069 | if (it == NULL) |
| 1070 | return NULL; |
| 1071 | |
| 1072 | /* take ownership, give the object to the iterator */ |
| 1073 | Py_INCREF(self); |
| 1074 | it->str = self; |
| 1075 | |
| 1076 | /* initialize the contained MarkupIterator */ |
| 1077 | MarkupIterator_init(&it->it_markup, |
Eric Smith | 8fd3eba | 2008-02-17 19:48:00 +0000 | [diff] [blame] | 1078 | STRINGLIB_STR(self), |
| 1079 | STRINGLIB_LEN(self)); |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1080 | |
| 1081 | return (PyObject *)it; |
| 1082 | } |
| 1083 | |
| 1084 | |
| 1085 | /************************************************************************/ |
| 1086 | /*********** fieldnameiterator ******************************************/ |
| 1087 | /************************************************************************/ |
| 1088 | |
| 1089 | |
| 1090 | /* This is used to implement string.Formatter.vparse(). It parses the |
| 1091 | field name into attribute and item values. It's a Python-callable |
| 1092 | wrapper around FieldNameIterator */ |
| 1093 | |
| 1094 | typedef struct { |
| 1095 | PyObject_HEAD |
| 1096 | |
Eric Smith | 8fd3eba | 2008-02-17 19:48:00 +0000 | [diff] [blame] | 1097 | STRINGLIB_OBJECT *str; |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1098 | |
| 1099 | FieldNameIterator it_field; |
| 1100 | } fieldnameiterobject; |
| 1101 | |
| 1102 | static void |
| 1103 | fieldnameiter_dealloc(fieldnameiterobject *it) |
| 1104 | { |
| 1105 | Py_XDECREF(it->str); |
| 1106 | PyObject_FREE(it); |
| 1107 | } |
| 1108 | |
| 1109 | /* returns a tuple: |
| 1110 | (is_attr, value) |
| 1111 | is_attr is true if we used attribute syntax (e.g., '.foo') |
| 1112 | false if we used index syntax (e.g., '[foo]') |
| 1113 | value is an integer or string |
| 1114 | */ |
| 1115 | static PyObject * |
| 1116 | fieldnameiter_next(fieldnameiterobject *it) |
| 1117 | { |
| 1118 | int result; |
| 1119 | int is_attr; |
| 1120 | Py_ssize_t idx; |
| 1121 | SubString name; |
| 1122 | |
| 1123 | result = FieldNameIterator_next(&it->it_field, &is_attr, |
| 1124 | &idx, &name); |
Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame] | 1125 | if (result == 0 || result == 1) |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1126 | /* if 0, error has already been set, if 1, iterator is empty */ |
| 1127 | return NULL; |
Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame] | 1128 | else { |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1129 | PyObject* result = NULL; |
| 1130 | PyObject* is_attr_obj = NULL; |
| 1131 | PyObject* obj = NULL; |
| 1132 | |
| 1133 | is_attr_obj = PyBool_FromLong(is_attr); |
| 1134 | if (is_attr_obj == NULL) |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 1135 | goto done; |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1136 | |
| 1137 | /* either an integer or a string */ |
| 1138 | if (idx != -1) |
Christian Heimes | 217cfd1 | 2007-12-02 14:31:20 +0000 | [diff] [blame] | 1139 | obj = PyLong_FromSsize_t(idx); |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1140 | else |
| 1141 | obj = SubString_new_object(&name); |
| 1142 | if (obj == NULL) |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 1143 | goto done; |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1144 | |
| 1145 | /* return a tuple of values */ |
| 1146 | result = PyTuple_Pack(2, is_attr_obj, obj); |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1147 | |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 1148 | done: |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1149 | Py_XDECREF(is_attr_obj); |
| 1150 | Py_XDECREF(obj); |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 1151 | return result; |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1152 | } |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1153 | } |
| 1154 | |
| 1155 | static PyMethodDef fieldnameiter_methods[] = { |
| 1156 | {NULL, NULL} /* sentinel */ |
| 1157 | }; |
| 1158 | |
| 1159 | static PyTypeObject PyFieldNameIter_Type = { |
| 1160 | PyVarObject_HEAD_INIT(&PyType_Type, 0) |
| 1161 | "fieldnameiterator", /* tp_name */ |
| 1162 | sizeof(fieldnameiterobject), /* tp_basicsize */ |
| 1163 | 0, /* tp_itemsize */ |
| 1164 | /* methods */ |
| 1165 | (destructor)fieldnameiter_dealloc, /* tp_dealloc */ |
| 1166 | 0, /* tp_print */ |
| 1167 | 0, /* tp_getattr */ |
| 1168 | 0, /* tp_setattr */ |
| 1169 | 0, /* tp_compare */ |
| 1170 | 0, /* tp_repr */ |
| 1171 | 0, /* tp_as_number */ |
| 1172 | 0, /* tp_as_sequence */ |
| 1173 | 0, /* tp_as_mapping */ |
| 1174 | 0, /* tp_hash */ |
| 1175 | 0, /* tp_call */ |
| 1176 | 0, /* tp_str */ |
| 1177 | PyObject_GenericGetAttr, /* tp_getattro */ |
| 1178 | 0, /* tp_setattro */ |
| 1179 | 0, /* tp_as_buffer */ |
| 1180 | Py_TPFLAGS_DEFAULT, /* tp_flags */ |
| 1181 | 0, /* tp_doc */ |
| 1182 | 0, /* tp_traverse */ |
| 1183 | 0, /* tp_clear */ |
| 1184 | 0, /* tp_richcompare */ |
| 1185 | 0, /* tp_weaklistoffset */ |
| 1186 | PyObject_SelfIter, /* tp_iter */ |
| 1187 | (iternextfunc)fieldnameiter_next, /* tp_iternext */ |
| 1188 | fieldnameiter_methods, /* tp_methods */ |
| 1189 | 0}; |
| 1190 | |
| 1191 | /* unicode_formatter_field_name_split is used to implement |
| 1192 | string.Formatter.vformat. it takes an PEP 3101 "field name", and |
| 1193 | returns a tuple of (first, rest): "first", the part before the |
| 1194 | first '.' or '['; and "rest", an iterator for the rest of the field |
| 1195 | name. it's a wrapper around stringlib/string_format.h's |
| 1196 | field_name_split. The iterator it returns is a |
| 1197 | FieldNameIterator */ |
| 1198 | static PyObject * |
Eric Smith | 8fd3eba | 2008-02-17 19:48:00 +0000 | [diff] [blame] | 1199 | formatter_field_name_split(STRINGLIB_OBJECT *self) |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1200 | { |
| 1201 | SubString first; |
| 1202 | Py_ssize_t first_idx; |
| 1203 | fieldnameiterobject *it; |
| 1204 | |
| 1205 | PyObject *first_obj = NULL; |
| 1206 | PyObject *result = NULL; |
| 1207 | |
| 1208 | it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); |
| 1209 | if (it == NULL) |
| 1210 | return NULL; |
| 1211 | |
| 1212 | /* take ownership, give the object to the iterator. this is |
| 1213 | just to keep the field_name alive */ |
| 1214 | Py_INCREF(self); |
| 1215 | it->str = self; |
| 1216 | |
| 1217 | if (!field_name_split(STRINGLIB_STR(self), |
| 1218 | STRINGLIB_LEN(self), |
| 1219 | &first, &first_idx, &it->it_field)) |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 1220 | goto done; |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1221 | |
Eric Smith | 0cb431c | 2007-08-28 01:07:27 +0000 | [diff] [blame] | 1222 | /* first becomes an integer, if possible; else a string */ |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1223 | if (first_idx != -1) |
Christian Heimes | 217cfd1 | 2007-12-02 14:31:20 +0000 | [diff] [blame] | 1224 | first_obj = PyLong_FromSsize_t(first_idx); |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1225 | else |
| 1226 | /* convert "first" into a string object */ |
| 1227 | first_obj = SubString_new_object(&first); |
| 1228 | if (first_obj == NULL) |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 1229 | goto done; |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1230 | |
| 1231 | /* return a tuple of values */ |
| 1232 | result = PyTuple_Pack(2, first_obj, it); |
| 1233 | |
Eric Smith | 625cbf2 | 2007-08-29 03:22:59 +0000 | [diff] [blame] | 1234 | done: |
Eric Smith | f6db409 | 2007-08-27 23:52:26 +0000 | [diff] [blame] | 1235 | Py_XDECREF(it); |
| 1236 | Py_XDECREF(first_obj); |
| 1237 | return result; |
| 1238 | } |