blob: bc70e97be4ba47144b8842a99e977c2fb4340710 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/*
2 string_format.h -- implementation of string.format().
3
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
6*/
7
8
Eric Smith8fd3eba2008-02-17 19:48:00 +00009/* Defines for Python 2.6 compatability */
10#if PY_VERSION_HEX < 0x03000000
11#define PyLong_FromSsize_t _PyLong_FromSsize_t
12#endif
13
Eric Smith8c663262007-08-25 02:26:07 +000014/* Defines for more efficiently reallocating the string buffer */
15#define INITIAL_SIZE_INCREMENT 100
16#define SIZE_MULTIPLIER 2
17#define MAX_SIZE_INCREMENT 3200
18
19
20/************************************************************************/
21/*********** Global data structures and forward declarations *********/
22/************************************************************************/
23
24/*
25 A SubString consists of the characters between two string or
26 unicode pointers.
27*/
28typedef struct {
29 STRINGLIB_CHAR *ptr;
30 STRINGLIB_CHAR *end;
31} SubString;
32
33
Eric Smith8ec90442009-03-14 12:29:34 +000034typedef enum {
35 ANS_INIT,
36 ANS_AUTO,
Georg Brandlfb526ac2009-05-01 08:59:13 +000037 ANS_MANUAL
Eric Smith8ec90442009-03-14 12:29:34 +000038} AutoNumberState; /* Keep track if we're auto-numbering fields */
39
40/* Keeps track of our auto-numbering state, and which number field we're on */
41typedef struct {
42 AutoNumberState an_state;
43 int an_field_number;
44} AutoNumber;
45
46
Eric Smith8c663262007-08-25 02:26:07 +000047/* forward declaration for recursion */
48static PyObject *
49build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +000050 int recursion_depth, AutoNumber *auto_number);
Eric Smith8c663262007-08-25 02:26:07 +000051
52
53
54/************************************************************************/
55/************************** Utility functions ************************/
56/************************************************************************/
57
Eric Smith8ec90442009-03-14 12:29:34 +000058static void
59AutoNumber_Init(AutoNumber *auto_number)
60{
61 auto_number->an_state = ANS_INIT;
62 auto_number->an_field_number = 0;
63}
64
Eric Smith8c663262007-08-25 02:26:07 +000065/* fill in a SubString from a pointer and length */
66Py_LOCAL_INLINE(void)
67SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
68{
69 str->ptr = p;
70 if (p == NULL)
71 str->end = NULL;
72 else
73 str->end = str->ptr + len;
74}
75
Eric Smith625cbf22007-08-29 03:22:59 +000076/* return a new string. if str->ptr is NULL, return None */
Eric Smith8c663262007-08-25 02:26:07 +000077Py_LOCAL_INLINE(PyObject *)
78SubString_new_object(SubString *str)
79{
Eric Smith625cbf22007-08-29 03:22:59 +000080 if (str->ptr == NULL) {
81 Py_INCREF(Py_None);
82 return Py_None;
83 }
84 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
85}
86
87/* return a new string. if str->ptr is NULL, return None */
88Py_LOCAL_INLINE(PyObject *)
89SubString_new_object_or_empty(SubString *str)
90{
91 if (str->ptr == NULL) {
92 return STRINGLIB_NEW(NULL, 0);
93 }
Eric Smith8c663262007-08-25 02:26:07 +000094 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
95}
96
Eric Smith8ec90442009-03-14 12:29:34 +000097/* Return 1 if an error has been detected switching between automatic
98 field numbering and manual field specification, else return 0. Set
99 ValueError on error. */
100static int
101autonumber_state_error(AutoNumberState state, int field_name_is_empty)
102{
103 if (state == ANS_MANUAL) {
104 if (field_name_is_empty) {
105 PyErr_SetString(PyExc_ValueError, "cannot switch from "
106 "manual field specification to "
107 "automatic field numbering");
108 return 1;
109 }
110 }
111 else {
112 if (!field_name_is_empty) {
113 PyErr_SetString(PyExc_ValueError, "cannot switch from "
114 "automatic field numbering to "
115 "manual field specification");
116 return 1;
117 }
118 }
119 return 0;
120}
121
122
Eric Smith8c663262007-08-25 02:26:07 +0000123/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +0000124/*********** Output string management functions ****************/
125/************************************************************************/
126
127typedef struct {
128 STRINGLIB_CHAR *ptr;
129 STRINGLIB_CHAR *end;
130 PyObject *obj;
131 Py_ssize_t size_increment;
132} OutputString;
133
134/* initialize an OutputString object, reserving size characters */
135static int
136output_initialize(OutputString *output, Py_ssize_t size)
137{
138 output->obj = STRINGLIB_NEW(NULL, size);
139 if (output->obj == NULL)
140 return 0;
141
142 output->ptr = STRINGLIB_STR(output->obj);
143 output->end = STRINGLIB_LEN(output->obj) + output->ptr;
144 output->size_increment = INITIAL_SIZE_INCREMENT;
145
146 return 1;
147}
148
149/*
150 output_extend reallocates the output string buffer.
151 It returns a status: 0 for a failed reallocation,
152 1 for success.
153*/
154
155static int
156output_extend(OutputString *output, Py_ssize_t count)
157{
158 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
159 Py_ssize_t curlen = output->ptr - startptr;
160 Py_ssize_t maxlen = curlen + count + output->size_increment;
161
162 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
163 return 0;
164 startptr = STRINGLIB_STR(output->obj);
165 output->ptr = startptr + curlen;
166 output->end = startptr + maxlen;
167 if (output->size_increment < MAX_SIZE_INCREMENT)
168 output->size_increment *= SIZE_MULTIPLIER;
169 return 1;
170}
171
172/*
173 output_data dumps characters into our output string
174 buffer.
175
176 In some cases, it has to reallocate the string.
177
178 It returns a status: 0 for a failed reallocation,
179 1 for success.
180*/
181static int
182output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
183{
184 if ((count > output->end - output->ptr) && !output_extend(output, count))
185 return 0;
186 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
187 output->ptr += count;
188 return 1;
189}
190
191/************************************************************************/
192/*********** Format string parsing -- integers and identifiers *********/
193/************************************************************************/
194
Eric Smith7ade6482007-08-26 22:27:13 +0000195static Py_ssize_t
196get_integer(const SubString *str)
Eric Smith8c663262007-08-25 02:26:07 +0000197{
Eric Smith7ade6482007-08-26 22:27:13 +0000198 Py_ssize_t accumulator = 0;
199 Py_ssize_t digitval;
200 Py_ssize_t oldaccumulator;
201 STRINGLIB_CHAR *p;
Eric Smith8c663262007-08-25 02:26:07 +0000202
Eric Smith7ade6482007-08-26 22:27:13 +0000203 /* empty string is an error */
204 if (str->ptr >= str->end)
205 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000206
Eric Smith7ade6482007-08-26 22:27:13 +0000207 for (p = str->ptr; p < str->end; p++) {
208 digitval = STRINGLIB_TODECIMAL(*p);
Eric Smith8c663262007-08-25 02:26:07 +0000209 if (digitval < 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000210 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000211 /*
212 This trick was copied from old Unicode format code. It's cute,
213 but would really suck on an old machine with a slow divide
214 implementation. Fortunately, in the normal case we do not
215 expect too many digits.
216 */
217 oldaccumulator = accumulator;
218 accumulator *= 10;
219 if ((accumulator+10)/10 != oldaccumulator+1) {
220 PyErr_Format(PyExc_ValueError,
221 "Too many decimal digits in format string");
222 return -1;
223 }
224 accumulator += digitval;
225 }
Eric Smith7ade6482007-08-26 22:27:13 +0000226 return accumulator;
Eric Smith8c663262007-08-25 02:26:07 +0000227}
228
229/************************************************************************/
230/******** Functions to get field objects and specification strings ******/
231/************************************************************************/
232
Eric Smith7ade6482007-08-26 22:27:13 +0000233/* do the equivalent of obj.name */
Eric Smith8c663262007-08-25 02:26:07 +0000234static PyObject *
Eric Smith7ade6482007-08-26 22:27:13 +0000235getattr(PyObject *obj, SubString *name)
Eric Smith8c663262007-08-25 02:26:07 +0000236{
Eric Smith7ade6482007-08-26 22:27:13 +0000237 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000238 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000239 if (str == NULL)
240 return NULL;
241 newobj = PyObject_GetAttr(obj, str);
242 Py_DECREF(str);
243 return newobj;
Eric Smith8c663262007-08-25 02:26:07 +0000244}
245
Eric Smith7ade6482007-08-26 22:27:13 +0000246/* do the equivalent of obj[idx], where obj is a sequence */
247static PyObject *
248getitem_sequence(PyObject *obj, Py_ssize_t idx)
249{
250 return PySequence_GetItem(obj, idx);
251}
252
253/* do the equivalent of obj[idx], where obj is not a sequence */
254static PyObject *
255getitem_idx(PyObject *obj, Py_ssize_t idx)
256{
257 PyObject *newobj;
Christian Heimes217cfd12007-12-02 14:31:20 +0000258 PyObject *idx_obj = PyLong_FromSsize_t(idx);
Eric Smith7ade6482007-08-26 22:27:13 +0000259 if (idx_obj == NULL)
260 return NULL;
261 newobj = PyObject_GetItem(obj, idx_obj);
262 Py_DECREF(idx_obj);
263 return newobj;
264}
265
266/* do the equivalent of obj[name] */
267static PyObject *
268getitem_str(PyObject *obj, SubString *name)
269{
270 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000271 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000272 if (str == NULL)
273 return NULL;
274 newobj = PyObject_GetItem(obj, str);
275 Py_DECREF(str);
276 return newobj;
277}
278
279typedef struct {
280 /* the entire string we're parsing. we assume that someone else
281 is managing its lifetime, and that it will exist for the
282 lifetime of the iterator. can be empty */
283 SubString str;
284
285 /* pointer to where we are inside field_name */
286 STRINGLIB_CHAR *ptr;
287} FieldNameIterator;
288
289
290static int
291FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
292 Py_ssize_t len)
293{
294 SubString_init(&self->str, ptr, len);
295 self->ptr = self->str.ptr;
296 return 1;
297}
298
299static int
300_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
301{
302 STRINGLIB_CHAR c;
303
304 name->ptr = self->ptr;
305
306 /* return everything until '.' or '[' */
307 while (self->ptr < self->str.end) {
308 switch (c = *self->ptr++) {
309 case '[':
310 case '.':
311 /* backup so that we this character will be seen next time */
312 self->ptr--;
313 break;
314 default:
315 continue;
316 }
317 break;
318 }
319 /* end of string is okay */
320 name->end = self->ptr;
321 return 1;
322}
323
324static int
325_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
326{
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000327 int bracket_seen = 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000328 STRINGLIB_CHAR c;
329
330 name->ptr = self->ptr;
331
332 /* return everything until ']' */
333 while (self->ptr < self->str.end) {
334 switch (c = *self->ptr++) {
335 case ']':
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000336 bracket_seen = 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000337 break;
338 default:
339 continue;
340 }
341 break;
342 }
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000343 /* make sure we ended with a ']' */
344 if (!bracket_seen) {
345 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
346 return 0;
347 }
348
Eric Smith7ade6482007-08-26 22:27:13 +0000349 /* end of string is okay */
350 /* don't include the ']' */
351 name->end = self->ptr-1;
352 return 1;
353}
354
355/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
356static int
357FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
358 Py_ssize_t *name_idx, SubString *name)
359{
360 /* check at end of input */
361 if (self->ptr >= self->str.end)
362 return 1;
363
364 switch (*self->ptr++) {
365 case '.':
366 *is_attribute = 1;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000367 if (_FieldNameIterator_attr(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000368 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000369 *name_idx = -1;
370 break;
371 case '[':
372 *is_attribute = 0;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000373 if (_FieldNameIterator_item(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000374 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000375 *name_idx = get_integer(name);
Benjamin Peterson59a1b2f2010-06-07 22:31:26 +0000376 if (*name_idx == -1 && PyErr_Occurred())
377 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000378 break;
379 default:
Eric Smith41669ca2009-05-23 14:23:22 +0000380 /* Invalid character follows ']' */
381 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
382 "follow ']' in format field specifier");
Eric Smith7ade6482007-08-26 22:27:13 +0000383 return 0;
384 }
385
386 /* empty string is an error */
387 if (name->ptr == name->end) {
388 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
389 return 0;
390 }
391
392 return 2;
393}
394
395
396/* input: field_name
397 output: 'first' points to the part before the first '[' or '.'
398 'first_idx' is -1 if 'first' is not an integer, otherwise
399 it's the value of first converted to an integer
400 'rest' is an iterator to return the rest
401*/
402static int
403field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
Eric Smith8ec90442009-03-14 12:29:34 +0000404 Py_ssize_t *first_idx, FieldNameIterator *rest,
405 AutoNumber *auto_number)
Eric Smith7ade6482007-08-26 22:27:13 +0000406{
407 STRINGLIB_CHAR c;
408 STRINGLIB_CHAR *p = ptr;
409 STRINGLIB_CHAR *end = ptr + len;
Eric Smith8ec90442009-03-14 12:29:34 +0000410 int field_name_is_empty;
411 int using_numeric_index;
Eric Smith7ade6482007-08-26 22:27:13 +0000412
413 /* find the part up until the first '.' or '[' */
414 while (p < end) {
415 switch (c = *p++) {
416 case '[':
417 case '.':
418 /* backup so that we this character is available to the
419 "rest" iterator */
420 p--;
421 break;
422 default:
423 continue;
424 }
425 break;
426 }
427
428 /* set up the return values */
429 SubString_init(first, ptr, p - ptr);
430 FieldNameIterator_init(rest, p, end - p);
431
432 /* see if "first" is an integer, in which case it's used as an index */
433 *first_idx = get_integer(first);
Benjamin Peterson59a1b2f2010-06-07 22:31:26 +0000434 if (*first_idx == -1 && PyErr_Occurred())
435 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000436
Eric Smith8ec90442009-03-14 12:29:34 +0000437 field_name_is_empty = first->ptr >= first->end;
438
439 /* If the field name is omitted or if we have a numeric index
440 specified, then we're doing numeric indexing into args. */
441 using_numeric_index = field_name_is_empty || *first_idx != -1;
442
443 /* We always get here exactly one time for each field we're
444 processing. And we get here in field order (counting by left
445 braces). So this is the perfect place to handle automatic field
446 numbering if the field name is omitted. */
447
448 /* Check if we need to do the auto-numbering. It's not needed if
449 we're called from string.Format routines, because it's handled
450 in that class by itself. */
451 if (auto_number) {
452 /* Initialize our auto numbering state if this is the first
453 time we're either auto-numbering or manually numbering. */
454 if (auto_number->an_state == ANS_INIT && using_numeric_index)
455 auto_number->an_state = field_name_is_empty ?
456 ANS_AUTO : ANS_MANUAL;
457
458 /* Make sure our state is consistent with what we're doing
459 this time through. Only check if we're using a numeric
460 index. */
461 if (using_numeric_index)
462 if (autonumber_state_error(auto_number->an_state,
463 field_name_is_empty))
464 return 0;
465 /* Zero length field means we want to do auto-numbering of the
466 fields. */
467 if (field_name_is_empty)
468 *first_idx = (auto_number->an_field_number)++;
Eric Smith7ade6482007-08-26 22:27:13 +0000469 }
470
471 return 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000472}
473
474
Eric Smith8c663262007-08-25 02:26:07 +0000475/*
476 get_field_object returns the object inside {}, before the
477 format_spec. It handles getindex and getattr lookups and consumes
478 the entire input string.
479*/
480static PyObject *
Eric Smith8ec90442009-03-14 12:29:34 +0000481get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
482 AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000483{
Eric Smith7ade6482007-08-26 22:27:13 +0000484 PyObject *obj = NULL;
485 int ok;
486 int is_attribute;
487 SubString name;
488 SubString first;
Eric Smith8c663262007-08-25 02:26:07 +0000489 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000490 FieldNameIterator rest;
Eric Smith8c663262007-08-25 02:26:07 +0000491
Eric Smith7ade6482007-08-26 22:27:13 +0000492 if (!field_name_split(input->ptr, input->end - input->ptr, &first,
Eric Smith8ec90442009-03-14 12:29:34 +0000493 &index, &rest, auto_number)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000494 goto error;
495 }
Eric Smith8c663262007-08-25 02:26:07 +0000496
Eric Smith7ade6482007-08-26 22:27:13 +0000497 if (index == -1) {
498 /* look up in kwargs */
Eric Smith7a6dd292007-08-27 23:30:47 +0000499 PyObject *key = SubString_new_object(&first);
Eric Smith7ade6482007-08-26 22:27:13 +0000500 if (key == NULL)
501 goto error;
502 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
Eric Smith11529192007-09-04 23:04:22 +0000503 PyErr_SetObject(PyExc_KeyError, key);
Eric Smith7ade6482007-08-26 22:27:13 +0000504 Py_DECREF(key);
505 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000506 }
Neal Norwitz8a4eb292007-08-27 07:24:17 +0000507 Py_DECREF(key);
Neal Norwitz247b5152007-08-27 03:22:50 +0000508 Py_INCREF(obj);
Eric Smith0cb431c2007-08-28 01:07:27 +0000509 }
510 else {
Eric Smith7ade6482007-08-26 22:27:13 +0000511 /* look up in args */
512 obj = PySequence_GetItem(args, index);
Eric Smith11529192007-09-04 23:04:22 +0000513 if (obj == NULL)
Eric Smith7ade6482007-08-26 22:27:13 +0000514 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000515 }
Eric Smith7ade6482007-08-26 22:27:13 +0000516
517 /* iterate over the rest of the field_name */
518 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
519 &name)) == 2) {
520 PyObject *tmp;
521
522 if (is_attribute)
523 /* getattr lookup "." */
524 tmp = getattr(obj, &name);
525 else
526 /* getitem lookup "[]" */
527 if (index == -1)
528 tmp = getitem_str(obj, &name);
529 else
530 if (PySequence_Check(obj))
531 tmp = getitem_sequence(obj, index);
532 else
533 /* not a sequence */
534 tmp = getitem_idx(obj, index);
535 if (tmp == NULL)
536 goto error;
537
538 /* assign to obj */
539 Py_DECREF(obj);
540 obj = tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000541 }
Eric Smith7ade6482007-08-26 22:27:13 +0000542 /* end of iterator, this is the non-error case */
543 if (ok == 1)
544 return obj;
545error:
546 Py_XDECREF(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000547 return NULL;
548}
549
550/************************************************************************/
551/***************** Field rendering functions **************************/
552/************************************************************************/
553
554/*
555 render_field() is the main function in this section. It takes the
556 field object and field specification string generated by
557 get_field_and_spec, and renders the field into the output string.
558
Eric Smith8c663262007-08-25 02:26:07 +0000559 render_field calls fieldobj.__format__(format_spec) method, and
560 appends to the output.
561*/
562static int
563render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
564{
565 int ok = 0;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000566 PyObject *result = NULL;
Eric Smith1d138f12008-05-31 01:40:08 +0000567 PyObject *format_spec_object = NULL;
Eric Smithba8c0282008-06-02 14:57:32 +0000568 PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
Eric Smith1d138f12008-05-31 01:40:08 +0000569 STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 format_spec->ptr : NULL;
Eric Smith1d138f12008-05-31 01:40:08 +0000571 Py_ssize_t format_spec_len = format_spec->ptr ?
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000572 format_spec->end - format_spec->ptr : 0;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000573
Eric Smith1d138f12008-05-31 01:40:08 +0000574 /* If we know the type exactly, skip the lookup of __format__ and just
575 call the formatter directly. */
576 if (PyUnicode_CheckExact(fieldobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 formatter = _PyUnicode_FormatAdvanced;
Eric Smith1d138f12008-05-31 01:40:08 +0000578 else if (PyLong_CheckExact(fieldobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000579 formatter =_PyLong_FormatAdvanced;
Eric Smith1d138f12008-05-31 01:40:08 +0000580 else if (PyFloat_CheckExact(fieldobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000581 formatter = _PyFloat_FormatAdvanced;
Eric Smithba8c0282008-06-02 14:57:32 +0000582
583 /* XXX: for 2.6, convert format_spec to the appropriate type
584 (unicode, str) */
585
586 if (formatter) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000587 /* we know exactly which formatter will be called when __format__ is
588 looked up, so call it directly, instead. */
589 result = formatter(fieldobj, format_spec_start, format_spec_len);
Eric Smithba8c0282008-06-02 14:57:32 +0000590 }
Eric Smith1d138f12008-05-31 01:40:08 +0000591 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 /* We need to create an object out of the pointers we have, because
593 __format__ takes a string/unicode object for format_spec. */
594 format_spec_object = STRINGLIB_NEW(format_spec_start,
595 format_spec_len);
596 if (format_spec_object == NULL)
597 goto done;
Eric Smith1d138f12008-05-31 01:40:08 +0000598
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000599 result = PyObject_Format(fieldobj, format_spec_object);
Eric Smith1d138f12008-05-31 01:40:08 +0000600 }
Eric Smith8c663262007-08-25 02:26:07 +0000601 if (result == NULL)
602 goto done;
603
Eric Smith8a0217c2008-02-18 18:07:47 +0000604#if PY_VERSION_HEX >= 0x03000000
Eric Smithecbac8f2008-02-24 21:44:34 +0000605 assert(PyUnicode_Check(result));
Eric Smith8a0217c2008-02-18 18:07:47 +0000606#else
Christian Heimes72b710a2008-05-26 13:28:38 +0000607 assert(PyBytes_Check(result) || PyUnicode_Check(result));
Eric Smith8a0217c2008-02-18 18:07:47 +0000608
609 /* Convert result to our type. We could be str, and result could
610 be unicode */
611 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000612 PyObject *tmp = STRINGLIB_TOSTR(result);
613 if (tmp == NULL)
614 goto done;
615 Py_DECREF(result);
616 result = tmp;
Eric Smith8a0217c2008-02-18 18:07:47 +0000617 }
618#endif
619
Eric Smith8c663262007-08-25 02:26:07 +0000620 ok = output_data(output,
621 STRINGLIB_STR(result), STRINGLIB_LEN(result));
622done:
Eric Smith1d138f12008-05-31 01:40:08 +0000623 Py_XDECREF(format_spec_object);
Eric Smith8c663262007-08-25 02:26:07 +0000624 Py_XDECREF(result);
625 return ok;
626}
627
628static int
629parse_field(SubString *str, SubString *field_name, SubString *format_spec,
630 STRINGLIB_CHAR *conversion)
631{
Eric Smith8ec90442009-03-14 12:29:34 +0000632 /* Note this function works if the field name is zero length,
633 which is good. Zero length field names are handled later, in
634 field_name_split. */
635
Eric Smith8c663262007-08-25 02:26:07 +0000636 STRINGLIB_CHAR c = 0;
637
638 /* initialize these, as they may be empty */
639 *conversion = '\0';
640 SubString_init(format_spec, NULL, 0);
641
Eric Smith8ec90442009-03-14 12:29:34 +0000642 /* Search for the field name. it's terminated by the end of
643 the string, or a ':' or '!' */
Eric Smith8c663262007-08-25 02:26:07 +0000644 field_name->ptr = str->ptr;
645 while (str->ptr < str->end) {
646 switch (c = *(str->ptr++)) {
647 case ':':
648 case '!':
649 break;
650 default:
651 continue;
652 }
653 break;
654 }
655
656 if (c == '!' || c == ':') {
657 /* we have a format specifier and/or a conversion */
658 /* don't include the last character */
659 field_name->end = str->ptr-1;
660
661 /* the format specifier is the rest of the string */
662 format_spec->ptr = str->ptr;
663 format_spec->end = str->end;
664
665 /* see if there's a conversion specifier */
666 if (c == '!') {
667 /* there must be another character present */
668 if (format_spec->ptr >= format_spec->end) {
669 PyErr_SetString(PyExc_ValueError,
670 "end of format while looking for conversion "
671 "specifier");
672 return 0;
673 }
674 *conversion = *(format_spec->ptr++);
675
676 /* if there is another character, it must be a colon */
677 if (format_spec->ptr < format_spec->end) {
678 c = *(format_spec->ptr++);
679 if (c != ':') {
680 PyErr_SetString(PyExc_ValueError,
681 "expected ':' after format specifier");
682 return 0;
683 }
684 }
685 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000686 }
Eric Smith8ec90442009-03-14 12:29:34 +0000687 else
Eric Smith8c663262007-08-25 02:26:07 +0000688 /* end of string, there's no format_spec or conversion */
689 field_name->end = str->ptr;
Eric Smith8ec90442009-03-14 12:29:34 +0000690
691 return 1;
Eric Smith8c663262007-08-25 02:26:07 +0000692}
693
694/************************************************************************/
695/******* Output string allocation and escape-to-markup processing ******/
696/************************************************************************/
697
698/* MarkupIterator breaks the string into pieces of either literal
699 text, or things inside {} that need to be marked up. it is
700 designed to make it easy to wrap a Python iterator around it, for
701 use with the Formatter class */
702
703typedef struct {
704 SubString str;
Eric Smith8c663262007-08-25 02:26:07 +0000705} MarkupIterator;
706
707static int
708MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
709{
710 SubString_init(&self->str, ptr, len);
Eric Smith8c663262007-08-25 02:26:07 +0000711 return 1;
712}
713
714/* returns 0 on error, 1 on non-error termination, and 2 if it got a
715 string (or something to be expanded) */
716static int
Eric Smith625cbf22007-08-29 03:22:59 +0000717MarkupIterator_next(MarkupIterator *self, SubString *literal,
Eric Smith8ec90442009-03-14 12:29:34 +0000718 int *field_present, SubString *field_name,
719 SubString *format_spec, STRINGLIB_CHAR *conversion,
Eric Smith8c663262007-08-25 02:26:07 +0000720 int *format_spec_needs_expanding)
721{
722 int at_end;
723 STRINGLIB_CHAR c = 0;
724 STRINGLIB_CHAR *start;
725 int count;
726 Py_ssize_t len;
Eric Smith625cbf22007-08-29 03:22:59 +0000727 int markup_follows = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000728
Eric Smith625cbf22007-08-29 03:22:59 +0000729 /* initialize all of the output variables */
730 SubString_init(literal, NULL, 0);
731 SubString_init(field_name, NULL, 0);
732 SubString_init(format_spec, NULL, 0);
733 *conversion = '\0';
Eric Smith8c663262007-08-25 02:26:07 +0000734 *format_spec_needs_expanding = 0;
Eric Smith8ec90442009-03-14 12:29:34 +0000735 *field_present = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000736
Eric Smith625cbf22007-08-29 03:22:59 +0000737 /* No more input, end of iterator. This is the normal exit
738 path. */
Eric Smith8c663262007-08-25 02:26:07 +0000739 if (self->str.ptr >= self->str.end)
740 return 1;
741
Eric Smith8c663262007-08-25 02:26:07 +0000742 start = self->str.ptr;
743
Eric Smith625cbf22007-08-29 03:22:59 +0000744 /* First read any literal text. Read until the end of string, an
745 escaped '{' or '}', or an unescaped '{'. In order to never
746 allocate memory and so I can just pass pointers around, if
747 there's an escaped '{' or '}' then we'll return the literal
748 including the brace, but no format object. The next time
749 through, we'll return the rest of the literal, skipping past
750 the second consecutive brace. */
751 while (self->str.ptr < self->str.end) {
752 switch (c = *(self->str.ptr++)) {
753 case '{':
754 case '}':
755 markup_follows = 1;
756 break;
757 default:
758 continue;
Eric Smith8c663262007-08-25 02:26:07 +0000759 }
Eric Smith625cbf22007-08-29 03:22:59 +0000760 break;
Eric Smith0cb431c2007-08-28 01:07:27 +0000761 }
Eric Smith625cbf22007-08-29 03:22:59 +0000762
763 at_end = self->str.ptr >= self->str.end;
764 len = self->str.ptr - start;
765
766 if ((c == '}') && (at_end || (c != *self->str.ptr))) {
767 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
768 "in format string");
769 return 0;
770 }
771 if (at_end && c == '{') {
772 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
773 "in format string");
774 return 0;
775 }
776 if (!at_end) {
777 if (c == *self->str.ptr) {
778 /* escaped } or {, skip it in the input. there is no
779 markup object following us, just this literal text */
780 self->str.ptr++;
781 markup_follows = 0;
782 }
783 else
784 len--;
785 }
786
787 /* record the literal text */
788 literal->ptr = start;
789 literal->end = start + len;
790
791 if (!markup_follows)
792 return 2;
793
794 /* this is markup, find the end of the string by counting nested
795 braces. note that this prohibits escaped braces, so that
796 format_specs cannot have braces in them. */
Eric Smith8ec90442009-03-14 12:29:34 +0000797 *field_present = 1;
Eric Smith625cbf22007-08-29 03:22:59 +0000798 count = 1;
799
800 start = self->str.ptr;
801
802 /* we know we can't have a zero length string, so don't worry
803 about that case */
804 while (self->str.ptr < self->str.end) {
805 switch (c = *(self->str.ptr++)) {
806 case '{':
807 /* the format spec needs to be recursively expanded.
808 this is an optimization, and not strictly needed */
809 *format_spec_needs_expanding = 1;
810 count++;
811 break;
812 case '}':
813 count--;
814 if (count <= 0) {
815 /* we're done. parse and get out */
816 SubString s;
817
818 SubString_init(&s, start, self->str.ptr - 1 - start);
819 if (parse_field(&s, field_name, format_spec, conversion) == 0)
820 return 0;
821
Eric Smith625cbf22007-08-29 03:22:59 +0000822 /* success */
823 return 2;
Eric Smith8c663262007-08-25 02:26:07 +0000824 }
825 break;
826 }
Eric Smith8c663262007-08-25 02:26:07 +0000827 }
Eric Smith625cbf22007-08-29 03:22:59 +0000828
829 /* end of string while searching for matching '}' */
830 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
831 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000832}
833
834
835/* do the !r or !s conversion on obj */
836static PyObject *
837do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
838{
839 /* XXX in pre-3.0, do we need to convert this to unicode, since it
840 might have returned a string? */
841 switch (conversion) {
842 case 'r':
843 return PyObject_Repr(obj);
844 case 's':
Eric Smith8fd3eba2008-02-17 19:48:00 +0000845 return STRINGLIB_TOSTR(obj);
Georg Brandl559e5d72008-06-11 18:37:52 +0000846#if PY_VERSION_HEX >= 0x03000000
847 case 'a':
848 return STRINGLIB_TOASCII(obj);
849#endif
Eric Smith8c663262007-08-25 02:26:07 +0000850 default:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000851 if (conversion > 32 && conversion < 127) {
852 /* It's the ASCII subrange; casting to char is safe
853 (assuming the execution character set is an ASCII
854 superset). */
855 PyErr_Format(PyExc_ValueError,
Martin v. Löwis5a6f4582008-04-07 03:22:07 +0000856 "Unknown conversion specifier %c",
857 (char)conversion);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000858 } else
859 PyErr_Format(PyExc_ValueError,
860 "Unknown conversion specifier \\x%x",
861 (unsigned int)conversion);
Eric Smith8c663262007-08-25 02:26:07 +0000862 return NULL;
863 }
864}
865
866/* given:
867
868 {field_name!conversion:format_spec}
869
870 compute the result and write it to output.
871 format_spec_needs_expanding is an optimization. if it's false,
872 just output the string directly, otherwise recursively expand the
Eric Smith8ec90442009-03-14 12:29:34 +0000873 format_spec string.
874
875 field_name is allowed to be zero length, in which case we
876 are doing auto field numbering.
877*/
Eric Smith8c663262007-08-25 02:26:07 +0000878
879static int
880output_markup(SubString *field_name, SubString *format_spec,
881 int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
882 OutputString *output, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000883 int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000884{
885 PyObject *tmp = NULL;
886 PyObject *fieldobj = NULL;
887 SubString expanded_format_spec;
888 SubString *actual_format_spec;
889 int result = 0;
890
891 /* convert field_name to an object */
Eric Smith8ec90442009-03-14 12:29:34 +0000892 fieldobj = get_field_object(field_name, args, kwargs, auto_number);
Eric Smith8c663262007-08-25 02:26:07 +0000893 if (fieldobj == NULL)
894 goto done;
895
896 if (conversion != '\0') {
897 tmp = do_conversion(fieldobj, conversion);
898 if (tmp == NULL)
899 goto done;
900
901 /* do the assignment, transferring ownership: fieldobj = tmp */
902 Py_DECREF(fieldobj);
903 fieldobj = tmp;
904 tmp = NULL;
905 }
906
907 /* if needed, recurively compute the format_spec */
908 if (format_spec_needs_expanding) {
Eric Smith8ec90442009-03-14 12:29:34 +0000909 tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
910 auto_number);
Eric Smith8c663262007-08-25 02:26:07 +0000911 if (tmp == NULL)
912 goto done;
913
914 /* note that in the case we're expanding the format string,
915 tmp must be kept around until after the call to
916 render_field. */
917 SubString_init(&expanded_format_spec,
918 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
919 actual_format_spec = &expanded_format_spec;
Eric Smith0cb431c2007-08-28 01:07:27 +0000920 }
921 else
Eric Smith8c663262007-08-25 02:26:07 +0000922 actual_format_spec = format_spec;
923
924 if (render_field(fieldobj, actual_format_spec, output) == 0)
925 goto done;
926
927 result = 1;
928
929done:
930 Py_XDECREF(fieldobj);
931 Py_XDECREF(tmp);
932
933 return result;
934}
935
936/*
Eric Smith8fd3eba2008-02-17 19:48:00 +0000937 do_markup is the top-level loop for the format() method. It
Eric Smith8c663262007-08-25 02:26:07 +0000938 searches through the format string for escapes to markup codes, and
939 calls other functions to move non-markup text to the output,
940 and to perform the markup to the output.
941*/
942static int
943do_markup(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000944 OutputString *output, int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000945{
946 MarkupIterator iter;
Eric Smith8c663262007-08-25 02:26:07 +0000947 int format_spec_needs_expanding;
948 int result;
Eric Smith8ec90442009-03-14 12:29:34 +0000949 int field_present;
Eric Smith625cbf22007-08-29 03:22:59 +0000950 SubString literal;
Eric Smith8c663262007-08-25 02:26:07 +0000951 SubString field_name;
952 SubString format_spec;
953 STRINGLIB_CHAR conversion;
954
955 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
Eric Smith8ec90442009-03-14 12:29:34 +0000956 while ((result = MarkupIterator_next(&iter, &literal, &field_present,
957 &field_name, &format_spec,
958 &conversion,
Eric Smith8c663262007-08-25 02:26:07 +0000959 &format_spec_needs_expanding)) == 2) {
Eric Smith625cbf22007-08-29 03:22:59 +0000960 if (!output_data(output, literal.ptr, literal.end - literal.ptr))
961 return 0;
Eric Smith8ec90442009-03-14 12:29:34 +0000962 if (field_present)
Eric Smith8c663262007-08-25 02:26:07 +0000963 if (!output_markup(&field_name, &format_spec,
964 format_spec_needs_expanding, conversion, output,
Eric Smith8ec90442009-03-14 12:29:34 +0000965 args, kwargs, recursion_depth, auto_number))
Eric Smith8c663262007-08-25 02:26:07 +0000966 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000967 }
968 return result;
969}
970
971
972/*
973 build_string allocates the output string and then
974 calls do_markup to do the heavy lifting.
975*/
976static PyObject *
977build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000978 int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000979{
980 OutputString output;
981 PyObject *result = NULL;
982 Py_ssize_t count;
983
984 output.obj = NULL; /* needed so cleanup code always works */
985
986 /* check the recursion level */
Eric Smith45c07872007-09-05 02:02:43 +0000987 if (recursion_depth <= 0) {
Eric Smith8c663262007-08-25 02:26:07 +0000988 PyErr_SetString(PyExc_ValueError,
989 "Max string recursion exceeded");
990 goto done;
991 }
992
993 /* initial size is the length of the format string, plus the size
994 increment. seems like a reasonable default */
995 if (!output_initialize(&output,
996 input->end - input->ptr +
997 INITIAL_SIZE_INCREMENT))
998 goto done;
999
Eric Smith8ec90442009-03-14 12:29:34 +00001000 if (!do_markup(input, args, kwargs, &output, recursion_depth,
1001 auto_number)) {
Eric Smith8c663262007-08-25 02:26:07 +00001002 goto done;
1003 }
1004
1005 count = output.ptr - STRINGLIB_STR(output.obj);
1006 if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
1007 goto done;
1008 }
1009
1010 /* transfer ownership to result */
1011 result = output.obj;
1012 output.obj = NULL;
1013
1014done:
Eric Smith8c663262007-08-25 02:26:07 +00001015 Py_XDECREF(output.obj);
1016 return result;
1017}
1018
1019/************************************************************************/
1020/*********** main routine ***********************************************/
1021/************************************************************************/
1022
1023/* this is the main entry point */
1024static PyObject *
1025do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
1026{
1027 SubString input;
1028
1029 /* PEP 3101 says only 2 levels, so that
1030 "{0:{1}}".format('abc', 's') # works
1031 "{0:{1:{2}}}".format('abc', 's', '') # fails
1032 */
Eric Smith45c07872007-09-05 02:02:43 +00001033 int recursion_depth = 2;
Eric Smith8c663262007-08-25 02:26:07 +00001034
Eric Smith8ec90442009-03-14 12:29:34 +00001035 AutoNumber auto_number;
1036
1037 AutoNumber_Init(&auto_number);
Eric Smith8c663262007-08-25 02:26:07 +00001038 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
Eric Smith8ec90442009-03-14 12:29:34 +00001039 return build_string(&input, args, kwargs, recursion_depth, &auto_number);
Eric Smith8c663262007-08-25 02:26:07 +00001040}
Eric Smithf6db4092007-08-27 23:52:26 +00001041
1042
1043
1044/************************************************************************/
1045/*********** formatteriterator ******************************************/
1046/************************************************************************/
1047
1048/* This is used to implement string.Formatter.vparse(). It exists so
1049 Formatter can share code with the built in unicode.format() method.
1050 It's really just a wrapper around MarkupIterator that is callable
1051 from Python. */
1052
1053typedef struct {
1054 PyObject_HEAD
1055
Eric Smith8fd3eba2008-02-17 19:48:00 +00001056 STRINGLIB_OBJECT *str;
Eric Smithf6db4092007-08-27 23:52:26 +00001057
1058 MarkupIterator it_markup;
1059} formatteriterobject;
1060
1061static void
1062formatteriter_dealloc(formatteriterobject *it)
1063{
1064 Py_XDECREF(it->str);
1065 PyObject_FREE(it);
1066}
1067
1068/* returns a tuple:
Eric Smith625cbf22007-08-29 03:22:59 +00001069 (literal, field_name, format_spec, conversion)
1070
1071 literal is any literal text to output. might be zero length
1072 field_name is the string before the ':'. might be None
1073 format_spec is the string after the ':'. mibht be None
1074 conversion is either None, or the string after the '!'
Eric Smithf6db4092007-08-27 23:52:26 +00001075*/
1076static PyObject *
1077formatteriter_next(formatteriterobject *it)
1078{
1079 SubString literal;
1080 SubString field_name;
1081 SubString format_spec;
Eric Smith8fd3eba2008-02-17 19:48:00 +00001082 STRINGLIB_CHAR conversion;
Eric Smithf6db4092007-08-27 23:52:26 +00001083 int format_spec_needs_expanding;
Eric Smith8ec90442009-03-14 12:29:34 +00001084 int field_present;
1085 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1086 &field_name, &format_spec, &conversion,
Eric Smithf6db4092007-08-27 23:52:26 +00001087 &format_spec_needs_expanding);
1088
1089 /* all of the SubString objects point into it->str, so no
1090 memory management needs to be done on them */
1091 assert(0 <= result && result <= 2);
Eric Smith0cb431c2007-08-28 01:07:27 +00001092 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001093 /* if 0, error has already been set, if 1, iterator is empty */
1094 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001095 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001096 PyObject *literal_str = NULL;
1097 PyObject *field_name_str = NULL;
1098 PyObject *format_spec_str = NULL;
1099 PyObject *conversion_str = NULL;
1100 PyObject *tuple = NULL;
1101
Eric Smith625cbf22007-08-29 03:22:59 +00001102 literal_str = SubString_new_object(&literal);
1103 if (literal_str == NULL)
1104 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001105
Eric Smith625cbf22007-08-29 03:22:59 +00001106 field_name_str = SubString_new_object(&field_name);
1107 if (field_name_str == NULL)
1108 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001109
Eric Smith625cbf22007-08-29 03:22:59 +00001110 /* if field_name is non-zero length, return a string for
1111 format_spec (even if zero length), else return None */
Eric Smith8ec90442009-03-14 12:29:34 +00001112 format_spec_str = (field_present ?
Eric Smith625cbf22007-08-29 03:22:59 +00001113 SubString_new_object_or_empty :
1114 SubString_new_object)(&format_spec);
1115 if (format_spec_str == NULL)
1116 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001117
Eric Smith625cbf22007-08-29 03:22:59 +00001118 /* if the conversion is not specified, return a None,
1119 otherwise create a one length string with the conversion
1120 character */
1121 if (conversion == '\0') {
Eric Smithf6db4092007-08-27 23:52:26 +00001122 conversion_str = Py_None;
Eric Smithf6db4092007-08-27 23:52:26 +00001123 Py_INCREF(conversion_str);
1124 }
Eric Smith625cbf22007-08-29 03:22:59 +00001125 else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 conversion_str = STRINGLIB_NEW(&conversion, 1);
Eric Smith625cbf22007-08-29 03:22:59 +00001127 if (conversion_str == NULL)
1128 goto done;
1129
Eric Smith9e7c8da2007-08-28 11:15:20 +00001130 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
Eric Smithf6db4092007-08-27 23:52:26 +00001131 conversion_str);
Eric Smith625cbf22007-08-29 03:22:59 +00001132 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001133 Py_XDECREF(literal_str);
1134 Py_XDECREF(field_name_str);
1135 Py_XDECREF(format_spec_str);
1136 Py_XDECREF(conversion_str);
1137 return tuple;
1138 }
1139}
1140
1141static PyMethodDef formatteriter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 {NULL, NULL} /* sentinel */
Eric Smithf6db4092007-08-27 23:52:26 +00001143};
1144
Eric Smith8fd3eba2008-02-17 19:48:00 +00001145static PyTypeObject PyFormatterIter_Type = {
Eric Smithf6db4092007-08-27 23:52:26 +00001146 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 "formatteriterator", /* tp_name */
1148 sizeof(formatteriterobject), /* tp_basicsize */
1149 0, /* tp_itemsize */
Eric Smithf6db4092007-08-27 23:52:26 +00001150 /* methods */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 (destructor)formatteriter_dealloc, /* tp_dealloc */
1152 0, /* tp_print */
1153 0, /* tp_getattr */
1154 0, /* tp_setattr */
1155 0, /* tp_reserved */
1156 0, /* tp_repr */
1157 0, /* tp_as_number */
1158 0, /* tp_as_sequence */
1159 0, /* tp_as_mapping */
1160 0, /* tp_hash */
1161 0, /* tp_call */
1162 0, /* tp_str */
1163 PyObject_GenericGetAttr, /* tp_getattro */
1164 0, /* tp_setattro */
1165 0, /* tp_as_buffer */
1166 Py_TPFLAGS_DEFAULT, /* tp_flags */
1167 0, /* tp_doc */
1168 0, /* tp_traverse */
1169 0, /* tp_clear */
1170 0, /* tp_richcompare */
1171 0, /* tp_weaklistoffset */
1172 PyObject_SelfIter, /* tp_iter */
1173 (iternextfunc)formatteriter_next, /* tp_iternext */
1174 formatteriter_methods, /* tp_methods */
Eric Smithf6db4092007-08-27 23:52:26 +00001175 0,
1176};
1177
1178/* unicode_formatter_parser is used to implement
1179 string.Formatter.vformat. it parses a string and returns tuples
1180 describing the parsed elements. It's a wrapper around
1181 stringlib/string_format.h's MarkupIterator */
1182static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +00001183formatter_parser(STRINGLIB_OBJECT *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001184{
1185 formatteriterobject *it;
1186
1187 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1188 if (it == NULL)
1189 return NULL;
1190
1191 /* take ownership, give the object to the iterator */
1192 Py_INCREF(self);
1193 it->str = self;
1194
1195 /* initialize the contained MarkupIterator */
1196 MarkupIterator_init(&it->it_markup,
Eric Smith8fd3eba2008-02-17 19:48:00 +00001197 STRINGLIB_STR(self),
1198 STRINGLIB_LEN(self));
Eric Smithf6db4092007-08-27 23:52:26 +00001199
1200 return (PyObject *)it;
1201}
1202
1203
1204/************************************************************************/
1205/*********** fieldnameiterator ******************************************/
1206/************************************************************************/
1207
1208
1209/* This is used to implement string.Formatter.vparse(). It parses the
1210 field name into attribute and item values. It's a Python-callable
1211 wrapper around FieldNameIterator */
1212
1213typedef struct {
1214 PyObject_HEAD
1215
Eric Smith8fd3eba2008-02-17 19:48:00 +00001216 STRINGLIB_OBJECT *str;
Eric Smithf6db4092007-08-27 23:52:26 +00001217
1218 FieldNameIterator it_field;
1219} fieldnameiterobject;
1220
1221static void
1222fieldnameiter_dealloc(fieldnameiterobject *it)
1223{
1224 Py_XDECREF(it->str);
1225 PyObject_FREE(it);
1226}
1227
1228/* returns a tuple:
1229 (is_attr, value)
1230 is_attr is true if we used attribute syntax (e.g., '.foo')
1231 false if we used index syntax (e.g., '[foo]')
1232 value is an integer or string
1233*/
1234static PyObject *
1235fieldnameiter_next(fieldnameiterobject *it)
1236{
1237 int result;
1238 int is_attr;
1239 Py_ssize_t idx;
1240 SubString name;
1241
1242 result = FieldNameIterator_next(&it->it_field, &is_attr,
1243 &idx, &name);
Eric Smith0cb431c2007-08-28 01:07:27 +00001244 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001245 /* if 0, error has already been set, if 1, iterator is empty */
1246 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001247 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001248 PyObject* result = NULL;
1249 PyObject* is_attr_obj = NULL;
1250 PyObject* obj = NULL;
1251
1252 is_attr_obj = PyBool_FromLong(is_attr);
1253 if (is_attr_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001254 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001255
1256 /* either an integer or a string */
1257 if (idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001258 obj = PyLong_FromSsize_t(idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001259 else
1260 obj = SubString_new_object(&name);
1261 if (obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001262 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001263
1264 /* return a tuple of values */
1265 result = PyTuple_Pack(2, is_attr_obj, obj);
Eric Smithf6db4092007-08-27 23:52:26 +00001266
Eric Smith625cbf22007-08-29 03:22:59 +00001267 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001268 Py_XDECREF(is_attr_obj);
1269 Py_XDECREF(obj);
Eric Smith625cbf22007-08-29 03:22:59 +00001270 return result;
Eric Smithf6db4092007-08-27 23:52:26 +00001271 }
Eric Smithf6db4092007-08-27 23:52:26 +00001272}
1273
1274static PyMethodDef fieldnameiter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 {NULL, NULL} /* sentinel */
Eric Smithf6db4092007-08-27 23:52:26 +00001276};
1277
1278static PyTypeObject PyFieldNameIter_Type = {
1279 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001280 "fieldnameiterator", /* tp_name */
1281 sizeof(fieldnameiterobject), /* tp_basicsize */
1282 0, /* tp_itemsize */
Eric Smithf6db4092007-08-27 23:52:26 +00001283 /* methods */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1285 0, /* tp_print */
1286 0, /* tp_getattr */
1287 0, /* tp_setattr */
1288 0, /* tp_reserved */
1289 0, /* tp_repr */
1290 0, /* tp_as_number */
1291 0, /* tp_as_sequence */
1292 0, /* tp_as_mapping */
1293 0, /* tp_hash */
1294 0, /* tp_call */
1295 0, /* tp_str */
1296 PyObject_GenericGetAttr, /* tp_getattro */
1297 0, /* tp_setattro */
1298 0, /* tp_as_buffer */
1299 Py_TPFLAGS_DEFAULT, /* tp_flags */
1300 0, /* tp_doc */
1301 0, /* tp_traverse */
1302 0, /* tp_clear */
1303 0, /* tp_richcompare */
1304 0, /* tp_weaklistoffset */
1305 PyObject_SelfIter, /* tp_iter */
1306 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1307 fieldnameiter_methods, /* tp_methods */
Eric Smithf6db4092007-08-27 23:52:26 +00001308 0};
1309
1310/* unicode_formatter_field_name_split is used to implement
1311 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1312 returns a tuple of (first, rest): "first", the part before the
1313 first '.' or '['; and "rest", an iterator for the rest of the field
1314 name. it's a wrapper around stringlib/string_format.h's
1315 field_name_split. The iterator it returns is a
1316 FieldNameIterator */
1317static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +00001318formatter_field_name_split(STRINGLIB_OBJECT *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001319{
1320 SubString first;
1321 Py_ssize_t first_idx;
1322 fieldnameiterobject *it;
1323
1324 PyObject *first_obj = NULL;
1325 PyObject *result = NULL;
1326
1327 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1328 if (it == NULL)
1329 return NULL;
1330
1331 /* take ownership, give the object to the iterator. this is
1332 just to keep the field_name alive */
1333 Py_INCREF(self);
1334 it->str = self;
1335
Eric Smith8ec90442009-03-14 12:29:34 +00001336 /* Pass in auto_number = NULL. We'll return an empty string for
1337 first_obj in that case. */
Eric Smithf6db4092007-08-27 23:52:26 +00001338 if (!field_name_split(STRINGLIB_STR(self),
1339 STRINGLIB_LEN(self),
Eric Smith8ec90442009-03-14 12:29:34 +00001340 &first, &first_idx, &it->it_field, NULL))
Eric Smith625cbf22007-08-29 03:22:59 +00001341 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001342
Eric Smith0cb431c2007-08-28 01:07:27 +00001343 /* first becomes an integer, if possible; else a string */
Eric Smithf6db4092007-08-27 23:52:26 +00001344 if (first_idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001345 first_obj = PyLong_FromSsize_t(first_idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001346 else
1347 /* convert "first" into a string object */
1348 first_obj = SubString_new_object(&first);
1349 if (first_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001350 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001351
1352 /* return a tuple of values */
1353 result = PyTuple_Pack(2, first_obj, it);
1354
Eric Smith625cbf22007-08-29 03:22:59 +00001355done:
Eric Smithf6db4092007-08-27 23:52:26 +00001356 Py_XDECREF(it);
1357 Py_XDECREF(first_obj);
1358 return result;
1359}