blob: ecb00a9a1e7b6c1964b948676479540b7f6b2622 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/*
2 string_format.h -- implementation of string.format().
3
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
6*/
7
8
Eric Smith8fd3eba2008-02-17 19:48:00 +00009/* Defines for Python 2.6 compatability */
10#if PY_VERSION_HEX < 0x03000000
11#define PyLong_FromSsize_t _PyLong_FromSsize_t
12#endif
13
Eric Smith8c663262007-08-25 02:26:07 +000014/* Defines for more efficiently reallocating the string buffer */
15#define INITIAL_SIZE_INCREMENT 100
16#define SIZE_MULTIPLIER 2
17#define MAX_SIZE_INCREMENT 3200
18
19
20/************************************************************************/
21/*********** Global data structures and forward declarations *********/
22/************************************************************************/
23
24/*
25 A SubString consists of the characters between two string or
26 unicode pointers.
27*/
28typedef struct {
29 STRINGLIB_CHAR *ptr;
30 STRINGLIB_CHAR *end;
31} SubString;
32
33
Eric Smith8ec90442009-03-14 12:29:34 +000034typedef enum {
35 ANS_INIT,
36 ANS_AUTO,
Georg Brandlfb526ac2009-05-01 08:59:13 +000037 ANS_MANUAL
Eric Smith8ec90442009-03-14 12:29:34 +000038} AutoNumberState; /* Keep track if we're auto-numbering fields */
39
40/* Keeps track of our auto-numbering state, and which number field we're on */
41typedef struct {
42 AutoNumberState an_state;
43 int an_field_number;
44} AutoNumber;
45
46
Eric Smith8c663262007-08-25 02:26:07 +000047/* forward declaration for recursion */
48static PyObject *
49build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +000050 int recursion_depth, AutoNumber *auto_number);
Eric Smith8c663262007-08-25 02:26:07 +000051
52
53
54/************************************************************************/
55/************************** Utility functions ************************/
56/************************************************************************/
57
Eric Smith8ec90442009-03-14 12:29:34 +000058static void
59AutoNumber_Init(AutoNumber *auto_number)
60{
61 auto_number->an_state = ANS_INIT;
62 auto_number->an_field_number = 0;
63}
64
Eric Smith8c663262007-08-25 02:26:07 +000065/* fill in a SubString from a pointer and length */
66Py_LOCAL_INLINE(void)
67SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
68{
69 str->ptr = p;
70 if (p == NULL)
71 str->end = NULL;
72 else
73 str->end = str->ptr + len;
74}
75
Eric Smith625cbf22007-08-29 03:22:59 +000076/* return a new string. if str->ptr is NULL, return None */
Eric Smith8c663262007-08-25 02:26:07 +000077Py_LOCAL_INLINE(PyObject *)
78SubString_new_object(SubString *str)
79{
Eric Smith625cbf22007-08-29 03:22:59 +000080 if (str->ptr == NULL) {
81 Py_INCREF(Py_None);
82 return Py_None;
83 }
84 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
85}
86
87/* return a new string. if str->ptr is NULL, return None */
88Py_LOCAL_INLINE(PyObject *)
89SubString_new_object_or_empty(SubString *str)
90{
91 if (str->ptr == NULL) {
92 return STRINGLIB_NEW(NULL, 0);
93 }
Eric Smith8c663262007-08-25 02:26:07 +000094 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
95}
96
Eric Smith8ec90442009-03-14 12:29:34 +000097/* Return 1 if an error has been detected switching between automatic
98 field numbering and manual field specification, else return 0. Set
99 ValueError on error. */
100static int
101autonumber_state_error(AutoNumberState state, int field_name_is_empty)
102{
103 if (state == ANS_MANUAL) {
104 if (field_name_is_empty) {
105 PyErr_SetString(PyExc_ValueError, "cannot switch from "
106 "manual field specification to "
107 "automatic field numbering");
108 return 1;
109 }
110 }
111 else {
112 if (!field_name_is_empty) {
113 PyErr_SetString(PyExc_ValueError, "cannot switch from "
114 "automatic field numbering to "
115 "manual field specification");
116 return 1;
117 }
118 }
119 return 0;
120}
121
122
Eric Smith8c663262007-08-25 02:26:07 +0000123/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +0000124/*********** Output string management functions ****************/
125/************************************************************************/
126
127typedef struct {
128 STRINGLIB_CHAR *ptr;
129 STRINGLIB_CHAR *end;
130 PyObject *obj;
131 Py_ssize_t size_increment;
132} OutputString;
133
134/* initialize an OutputString object, reserving size characters */
135static int
136output_initialize(OutputString *output, Py_ssize_t size)
137{
138 output->obj = STRINGLIB_NEW(NULL, size);
139 if (output->obj == NULL)
140 return 0;
141
142 output->ptr = STRINGLIB_STR(output->obj);
143 output->end = STRINGLIB_LEN(output->obj) + output->ptr;
144 output->size_increment = INITIAL_SIZE_INCREMENT;
145
146 return 1;
147}
148
149/*
150 output_extend reallocates the output string buffer.
151 It returns a status: 0 for a failed reallocation,
152 1 for success.
153*/
154
155static int
156output_extend(OutputString *output, Py_ssize_t count)
157{
158 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
159 Py_ssize_t curlen = output->ptr - startptr;
160 Py_ssize_t maxlen = curlen + count + output->size_increment;
161
162 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
163 return 0;
164 startptr = STRINGLIB_STR(output->obj);
165 output->ptr = startptr + curlen;
166 output->end = startptr + maxlen;
167 if (output->size_increment < MAX_SIZE_INCREMENT)
168 output->size_increment *= SIZE_MULTIPLIER;
169 return 1;
170}
171
172/*
173 output_data dumps characters into our output string
174 buffer.
175
176 In some cases, it has to reallocate the string.
177
178 It returns a status: 0 for a failed reallocation,
179 1 for success.
180*/
181static int
182output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
183{
184 if ((count > output->end - output->ptr) && !output_extend(output, count))
185 return 0;
186 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
187 output->ptr += count;
188 return 1;
189}
190
191/************************************************************************/
192/*********** Format string parsing -- integers and identifiers *********/
193/************************************************************************/
194
Eric Smith7ade6482007-08-26 22:27:13 +0000195static Py_ssize_t
196get_integer(const SubString *str)
Eric Smith8c663262007-08-25 02:26:07 +0000197{
Eric Smith7ade6482007-08-26 22:27:13 +0000198 Py_ssize_t accumulator = 0;
199 Py_ssize_t digitval;
200 Py_ssize_t oldaccumulator;
201 STRINGLIB_CHAR *p;
Eric Smith8c663262007-08-25 02:26:07 +0000202
Eric Smith7ade6482007-08-26 22:27:13 +0000203 /* empty string is an error */
204 if (str->ptr >= str->end)
205 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000206
Eric Smith7ade6482007-08-26 22:27:13 +0000207 for (p = str->ptr; p < str->end; p++) {
208 digitval = STRINGLIB_TODECIMAL(*p);
Eric Smith8c663262007-08-25 02:26:07 +0000209 if (digitval < 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000210 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000211 /*
212 This trick was copied from old Unicode format code. It's cute,
213 but would really suck on an old machine with a slow divide
214 implementation. Fortunately, in the normal case we do not
215 expect too many digits.
216 */
217 oldaccumulator = accumulator;
218 accumulator *= 10;
219 if ((accumulator+10)/10 != oldaccumulator+1) {
220 PyErr_Format(PyExc_ValueError,
221 "Too many decimal digits in format string");
222 return -1;
223 }
224 accumulator += digitval;
225 }
Eric Smith7ade6482007-08-26 22:27:13 +0000226 return accumulator;
Eric Smith8c663262007-08-25 02:26:07 +0000227}
228
229/************************************************************************/
230/******** Functions to get field objects and specification strings ******/
231/************************************************************************/
232
Eric Smith7ade6482007-08-26 22:27:13 +0000233/* do the equivalent of obj.name */
Eric Smith8c663262007-08-25 02:26:07 +0000234static PyObject *
Eric Smith7ade6482007-08-26 22:27:13 +0000235getattr(PyObject *obj, SubString *name)
Eric Smith8c663262007-08-25 02:26:07 +0000236{
Eric Smith7ade6482007-08-26 22:27:13 +0000237 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000238 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000239 if (str == NULL)
240 return NULL;
241 newobj = PyObject_GetAttr(obj, str);
242 Py_DECREF(str);
243 return newobj;
Eric Smith8c663262007-08-25 02:26:07 +0000244}
245
Eric Smith7ade6482007-08-26 22:27:13 +0000246/* do the equivalent of obj[idx], where obj is a sequence */
247static PyObject *
248getitem_sequence(PyObject *obj, Py_ssize_t idx)
249{
250 return PySequence_GetItem(obj, idx);
251}
252
253/* do the equivalent of obj[idx], where obj is not a sequence */
254static PyObject *
255getitem_idx(PyObject *obj, Py_ssize_t idx)
256{
257 PyObject *newobj;
Christian Heimes217cfd12007-12-02 14:31:20 +0000258 PyObject *idx_obj = PyLong_FromSsize_t(idx);
Eric Smith7ade6482007-08-26 22:27:13 +0000259 if (idx_obj == NULL)
260 return NULL;
261 newobj = PyObject_GetItem(obj, idx_obj);
262 Py_DECREF(idx_obj);
263 return newobj;
264}
265
266/* do the equivalent of obj[name] */
267static PyObject *
268getitem_str(PyObject *obj, SubString *name)
269{
270 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000271 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000272 if (str == NULL)
273 return NULL;
274 newobj = PyObject_GetItem(obj, str);
275 Py_DECREF(str);
276 return newobj;
277}
278
279typedef struct {
280 /* the entire string we're parsing. we assume that someone else
281 is managing its lifetime, and that it will exist for the
282 lifetime of the iterator. can be empty */
283 SubString str;
284
285 /* pointer to where we are inside field_name */
286 STRINGLIB_CHAR *ptr;
287} FieldNameIterator;
288
289
290static int
291FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
292 Py_ssize_t len)
293{
294 SubString_init(&self->str, ptr, len);
295 self->ptr = self->str.ptr;
296 return 1;
297}
298
299static int
300_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
301{
302 STRINGLIB_CHAR c;
303
304 name->ptr = self->ptr;
305
306 /* return everything until '.' or '[' */
307 while (self->ptr < self->str.end) {
308 switch (c = *self->ptr++) {
309 case '[':
310 case '.':
311 /* backup so that we this character will be seen next time */
312 self->ptr--;
313 break;
314 default:
315 continue;
316 }
317 break;
318 }
319 /* end of string is okay */
320 name->end = self->ptr;
321 return 1;
322}
323
324static int
325_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
326{
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000327 int bracket_seen = 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000328 STRINGLIB_CHAR c;
329
330 name->ptr = self->ptr;
331
332 /* return everything until ']' */
333 while (self->ptr < self->str.end) {
334 switch (c = *self->ptr++) {
335 case ']':
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000336 bracket_seen = 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000337 break;
338 default:
339 continue;
340 }
341 break;
342 }
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000343 /* make sure we ended with a ']' */
344 if (!bracket_seen) {
345 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
346 return 0;
347 }
348
Eric Smith7ade6482007-08-26 22:27:13 +0000349 /* end of string is okay */
350 /* don't include the ']' */
351 name->end = self->ptr-1;
352 return 1;
353}
354
355/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
356static int
357FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
358 Py_ssize_t *name_idx, SubString *name)
359{
360 /* check at end of input */
361 if (self->ptr >= self->str.end)
362 return 1;
363
364 switch (*self->ptr++) {
365 case '.':
366 *is_attribute = 1;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000367 if (_FieldNameIterator_attr(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000368 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000369 *name_idx = -1;
370 break;
371 case '[':
372 *is_attribute = 0;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000373 if (_FieldNameIterator_item(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000374 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000375 *name_idx = get_integer(name);
376 break;
377 default:
Eric Smith41669ca2009-05-23 14:23:22 +0000378 /* Invalid character follows ']' */
379 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
380 "follow ']' in format field specifier");
Eric Smith7ade6482007-08-26 22:27:13 +0000381 return 0;
382 }
383
384 /* empty string is an error */
385 if (name->ptr == name->end) {
386 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
387 return 0;
388 }
389
390 return 2;
391}
392
393
394/* input: field_name
395 output: 'first' points to the part before the first '[' or '.'
396 'first_idx' is -1 if 'first' is not an integer, otherwise
397 it's the value of first converted to an integer
398 'rest' is an iterator to return the rest
399*/
400static int
401field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
Eric Smith8ec90442009-03-14 12:29:34 +0000402 Py_ssize_t *first_idx, FieldNameIterator *rest,
403 AutoNumber *auto_number)
Eric Smith7ade6482007-08-26 22:27:13 +0000404{
405 STRINGLIB_CHAR c;
406 STRINGLIB_CHAR *p = ptr;
407 STRINGLIB_CHAR *end = ptr + len;
Eric Smith8ec90442009-03-14 12:29:34 +0000408 int field_name_is_empty;
409 int using_numeric_index;
Eric Smith7ade6482007-08-26 22:27:13 +0000410
411 /* find the part up until the first '.' or '[' */
412 while (p < end) {
413 switch (c = *p++) {
414 case '[':
415 case '.':
416 /* backup so that we this character is available to the
417 "rest" iterator */
418 p--;
419 break;
420 default:
421 continue;
422 }
423 break;
424 }
425
426 /* set up the return values */
427 SubString_init(first, ptr, p - ptr);
428 FieldNameIterator_init(rest, p, end - p);
429
430 /* see if "first" is an integer, in which case it's used as an index */
431 *first_idx = get_integer(first);
432
Eric Smith8ec90442009-03-14 12:29:34 +0000433 field_name_is_empty = first->ptr >= first->end;
434
435 /* If the field name is omitted or if we have a numeric index
436 specified, then we're doing numeric indexing into args. */
437 using_numeric_index = field_name_is_empty || *first_idx != -1;
438
439 /* We always get here exactly one time for each field we're
440 processing. And we get here in field order (counting by left
441 braces). So this is the perfect place to handle automatic field
442 numbering if the field name is omitted. */
443
444 /* Check if we need to do the auto-numbering. It's not needed if
445 we're called from string.Format routines, because it's handled
446 in that class by itself. */
447 if (auto_number) {
448 /* Initialize our auto numbering state if this is the first
449 time we're either auto-numbering or manually numbering. */
450 if (auto_number->an_state == ANS_INIT && using_numeric_index)
451 auto_number->an_state = field_name_is_empty ?
452 ANS_AUTO : ANS_MANUAL;
453
454 /* Make sure our state is consistent with what we're doing
455 this time through. Only check if we're using a numeric
456 index. */
457 if (using_numeric_index)
458 if (autonumber_state_error(auto_number->an_state,
459 field_name_is_empty))
460 return 0;
461 /* Zero length field means we want to do auto-numbering of the
462 fields. */
463 if (field_name_is_empty)
464 *first_idx = (auto_number->an_field_number)++;
Eric Smith7ade6482007-08-26 22:27:13 +0000465 }
466
467 return 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000468}
469
470
Eric Smith8c663262007-08-25 02:26:07 +0000471/*
472 get_field_object returns the object inside {}, before the
473 format_spec. It handles getindex and getattr lookups and consumes
474 the entire input string.
475*/
476static PyObject *
Eric Smith8ec90442009-03-14 12:29:34 +0000477get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
478 AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000479{
Eric Smith7ade6482007-08-26 22:27:13 +0000480 PyObject *obj = NULL;
481 int ok;
482 int is_attribute;
483 SubString name;
484 SubString first;
Eric Smith8c663262007-08-25 02:26:07 +0000485 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000486 FieldNameIterator rest;
Eric Smith8c663262007-08-25 02:26:07 +0000487
Eric Smith7ade6482007-08-26 22:27:13 +0000488 if (!field_name_split(input->ptr, input->end - input->ptr, &first,
Eric Smith8ec90442009-03-14 12:29:34 +0000489 &index, &rest, auto_number)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000490 goto error;
491 }
Eric Smith8c663262007-08-25 02:26:07 +0000492
Eric Smith7ade6482007-08-26 22:27:13 +0000493 if (index == -1) {
494 /* look up in kwargs */
Eric Smith7a6dd292007-08-27 23:30:47 +0000495 PyObject *key = SubString_new_object(&first);
Eric Smith7ade6482007-08-26 22:27:13 +0000496 if (key == NULL)
497 goto error;
498 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
Eric Smith11529192007-09-04 23:04:22 +0000499 PyErr_SetObject(PyExc_KeyError, key);
Eric Smith7ade6482007-08-26 22:27:13 +0000500 Py_DECREF(key);
501 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000502 }
Neal Norwitz8a4eb292007-08-27 07:24:17 +0000503 Py_DECREF(key);
Neal Norwitz247b5152007-08-27 03:22:50 +0000504 Py_INCREF(obj);
Eric Smith0cb431c2007-08-28 01:07:27 +0000505 }
506 else {
Eric Smith7ade6482007-08-26 22:27:13 +0000507 /* look up in args */
508 obj = PySequence_GetItem(args, index);
Eric Smith11529192007-09-04 23:04:22 +0000509 if (obj == NULL)
Eric Smith7ade6482007-08-26 22:27:13 +0000510 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000511 }
Eric Smith7ade6482007-08-26 22:27:13 +0000512
513 /* iterate over the rest of the field_name */
514 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
515 &name)) == 2) {
516 PyObject *tmp;
517
518 if (is_attribute)
519 /* getattr lookup "." */
520 tmp = getattr(obj, &name);
521 else
522 /* getitem lookup "[]" */
523 if (index == -1)
524 tmp = getitem_str(obj, &name);
525 else
526 if (PySequence_Check(obj))
527 tmp = getitem_sequence(obj, index);
528 else
529 /* not a sequence */
530 tmp = getitem_idx(obj, index);
531 if (tmp == NULL)
532 goto error;
533
534 /* assign to obj */
535 Py_DECREF(obj);
536 obj = tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000537 }
Eric Smith7ade6482007-08-26 22:27:13 +0000538 /* end of iterator, this is the non-error case */
539 if (ok == 1)
540 return obj;
541error:
542 Py_XDECREF(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000543 return NULL;
544}
545
546/************************************************************************/
547/***************** Field rendering functions **************************/
548/************************************************************************/
549
550/*
551 render_field() is the main function in this section. It takes the
552 field object and field specification string generated by
553 get_field_and_spec, and renders the field into the output string.
554
Eric Smith8c663262007-08-25 02:26:07 +0000555 render_field calls fieldobj.__format__(format_spec) method, and
556 appends to the output.
557*/
558static int
559render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
560{
561 int ok = 0;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000562 PyObject *result = NULL;
Eric Smith1d138f12008-05-31 01:40:08 +0000563 PyObject *format_spec_object = NULL;
Eric Smithba8c0282008-06-02 14:57:32 +0000564 PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
Eric Smith1d138f12008-05-31 01:40:08 +0000565 STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000566 format_spec->ptr : NULL;
Eric Smith1d138f12008-05-31 01:40:08 +0000567 Py_ssize_t format_spec_len = format_spec->ptr ?
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000568 format_spec->end - format_spec->ptr : 0;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000569
Eric Smith1d138f12008-05-31 01:40:08 +0000570 /* If we know the type exactly, skip the lookup of __format__ and just
571 call the formatter directly. */
572 if (PyUnicode_CheckExact(fieldobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000573 formatter = _PyUnicode_FormatAdvanced;
Eric Smith1d138f12008-05-31 01:40:08 +0000574 else if (PyLong_CheckExact(fieldobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 formatter =_PyLong_FormatAdvanced;
Eric Smith1d138f12008-05-31 01:40:08 +0000576 else if (PyFloat_CheckExact(fieldobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 formatter = _PyFloat_FormatAdvanced;
Eric Smithba8c0282008-06-02 14:57:32 +0000578
579 /* XXX: for 2.6, convert format_spec to the appropriate type
580 (unicode, str) */
581
582 if (formatter) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000583 /* we know exactly which formatter will be called when __format__ is
584 looked up, so call it directly, instead. */
585 result = formatter(fieldobj, format_spec_start, format_spec_len);
Eric Smithba8c0282008-06-02 14:57:32 +0000586 }
Eric Smith1d138f12008-05-31 01:40:08 +0000587 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000588 /* We need to create an object out of the pointers we have, because
589 __format__ takes a string/unicode object for format_spec. */
590 format_spec_object = STRINGLIB_NEW(format_spec_start,
591 format_spec_len);
592 if (format_spec_object == NULL)
593 goto done;
Eric Smith1d138f12008-05-31 01:40:08 +0000594
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000595 result = PyObject_Format(fieldobj, format_spec_object);
Eric Smith1d138f12008-05-31 01:40:08 +0000596 }
Eric Smith8c663262007-08-25 02:26:07 +0000597 if (result == NULL)
598 goto done;
599
Eric Smith8a0217c2008-02-18 18:07:47 +0000600#if PY_VERSION_HEX >= 0x03000000
Eric Smithecbac8f2008-02-24 21:44:34 +0000601 assert(PyUnicode_Check(result));
Eric Smith8a0217c2008-02-18 18:07:47 +0000602#else
Christian Heimes72b710a2008-05-26 13:28:38 +0000603 assert(PyBytes_Check(result) || PyUnicode_Check(result));
Eric Smith8a0217c2008-02-18 18:07:47 +0000604
605 /* Convert result to our type. We could be str, and result could
606 be unicode */
607 {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000608 PyObject *tmp = STRINGLIB_TOSTR(result);
609 if (tmp == NULL)
610 goto done;
611 Py_DECREF(result);
612 result = tmp;
Eric Smith8a0217c2008-02-18 18:07:47 +0000613 }
614#endif
615
Eric Smith8c663262007-08-25 02:26:07 +0000616 ok = output_data(output,
617 STRINGLIB_STR(result), STRINGLIB_LEN(result));
618done:
Eric Smith1d138f12008-05-31 01:40:08 +0000619 Py_XDECREF(format_spec_object);
Eric Smith8c663262007-08-25 02:26:07 +0000620 Py_XDECREF(result);
621 return ok;
622}
623
624static int
625parse_field(SubString *str, SubString *field_name, SubString *format_spec,
626 STRINGLIB_CHAR *conversion)
627{
Eric Smith8ec90442009-03-14 12:29:34 +0000628 /* Note this function works if the field name is zero length,
629 which is good. Zero length field names are handled later, in
630 field_name_split. */
631
Eric Smith8c663262007-08-25 02:26:07 +0000632 STRINGLIB_CHAR c = 0;
633
634 /* initialize these, as they may be empty */
635 *conversion = '\0';
636 SubString_init(format_spec, NULL, 0);
637
Eric Smith8ec90442009-03-14 12:29:34 +0000638 /* Search for the field name. it's terminated by the end of
639 the string, or a ':' or '!' */
Eric Smith8c663262007-08-25 02:26:07 +0000640 field_name->ptr = str->ptr;
641 while (str->ptr < str->end) {
642 switch (c = *(str->ptr++)) {
643 case ':':
644 case '!':
645 break;
646 default:
647 continue;
648 }
649 break;
650 }
651
652 if (c == '!' || c == ':') {
653 /* we have a format specifier and/or a conversion */
654 /* don't include the last character */
655 field_name->end = str->ptr-1;
656
657 /* the format specifier is the rest of the string */
658 format_spec->ptr = str->ptr;
659 format_spec->end = str->end;
660
661 /* see if there's a conversion specifier */
662 if (c == '!') {
663 /* there must be another character present */
664 if (format_spec->ptr >= format_spec->end) {
665 PyErr_SetString(PyExc_ValueError,
666 "end of format while looking for conversion "
667 "specifier");
668 return 0;
669 }
670 *conversion = *(format_spec->ptr++);
671
672 /* if there is another character, it must be a colon */
673 if (format_spec->ptr < format_spec->end) {
674 c = *(format_spec->ptr++);
675 if (c != ':') {
676 PyErr_SetString(PyExc_ValueError,
677 "expected ':' after format specifier");
678 return 0;
679 }
680 }
681 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000682 }
Eric Smith8ec90442009-03-14 12:29:34 +0000683 else
Eric Smith8c663262007-08-25 02:26:07 +0000684 /* end of string, there's no format_spec or conversion */
685 field_name->end = str->ptr;
Eric Smith8ec90442009-03-14 12:29:34 +0000686
687 return 1;
Eric Smith8c663262007-08-25 02:26:07 +0000688}
689
690/************************************************************************/
691/******* Output string allocation and escape-to-markup processing ******/
692/************************************************************************/
693
694/* MarkupIterator breaks the string into pieces of either literal
695 text, or things inside {} that need to be marked up. it is
696 designed to make it easy to wrap a Python iterator around it, for
697 use with the Formatter class */
698
699typedef struct {
700 SubString str;
Eric Smith8c663262007-08-25 02:26:07 +0000701} MarkupIterator;
702
703static int
704MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
705{
706 SubString_init(&self->str, ptr, len);
Eric Smith8c663262007-08-25 02:26:07 +0000707 return 1;
708}
709
710/* returns 0 on error, 1 on non-error termination, and 2 if it got a
711 string (or something to be expanded) */
712static int
Eric Smith625cbf22007-08-29 03:22:59 +0000713MarkupIterator_next(MarkupIterator *self, SubString *literal,
Eric Smith8ec90442009-03-14 12:29:34 +0000714 int *field_present, SubString *field_name,
715 SubString *format_spec, STRINGLIB_CHAR *conversion,
Eric Smith8c663262007-08-25 02:26:07 +0000716 int *format_spec_needs_expanding)
717{
718 int at_end;
719 STRINGLIB_CHAR c = 0;
720 STRINGLIB_CHAR *start;
721 int count;
722 Py_ssize_t len;
Eric Smith625cbf22007-08-29 03:22:59 +0000723 int markup_follows = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000724
Eric Smith625cbf22007-08-29 03:22:59 +0000725 /* initialize all of the output variables */
726 SubString_init(literal, NULL, 0);
727 SubString_init(field_name, NULL, 0);
728 SubString_init(format_spec, NULL, 0);
729 *conversion = '\0';
Eric Smith8c663262007-08-25 02:26:07 +0000730 *format_spec_needs_expanding = 0;
Eric Smith8ec90442009-03-14 12:29:34 +0000731 *field_present = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000732
Eric Smith625cbf22007-08-29 03:22:59 +0000733 /* No more input, end of iterator. This is the normal exit
734 path. */
Eric Smith8c663262007-08-25 02:26:07 +0000735 if (self->str.ptr >= self->str.end)
736 return 1;
737
Eric Smith8c663262007-08-25 02:26:07 +0000738 start = self->str.ptr;
739
Eric Smith625cbf22007-08-29 03:22:59 +0000740 /* First read any literal text. Read until the end of string, an
741 escaped '{' or '}', or an unescaped '{'. In order to never
742 allocate memory and so I can just pass pointers around, if
743 there's an escaped '{' or '}' then we'll return the literal
744 including the brace, but no format object. The next time
745 through, we'll return the rest of the literal, skipping past
746 the second consecutive brace. */
747 while (self->str.ptr < self->str.end) {
748 switch (c = *(self->str.ptr++)) {
749 case '{':
750 case '}':
751 markup_follows = 1;
752 break;
753 default:
754 continue;
Eric Smith8c663262007-08-25 02:26:07 +0000755 }
Eric Smith625cbf22007-08-29 03:22:59 +0000756 break;
Eric Smith0cb431c2007-08-28 01:07:27 +0000757 }
Eric Smith625cbf22007-08-29 03:22:59 +0000758
759 at_end = self->str.ptr >= self->str.end;
760 len = self->str.ptr - start;
761
762 if ((c == '}') && (at_end || (c != *self->str.ptr))) {
763 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
764 "in format string");
765 return 0;
766 }
767 if (at_end && c == '{') {
768 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
769 "in format string");
770 return 0;
771 }
772 if (!at_end) {
773 if (c == *self->str.ptr) {
774 /* escaped } or {, skip it in the input. there is no
775 markup object following us, just this literal text */
776 self->str.ptr++;
777 markup_follows = 0;
778 }
779 else
780 len--;
781 }
782
783 /* record the literal text */
784 literal->ptr = start;
785 literal->end = start + len;
786
787 if (!markup_follows)
788 return 2;
789
790 /* this is markup, find the end of the string by counting nested
791 braces. note that this prohibits escaped braces, so that
792 format_specs cannot have braces in them. */
Eric Smith8ec90442009-03-14 12:29:34 +0000793 *field_present = 1;
Eric Smith625cbf22007-08-29 03:22:59 +0000794 count = 1;
795
796 start = self->str.ptr;
797
798 /* we know we can't have a zero length string, so don't worry
799 about that case */
800 while (self->str.ptr < self->str.end) {
801 switch (c = *(self->str.ptr++)) {
802 case '{':
803 /* the format spec needs to be recursively expanded.
804 this is an optimization, and not strictly needed */
805 *format_spec_needs_expanding = 1;
806 count++;
807 break;
808 case '}':
809 count--;
810 if (count <= 0) {
811 /* we're done. parse and get out */
812 SubString s;
813
814 SubString_init(&s, start, self->str.ptr - 1 - start);
815 if (parse_field(&s, field_name, format_spec, conversion) == 0)
816 return 0;
817
Eric Smith625cbf22007-08-29 03:22:59 +0000818 /* success */
819 return 2;
Eric Smith8c663262007-08-25 02:26:07 +0000820 }
821 break;
822 }
Eric Smith8c663262007-08-25 02:26:07 +0000823 }
Eric Smith625cbf22007-08-29 03:22:59 +0000824
825 /* end of string while searching for matching '}' */
826 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
827 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000828}
829
830
831/* do the !r or !s conversion on obj */
832static PyObject *
833do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
834{
835 /* XXX in pre-3.0, do we need to convert this to unicode, since it
836 might have returned a string? */
837 switch (conversion) {
838 case 'r':
839 return PyObject_Repr(obj);
840 case 's':
Eric Smith8fd3eba2008-02-17 19:48:00 +0000841 return STRINGLIB_TOSTR(obj);
Georg Brandl559e5d72008-06-11 18:37:52 +0000842#if PY_VERSION_HEX >= 0x03000000
843 case 'a':
844 return STRINGLIB_TOASCII(obj);
845#endif
Eric Smith8c663262007-08-25 02:26:07 +0000846 default:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000847 if (conversion > 32 && conversion < 127) {
848 /* It's the ASCII subrange; casting to char is safe
849 (assuming the execution character set is an ASCII
850 superset). */
851 PyErr_Format(PyExc_ValueError,
Martin v. Löwis5a6f4582008-04-07 03:22:07 +0000852 "Unknown conversion specifier %c",
853 (char)conversion);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000854 } else
855 PyErr_Format(PyExc_ValueError,
856 "Unknown conversion specifier \\x%x",
857 (unsigned int)conversion);
Eric Smith8c663262007-08-25 02:26:07 +0000858 return NULL;
859 }
860}
861
862/* given:
863
864 {field_name!conversion:format_spec}
865
866 compute the result and write it to output.
867 format_spec_needs_expanding is an optimization. if it's false,
868 just output the string directly, otherwise recursively expand the
Eric Smith8ec90442009-03-14 12:29:34 +0000869 format_spec string.
870
871 field_name is allowed to be zero length, in which case we
872 are doing auto field numbering.
873*/
Eric Smith8c663262007-08-25 02:26:07 +0000874
875static int
876output_markup(SubString *field_name, SubString *format_spec,
877 int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
878 OutputString *output, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000879 int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000880{
881 PyObject *tmp = NULL;
882 PyObject *fieldobj = NULL;
883 SubString expanded_format_spec;
884 SubString *actual_format_spec;
885 int result = 0;
886
887 /* convert field_name to an object */
Eric Smith8ec90442009-03-14 12:29:34 +0000888 fieldobj = get_field_object(field_name, args, kwargs, auto_number);
Eric Smith8c663262007-08-25 02:26:07 +0000889 if (fieldobj == NULL)
890 goto done;
891
892 if (conversion != '\0') {
893 tmp = do_conversion(fieldobj, conversion);
894 if (tmp == NULL)
895 goto done;
896
897 /* do the assignment, transferring ownership: fieldobj = tmp */
898 Py_DECREF(fieldobj);
899 fieldobj = tmp;
900 tmp = NULL;
901 }
902
903 /* if needed, recurively compute the format_spec */
904 if (format_spec_needs_expanding) {
Eric Smith8ec90442009-03-14 12:29:34 +0000905 tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
906 auto_number);
Eric Smith8c663262007-08-25 02:26:07 +0000907 if (tmp == NULL)
908 goto done;
909
910 /* note that in the case we're expanding the format string,
911 tmp must be kept around until after the call to
912 render_field. */
913 SubString_init(&expanded_format_spec,
914 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
915 actual_format_spec = &expanded_format_spec;
Eric Smith0cb431c2007-08-28 01:07:27 +0000916 }
917 else
Eric Smith8c663262007-08-25 02:26:07 +0000918 actual_format_spec = format_spec;
919
920 if (render_field(fieldobj, actual_format_spec, output) == 0)
921 goto done;
922
923 result = 1;
924
925done:
926 Py_XDECREF(fieldobj);
927 Py_XDECREF(tmp);
928
929 return result;
930}
931
932/*
Eric Smith8fd3eba2008-02-17 19:48:00 +0000933 do_markup is the top-level loop for the format() method. It
Eric Smith8c663262007-08-25 02:26:07 +0000934 searches through the format string for escapes to markup codes, and
935 calls other functions to move non-markup text to the output,
936 and to perform the markup to the output.
937*/
938static int
939do_markup(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000940 OutputString *output, int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000941{
942 MarkupIterator iter;
Eric Smith8c663262007-08-25 02:26:07 +0000943 int format_spec_needs_expanding;
944 int result;
Eric Smith8ec90442009-03-14 12:29:34 +0000945 int field_present;
Eric Smith625cbf22007-08-29 03:22:59 +0000946 SubString literal;
Eric Smith8c663262007-08-25 02:26:07 +0000947 SubString field_name;
948 SubString format_spec;
949 STRINGLIB_CHAR conversion;
950
951 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
Eric Smith8ec90442009-03-14 12:29:34 +0000952 while ((result = MarkupIterator_next(&iter, &literal, &field_present,
953 &field_name, &format_spec,
954 &conversion,
Eric Smith8c663262007-08-25 02:26:07 +0000955 &format_spec_needs_expanding)) == 2) {
Eric Smith625cbf22007-08-29 03:22:59 +0000956 if (!output_data(output, literal.ptr, literal.end - literal.ptr))
957 return 0;
Eric Smith8ec90442009-03-14 12:29:34 +0000958 if (field_present)
Eric Smith8c663262007-08-25 02:26:07 +0000959 if (!output_markup(&field_name, &format_spec,
960 format_spec_needs_expanding, conversion, output,
Eric Smith8ec90442009-03-14 12:29:34 +0000961 args, kwargs, recursion_depth, auto_number))
Eric Smith8c663262007-08-25 02:26:07 +0000962 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000963 }
964 return result;
965}
966
967
968/*
969 build_string allocates the output string and then
970 calls do_markup to do the heavy lifting.
971*/
972static PyObject *
973build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000974 int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000975{
976 OutputString output;
977 PyObject *result = NULL;
978 Py_ssize_t count;
979
980 output.obj = NULL; /* needed so cleanup code always works */
981
982 /* check the recursion level */
Eric Smith45c07872007-09-05 02:02:43 +0000983 if (recursion_depth <= 0) {
Eric Smith8c663262007-08-25 02:26:07 +0000984 PyErr_SetString(PyExc_ValueError,
985 "Max string recursion exceeded");
986 goto done;
987 }
988
989 /* initial size is the length of the format string, plus the size
990 increment. seems like a reasonable default */
991 if (!output_initialize(&output,
992 input->end - input->ptr +
993 INITIAL_SIZE_INCREMENT))
994 goto done;
995
Eric Smith8ec90442009-03-14 12:29:34 +0000996 if (!do_markup(input, args, kwargs, &output, recursion_depth,
997 auto_number)) {
Eric Smith8c663262007-08-25 02:26:07 +0000998 goto done;
999 }
1000
1001 count = output.ptr - STRINGLIB_STR(output.obj);
1002 if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
1003 goto done;
1004 }
1005
1006 /* transfer ownership to result */
1007 result = output.obj;
1008 output.obj = NULL;
1009
1010done:
Eric Smith8c663262007-08-25 02:26:07 +00001011 Py_XDECREF(output.obj);
1012 return result;
1013}
1014
1015/************************************************************************/
1016/*********** main routine ***********************************************/
1017/************************************************************************/
1018
1019/* this is the main entry point */
1020static PyObject *
1021do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
1022{
1023 SubString input;
1024
1025 /* PEP 3101 says only 2 levels, so that
1026 "{0:{1}}".format('abc', 's') # works
1027 "{0:{1:{2}}}".format('abc', 's', '') # fails
1028 */
Eric Smith45c07872007-09-05 02:02:43 +00001029 int recursion_depth = 2;
Eric Smith8c663262007-08-25 02:26:07 +00001030
Eric Smith8ec90442009-03-14 12:29:34 +00001031 AutoNumber auto_number;
1032
1033 AutoNumber_Init(&auto_number);
Eric Smith8c663262007-08-25 02:26:07 +00001034 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
Eric Smith8ec90442009-03-14 12:29:34 +00001035 return build_string(&input, args, kwargs, recursion_depth, &auto_number);
Eric Smith8c663262007-08-25 02:26:07 +00001036}
Eric Smithf6db4092007-08-27 23:52:26 +00001037
1038
1039
1040/************************************************************************/
1041/*********** formatteriterator ******************************************/
1042/************************************************************************/
1043
1044/* This is used to implement string.Formatter.vparse(). It exists so
1045 Formatter can share code with the built in unicode.format() method.
1046 It's really just a wrapper around MarkupIterator that is callable
1047 from Python. */
1048
1049typedef struct {
1050 PyObject_HEAD
1051
Eric Smith8fd3eba2008-02-17 19:48:00 +00001052 STRINGLIB_OBJECT *str;
Eric Smithf6db4092007-08-27 23:52:26 +00001053
1054 MarkupIterator it_markup;
1055} formatteriterobject;
1056
1057static void
1058formatteriter_dealloc(formatteriterobject *it)
1059{
1060 Py_XDECREF(it->str);
1061 PyObject_FREE(it);
1062}
1063
1064/* returns a tuple:
Eric Smith625cbf22007-08-29 03:22:59 +00001065 (literal, field_name, format_spec, conversion)
1066
1067 literal is any literal text to output. might be zero length
1068 field_name is the string before the ':'. might be None
1069 format_spec is the string after the ':'. mibht be None
1070 conversion is either None, or the string after the '!'
Eric Smithf6db4092007-08-27 23:52:26 +00001071*/
1072static PyObject *
1073formatteriter_next(formatteriterobject *it)
1074{
1075 SubString literal;
1076 SubString field_name;
1077 SubString format_spec;
Eric Smith8fd3eba2008-02-17 19:48:00 +00001078 STRINGLIB_CHAR conversion;
Eric Smithf6db4092007-08-27 23:52:26 +00001079 int format_spec_needs_expanding;
Eric Smith8ec90442009-03-14 12:29:34 +00001080 int field_present;
1081 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1082 &field_name, &format_spec, &conversion,
Eric Smithf6db4092007-08-27 23:52:26 +00001083 &format_spec_needs_expanding);
1084
1085 /* all of the SubString objects point into it->str, so no
1086 memory management needs to be done on them */
1087 assert(0 <= result && result <= 2);
Eric Smith0cb431c2007-08-28 01:07:27 +00001088 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001089 /* if 0, error has already been set, if 1, iterator is empty */
1090 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001091 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001092 PyObject *literal_str = NULL;
1093 PyObject *field_name_str = NULL;
1094 PyObject *format_spec_str = NULL;
1095 PyObject *conversion_str = NULL;
1096 PyObject *tuple = NULL;
1097
Eric Smith625cbf22007-08-29 03:22:59 +00001098 literal_str = SubString_new_object(&literal);
1099 if (literal_str == NULL)
1100 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001101
Eric Smith625cbf22007-08-29 03:22:59 +00001102 field_name_str = SubString_new_object(&field_name);
1103 if (field_name_str == NULL)
1104 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001105
Eric Smith625cbf22007-08-29 03:22:59 +00001106 /* if field_name is non-zero length, return a string for
1107 format_spec (even if zero length), else return None */
Eric Smith8ec90442009-03-14 12:29:34 +00001108 format_spec_str = (field_present ?
Eric Smith625cbf22007-08-29 03:22:59 +00001109 SubString_new_object_or_empty :
1110 SubString_new_object)(&format_spec);
1111 if (format_spec_str == NULL)
1112 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001113
Eric Smith625cbf22007-08-29 03:22:59 +00001114 /* if the conversion is not specified, return a None,
1115 otherwise create a one length string with the conversion
1116 character */
1117 if (conversion == '\0') {
Eric Smithf6db4092007-08-27 23:52:26 +00001118 conversion_str = Py_None;
Eric Smithf6db4092007-08-27 23:52:26 +00001119 Py_INCREF(conversion_str);
1120 }
Eric Smith625cbf22007-08-29 03:22:59 +00001121 else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 conversion_str = STRINGLIB_NEW(&conversion, 1);
Eric Smith625cbf22007-08-29 03:22:59 +00001123 if (conversion_str == NULL)
1124 goto done;
1125
Eric Smith9e7c8da2007-08-28 11:15:20 +00001126 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
Eric Smithf6db4092007-08-27 23:52:26 +00001127 conversion_str);
Eric Smith625cbf22007-08-29 03:22:59 +00001128 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001129 Py_XDECREF(literal_str);
1130 Py_XDECREF(field_name_str);
1131 Py_XDECREF(format_spec_str);
1132 Py_XDECREF(conversion_str);
1133 return tuple;
1134 }
1135}
1136
1137static PyMethodDef formatteriter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 {NULL, NULL} /* sentinel */
Eric Smithf6db4092007-08-27 23:52:26 +00001139};
1140
Eric Smith8fd3eba2008-02-17 19:48:00 +00001141static PyTypeObject PyFormatterIter_Type = {
Eric Smithf6db4092007-08-27 23:52:26 +00001142 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 "formatteriterator", /* tp_name */
1144 sizeof(formatteriterobject), /* tp_basicsize */
1145 0, /* tp_itemsize */
Eric Smithf6db4092007-08-27 23:52:26 +00001146 /* methods */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 (destructor)formatteriter_dealloc, /* tp_dealloc */
1148 0, /* tp_print */
1149 0, /* tp_getattr */
1150 0, /* tp_setattr */
1151 0, /* tp_reserved */
1152 0, /* tp_repr */
1153 0, /* tp_as_number */
1154 0, /* tp_as_sequence */
1155 0, /* tp_as_mapping */
1156 0, /* tp_hash */
1157 0, /* tp_call */
1158 0, /* tp_str */
1159 PyObject_GenericGetAttr, /* tp_getattro */
1160 0, /* tp_setattro */
1161 0, /* tp_as_buffer */
1162 Py_TPFLAGS_DEFAULT, /* tp_flags */
1163 0, /* tp_doc */
1164 0, /* tp_traverse */
1165 0, /* tp_clear */
1166 0, /* tp_richcompare */
1167 0, /* tp_weaklistoffset */
1168 PyObject_SelfIter, /* tp_iter */
1169 (iternextfunc)formatteriter_next, /* tp_iternext */
1170 formatteriter_methods, /* tp_methods */
Eric Smithf6db4092007-08-27 23:52:26 +00001171 0,
1172};
1173
1174/* unicode_formatter_parser is used to implement
1175 string.Formatter.vformat. it parses a string and returns tuples
1176 describing the parsed elements. It's a wrapper around
1177 stringlib/string_format.h's MarkupIterator */
1178static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +00001179formatter_parser(STRINGLIB_OBJECT *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001180{
1181 formatteriterobject *it;
1182
1183 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1184 if (it == NULL)
1185 return NULL;
1186
1187 /* take ownership, give the object to the iterator */
1188 Py_INCREF(self);
1189 it->str = self;
1190
1191 /* initialize the contained MarkupIterator */
1192 MarkupIterator_init(&it->it_markup,
Eric Smith8fd3eba2008-02-17 19:48:00 +00001193 STRINGLIB_STR(self),
1194 STRINGLIB_LEN(self));
Eric Smithf6db4092007-08-27 23:52:26 +00001195
1196 return (PyObject *)it;
1197}
1198
1199
1200/************************************************************************/
1201/*********** fieldnameiterator ******************************************/
1202/************************************************************************/
1203
1204
1205/* This is used to implement string.Formatter.vparse(). It parses the
1206 field name into attribute and item values. It's a Python-callable
1207 wrapper around FieldNameIterator */
1208
1209typedef struct {
1210 PyObject_HEAD
1211
Eric Smith8fd3eba2008-02-17 19:48:00 +00001212 STRINGLIB_OBJECT *str;
Eric Smithf6db4092007-08-27 23:52:26 +00001213
1214 FieldNameIterator it_field;
1215} fieldnameiterobject;
1216
1217static void
1218fieldnameiter_dealloc(fieldnameiterobject *it)
1219{
1220 Py_XDECREF(it->str);
1221 PyObject_FREE(it);
1222}
1223
1224/* returns a tuple:
1225 (is_attr, value)
1226 is_attr is true if we used attribute syntax (e.g., '.foo')
1227 false if we used index syntax (e.g., '[foo]')
1228 value is an integer or string
1229*/
1230static PyObject *
1231fieldnameiter_next(fieldnameiterobject *it)
1232{
1233 int result;
1234 int is_attr;
1235 Py_ssize_t idx;
1236 SubString name;
1237
1238 result = FieldNameIterator_next(&it->it_field, &is_attr,
1239 &idx, &name);
Eric Smith0cb431c2007-08-28 01:07:27 +00001240 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001241 /* if 0, error has already been set, if 1, iterator is empty */
1242 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001243 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001244 PyObject* result = NULL;
1245 PyObject* is_attr_obj = NULL;
1246 PyObject* obj = NULL;
1247
1248 is_attr_obj = PyBool_FromLong(is_attr);
1249 if (is_attr_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001250 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001251
1252 /* either an integer or a string */
1253 if (idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001254 obj = PyLong_FromSsize_t(idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001255 else
1256 obj = SubString_new_object(&name);
1257 if (obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001258 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001259
1260 /* return a tuple of values */
1261 result = PyTuple_Pack(2, is_attr_obj, obj);
Eric Smithf6db4092007-08-27 23:52:26 +00001262
Eric Smith625cbf22007-08-29 03:22:59 +00001263 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001264 Py_XDECREF(is_attr_obj);
1265 Py_XDECREF(obj);
Eric Smith625cbf22007-08-29 03:22:59 +00001266 return result;
Eric Smithf6db4092007-08-27 23:52:26 +00001267 }
Eric Smithf6db4092007-08-27 23:52:26 +00001268}
1269
1270static PyMethodDef fieldnameiter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001271 {NULL, NULL} /* sentinel */
Eric Smithf6db4092007-08-27 23:52:26 +00001272};
1273
1274static PyTypeObject PyFieldNameIter_Type = {
1275 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 "fieldnameiterator", /* tp_name */
1277 sizeof(fieldnameiterobject), /* tp_basicsize */
1278 0, /* tp_itemsize */
Eric Smithf6db4092007-08-27 23:52:26 +00001279 /* methods */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001280 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1281 0, /* tp_print */
1282 0, /* tp_getattr */
1283 0, /* tp_setattr */
1284 0, /* tp_reserved */
1285 0, /* tp_repr */
1286 0, /* tp_as_number */
1287 0, /* tp_as_sequence */
1288 0, /* tp_as_mapping */
1289 0, /* tp_hash */
1290 0, /* tp_call */
1291 0, /* tp_str */
1292 PyObject_GenericGetAttr, /* tp_getattro */
1293 0, /* tp_setattro */
1294 0, /* tp_as_buffer */
1295 Py_TPFLAGS_DEFAULT, /* tp_flags */
1296 0, /* tp_doc */
1297 0, /* tp_traverse */
1298 0, /* tp_clear */
1299 0, /* tp_richcompare */
1300 0, /* tp_weaklistoffset */
1301 PyObject_SelfIter, /* tp_iter */
1302 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1303 fieldnameiter_methods, /* tp_methods */
Eric Smithf6db4092007-08-27 23:52:26 +00001304 0};
1305
1306/* unicode_formatter_field_name_split is used to implement
1307 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1308 returns a tuple of (first, rest): "first", the part before the
1309 first '.' or '['; and "rest", an iterator for the rest of the field
1310 name. it's a wrapper around stringlib/string_format.h's
1311 field_name_split. The iterator it returns is a
1312 FieldNameIterator */
1313static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +00001314formatter_field_name_split(STRINGLIB_OBJECT *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001315{
1316 SubString first;
1317 Py_ssize_t first_idx;
1318 fieldnameiterobject *it;
1319
1320 PyObject *first_obj = NULL;
1321 PyObject *result = NULL;
1322
1323 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1324 if (it == NULL)
1325 return NULL;
1326
1327 /* take ownership, give the object to the iterator. this is
1328 just to keep the field_name alive */
1329 Py_INCREF(self);
1330 it->str = self;
1331
Eric Smith8ec90442009-03-14 12:29:34 +00001332 /* Pass in auto_number = NULL. We'll return an empty string for
1333 first_obj in that case. */
Eric Smithf6db4092007-08-27 23:52:26 +00001334 if (!field_name_split(STRINGLIB_STR(self),
1335 STRINGLIB_LEN(self),
Eric Smith8ec90442009-03-14 12:29:34 +00001336 &first, &first_idx, &it->it_field, NULL))
Eric Smith625cbf22007-08-29 03:22:59 +00001337 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001338
Eric Smith0cb431c2007-08-28 01:07:27 +00001339 /* first becomes an integer, if possible; else a string */
Eric Smithf6db4092007-08-27 23:52:26 +00001340 if (first_idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001341 first_obj = PyLong_FromSsize_t(first_idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001342 else
1343 /* convert "first" into a string object */
1344 first_obj = SubString_new_object(&first);
1345 if (first_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001346 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001347
1348 /* return a tuple of values */
1349 result = PyTuple_Pack(2, first_obj, it);
1350
Eric Smith625cbf22007-08-29 03:22:59 +00001351done:
Eric Smithf6db4092007-08-27 23:52:26 +00001352 Py_XDECREF(it);
1353 Py_XDECREF(first_obj);
1354 return result;
1355}