blob: fabc7a1c4fbf30a3640d70b8a1a9414a167676bd [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/*
2 string_format.h -- implementation of string.format().
3
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
6*/
7
8
Eric Smith8fd3eba2008-02-17 19:48:00 +00009/* Defines for Python 2.6 compatability */
10#if PY_VERSION_HEX < 0x03000000
11#define PyLong_FromSsize_t _PyLong_FromSsize_t
12#endif
13
Eric Smith8c663262007-08-25 02:26:07 +000014/* Defines for more efficiently reallocating the string buffer */
15#define INITIAL_SIZE_INCREMENT 100
16#define SIZE_MULTIPLIER 2
17#define MAX_SIZE_INCREMENT 3200
18
19
20/************************************************************************/
21/*********** Global data structures and forward declarations *********/
22/************************************************************************/
23
24/*
25 A SubString consists of the characters between two string or
26 unicode pointers.
27*/
28typedef struct {
29 STRINGLIB_CHAR *ptr;
30 STRINGLIB_CHAR *end;
31} SubString;
32
33
Eric Smith8ec90442009-03-14 12:29:34 +000034typedef enum {
35 ANS_INIT,
36 ANS_AUTO,
37 ANS_MANUAL,
38} AutoNumberState; /* Keep track if we're auto-numbering fields */
39
40/* Keeps track of our auto-numbering state, and which number field we're on */
41typedef struct {
42 AutoNumberState an_state;
43 int an_field_number;
44} AutoNumber;
45
46
Eric Smith8c663262007-08-25 02:26:07 +000047/* forward declaration for recursion */
48static PyObject *
49build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +000050 int recursion_depth, AutoNumber *auto_number);
Eric Smith8c663262007-08-25 02:26:07 +000051
52
53
54/************************************************************************/
55/************************** Utility functions ************************/
56/************************************************************************/
57
Eric Smith8ec90442009-03-14 12:29:34 +000058static void
59AutoNumber_Init(AutoNumber *auto_number)
60{
61 auto_number->an_state = ANS_INIT;
62 auto_number->an_field_number = 0;
63}
64
Eric Smith8c663262007-08-25 02:26:07 +000065/* fill in a SubString from a pointer and length */
66Py_LOCAL_INLINE(void)
67SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
68{
69 str->ptr = p;
70 if (p == NULL)
71 str->end = NULL;
72 else
73 str->end = str->ptr + len;
74}
75
Eric Smith625cbf22007-08-29 03:22:59 +000076/* return a new string. if str->ptr is NULL, return None */
Eric Smith8c663262007-08-25 02:26:07 +000077Py_LOCAL_INLINE(PyObject *)
78SubString_new_object(SubString *str)
79{
Eric Smith625cbf22007-08-29 03:22:59 +000080 if (str->ptr == NULL) {
81 Py_INCREF(Py_None);
82 return Py_None;
83 }
84 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
85}
86
87/* return a new string. if str->ptr is NULL, return None */
88Py_LOCAL_INLINE(PyObject *)
89SubString_new_object_or_empty(SubString *str)
90{
91 if (str->ptr == NULL) {
92 return STRINGLIB_NEW(NULL, 0);
93 }
Eric Smith8c663262007-08-25 02:26:07 +000094 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
95}
96
Eric Smith8ec90442009-03-14 12:29:34 +000097/* Return 1 if an error has been detected switching between automatic
98 field numbering and manual field specification, else return 0. Set
99 ValueError on error. */
100static int
101autonumber_state_error(AutoNumberState state, int field_name_is_empty)
102{
103 if (state == ANS_MANUAL) {
104 if (field_name_is_empty) {
105 PyErr_SetString(PyExc_ValueError, "cannot switch from "
106 "manual field specification to "
107 "automatic field numbering");
108 return 1;
109 }
110 }
111 else {
112 if (!field_name_is_empty) {
113 PyErr_SetString(PyExc_ValueError, "cannot switch from "
114 "automatic field numbering to "
115 "manual field specification");
116 return 1;
117 }
118 }
119 return 0;
120}
121
122
Eric Smith8c663262007-08-25 02:26:07 +0000123/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +0000124/*********** Output string management functions ****************/
125/************************************************************************/
126
127typedef struct {
128 STRINGLIB_CHAR *ptr;
129 STRINGLIB_CHAR *end;
130 PyObject *obj;
131 Py_ssize_t size_increment;
132} OutputString;
133
134/* initialize an OutputString object, reserving size characters */
135static int
136output_initialize(OutputString *output, Py_ssize_t size)
137{
138 output->obj = STRINGLIB_NEW(NULL, size);
139 if (output->obj == NULL)
140 return 0;
141
142 output->ptr = STRINGLIB_STR(output->obj);
143 output->end = STRINGLIB_LEN(output->obj) + output->ptr;
144 output->size_increment = INITIAL_SIZE_INCREMENT;
145
146 return 1;
147}
148
149/*
150 output_extend reallocates the output string buffer.
151 It returns a status: 0 for a failed reallocation,
152 1 for success.
153*/
154
155static int
156output_extend(OutputString *output, Py_ssize_t count)
157{
158 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
159 Py_ssize_t curlen = output->ptr - startptr;
160 Py_ssize_t maxlen = curlen + count + output->size_increment;
161
162 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
163 return 0;
164 startptr = STRINGLIB_STR(output->obj);
165 output->ptr = startptr + curlen;
166 output->end = startptr + maxlen;
167 if (output->size_increment < MAX_SIZE_INCREMENT)
168 output->size_increment *= SIZE_MULTIPLIER;
169 return 1;
170}
171
172/*
173 output_data dumps characters into our output string
174 buffer.
175
176 In some cases, it has to reallocate the string.
177
178 It returns a status: 0 for a failed reallocation,
179 1 for success.
180*/
181static int
182output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
183{
184 if ((count > output->end - output->ptr) && !output_extend(output, count))
185 return 0;
186 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
187 output->ptr += count;
188 return 1;
189}
190
191/************************************************************************/
192/*********** Format string parsing -- integers and identifiers *********/
193/************************************************************************/
194
Eric Smith7ade6482007-08-26 22:27:13 +0000195static Py_ssize_t
196get_integer(const SubString *str)
Eric Smith8c663262007-08-25 02:26:07 +0000197{
Eric Smith7ade6482007-08-26 22:27:13 +0000198 Py_ssize_t accumulator = 0;
199 Py_ssize_t digitval;
200 Py_ssize_t oldaccumulator;
201 STRINGLIB_CHAR *p;
Eric Smith8c663262007-08-25 02:26:07 +0000202
Eric Smith7ade6482007-08-26 22:27:13 +0000203 /* empty string is an error */
204 if (str->ptr >= str->end)
205 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000206
Eric Smith7ade6482007-08-26 22:27:13 +0000207 for (p = str->ptr; p < str->end; p++) {
208 digitval = STRINGLIB_TODECIMAL(*p);
Eric Smith8c663262007-08-25 02:26:07 +0000209 if (digitval < 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000210 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000211 /*
212 This trick was copied from old Unicode format code. It's cute,
213 but would really suck on an old machine with a slow divide
214 implementation. Fortunately, in the normal case we do not
215 expect too many digits.
216 */
217 oldaccumulator = accumulator;
218 accumulator *= 10;
219 if ((accumulator+10)/10 != oldaccumulator+1) {
220 PyErr_Format(PyExc_ValueError,
221 "Too many decimal digits in format string");
222 return -1;
223 }
224 accumulator += digitval;
225 }
Eric Smith7ade6482007-08-26 22:27:13 +0000226 return accumulator;
Eric Smith8c663262007-08-25 02:26:07 +0000227}
228
229/************************************************************************/
230/******** Functions to get field objects and specification strings ******/
231/************************************************************************/
232
Eric Smith7ade6482007-08-26 22:27:13 +0000233/* do the equivalent of obj.name */
Eric Smith8c663262007-08-25 02:26:07 +0000234static PyObject *
Eric Smith7ade6482007-08-26 22:27:13 +0000235getattr(PyObject *obj, SubString *name)
Eric Smith8c663262007-08-25 02:26:07 +0000236{
Eric Smith7ade6482007-08-26 22:27:13 +0000237 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000238 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000239 if (str == NULL)
240 return NULL;
241 newobj = PyObject_GetAttr(obj, str);
242 Py_DECREF(str);
243 return newobj;
Eric Smith8c663262007-08-25 02:26:07 +0000244}
245
Eric Smith7ade6482007-08-26 22:27:13 +0000246/* do the equivalent of obj[idx], where obj is a sequence */
247static PyObject *
248getitem_sequence(PyObject *obj, Py_ssize_t idx)
249{
250 return PySequence_GetItem(obj, idx);
251}
252
253/* do the equivalent of obj[idx], where obj is not a sequence */
254static PyObject *
255getitem_idx(PyObject *obj, Py_ssize_t idx)
256{
257 PyObject *newobj;
Christian Heimes217cfd12007-12-02 14:31:20 +0000258 PyObject *idx_obj = PyLong_FromSsize_t(idx);
Eric Smith7ade6482007-08-26 22:27:13 +0000259 if (idx_obj == NULL)
260 return NULL;
261 newobj = PyObject_GetItem(obj, idx_obj);
262 Py_DECREF(idx_obj);
263 return newobj;
264}
265
266/* do the equivalent of obj[name] */
267static PyObject *
268getitem_str(PyObject *obj, SubString *name)
269{
270 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000271 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000272 if (str == NULL)
273 return NULL;
274 newobj = PyObject_GetItem(obj, str);
275 Py_DECREF(str);
276 return newobj;
277}
278
279typedef struct {
280 /* the entire string we're parsing. we assume that someone else
281 is managing its lifetime, and that it will exist for the
282 lifetime of the iterator. can be empty */
283 SubString str;
284
285 /* pointer to where we are inside field_name */
286 STRINGLIB_CHAR *ptr;
287} FieldNameIterator;
288
289
290static int
291FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
292 Py_ssize_t len)
293{
294 SubString_init(&self->str, ptr, len);
295 self->ptr = self->str.ptr;
296 return 1;
297}
298
299static int
300_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
301{
302 STRINGLIB_CHAR c;
303
304 name->ptr = self->ptr;
305
306 /* return everything until '.' or '[' */
307 while (self->ptr < self->str.end) {
308 switch (c = *self->ptr++) {
309 case '[':
310 case '.':
311 /* backup so that we this character will be seen next time */
312 self->ptr--;
313 break;
314 default:
315 continue;
316 }
317 break;
318 }
319 /* end of string is okay */
320 name->end = self->ptr;
321 return 1;
322}
323
324static int
325_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
326{
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000327 int bracket_seen = 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000328 STRINGLIB_CHAR c;
329
330 name->ptr = self->ptr;
331
332 /* return everything until ']' */
333 while (self->ptr < self->str.end) {
334 switch (c = *self->ptr++) {
335 case ']':
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000336 bracket_seen = 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000337 break;
338 default:
339 continue;
340 }
341 break;
342 }
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000343 /* make sure we ended with a ']' */
344 if (!bracket_seen) {
345 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
346 return 0;
347 }
348
Eric Smith7ade6482007-08-26 22:27:13 +0000349 /* end of string is okay */
350 /* don't include the ']' */
351 name->end = self->ptr-1;
352 return 1;
353}
354
355/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
356static int
357FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
358 Py_ssize_t *name_idx, SubString *name)
359{
360 /* check at end of input */
361 if (self->ptr >= self->str.end)
362 return 1;
363
364 switch (*self->ptr++) {
365 case '.':
366 *is_attribute = 1;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000367 if (_FieldNameIterator_attr(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000368 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000369 *name_idx = -1;
370 break;
371 case '[':
372 *is_attribute = 0;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000373 if (_FieldNameIterator_item(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000374 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000375 *name_idx = get_integer(name);
376 break;
377 default:
378 /* interal error, can't get here */
379 assert(0);
380 return 0;
381 }
382
383 /* empty string is an error */
384 if (name->ptr == name->end) {
385 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
386 return 0;
387 }
388
389 return 2;
390}
391
392
393/* input: field_name
394 output: 'first' points to the part before the first '[' or '.'
395 'first_idx' is -1 if 'first' is not an integer, otherwise
396 it's the value of first converted to an integer
397 'rest' is an iterator to return the rest
398*/
399static int
400field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
Eric Smith8ec90442009-03-14 12:29:34 +0000401 Py_ssize_t *first_idx, FieldNameIterator *rest,
402 AutoNumber *auto_number)
Eric Smith7ade6482007-08-26 22:27:13 +0000403{
404 STRINGLIB_CHAR c;
405 STRINGLIB_CHAR *p = ptr;
406 STRINGLIB_CHAR *end = ptr + len;
Eric Smith8ec90442009-03-14 12:29:34 +0000407 int field_name_is_empty;
408 int using_numeric_index;
Eric Smith7ade6482007-08-26 22:27:13 +0000409
410 /* find the part up until the first '.' or '[' */
411 while (p < end) {
412 switch (c = *p++) {
413 case '[':
414 case '.':
415 /* backup so that we this character is available to the
416 "rest" iterator */
417 p--;
418 break;
419 default:
420 continue;
421 }
422 break;
423 }
424
425 /* set up the return values */
426 SubString_init(first, ptr, p - ptr);
427 FieldNameIterator_init(rest, p, end - p);
428
429 /* see if "first" is an integer, in which case it's used as an index */
430 *first_idx = get_integer(first);
431
Eric Smith8ec90442009-03-14 12:29:34 +0000432 field_name_is_empty = first->ptr >= first->end;
433
434 /* If the field name is omitted or if we have a numeric index
435 specified, then we're doing numeric indexing into args. */
436 using_numeric_index = field_name_is_empty || *first_idx != -1;
437
438 /* We always get here exactly one time for each field we're
439 processing. And we get here in field order (counting by left
440 braces). So this is the perfect place to handle automatic field
441 numbering if the field name is omitted. */
442
443 /* Check if we need to do the auto-numbering. It's not needed if
444 we're called from string.Format routines, because it's handled
445 in that class by itself. */
446 if (auto_number) {
447 /* Initialize our auto numbering state if this is the first
448 time we're either auto-numbering or manually numbering. */
449 if (auto_number->an_state == ANS_INIT && using_numeric_index)
450 auto_number->an_state = field_name_is_empty ?
451 ANS_AUTO : ANS_MANUAL;
452
453 /* Make sure our state is consistent with what we're doing
454 this time through. Only check if we're using a numeric
455 index. */
456 if (using_numeric_index)
457 if (autonumber_state_error(auto_number->an_state,
458 field_name_is_empty))
459 return 0;
460 /* Zero length field means we want to do auto-numbering of the
461 fields. */
462 if (field_name_is_empty)
463 *first_idx = (auto_number->an_field_number)++;
Eric Smith7ade6482007-08-26 22:27:13 +0000464 }
465
466 return 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000467}
468
469
Eric Smith8c663262007-08-25 02:26:07 +0000470/*
471 get_field_object returns the object inside {}, before the
472 format_spec. It handles getindex and getattr lookups and consumes
473 the entire input string.
474*/
475static PyObject *
Eric Smith8ec90442009-03-14 12:29:34 +0000476get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
477 AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000478{
Eric Smith7ade6482007-08-26 22:27:13 +0000479 PyObject *obj = NULL;
480 int ok;
481 int is_attribute;
482 SubString name;
483 SubString first;
Eric Smith8c663262007-08-25 02:26:07 +0000484 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000485 FieldNameIterator rest;
Eric Smith8c663262007-08-25 02:26:07 +0000486
Eric Smith7ade6482007-08-26 22:27:13 +0000487 if (!field_name_split(input->ptr, input->end - input->ptr, &first,
Eric Smith8ec90442009-03-14 12:29:34 +0000488 &index, &rest, auto_number)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000489 goto error;
490 }
Eric Smith8c663262007-08-25 02:26:07 +0000491
Eric Smith7ade6482007-08-26 22:27:13 +0000492 if (index == -1) {
493 /* look up in kwargs */
Eric Smith7a6dd292007-08-27 23:30:47 +0000494 PyObject *key = SubString_new_object(&first);
Eric Smith7ade6482007-08-26 22:27:13 +0000495 if (key == NULL)
496 goto error;
497 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
Eric Smith11529192007-09-04 23:04:22 +0000498 PyErr_SetObject(PyExc_KeyError, key);
Eric Smith7ade6482007-08-26 22:27:13 +0000499 Py_DECREF(key);
500 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000501 }
Neal Norwitz8a4eb292007-08-27 07:24:17 +0000502 Py_DECREF(key);
Neal Norwitz247b5152007-08-27 03:22:50 +0000503 Py_INCREF(obj);
Eric Smith0cb431c2007-08-28 01:07:27 +0000504 }
505 else {
Eric Smith7ade6482007-08-26 22:27:13 +0000506 /* look up in args */
507 obj = PySequence_GetItem(args, index);
Eric Smith11529192007-09-04 23:04:22 +0000508 if (obj == NULL)
Eric Smith7ade6482007-08-26 22:27:13 +0000509 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000510 }
Eric Smith7ade6482007-08-26 22:27:13 +0000511
512 /* iterate over the rest of the field_name */
513 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
514 &name)) == 2) {
515 PyObject *tmp;
516
517 if (is_attribute)
518 /* getattr lookup "." */
519 tmp = getattr(obj, &name);
520 else
521 /* getitem lookup "[]" */
522 if (index == -1)
523 tmp = getitem_str(obj, &name);
524 else
525 if (PySequence_Check(obj))
526 tmp = getitem_sequence(obj, index);
527 else
528 /* not a sequence */
529 tmp = getitem_idx(obj, index);
530 if (tmp == NULL)
531 goto error;
532
533 /* assign to obj */
534 Py_DECREF(obj);
535 obj = tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000536 }
Eric Smith7ade6482007-08-26 22:27:13 +0000537 /* end of iterator, this is the non-error case */
538 if (ok == 1)
539 return obj;
540error:
541 Py_XDECREF(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000542 return NULL;
543}
544
545/************************************************************************/
546/***************** Field rendering functions **************************/
547/************************************************************************/
548
549/*
550 render_field() is the main function in this section. It takes the
551 field object and field specification string generated by
552 get_field_and_spec, and renders the field into the output string.
553
Eric Smith8c663262007-08-25 02:26:07 +0000554 render_field calls fieldobj.__format__(format_spec) method, and
555 appends to the output.
556*/
557static int
558render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
559{
560 int ok = 0;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000561 PyObject *result = NULL;
Eric Smith1d138f12008-05-31 01:40:08 +0000562 PyObject *format_spec_object = NULL;
Eric Smithba8c0282008-06-02 14:57:32 +0000563 PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
Eric Smith1d138f12008-05-31 01:40:08 +0000564 STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
565 format_spec->ptr : NULL;
566 Py_ssize_t format_spec_len = format_spec->ptr ?
567 format_spec->end - format_spec->ptr : 0;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000568
Eric Smith1d138f12008-05-31 01:40:08 +0000569 /* If we know the type exactly, skip the lookup of __format__ and just
570 call the formatter directly. */
571 if (PyUnicode_CheckExact(fieldobj))
Eric Smithba8c0282008-06-02 14:57:32 +0000572 formatter = _PyUnicode_FormatAdvanced;
Eric Smith1d138f12008-05-31 01:40:08 +0000573 else if (PyLong_CheckExact(fieldobj))
Eric Smithba8c0282008-06-02 14:57:32 +0000574 formatter =_PyLong_FormatAdvanced;
Eric Smith1d138f12008-05-31 01:40:08 +0000575 else if (PyFloat_CheckExact(fieldobj))
Eric Smithba8c0282008-06-02 14:57:32 +0000576 formatter = _PyFloat_FormatAdvanced;
577
578 /* XXX: for 2.6, convert format_spec to the appropriate type
579 (unicode, str) */
580
581 if (formatter) {
582 /* we know exactly which formatter will be called when __format__ is
583 looked up, so call it directly, instead. */
584 result = formatter(fieldobj, format_spec_start, format_spec_len);
585 }
Eric Smith1d138f12008-05-31 01:40:08 +0000586 else {
587 /* We need to create an object out of the pointers we have, because
588 __format__ takes a string/unicode object for format_spec. */
589 format_spec_object = STRINGLIB_NEW(format_spec_start,
590 format_spec_len);
591 if (format_spec_object == NULL)
592 goto done;
593
594 result = PyObject_Format(fieldobj, format_spec_object);
595 }
Eric Smith8c663262007-08-25 02:26:07 +0000596 if (result == NULL)
597 goto done;
598
Eric Smith8a0217c2008-02-18 18:07:47 +0000599#if PY_VERSION_HEX >= 0x03000000
Eric Smithecbac8f2008-02-24 21:44:34 +0000600 assert(PyUnicode_Check(result));
Eric Smith8a0217c2008-02-18 18:07:47 +0000601#else
Christian Heimes72b710a2008-05-26 13:28:38 +0000602 assert(PyBytes_Check(result) || PyUnicode_Check(result));
Eric Smith8a0217c2008-02-18 18:07:47 +0000603
604 /* Convert result to our type. We could be str, and result could
605 be unicode */
606 {
607 PyObject *tmp = STRINGLIB_TOSTR(result);
608 if (tmp == NULL)
609 goto done;
610 Py_DECREF(result);
611 result = tmp;
612 }
613#endif
614
Eric Smith8c663262007-08-25 02:26:07 +0000615 ok = output_data(output,
616 STRINGLIB_STR(result), STRINGLIB_LEN(result));
617done:
Eric Smith1d138f12008-05-31 01:40:08 +0000618 Py_XDECREF(format_spec_object);
Eric Smith8c663262007-08-25 02:26:07 +0000619 Py_XDECREF(result);
620 return ok;
621}
622
623static int
624parse_field(SubString *str, SubString *field_name, SubString *format_spec,
625 STRINGLIB_CHAR *conversion)
626{
Eric Smith8ec90442009-03-14 12:29:34 +0000627 /* Note this function works if the field name is zero length,
628 which is good. Zero length field names are handled later, in
629 field_name_split. */
630
Eric Smith8c663262007-08-25 02:26:07 +0000631 STRINGLIB_CHAR c = 0;
632
633 /* initialize these, as they may be empty */
634 *conversion = '\0';
635 SubString_init(format_spec, NULL, 0);
636
Eric Smith8ec90442009-03-14 12:29:34 +0000637 /* Search for the field name. it's terminated by the end of
638 the string, or a ':' or '!' */
Eric Smith8c663262007-08-25 02:26:07 +0000639 field_name->ptr = str->ptr;
640 while (str->ptr < str->end) {
641 switch (c = *(str->ptr++)) {
642 case ':':
643 case '!':
644 break;
645 default:
646 continue;
647 }
648 break;
649 }
650
651 if (c == '!' || c == ':') {
652 /* we have a format specifier and/or a conversion */
653 /* don't include the last character */
654 field_name->end = str->ptr-1;
655
656 /* the format specifier is the rest of the string */
657 format_spec->ptr = str->ptr;
658 format_spec->end = str->end;
659
660 /* see if there's a conversion specifier */
661 if (c == '!') {
662 /* there must be another character present */
663 if (format_spec->ptr >= format_spec->end) {
664 PyErr_SetString(PyExc_ValueError,
665 "end of format while looking for conversion "
666 "specifier");
667 return 0;
668 }
669 *conversion = *(format_spec->ptr++);
670
671 /* if there is another character, it must be a colon */
672 if (format_spec->ptr < format_spec->end) {
673 c = *(format_spec->ptr++);
674 if (c != ':') {
675 PyErr_SetString(PyExc_ValueError,
676 "expected ':' after format specifier");
677 return 0;
678 }
679 }
680 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000681 }
Eric Smith8ec90442009-03-14 12:29:34 +0000682 else
Eric Smith8c663262007-08-25 02:26:07 +0000683 /* end of string, there's no format_spec or conversion */
684 field_name->end = str->ptr;
Eric Smith8ec90442009-03-14 12:29:34 +0000685
686 return 1;
Eric Smith8c663262007-08-25 02:26:07 +0000687}
688
689/************************************************************************/
690/******* Output string allocation and escape-to-markup processing ******/
691/************************************************************************/
692
693/* MarkupIterator breaks the string into pieces of either literal
694 text, or things inside {} that need to be marked up. it is
695 designed to make it easy to wrap a Python iterator around it, for
696 use with the Formatter class */
697
698typedef struct {
699 SubString str;
Eric Smith8c663262007-08-25 02:26:07 +0000700} MarkupIterator;
701
702static int
703MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
704{
705 SubString_init(&self->str, ptr, len);
Eric Smith8c663262007-08-25 02:26:07 +0000706 return 1;
707}
708
709/* returns 0 on error, 1 on non-error termination, and 2 if it got a
710 string (or something to be expanded) */
711static int
Eric Smith625cbf22007-08-29 03:22:59 +0000712MarkupIterator_next(MarkupIterator *self, SubString *literal,
Eric Smith8ec90442009-03-14 12:29:34 +0000713 int *field_present, SubString *field_name,
714 SubString *format_spec, STRINGLIB_CHAR *conversion,
Eric Smith8c663262007-08-25 02:26:07 +0000715 int *format_spec_needs_expanding)
716{
717 int at_end;
718 STRINGLIB_CHAR c = 0;
719 STRINGLIB_CHAR *start;
720 int count;
721 Py_ssize_t len;
Eric Smith625cbf22007-08-29 03:22:59 +0000722 int markup_follows = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000723
Eric Smith625cbf22007-08-29 03:22:59 +0000724 /* initialize all of the output variables */
725 SubString_init(literal, NULL, 0);
726 SubString_init(field_name, NULL, 0);
727 SubString_init(format_spec, NULL, 0);
728 *conversion = '\0';
Eric Smith8c663262007-08-25 02:26:07 +0000729 *format_spec_needs_expanding = 0;
Eric Smith8ec90442009-03-14 12:29:34 +0000730 *field_present = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000731
Eric Smith625cbf22007-08-29 03:22:59 +0000732 /* No more input, end of iterator. This is the normal exit
733 path. */
Eric Smith8c663262007-08-25 02:26:07 +0000734 if (self->str.ptr >= self->str.end)
735 return 1;
736
Eric Smith8c663262007-08-25 02:26:07 +0000737 start = self->str.ptr;
738
Eric Smith625cbf22007-08-29 03:22:59 +0000739 /* First read any literal text. Read until the end of string, an
740 escaped '{' or '}', or an unescaped '{'. In order to never
741 allocate memory and so I can just pass pointers around, if
742 there's an escaped '{' or '}' then we'll return the literal
743 including the brace, but no format object. The next time
744 through, we'll return the rest of the literal, skipping past
745 the second consecutive brace. */
746 while (self->str.ptr < self->str.end) {
747 switch (c = *(self->str.ptr++)) {
748 case '{':
749 case '}':
750 markup_follows = 1;
751 break;
752 default:
753 continue;
Eric Smith8c663262007-08-25 02:26:07 +0000754 }
Eric Smith625cbf22007-08-29 03:22:59 +0000755 break;
Eric Smith0cb431c2007-08-28 01:07:27 +0000756 }
Eric Smith625cbf22007-08-29 03:22:59 +0000757
758 at_end = self->str.ptr >= self->str.end;
759 len = self->str.ptr - start;
760
761 if ((c == '}') && (at_end || (c != *self->str.ptr))) {
762 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
763 "in format string");
764 return 0;
765 }
766 if (at_end && c == '{') {
767 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
768 "in format string");
769 return 0;
770 }
771 if (!at_end) {
772 if (c == *self->str.ptr) {
773 /* escaped } or {, skip it in the input. there is no
774 markup object following us, just this literal text */
775 self->str.ptr++;
776 markup_follows = 0;
777 }
778 else
779 len--;
780 }
781
782 /* record the literal text */
783 literal->ptr = start;
784 literal->end = start + len;
785
786 if (!markup_follows)
787 return 2;
788
789 /* this is markup, find the end of the string by counting nested
790 braces. note that this prohibits escaped braces, so that
791 format_specs cannot have braces in them. */
Eric Smith8ec90442009-03-14 12:29:34 +0000792 *field_present = 1;
Eric Smith625cbf22007-08-29 03:22:59 +0000793 count = 1;
794
795 start = self->str.ptr;
796
797 /* we know we can't have a zero length string, so don't worry
798 about that case */
799 while (self->str.ptr < self->str.end) {
800 switch (c = *(self->str.ptr++)) {
801 case '{':
802 /* the format spec needs to be recursively expanded.
803 this is an optimization, and not strictly needed */
804 *format_spec_needs_expanding = 1;
805 count++;
806 break;
807 case '}':
808 count--;
809 if (count <= 0) {
810 /* we're done. parse and get out */
811 SubString s;
812
813 SubString_init(&s, start, self->str.ptr - 1 - start);
814 if (parse_field(&s, field_name, format_spec, conversion) == 0)
815 return 0;
816
Eric Smith625cbf22007-08-29 03:22:59 +0000817 /* success */
818 return 2;
Eric Smith8c663262007-08-25 02:26:07 +0000819 }
820 break;
821 }
Eric Smith8c663262007-08-25 02:26:07 +0000822 }
Eric Smith625cbf22007-08-29 03:22:59 +0000823
824 /* end of string while searching for matching '}' */
825 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
826 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000827}
828
829
830/* do the !r or !s conversion on obj */
831static PyObject *
832do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
833{
834 /* XXX in pre-3.0, do we need to convert this to unicode, since it
835 might have returned a string? */
836 switch (conversion) {
837 case 'r':
838 return PyObject_Repr(obj);
839 case 's':
Eric Smith8fd3eba2008-02-17 19:48:00 +0000840 return STRINGLIB_TOSTR(obj);
Georg Brandl559e5d72008-06-11 18:37:52 +0000841#if PY_VERSION_HEX >= 0x03000000
842 case 'a':
843 return STRINGLIB_TOASCII(obj);
844#endif
Eric Smith8c663262007-08-25 02:26:07 +0000845 default:
Martin v. Löwis5a6f4582008-04-07 03:22:07 +0000846 if (conversion > 32 && conversion < 127) {
847 /* It's the ASCII subrange; casting to char is safe
848 (assuming the execution character set is an ASCII
849 superset). */
850 PyErr_Format(PyExc_ValueError,
851 "Unknown conversion specifier %c",
852 (char)conversion);
853 } else
854 PyErr_Format(PyExc_ValueError,
855 "Unknown conversion specifier \\x%x",
856 (unsigned int)conversion);
Eric Smith8c663262007-08-25 02:26:07 +0000857 return NULL;
858 }
859}
860
861/* given:
862
863 {field_name!conversion:format_spec}
864
865 compute the result and write it to output.
866 format_spec_needs_expanding is an optimization. if it's false,
867 just output the string directly, otherwise recursively expand the
Eric Smith8ec90442009-03-14 12:29:34 +0000868 format_spec string.
869
870 field_name is allowed to be zero length, in which case we
871 are doing auto field numbering.
872*/
Eric Smith8c663262007-08-25 02:26:07 +0000873
874static int
875output_markup(SubString *field_name, SubString *format_spec,
876 int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
877 OutputString *output, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000878 int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000879{
880 PyObject *tmp = NULL;
881 PyObject *fieldobj = NULL;
882 SubString expanded_format_spec;
883 SubString *actual_format_spec;
884 int result = 0;
885
886 /* convert field_name to an object */
Eric Smith8ec90442009-03-14 12:29:34 +0000887 fieldobj = get_field_object(field_name, args, kwargs, auto_number);
Eric Smith8c663262007-08-25 02:26:07 +0000888 if (fieldobj == NULL)
889 goto done;
890
891 if (conversion != '\0') {
892 tmp = do_conversion(fieldobj, conversion);
893 if (tmp == NULL)
894 goto done;
895
896 /* do the assignment, transferring ownership: fieldobj = tmp */
897 Py_DECREF(fieldobj);
898 fieldobj = tmp;
899 tmp = NULL;
900 }
901
902 /* if needed, recurively compute the format_spec */
903 if (format_spec_needs_expanding) {
Eric Smith8ec90442009-03-14 12:29:34 +0000904 tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
905 auto_number);
Eric Smith8c663262007-08-25 02:26:07 +0000906 if (tmp == NULL)
907 goto done;
908
909 /* note that in the case we're expanding the format string,
910 tmp must be kept around until after the call to
911 render_field. */
912 SubString_init(&expanded_format_spec,
913 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
914 actual_format_spec = &expanded_format_spec;
Eric Smith0cb431c2007-08-28 01:07:27 +0000915 }
916 else
Eric Smith8c663262007-08-25 02:26:07 +0000917 actual_format_spec = format_spec;
918
919 if (render_field(fieldobj, actual_format_spec, output) == 0)
920 goto done;
921
922 result = 1;
923
924done:
925 Py_XDECREF(fieldobj);
926 Py_XDECREF(tmp);
927
928 return result;
929}
930
931/*
Eric Smith8fd3eba2008-02-17 19:48:00 +0000932 do_markup is the top-level loop for the format() method. It
Eric Smith8c663262007-08-25 02:26:07 +0000933 searches through the format string for escapes to markup codes, and
934 calls other functions to move non-markup text to the output,
935 and to perform the markup to the output.
936*/
937static int
938do_markup(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000939 OutputString *output, int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000940{
941 MarkupIterator iter;
Eric Smith8c663262007-08-25 02:26:07 +0000942 int format_spec_needs_expanding;
943 int result;
Eric Smith8ec90442009-03-14 12:29:34 +0000944 int field_present;
Eric Smith625cbf22007-08-29 03:22:59 +0000945 SubString literal;
Eric Smith8c663262007-08-25 02:26:07 +0000946 SubString field_name;
947 SubString format_spec;
948 STRINGLIB_CHAR conversion;
949
950 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
Eric Smith8ec90442009-03-14 12:29:34 +0000951 while ((result = MarkupIterator_next(&iter, &literal, &field_present,
952 &field_name, &format_spec,
953 &conversion,
Eric Smith8c663262007-08-25 02:26:07 +0000954 &format_spec_needs_expanding)) == 2) {
Eric Smith625cbf22007-08-29 03:22:59 +0000955 if (!output_data(output, literal.ptr, literal.end - literal.ptr))
956 return 0;
Eric Smith8ec90442009-03-14 12:29:34 +0000957 if (field_present)
Eric Smith8c663262007-08-25 02:26:07 +0000958 if (!output_markup(&field_name, &format_spec,
959 format_spec_needs_expanding, conversion, output,
Eric Smith8ec90442009-03-14 12:29:34 +0000960 args, kwargs, recursion_depth, auto_number))
Eric Smith8c663262007-08-25 02:26:07 +0000961 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000962 }
963 return result;
964}
965
966
967/*
968 build_string allocates the output string and then
969 calls do_markup to do the heavy lifting.
970*/
971static PyObject *
972build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000973 int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000974{
975 OutputString output;
976 PyObject *result = NULL;
977 Py_ssize_t count;
978
979 output.obj = NULL; /* needed so cleanup code always works */
980
981 /* check the recursion level */
Eric Smith45c07872007-09-05 02:02:43 +0000982 if (recursion_depth <= 0) {
Eric Smith8c663262007-08-25 02:26:07 +0000983 PyErr_SetString(PyExc_ValueError,
984 "Max string recursion exceeded");
985 goto done;
986 }
987
988 /* initial size is the length of the format string, plus the size
989 increment. seems like a reasonable default */
990 if (!output_initialize(&output,
991 input->end - input->ptr +
992 INITIAL_SIZE_INCREMENT))
993 goto done;
994
Eric Smith8ec90442009-03-14 12:29:34 +0000995 if (!do_markup(input, args, kwargs, &output, recursion_depth,
996 auto_number)) {
Eric Smith8c663262007-08-25 02:26:07 +0000997 goto done;
998 }
999
1000 count = output.ptr - STRINGLIB_STR(output.obj);
1001 if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
1002 goto done;
1003 }
1004
1005 /* transfer ownership to result */
1006 result = output.obj;
1007 output.obj = NULL;
1008
1009done:
Eric Smith8c663262007-08-25 02:26:07 +00001010 Py_XDECREF(output.obj);
1011 return result;
1012}
1013
1014/************************************************************************/
1015/*********** main routine ***********************************************/
1016/************************************************************************/
1017
1018/* this is the main entry point */
1019static PyObject *
1020do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
1021{
1022 SubString input;
1023
1024 /* PEP 3101 says only 2 levels, so that
1025 "{0:{1}}".format('abc', 's') # works
1026 "{0:{1:{2}}}".format('abc', 's', '') # fails
1027 */
Eric Smith45c07872007-09-05 02:02:43 +00001028 int recursion_depth = 2;
Eric Smith8c663262007-08-25 02:26:07 +00001029
Eric Smith8ec90442009-03-14 12:29:34 +00001030 AutoNumber auto_number;
1031
1032 AutoNumber_Init(&auto_number);
Eric Smith8c663262007-08-25 02:26:07 +00001033 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
Eric Smith8ec90442009-03-14 12:29:34 +00001034 return build_string(&input, args, kwargs, recursion_depth, &auto_number);
Eric Smith8c663262007-08-25 02:26:07 +00001035}
Eric Smithf6db4092007-08-27 23:52:26 +00001036
1037
1038
1039/************************************************************************/
1040/*********** formatteriterator ******************************************/
1041/************************************************************************/
1042
1043/* This is used to implement string.Formatter.vparse(). It exists so
1044 Formatter can share code with the built in unicode.format() method.
1045 It's really just a wrapper around MarkupIterator that is callable
1046 from Python. */
1047
1048typedef struct {
1049 PyObject_HEAD
1050
Eric Smith8fd3eba2008-02-17 19:48:00 +00001051 STRINGLIB_OBJECT *str;
Eric Smithf6db4092007-08-27 23:52:26 +00001052
1053 MarkupIterator it_markup;
1054} formatteriterobject;
1055
1056static void
1057formatteriter_dealloc(formatteriterobject *it)
1058{
1059 Py_XDECREF(it->str);
1060 PyObject_FREE(it);
1061}
1062
1063/* returns a tuple:
Eric Smith625cbf22007-08-29 03:22:59 +00001064 (literal, field_name, format_spec, conversion)
1065
1066 literal is any literal text to output. might be zero length
1067 field_name is the string before the ':'. might be None
1068 format_spec is the string after the ':'. mibht be None
1069 conversion is either None, or the string after the '!'
Eric Smithf6db4092007-08-27 23:52:26 +00001070*/
1071static PyObject *
1072formatteriter_next(formatteriterobject *it)
1073{
1074 SubString literal;
1075 SubString field_name;
1076 SubString format_spec;
Eric Smith8fd3eba2008-02-17 19:48:00 +00001077 STRINGLIB_CHAR conversion;
Eric Smithf6db4092007-08-27 23:52:26 +00001078 int format_spec_needs_expanding;
Eric Smith8ec90442009-03-14 12:29:34 +00001079 int field_present;
1080 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1081 &field_name, &format_spec, &conversion,
Eric Smithf6db4092007-08-27 23:52:26 +00001082 &format_spec_needs_expanding);
1083
1084 /* all of the SubString objects point into it->str, so no
1085 memory management needs to be done on them */
1086 assert(0 <= result && result <= 2);
Eric Smith0cb431c2007-08-28 01:07:27 +00001087 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001088 /* if 0, error has already been set, if 1, iterator is empty */
1089 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001090 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001091 PyObject *literal_str = NULL;
1092 PyObject *field_name_str = NULL;
1093 PyObject *format_spec_str = NULL;
1094 PyObject *conversion_str = NULL;
1095 PyObject *tuple = NULL;
1096
Eric Smith625cbf22007-08-29 03:22:59 +00001097 literal_str = SubString_new_object(&literal);
1098 if (literal_str == NULL)
1099 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001100
Eric Smith625cbf22007-08-29 03:22:59 +00001101 field_name_str = SubString_new_object(&field_name);
1102 if (field_name_str == NULL)
1103 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001104
Eric Smith625cbf22007-08-29 03:22:59 +00001105 /* if field_name is non-zero length, return a string for
1106 format_spec (even if zero length), else return None */
Eric Smith8ec90442009-03-14 12:29:34 +00001107 format_spec_str = (field_present ?
Eric Smith625cbf22007-08-29 03:22:59 +00001108 SubString_new_object_or_empty :
1109 SubString_new_object)(&format_spec);
1110 if (format_spec_str == NULL)
1111 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001112
Eric Smith625cbf22007-08-29 03:22:59 +00001113 /* if the conversion is not specified, return a None,
1114 otherwise create a one length string with the conversion
1115 character */
1116 if (conversion == '\0') {
Eric Smithf6db4092007-08-27 23:52:26 +00001117 conversion_str = Py_None;
Eric Smithf6db4092007-08-27 23:52:26 +00001118 Py_INCREF(conversion_str);
1119 }
Eric Smith625cbf22007-08-29 03:22:59 +00001120 else
Eric Smith8fd3eba2008-02-17 19:48:00 +00001121 conversion_str = STRINGLIB_NEW(&conversion, 1);
Eric Smith625cbf22007-08-29 03:22:59 +00001122 if (conversion_str == NULL)
1123 goto done;
1124
Eric Smith9e7c8da2007-08-28 11:15:20 +00001125 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
Eric Smithf6db4092007-08-27 23:52:26 +00001126 conversion_str);
Eric Smith625cbf22007-08-29 03:22:59 +00001127 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001128 Py_XDECREF(literal_str);
1129 Py_XDECREF(field_name_str);
1130 Py_XDECREF(format_spec_str);
1131 Py_XDECREF(conversion_str);
1132 return tuple;
1133 }
1134}
1135
1136static PyMethodDef formatteriter_methods[] = {
1137 {NULL, NULL} /* sentinel */
1138};
1139
Eric Smith8fd3eba2008-02-17 19:48:00 +00001140static PyTypeObject PyFormatterIter_Type = {
Eric Smithf6db4092007-08-27 23:52:26 +00001141 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1142 "formatteriterator", /* tp_name */
1143 sizeof(formatteriterobject), /* tp_basicsize */
1144 0, /* tp_itemsize */
1145 /* methods */
1146 (destructor)formatteriter_dealloc, /* tp_dealloc */
1147 0, /* tp_print */
1148 0, /* tp_getattr */
1149 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00001150 0, /* tp_reserved */
Eric Smithf6db4092007-08-27 23:52:26 +00001151 0, /* tp_repr */
1152 0, /* tp_as_number */
1153 0, /* tp_as_sequence */
1154 0, /* tp_as_mapping */
1155 0, /* tp_hash */
1156 0, /* tp_call */
1157 0, /* tp_str */
1158 PyObject_GenericGetAttr, /* tp_getattro */
1159 0, /* tp_setattro */
1160 0, /* tp_as_buffer */
1161 Py_TPFLAGS_DEFAULT, /* tp_flags */
1162 0, /* tp_doc */
1163 0, /* tp_traverse */
1164 0, /* tp_clear */
1165 0, /* tp_richcompare */
1166 0, /* tp_weaklistoffset */
1167 PyObject_SelfIter, /* tp_iter */
1168 (iternextfunc)formatteriter_next, /* tp_iternext */
1169 formatteriter_methods, /* tp_methods */
1170 0,
1171};
1172
1173/* unicode_formatter_parser is used to implement
1174 string.Formatter.vformat. it parses a string and returns tuples
1175 describing the parsed elements. It's a wrapper around
1176 stringlib/string_format.h's MarkupIterator */
1177static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +00001178formatter_parser(STRINGLIB_OBJECT *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001179{
1180 formatteriterobject *it;
1181
1182 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1183 if (it == NULL)
1184 return NULL;
1185
1186 /* take ownership, give the object to the iterator */
1187 Py_INCREF(self);
1188 it->str = self;
1189
1190 /* initialize the contained MarkupIterator */
1191 MarkupIterator_init(&it->it_markup,
Eric Smith8fd3eba2008-02-17 19:48:00 +00001192 STRINGLIB_STR(self),
1193 STRINGLIB_LEN(self));
Eric Smithf6db4092007-08-27 23:52:26 +00001194
1195 return (PyObject *)it;
1196}
1197
1198
1199/************************************************************************/
1200/*********** fieldnameiterator ******************************************/
1201/************************************************************************/
1202
1203
1204/* This is used to implement string.Formatter.vparse(). It parses the
1205 field name into attribute and item values. It's a Python-callable
1206 wrapper around FieldNameIterator */
1207
1208typedef struct {
1209 PyObject_HEAD
1210
Eric Smith8fd3eba2008-02-17 19:48:00 +00001211 STRINGLIB_OBJECT *str;
Eric Smithf6db4092007-08-27 23:52:26 +00001212
1213 FieldNameIterator it_field;
1214} fieldnameiterobject;
1215
1216static void
1217fieldnameiter_dealloc(fieldnameiterobject *it)
1218{
1219 Py_XDECREF(it->str);
1220 PyObject_FREE(it);
1221}
1222
1223/* returns a tuple:
1224 (is_attr, value)
1225 is_attr is true if we used attribute syntax (e.g., '.foo')
1226 false if we used index syntax (e.g., '[foo]')
1227 value is an integer or string
1228*/
1229static PyObject *
1230fieldnameiter_next(fieldnameiterobject *it)
1231{
1232 int result;
1233 int is_attr;
1234 Py_ssize_t idx;
1235 SubString name;
1236
1237 result = FieldNameIterator_next(&it->it_field, &is_attr,
1238 &idx, &name);
Eric Smith0cb431c2007-08-28 01:07:27 +00001239 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001240 /* if 0, error has already been set, if 1, iterator is empty */
1241 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001242 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001243 PyObject* result = NULL;
1244 PyObject* is_attr_obj = NULL;
1245 PyObject* obj = NULL;
1246
1247 is_attr_obj = PyBool_FromLong(is_attr);
1248 if (is_attr_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001249 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001250
1251 /* either an integer or a string */
1252 if (idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001253 obj = PyLong_FromSsize_t(idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001254 else
1255 obj = SubString_new_object(&name);
1256 if (obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001257 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001258
1259 /* return a tuple of values */
1260 result = PyTuple_Pack(2, is_attr_obj, obj);
Eric Smithf6db4092007-08-27 23:52:26 +00001261
Eric Smith625cbf22007-08-29 03:22:59 +00001262 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001263 Py_XDECREF(is_attr_obj);
1264 Py_XDECREF(obj);
Eric Smith625cbf22007-08-29 03:22:59 +00001265 return result;
Eric Smithf6db4092007-08-27 23:52:26 +00001266 }
Eric Smithf6db4092007-08-27 23:52:26 +00001267}
1268
1269static PyMethodDef fieldnameiter_methods[] = {
1270 {NULL, NULL} /* sentinel */
1271};
1272
1273static PyTypeObject PyFieldNameIter_Type = {
1274 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1275 "fieldnameiterator", /* tp_name */
1276 sizeof(fieldnameiterobject), /* tp_basicsize */
1277 0, /* tp_itemsize */
1278 /* methods */
1279 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1280 0, /* tp_print */
1281 0, /* tp_getattr */
1282 0, /* tp_setattr */
Mark Dickinsone94c6792009-02-02 20:36:42 +00001283 0, /* tp_reserved */
Eric Smithf6db4092007-08-27 23:52:26 +00001284 0, /* tp_repr */
1285 0, /* tp_as_number */
1286 0, /* tp_as_sequence */
1287 0, /* tp_as_mapping */
1288 0, /* tp_hash */
1289 0, /* tp_call */
1290 0, /* tp_str */
1291 PyObject_GenericGetAttr, /* tp_getattro */
1292 0, /* tp_setattro */
1293 0, /* tp_as_buffer */
1294 Py_TPFLAGS_DEFAULT, /* tp_flags */
1295 0, /* tp_doc */
1296 0, /* tp_traverse */
1297 0, /* tp_clear */
1298 0, /* tp_richcompare */
1299 0, /* tp_weaklistoffset */
1300 PyObject_SelfIter, /* tp_iter */
1301 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1302 fieldnameiter_methods, /* tp_methods */
1303 0};
1304
1305/* unicode_formatter_field_name_split is used to implement
1306 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1307 returns a tuple of (first, rest): "first", the part before the
1308 first '.' or '['; and "rest", an iterator for the rest of the field
1309 name. it's a wrapper around stringlib/string_format.h's
1310 field_name_split. The iterator it returns is a
1311 FieldNameIterator */
1312static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +00001313formatter_field_name_split(STRINGLIB_OBJECT *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001314{
1315 SubString first;
1316 Py_ssize_t first_idx;
1317 fieldnameiterobject *it;
1318
1319 PyObject *first_obj = NULL;
1320 PyObject *result = NULL;
1321
1322 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1323 if (it == NULL)
1324 return NULL;
1325
1326 /* take ownership, give the object to the iterator. this is
1327 just to keep the field_name alive */
1328 Py_INCREF(self);
1329 it->str = self;
1330
Eric Smith8ec90442009-03-14 12:29:34 +00001331 /* Pass in auto_number = NULL. We'll return an empty string for
1332 first_obj in that case. */
Eric Smithf6db4092007-08-27 23:52:26 +00001333 if (!field_name_split(STRINGLIB_STR(self),
1334 STRINGLIB_LEN(self),
Eric Smith8ec90442009-03-14 12:29:34 +00001335 &first, &first_idx, &it->it_field, NULL))
Eric Smith625cbf22007-08-29 03:22:59 +00001336 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001337
Eric Smith0cb431c2007-08-28 01:07:27 +00001338 /* first becomes an integer, if possible; else a string */
Eric Smithf6db4092007-08-27 23:52:26 +00001339 if (first_idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001340 first_obj = PyLong_FromSsize_t(first_idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001341 else
1342 /* convert "first" into a string object */
1343 first_obj = SubString_new_object(&first);
1344 if (first_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001345 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001346
1347 /* return a tuple of values */
1348 result = PyTuple_Pack(2, first_obj, it);
1349
Eric Smith625cbf22007-08-29 03:22:59 +00001350done:
Eric Smithf6db4092007-08-27 23:52:26 +00001351 Py_XDECREF(it);
1352 Py_XDECREF(first_obj);
1353 return result;
1354}