blob: 10d150f03fdd4285024010c7a7151f7256220f3c [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/*
2 string_format.h -- implementation of string.format().
3
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
6*/
7
8
9/* Defines for more efficiently reallocating the string buffer */
10#define INITIAL_SIZE_INCREMENT 100
11#define SIZE_MULTIPLIER 2
12#define MAX_SIZE_INCREMENT 3200
13
14
15/************************************************************************/
16/*********** Global data structures and forward declarations *********/
17/************************************************************************/
18
19/*
20 A SubString consists of the characters between two string or
21 unicode pointers.
22*/
23typedef struct {
24 STRINGLIB_CHAR *ptr;
25 STRINGLIB_CHAR *end;
26} SubString;
27
28
29/* forward declaration for recursion */
30static PyObject *
31build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith45c07872007-09-05 02:02:43 +000032 int recursion_depth);
Eric Smith8c663262007-08-25 02:26:07 +000033
34
35
36/************************************************************************/
37/************************** Utility functions ************************/
38/************************************************************************/
39
40/* fill in a SubString from a pointer and length */
41Py_LOCAL_INLINE(void)
42SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
43{
44 str->ptr = p;
45 if (p == NULL)
46 str->end = NULL;
47 else
48 str->end = str->ptr + len;
49}
50
Eric Smith625cbf22007-08-29 03:22:59 +000051/* return a new string. if str->ptr is NULL, return None */
Eric Smith8c663262007-08-25 02:26:07 +000052Py_LOCAL_INLINE(PyObject *)
53SubString_new_object(SubString *str)
54{
Eric Smith625cbf22007-08-29 03:22:59 +000055 if (str->ptr == NULL) {
56 Py_INCREF(Py_None);
57 return Py_None;
58 }
59 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
60}
61
62/* return a new string. if str->ptr is NULL, return None */
63Py_LOCAL_INLINE(PyObject *)
64SubString_new_object_or_empty(SubString *str)
65{
66 if (str->ptr == NULL) {
67 return STRINGLIB_NEW(NULL, 0);
68 }
Eric Smith8c663262007-08-25 02:26:07 +000069 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
70}
71
72/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +000073/*********** Output string management functions ****************/
74/************************************************************************/
75
76typedef struct {
77 STRINGLIB_CHAR *ptr;
78 STRINGLIB_CHAR *end;
79 PyObject *obj;
80 Py_ssize_t size_increment;
81} OutputString;
82
83/* initialize an OutputString object, reserving size characters */
84static int
85output_initialize(OutputString *output, Py_ssize_t size)
86{
87 output->obj = STRINGLIB_NEW(NULL, size);
88 if (output->obj == NULL)
89 return 0;
90
91 output->ptr = STRINGLIB_STR(output->obj);
92 output->end = STRINGLIB_LEN(output->obj) + output->ptr;
93 output->size_increment = INITIAL_SIZE_INCREMENT;
94
95 return 1;
96}
97
98/*
99 output_extend reallocates the output string buffer.
100 It returns a status: 0 for a failed reallocation,
101 1 for success.
102*/
103
104static int
105output_extend(OutputString *output, Py_ssize_t count)
106{
107 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
108 Py_ssize_t curlen = output->ptr - startptr;
109 Py_ssize_t maxlen = curlen + count + output->size_increment;
110
111 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
112 return 0;
113 startptr = STRINGLIB_STR(output->obj);
114 output->ptr = startptr + curlen;
115 output->end = startptr + maxlen;
116 if (output->size_increment < MAX_SIZE_INCREMENT)
117 output->size_increment *= SIZE_MULTIPLIER;
118 return 1;
119}
120
121/*
122 output_data dumps characters into our output string
123 buffer.
124
125 In some cases, it has to reallocate the string.
126
127 It returns a status: 0 for a failed reallocation,
128 1 for success.
129*/
130static int
131output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
132{
133 if ((count > output->end - output->ptr) && !output_extend(output, count))
134 return 0;
135 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
136 output->ptr += count;
137 return 1;
138}
139
140/************************************************************************/
141/*********** Format string parsing -- integers and identifiers *********/
142/************************************************************************/
143
Eric Smith7ade6482007-08-26 22:27:13 +0000144static Py_ssize_t
145get_integer(const SubString *str)
Eric Smith8c663262007-08-25 02:26:07 +0000146{
Eric Smith7ade6482007-08-26 22:27:13 +0000147 Py_ssize_t accumulator = 0;
148 Py_ssize_t digitval;
149 Py_ssize_t oldaccumulator;
150 STRINGLIB_CHAR *p;
Eric Smith8c663262007-08-25 02:26:07 +0000151
Eric Smith7ade6482007-08-26 22:27:13 +0000152 /* empty string is an error */
153 if (str->ptr >= str->end)
154 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000155
Eric Smith7ade6482007-08-26 22:27:13 +0000156 for (p = str->ptr; p < str->end; p++) {
157 digitval = STRINGLIB_TODECIMAL(*p);
Eric Smith8c663262007-08-25 02:26:07 +0000158 if (digitval < 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000159 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000160 /*
161 This trick was copied from old Unicode format code. It's cute,
162 but would really suck on an old machine with a slow divide
163 implementation. Fortunately, in the normal case we do not
164 expect too many digits.
165 */
166 oldaccumulator = accumulator;
167 accumulator *= 10;
168 if ((accumulator+10)/10 != oldaccumulator+1) {
169 PyErr_Format(PyExc_ValueError,
170 "Too many decimal digits in format string");
171 return -1;
172 }
173 accumulator += digitval;
174 }
Eric Smith7ade6482007-08-26 22:27:13 +0000175 return accumulator;
Eric Smith8c663262007-08-25 02:26:07 +0000176}
177
178/************************************************************************/
179/******** Functions to get field objects and specification strings ******/
180/************************************************************************/
181
Eric Smith7ade6482007-08-26 22:27:13 +0000182/* do the equivalent of obj.name */
Eric Smith8c663262007-08-25 02:26:07 +0000183static PyObject *
Eric Smith7ade6482007-08-26 22:27:13 +0000184getattr(PyObject *obj, SubString *name)
Eric Smith8c663262007-08-25 02:26:07 +0000185{
Eric Smith7ade6482007-08-26 22:27:13 +0000186 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000187 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000188 if (str == NULL)
189 return NULL;
190 newobj = PyObject_GetAttr(obj, str);
191 Py_DECREF(str);
192 return newobj;
Eric Smith8c663262007-08-25 02:26:07 +0000193}
194
Eric Smith7ade6482007-08-26 22:27:13 +0000195/* do the equivalent of obj[idx], where obj is a sequence */
196static PyObject *
197getitem_sequence(PyObject *obj, Py_ssize_t idx)
198{
199 return PySequence_GetItem(obj, idx);
200}
201
202/* do the equivalent of obj[idx], where obj is not a sequence */
203static PyObject *
204getitem_idx(PyObject *obj, Py_ssize_t idx)
205{
206 PyObject *newobj;
Christian Heimes217cfd12007-12-02 14:31:20 +0000207 PyObject *idx_obj = PyLong_FromSsize_t(idx);
Eric Smith7ade6482007-08-26 22:27:13 +0000208 if (idx_obj == NULL)
209 return NULL;
210 newobj = PyObject_GetItem(obj, idx_obj);
211 Py_DECREF(idx_obj);
212 return newobj;
213}
214
215/* do the equivalent of obj[name] */
216static PyObject *
217getitem_str(PyObject *obj, SubString *name)
218{
219 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000220 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000221 if (str == NULL)
222 return NULL;
223 newobj = PyObject_GetItem(obj, str);
224 Py_DECREF(str);
225 return newobj;
226}
227
228typedef struct {
229 /* the entire string we're parsing. we assume that someone else
230 is managing its lifetime, and that it will exist for the
231 lifetime of the iterator. can be empty */
232 SubString str;
233
234 /* pointer to where we are inside field_name */
235 STRINGLIB_CHAR *ptr;
236} FieldNameIterator;
237
238
239static int
240FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
241 Py_ssize_t len)
242{
243 SubString_init(&self->str, ptr, len);
244 self->ptr = self->str.ptr;
245 return 1;
246}
247
248static int
249_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
250{
251 STRINGLIB_CHAR c;
252
253 name->ptr = self->ptr;
254
255 /* return everything until '.' or '[' */
256 while (self->ptr < self->str.end) {
257 switch (c = *self->ptr++) {
258 case '[':
259 case '.':
260 /* backup so that we this character will be seen next time */
261 self->ptr--;
262 break;
263 default:
264 continue;
265 }
266 break;
267 }
268 /* end of string is okay */
269 name->end = self->ptr;
270 return 1;
271}
272
273static int
274_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
275{
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000276 int bracket_seen = 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000277 STRINGLIB_CHAR c;
278
279 name->ptr = self->ptr;
280
281 /* return everything until ']' */
282 while (self->ptr < self->str.end) {
283 switch (c = *self->ptr++) {
284 case ']':
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000285 bracket_seen = 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000286 break;
287 default:
288 continue;
289 }
290 break;
291 }
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000292 /* make sure we ended with a ']' */
293 if (!bracket_seen) {
294 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
295 return 0;
296 }
297
Eric Smith7ade6482007-08-26 22:27:13 +0000298 /* end of string is okay */
299 /* don't include the ']' */
300 name->end = self->ptr-1;
301 return 1;
302}
303
304/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
305static int
306FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
307 Py_ssize_t *name_idx, SubString *name)
308{
309 /* check at end of input */
310 if (self->ptr >= self->str.end)
311 return 1;
312
313 switch (*self->ptr++) {
314 case '.':
315 *is_attribute = 1;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000316 if (_FieldNameIterator_attr(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000317 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000318 *name_idx = -1;
319 break;
320 case '[':
321 *is_attribute = 0;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000322 if (_FieldNameIterator_item(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000323 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000324 *name_idx = get_integer(name);
325 break;
326 default:
327 /* interal error, can't get here */
328 assert(0);
329 return 0;
330 }
331
332 /* empty string is an error */
333 if (name->ptr == name->end) {
334 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
335 return 0;
336 }
337
338 return 2;
339}
340
341
342/* input: field_name
343 output: 'first' points to the part before the first '[' or '.'
344 'first_idx' is -1 if 'first' is not an integer, otherwise
345 it's the value of first converted to an integer
346 'rest' is an iterator to return the rest
347*/
348static int
349field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
350 Py_ssize_t *first_idx, FieldNameIterator *rest)
351{
352 STRINGLIB_CHAR c;
353 STRINGLIB_CHAR *p = ptr;
354 STRINGLIB_CHAR *end = ptr + len;
355
356 /* find the part up until the first '.' or '[' */
357 while (p < end) {
358 switch (c = *p++) {
359 case '[':
360 case '.':
361 /* backup so that we this character is available to the
362 "rest" iterator */
363 p--;
364 break;
365 default:
366 continue;
367 }
368 break;
369 }
370
371 /* set up the return values */
372 SubString_init(first, ptr, p - ptr);
373 FieldNameIterator_init(rest, p, end - p);
374
375 /* see if "first" is an integer, in which case it's used as an index */
376 *first_idx = get_integer(first);
377
378 /* zero length string is an error */
379 if (first->ptr >= first->end) {
380 PyErr_SetString(PyExc_ValueError, "empty field name");
381 goto error;
382 }
383
384 return 1;
385error:
386 return 0;
387}
388
389
Eric Smith8c663262007-08-25 02:26:07 +0000390/*
391 get_field_object returns the object inside {}, before the
392 format_spec. It handles getindex and getattr lookups and consumes
393 the entire input string.
394*/
395static PyObject *
396get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
397{
Eric Smith7ade6482007-08-26 22:27:13 +0000398 PyObject *obj = NULL;
399 int ok;
400 int is_attribute;
401 SubString name;
402 SubString first;
Eric Smith8c663262007-08-25 02:26:07 +0000403 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000404 FieldNameIterator rest;
Eric Smith8c663262007-08-25 02:26:07 +0000405
Eric Smith7ade6482007-08-26 22:27:13 +0000406 if (!field_name_split(input->ptr, input->end - input->ptr, &first,
407 &index, &rest)) {
408 goto error;
409 }
Eric Smith8c663262007-08-25 02:26:07 +0000410
Eric Smith7ade6482007-08-26 22:27:13 +0000411 if (index == -1) {
412 /* look up in kwargs */
Eric Smith7a6dd292007-08-27 23:30:47 +0000413 PyObject *key = SubString_new_object(&first);
Eric Smith7ade6482007-08-26 22:27:13 +0000414 if (key == NULL)
415 goto error;
416 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
Eric Smith11529192007-09-04 23:04:22 +0000417 PyErr_SetObject(PyExc_KeyError, key);
Eric Smith7ade6482007-08-26 22:27:13 +0000418 Py_DECREF(key);
419 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000420 }
Neal Norwitz8a4eb292007-08-27 07:24:17 +0000421 Py_DECREF(key);
Neal Norwitz247b5152007-08-27 03:22:50 +0000422 Py_INCREF(obj);
Eric Smith0cb431c2007-08-28 01:07:27 +0000423 }
424 else {
Eric Smith7ade6482007-08-26 22:27:13 +0000425 /* look up in args */
426 obj = PySequence_GetItem(args, index);
Eric Smith11529192007-09-04 23:04:22 +0000427 if (obj == NULL)
Eric Smith7ade6482007-08-26 22:27:13 +0000428 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000429 }
Eric Smith7ade6482007-08-26 22:27:13 +0000430
431 /* iterate over the rest of the field_name */
432 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
433 &name)) == 2) {
434 PyObject *tmp;
435
436 if (is_attribute)
437 /* getattr lookup "." */
438 tmp = getattr(obj, &name);
439 else
440 /* getitem lookup "[]" */
441 if (index == -1)
442 tmp = getitem_str(obj, &name);
443 else
444 if (PySequence_Check(obj))
445 tmp = getitem_sequence(obj, index);
446 else
447 /* not a sequence */
448 tmp = getitem_idx(obj, index);
449 if (tmp == NULL)
450 goto error;
451
452 /* assign to obj */
453 Py_DECREF(obj);
454 obj = tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000455 }
Eric Smith7ade6482007-08-26 22:27:13 +0000456 /* end of iterator, this is the non-error case */
457 if (ok == 1)
458 return obj;
459error:
460 Py_XDECREF(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000461 return NULL;
462}
463
464/************************************************************************/
465/***************** Field rendering functions **************************/
466/************************************************************************/
467
468/*
469 render_field() is the main function in this section. It takes the
470 field object and field specification string generated by
471 get_field_and_spec, and renders the field into the output string.
472
473 format() does the actual calling of the objects __format__ method.
474*/
475
476
477/* returns fieldobj.__format__(format_spec) */
478static PyObject *
479format(PyObject *fieldobj, SubString *format_spec)
480{
481 static PyObject *format_str = NULL;
482 PyObject *meth;
483 PyObject *spec = NULL;
484 PyObject *result = NULL;
485
486 /* Initialize cached value */
487 if (format_str == NULL) {
488 /* Initialize static variable needed by _PyType_Lookup */
489 format_str = PyUnicode_FromString("__format__");
490 if (format_str == NULL)
491 return NULL;
492 }
493
494 /* Make sure the type is initialized. float gets initialized late */
495 if (Py_Type(fieldobj)->tp_dict == NULL)
496 if (PyType_Ready(Py_Type(fieldobj)) < 0)
497 return NULL;
498
499 /* we need to create an object out of the pointers we have */
Eric Smith625cbf22007-08-29 03:22:59 +0000500 spec = SubString_new_object_or_empty(format_spec);
Eric Smith8c663262007-08-25 02:26:07 +0000501 if (spec == NULL)
502 goto done;
503
504 /* Find the (unbound!) __format__ method (a borrowed reference) */
505 meth = _PyType_Lookup(Py_Type(fieldobj), format_str);
506 if (meth == NULL) {
507 PyErr_Format(PyExc_TypeError,
508 "Type %.100s doesn't define __format__",
509 Py_Type(fieldobj)->tp_name);
510 goto done;
511 }
512
513 /* And call it, binding it to the value */
514 result = PyObject_CallFunctionObjArgs(meth, fieldobj, spec, NULL);
515 if (result == NULL)
516 goto done;
517
518 if (!STRINGLIB_CHECK(result)) {
519 PyErr_SetString(PyExc_TypeError,
520 "__format__ method did not return "
521 STRINGLIB_TYPE_NAME);
522 Py_DECREF(result);
523 result = NULL;
524 goto done;
525 }
526
527done:
528 Py_XDECREF(spec);
529 return result;
530}
531
532/*
533 render_field calls fieldobj.__format__(format_spec) method, and
534 appends to the output.
535*/
536static int
537render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
538{
539 int ok = 0;
540 PyObject *result = format(fieldobj, format_spec);
541
542 if (result == NULL)
543 goto done;
544
545 ok = output_data(output,
546 STRINGLIB_STR(result), STRINGLIB_LEN(result));
547done:
548 Py_XDECREF(result);
549 return ok;
550}
551
552static int
553parse_field(SubString *str, SubString *field_name, SubString *format_spec,
554 STRINGLIB_CHAR *conversion)
555{
556 STRINGLIB_CHAR c = 0;
557
558 /* initialize these, as they may be empty */
559 *conversion = '\0';
560 SubString_init(format_spec, NULL, 0);
561
562 /* search for the field name. it's terminated by the end of the
563 string, or a ':' or '!' */
564 field_name->ptr = str->ptr;
565 while (str->ptr < str->end) {
566 switch (c = *(str->ptr++)) {
567 case ':':
568 case '!':
569 break;
570 default:
571 continue;
572 }
573 break;
574 }
575
576 if (c == '!' || c == ':') {
577 /* we have a format specifier and/or a conversion */
578 /* don't include the last character */
579 field_name->end = str->ptr-1;
580
581 /* the format specifier is the rest of the string */
582 format_spec->ptr = str->ptr;
583 format_spec->end = str->end;
584
585 /* see if there's a conversion specifier */
586 if (c == '!') {
587 /* there must be another character present */
588 if (format_spec->ptr >= format_spec->end) {
589 PyErr_SetString(PyExc_ValueError,
590 "end of format while looking for conversion "
591 "specifier");
592 return 0;
593 }
594 *conversion = *(format_spec->ptr++);
595
596 /* if there is another character, it must be a colon */
597 if (format_spec->ptr < format_spec->end) {
598 c = *(format_spec->ptr++);
599 if (c != ':') {
600 PyErr_SetString(PyExc_ValueError,
601 "expected ':' after format specifier");
602 return 0;
603 }
604 }
605 }
606
607 return 1;
608
Eric Smith0cb431c2007-08-28 01:07:27 +0000609 }
610 else {
Eric Smith8c663262007-08-25 02:26:07 +0000611 /* end of string, there's no format_spec or conversion */
612 field_name->end = str->ptr;
613 return 1;
614 }
615}
616
617/************************************************************************/
618/******* Output string allocation and escape-to-markup processing ******/
619/************************************************************************/
620
621/* MarkupIterator breaks the string into pieces of either literal
622 text, or things inside {} that need to be marked up. it is
623 designed to make it easy to wrap a Python iterator around it, for
624 use with the Formatter class */
625
626typedef struct {
627 SubString str;
Eric Smith8c663262007-08-25 02:26:07 +0000628} MarkupIterator;
629
630static int
631MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
632{
633 SubString_init(&self->str, ptr, len);
Eric Smith8c663262007-08-25 02:26:07 +0000634 return 1;
635}
636
637/* returns 0 on error, 1 on non-error termination, and 2 if it got a
638 string (or something to be expanded) */
639static int
Eric Smith625cbf22007-08-29 03:22:59 +0000640MarkupIterator_next(MarkupIterator *self, SubString *literal,
Eric Smith8c663262007-08-25 02:26:07 +0000641 SubString *field_name, SubString *format_spec,
642 STRINGLIB_CHAR *conversion,
643 int *format_spec_needs_expanding)
644{
645 int at_end;
646 STRINGLIB_CHAR c = 0;
647 STRINGLIB_CHAR *start;
648 int count;
649 Py_ssize_t len;
Eric Smith625cbf22007-08-29 03:22:59 +0000650 int markup_follows = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000651
Eric Smith625cbf22007-08-29 03:22:59 +0000652 /* initialize all of the output variables */
653 SubString_init(literal, NULL, 0);
654 SubString_init(field_name, NULL, 0);
655 SubString_init(format_spec, NULL, 0);
656 *conversion = '\0';
Eric Smith8c663262007-08-25 02:26:07 +0000657 *format_spec_needs_expanding = 0;
658
Eric Smith625cbf22007-08-29 03:22:59 +0000659 /* No more input, end of iterator. This is the normal exit
660 path. */
Eric Smith8c663262007-08-25 02:26:07 +0000661 if (self->str.ptr >= self->str.end)
662 return 1;
663
Eric Smith8c663262007-08-25 02:26:07 +0000664 start = self->str.ptr;
665
Eric Smith625cbf22007-08-29 03:22:59 +0000666 /* First read any literal text. Read until the end of string, an
667 escaped '{' or '}', or an unescaped '{'. In order to never
668 allocate memory and so I can just pass pointers around, if
669 there's an escaped '{' or '}' then we'll return the literal
670 including the brace, but no format object. The next time
671 through, we'll return the rest of the literal, skipping past
672 the second consecutive brace. */
673 while (self->str.ptr < self->str.end) {
674 switch (c = *(self->str.ptr++)) {
675 case '{':
676 case '}':
677 markup_follows = 1;
678 break;
679 default:
680 continue;
Eric Smith8c663262007-08-25 02:26:07 +0000681 }
Eric Smith625cbf22007-08-29 03:22:59 +0000682 break;
Eric Smith0cb431c2007-08-28 01:07:27 +0000683 }
Eric Smith625cbf22007-08-29 03:22:59 +0000684
685 at_end = self->str.ptr >= self->str.end;
686 len = self->str.ptr - start;
687
688 if ((c == '}') && (at_end || (c != *self->str.ptr))) {
689 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
690 "in format string");
691 return 0;
692 }
693 if (at_end && c == '{') {
694 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
695 "in format string");
696 return 0;
697 }
698 if (!at_end) {
699 if (c == *self->str.ptr) {
700 /* escaped } or {, skip it in the input. there is no
701 markup object following us, just this literal text */
702 self->str.ptr++;
703 markup_follows = 0;
704 }
705 else
706 len--;
707 }
708
709 /* record the literal text */
710 literal->ptr = start;
711 literal->end = start + len;
712
713 if (!markup_follows)
714 return 2;
715
716 /* this is markup, find the end of the string by counting nested
717 braces. note that this prohibits escaped braces, so that
718 format_specs cannot have braces in them. */
719 count = 1;
720
721 start = self->str.ptr;
722
723 /* we know we can't have a zero length string, so don't worry
724 about that case */
725 while (self->str.ptr < self->str.end) {
726 switch (c = *(self->str.ptr++)) {
727 case '{':
728 /* the format spec needs to be recursively expanded.
729 this is an optimization, and not strictly needed */
730 *format_spec_needs_expanding = 1;
731 count++;
732 break;
733 case '}':
734 count--;
735 if (count <= 0) {
736 /* we're done. parse and get out */
737 SubString s;
738
739 SubString_init(&s, start, self->str.ptr - 1 - start);
740 if (parse_field(&s, field_name, format_spec, conversion) == 0)
741 return 0;
742
743 /* a zero length field_name is an error */
744 if (field_name->ptr == field_name->end) {
745 PyErr_SetString(PyExc_ValueError, "zero length field name "
746 "in format");
747 return 0;
748 }
749
750 /* success */
751 return 2;
Eric Smith8c663262007-08-25 02:26:07 +0000752 }
753 break;
754 }
Eric Smith8c663262007-08-25 02:26:07 +0000755 }
Eric Smith625cbf22007-08-29 03:22:59 +0000756
757 /* end of string while searching for matching '}' */
758 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
759 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000760}
761
762
763/* do the !r or !s conversion on obj */
764static PyObject *
765do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
766{
767 /* XXX in pre-3.0, do we need to convert this to unicode, since it
768 might have returned a string? */
769 switch (conversion) {
770 case 'r':
771 return PyObject_Repr(obj);
772 case 's':
Thomas Heller519a0422007-11-15 20:48:54 +0000773 return PyObject_Str(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000774 default:
775 PyErr_Format(PyExc_ValueError,
776 "Unknown converion specifier %c",
777 conversion);
778 return NULL;
779 }
780}
781
782/* given:
783
784 {field_name!conversion:format_spec}
785
786 compute the result and write it to output.
787 format_spec_needs_expanding is an optimization. if it's false,
788 just output the string directly, otherwise recursively expand the
789 format_spec string. */
790
791static int
792output_markup(SubString *field_name, SubString *format_spec,
793 int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
794 OutputString *output, PyObject *args, PyObject *kwargs,
Eric Smith45c07872007-09-05 02:02:43 +0000795 int recursion_depth)
Eric Smith8c663262007-08-25 02:26:07 +0000796{
797 PyObject *tmp = NULL;
798 PyObject *fieldobj = NULL;
799 SubString expanded_format_spec;
800 SubString *actual_format_spec;
801 int result = 0;
802
803 /* convert field_name to an object */
804 fieldobj = get_field_object(field_name, args, kwargs);
805 if (fieldobj == NULL)
806 goto done;
807
808 if (conversion != '\0') {
809 tmp = do_conversion(fieldobj, conversion);
810 if (tmp == NULL)
811 goto done;
812
813 /* do the assignment, transferring ownership: fieldobj = tmp */
814 Py_DECREF(fieldobj);
815 fieldobj = tmp;
816 tmp = NULL;
817 }
818
819 /* if needed, recurively compute the format_spec */
820 if (format_spec_needs_expanding) {
Eric Smith45c07872007-09-05 02:02:43 +0000821 tmp = build_string(format_spec, args, kwargs, recursion_depth-1);
Eric Smith8c663262007-08-25 02:26:07 +0000822 if (tmp == NULL)
823 goto done;
824
825 /* note that in the case we're expanding the format string,
826 tmp must be kept around until after the call to
827 render_field. */
828 SubString_init(&expanded_format_spec,
829 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
830 actual_format_spec = &expanded_format_spec;
Eric Smith0cb431c2007-08-28 01:07:27 +0000831 }
832 else
Eric Smith8c663262007-08-25 02:26:07 +0000833 actual_format_spec = format_spec;
834
835 if (render_field(fieldobj, actual_format_spec, output) == 0)
836 goto done;
837
838 result = 1;
839
840done:
841 Py_XDECREF(fieldobj);
842 Py_XDECREF(tmp);
843
844 return result;
845}
846
847/*
848 do_markup is the top-level loop for the format() function. It
849 searches through the format string for escapes to markup codes, and
850 calls other functions to move non-markup text to the output,
851 and to perform the markup to the output.
852*/
853static int
854do_markup(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith45c07872007-09-05 02:02:43 +0000855 OutputString *output, int recursion_depth)
Eric Smith8c663262007-08-25 02:26:07 +0000856{
857 MarkupIterator iter;
Eric Smith8c663262007-08-25 02:26:07 +0000858 int format_spec_needs_expanding;
859 int result;
Eric Smith625cbf22007-08-29 03:22:59 +0000860 SubString literal;
Eric Smith8c663262007-08-25 02:26:07 +0000861 SubString field_name;
862 SubString format_spec;
863 STRINGLIB_CHAR conversion;
864
865 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
Eric Smith625cbf22007-08-29 03:22:59 +0000866 while ((result = MarkupIterator_next(&iter, &literal, &field_name,
Eric Smith8c663262007-08-25 02:26:07 +0000867 &format_spec, &conversion,
868 &format_spec_needs_expanding)) == 2) {
Eric Smith625cbf22007-08-29 03:22:59 +0000869 if (!output_data(output, literal.ptr, literal.end - literal.ptr))
870 return 0;
871 if (field_name.ptr != field_name.end)
Eric Smith8c663262007-08-25 02:26:07 +0000872 if (!output_markup(&field_name, &format_spec,
873 format_spec_needs_expanding, conversion, output,
Eric Smith45c07872007-09-05 02:02:43 +0000874 args, kwargs, recursion_depth))
Eric Smith8c663262007-08-25 02:26:07 +0000875 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000876 }
877 return result;
878}
879
880
881/*
882 build_string allocates the output string and then
883 calls do_markup to do the heavy lifting.
884*/
885static PyObject *
886build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith45c07872007-09-05 02:02:43 +0000887 int recursion_depth)
Eric Smith8c663262007-08-25 02:26:07 +0000888{
889 OutputString output;
890 PyObject *result = NULL;
891 Py_ssize_t count;
892
893 output.obj = NULL; /* needed so cleanup code always works */
894
895 /* check the recursion level */
Eric Smith45c07872007-09-05 02:02:43 +0000896 if (recursion_depth <= 0) {
Eric Smith8c663262007-08-25 02:26:07 +0000897 PyErr_SetString(PyExc_ValueError,
898 "Max string recursion exceeded");
899 goto done;
900 }
901
902 /* initial size is the length of the format string, plus the size
903 increment. seems like a reasonable default */
904 if (!output_initialize(&output,
905 input->end - input->ptr +
906 INITIAL_SIZE_INCREMENT))
907 goto done;
908
Eric Smith45c07872007-09-05 02:02:43 +0000909 if (!do_markup(input, args, kwargs, &output, recursion_depth)) {
Eric Smith8c663262007-08-25 02:26:07 +0000910 goto done;
911 }
912
913 count = output.ptr - STRINGLIB_STR(output.obj);
914 if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
915 goto done;
916 }
917
918 /* transfer ownership to result */
919 result = output.obj;
920 output.obj = NULL;
921
922done:
Eric Smith8c663262007-08-25 02:26:07 +0000923 Py_XDECREF(output.obj);
924 return result;
925}
926
927/************************************************************************/
928/*********** main routine ***********************************************/
929/************************************************************************/
930
931/* this is the main entry point */
932static PyObject *
933do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
934{
935 SubString input;
936
937 /* PEP 3101 says only 2 levels, so that
938 "{0:{1}}".format('abc', 's') # works
939 "{0:{1:{2}}}".format('abc', 's', '') # fails
940 */
Eric Smith45c07872007-09-05 02:02:43 +0000941 int recursion_depth = 2;
Eric Smith8c663262007-08-25 02:26:07 +0000942
943 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
Eric Smith45c07872007-09-05 02:02:43 +0000944 return build_string(&input, args, kwargs, recursion_depth);
Eric Smith8c663262007-08-25 02:26:07 +0000945}
Eric Smithf6db4092007-08-27 23:52:26 +0000946
947
948
949/************************************************************************/
950/*********** formatteriterator ******************************************/
951/************************************************************************/
952
953/* This is used to implement string.Formatter.vparse(). It exists so
954 Formatter can share code with the built in unicode.format() method.
955 It's really just a wrapper around MarkupIterator that is callable
956 from Python. */
957
958typedef struct {
959 PyObject_HEAD
960
961 PyUnicodeObject *str;
962
963 MarkupIterator it_markup;
964} formatteriterobject;
965
966static void
967formatteriter_dealloc(formatteriterobject *it)
968{
969 Py_XDECREF(it->str);
970 PyObject_FREE(it);
971}
972
973/* returns a tuple:
Eric Smith625cbf22007-08-29 03:22:59 +0000974 (literal, field_name, format_spec, conversion)
975
976 literal is any literal text to output. might be zero length
977 field_name is the string before the ':'. might be None
978 format_spec is the string after the ':'. mibht be None
979 conversion is either None, or the string after the '!'
Eric Smithf6db4092007-08-27 23:52:26 +0000980*/
981static PyObject *
982formatteriter_next(formatteriterobject *it)
983{
984 SubString literal;
985 SubString field_name;
986 SubString format_spec;
987 Py_UNICODE conversion;
Eric Smithf6db4092007-08-27 23:52:26 +0000988 int format_spec_needs_expanding;
Eric Smith625cbf22007-08-29 03:22:59 +0000989 int result = MarkupIterator_next(&it->it_markup, &literal, &field_name,
990 &format_spec, &conversion,
Eric Smithf6db4092007-08-27 23:52:26 +0000991 &format_spec_needs_expanding);
992
993 /* all of the SubString objects point into it->str, so no
994 memory management needs to be done on them */
995 assert(0 <= result && result <= 2);
Eric Smith0cb431c2007-08-28 01:07:27 +0000996 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +0000997 /* if 0, error has already been set, if 1, iterator is empty */
998 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +0000999 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001000 PyObject *literal_str = NULL;
1001 PyObject *field_name_str = NULL;
1002 PyObject *format_spec_str = NULL;
1003 PyObject *conversion_str = NULL;
1004 PyObject *tuple = NULL;
Eric Smith625cbf22007-08-29 03:22:59 +00001005 int has_field = field_name.ptr != field_name.end;
Eric Smithf6db4092007-08-27 23:52:26 +00001006
Eric Smith625cbf22007-08-29 03:22:59 +00001007 literal_str = SubString_new_object(&literal);
1008 if (literal_str == NULL)
1009 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001010
Eric Smith625cbf22007-08-29 03:22:59 +00001011 field_name_str = SubString_new_object(&field_name);
1012 if (field_name_str == NULL)
1013 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001014
Eric Smith625cbf22007-08-29 03:22:59 +00001015 /* if field_name is non-zero length, return a string for
1016 format_spec (even if zero length), else return None */
1017 format_spec_str = (has_field ?
1018 SubString_new_object_or_empty :
1019 SubString_new_object)(&format_spec);
1020 if (format_spec_str == NULL)
1021 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001022
Eric Smith625cbf22007-08-29 03:22:59 +00001023 /* if the conversion is not specified, return a None,
1024 otherwise create a one length string with the conversion
1025 character */
1026 if (conversion == '\0') {
Eric Smithf6db4092007-08-27 23:52:26 +00001027 conversion_str = Py_None;
Eric Smithf6db4092007-08-27 23:52:26 +00001028 Py_INCREF(conversion_str);
1029 }
Eric Smith625cbf22007-08-29 03:22:59 +00001030 else
1031 conversion_str = PyUnicode_FromUnicode(&conversion, 1);
1032 if (conversion_str == NULL)
1033 goto done;
1034
Eric Smith9e7c8da2007-08-28 11:15:20 +00001035 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
Eric Smithf6db4092007-08-27 23:52:26 +00001036 conversion_str);
Eric Smith625cbf22007-08-29 03:22:59 +00001037 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001038 Py_XDECREF(literal_str);
1039 Py_XDECREF(field_name_str);
1040 Py_XDECREF(format_spec_str);
1041 Py_XDECREF(conversion_str);
1042 return tuple;
1043 }
1044}
1045
1046static PyMethodDef formatteriter_methods[] = {
1047 {NULL, NULL} /* sentinel */
1048};
1049
1050PyTypeObject PyFormatterIter_Type = {
1051 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1052 "formatteriterator", /* tp_name */
1053 sizeof(formatteriterobject), /* tp_basicsize */
1054 0, /* tp_itemsize */
1055 /* methods */
1056 (destructor)formatteriter_dealloc, /* tp_dealloc */
1057 0, /* tp_print */
1058 0, /* tp_getattr */
1059 0, /* tp_setattr */
1060 0, /* tp_compare */
1061 0, /* tp_repr */
1062 0, /* tp_as_number */
1063 0, /* tp_as_sequence */
1064 0, /* tp_as_mapping */
1065 0, /* tp_hash */
1066 0, /* tp_call */
1067 0, /* tp_str */
1068 PyObject_GenericGetAttr, /* tp_getattro */
1069 0, /* tp_setattro */
1070 0, /* tp_as_buffer */
1071 Py_TPFLAGS_DEFAULT, /* tp_flags */
1072 0, /* tp_doc */
1073 0, /* tp_traverse */
1074 0, /* tp_clear */
1075 0, /* tp_richcompare */
1076 0, /* tp_weaklistoffset */
1077 PyObject_SelfIter, /* tp_iter */
1078 (iternextfunc)formatteriter_next, /* tp_iternext */
1079 formatteriter_methods, /* tp_methods */
1080 0,
1081};
1082
1083/* unicode_formatter_parser is used to implement
1084 string.Formatter.vformat. it parses a string and returns tuples
1085 describing the parsed elements. It's a wrapper around
1086 stringlib/string_format.h's MarkupIterator */
1087static PyObject *
1088formatter_parser(PyUnicodeObject *self)
1089{
1090 formatteriterobject *it;
1091
1092 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1093 if (it == NULL)
1094 return NULL;
1095
1096 /* take ownership, give the object to the iterator */
1097 Py_INCREF(self);
1098 it->str = self;
1099
1100 /* initialize the contained MarkupIterator */
1101 MarkupIterator_init(&it->it_markup,
1102 PyUnicode_AS_UNICODE(self),
1103 PyUnicode_GET_SIZE(self));
1104
1105 return (PyObject *)it;
1106}
1107
1108
1109/************************************************************************/
1110/*********** fieldnameiterator ******************************************/
1111/************************************************************************/
1112
1113
1114/* This is used to implement string.Formatter.vparse(). It parses the
1115 field name into attribute and item values. It's a Python-callable
1116 wrapper around FieldNameIterator */
1117
1118typedef struct {
1119 PyObject_HEAD
1120
1121 PyUnicodeObject *str;
1122
1123 FieldNameIterator it_field;
1124} fieldnameiterobject;
1125
1126static void
1127fieldnameiter_dealloc(fieldnameiterobject *it)
1128{
1129 Py_XDECREF(it->str);
1130 PyObject_FREE(it);
1131}
1132
1133/* returns a tuple:
1134 (is_attr, value)
1135 is_attr is true if we used attribute syntax (e.g., '.foo')
1136 false if we used index syntax (e.g., '[foo]')
1137 value is an integer or string
1138*/
1139static PyObject *
1140fieldnameiter_next(fieldnameiterobject *it)
1141{
1142 int result;
1143 int is_attr;
1144 Py_ssize_t idx;
1145 SubString name;
1146
1147 result = FieldNameIterator_next(&it->it_field, &is_attr,
1148 &idx, &name);
Eric Smith0cb431c2007-08-28 01:07:27 +00001149 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001150 /* if 0, error has already been set, if 1, iterator is empty */
1151 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001152 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001153 PyObject* result = NULL;
1154 PyObject* is_attr_obj = NULL;
1155 PyObject* obj = NULL;
1156
1157 is_attr_obj = PyBool_FromLong(is_attr);
1158 if (is_attr_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001159 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001160
1161 /* either an integer or a string */
1162 if (idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001163 obj = PyLong_FromSsize_t(idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001164 else
1165 obj = SubString_new_object(&name);
1166 if (obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001167 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001168
1169 /* return a tuple of values */
1170 result = PyTuple_Pack(2, is_attr_obj, obj);
Eric Smithf6db4092007-08-27 23:52:26 +00001171
Eric Smith625cbf22007-08-29 03:22:59 +00001172 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001173 Py_XDECREF(is_attr_obj);
1174 Py_XDECREF(obj);
Eric Smith625cbf22007-08-29 03:22:59 +00001175 return result;
Eric Smithf6db4092007-08-27 23:52:26 +00001176 }
Eric Smithf6db4092007-08-27 23:52:26 +00001177}
1178
1179static PyMethodDef fieldnameiter_methods[] = {
1180 {NULL, NULL} /* sentinel */
1181};
1182
1183static PyTypeObject PyFieldNameIter_Type = {
1184 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1185 "fieldnameiterator", /* tp_name */
1186 sizeof(fieldnameiterobject), /* tp_basicsize */
1187 0, /* tp_itemsize */
1188 /* methods */
1189 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1190 0, /* tp_print */
1191 0, /* tp_getattr */
1192 0, /* tp_setattr */
1193 0, /* tp_compare */
1194 0, /* tp_repr */
1195 0, /* tp_as_number */
1196 0, /* tp_as_sequence */
1197 0, /* tp_as_mapping */
1198 0, /* tp_hash */
1199 0, /* tp_call */
1200 0, /* tp_str */
1201 PyObject_GenericGetAttr, /* tp_getattro */
1202 0, /* tp_setattro */
1203 0, /* tp_as_buffer */
1204 Py_TPFLAGS_DEFAULT, /* tp_flags */
1205 0, /* tp_doc */
1206 0, /* tp_traverse */
1207 0, /* tp_clear */
1208 0, /* tp_richcompare */
1209 0, /* tp_weaklistoffset */
1210 PyObject_SelfIter, /* tp_iter */
1211 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1212 fieldnameiter_methods, /* tp_methods */
1213 0};
1214
1215/* unicode_formatter_field_name_split is used to implement
1216 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1217 returns a tuple of (first, rest): "first", the part before the
1218 first '.' or '['; and "rest", an iterator for the rest of the field
1219 name. it's a wrapper around stringlib/string_format.h's
1220 field_name_split. The iterator it returns is a
1221 FieldNameIterator */
1222static PyObject *
1223formatter_field_name_split(PyUnicodeObject *self)
1224{
1225 SubString first;
1226 Py_ssize_t first_idx;
1227 fieldnameiterobject *it;
1228
1229 PyObject *first_obj = NULL;
1230 PyObject *result = NULL;
1231
1232 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1233 if (it == NULL)
1234 return NULL;
1235
1236 /* take ownership, give the object to the iterator. this is
1237 just to keep the field_name alive */
1238 Py_INCREF(self);
1239 it->str = self;
1240
1241 if (!field_name_split(STRINGLIB_STR(self),
1242 STRINGLIB_LEN(self),
1243 &first, &first_idx, &it->it_field))
Eric Smith625cbf22007-08-29 03:22:59 +00001244 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001245
Eric Smith0cb431c2007-08-28 01:07:27 +00001246 /* first becomes an integer, if possible; else a string */
Eric Smithf6db4092007-08-27 23:52:26 +00001247 if (first_idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001248 first_obj = PyLong_FromSsize_t(first_idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001249 else
1250 /* convert "first" into a string object */
1251 first_obj = SubString_new_object(&first);
1252 if (first_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001253 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001254
1255 /* return a tuple of values */
1256 result = PyTuple_Pack(2, first_obj, it);
1257
Eric Smith625cbf22007-08-29 03:22:59 +00001258done:
Eric Smithf6db4092007-08-27 23:52:26 +00001259 Py_XDECREF(it);
1260 Py_XDECREF(first_obj);
1261 return result;
1262}