blob: dbd01d3e7eec624cd7f0343e6f2a4d4beaa82840 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/*
2 string_format.h -- implementation of string.format().
3
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
6*/
7
8
9/* Defines for more efficiently reallocating the string buffer */
10#define INITIAL_SIZE_INCREMENT 100
11#define SIZE_MULTIPLIER 2
12#define MAX_SIZE_INCREMENT 3200
13
14
15/************************************************************************/
16/*********** Global data structures and forward declarations *********/
17/************************************************************************/
18
19/*
20 A SubString consists of the characters between two string or
21 unicode pointers.
22*/
23typedef struct {
24 STRINGLIB_CHAR *ptr;
25 STRINGLIB_CHAR *end;
26} SubString;
27
28
29/* forward declaration for recursion */
30static PyObject *
31build_string(SubString *input, PyObject *args, PyObject *kwargs,
32 int *recursion_level);
33
34
35
36/************************************************************************/
37/************************** Utility functions ************************/
38/************************************************************************/
39
40/* fill in a SubString from a pointer and length */
41Py_LOCAL_INLINE(void)
42SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
43{
44 str->ptr = p;
45 if (p == NULL)
46 str->end = NULL;
47 else
48 str->end = str->ptr + len;
49}
50
Eric Smith625cbf22007-08-29 03:22:59 +000051/* return a new string. if str->ptr is NULL, return None */
Eric Smith8c663262007-08-25 02:26:07 +000052Py_LOCAL_INLINE(PyObject *)
53SubString_new_object(SubString *str)
54{
Eric Smith625cbf22007-08-29 03:22:59 +000055 if (str->ptr == NULL) {
56 Py_INCREF(Py_None);
57 return Py_None;
58 }
59 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
60}
61
62/* return a new string. if str->ptr is NULL, return None */
63Py_LOCAL_INLINE(PyObject *)
64SubString_new_object_or_empty(SubString *str)
65{
66 if (str->ptr == NULL) {
67 return STRINGLIB_NEW(NULL, 0);
68 }
Eric Smith8c663262007-08-25 02:26:07 +000069 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
70}
71
72/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +000073/*********** Output string management functions ****************/
74/************************************************************************/
75
76typedef struct {
77 STRINGLIB_CHAR *ptr;
78 STRINGLIB_CHAR *end;
79 PyObject *obj;
80 Py_ssize_t size_increment;
81} OutputString;
82
83/* initialize an OutputString object, reserving size characters */
84static int
85output_initialize(OutputString *output, Py_ssize_t size)
86{
87 output->obj = STRINGLIB_NEW(NULL, size);
88 if (output->obj == NULL)
89 return 0;
90
91 output->ptr = STRINGLIB_STR(output->obj);
92 output->end = STRINGLIB_LEN(output->obj) + output->ptr;
93 output->size_increment = INITIAL_SIZE_INCREMENT;
94
95 return 1;
96}
97
98/*
99 output_extend reallocates the output string buffer.
100 It returns a status: 0 for a failed reallocation,
101 1 for success.
102*/
103
104static int
105output_extend(OutputString *output, Py_ssize_t count)
106{
107 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
108 Py_ssize_t curlen = output->ptr - startptr;
109 Py_ssize_t maxlen = curlen + count + output->size_increment;
110
111 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
112 return 0;
113 startptr = STRINGLIB_STR(output->obj);
114 output->ptr = startptr + curlen;
115 output->end = startptr + maxlen;
116 if (output->size_increment < MAX_SIZE_INCREMENT)
117 output->size_increment *= SIZE_MULTIPLIER;
118 return 1;
119}
120
121/*
122 output_data dumps characters into our output string
123 buffer.
124
125 In some cases, it has to reallocate the string.
126
127 It returns a status: 0 for a failed reallocation,
128 1 for success.
129*/
130static int
131output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
132{
133 if ((count > output->end - output->ptr) && !output_extend(output, count))
134 return 0;
135 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
136 output->ptr += count;
137 return 1;
138}
139
140/************************************************************************/
141/*********** Format string parsing -- integers and identifiers *********/
142/************************************************************************/
143
Eric Smith7ade6482007-08-26 22:27:13 +0000144static Py_ssize_t
145get_integer(const SubString *str)
Eric Smith8c663262007-08-25 02:26:07 +0000146{
Eric Smith7ade6482007-08-26 22:27:13 +0000147 Py_ssize_t accumulator = 0;
148 Py_ssize_t digitval;
149 Py_ssize_t oldaccumulator;
150 STRINGLIB_CHAR *p;
Eric Smith8c663262007-08-25 02:26:07 +0000151
Eric Smith7ade6482007-08-26 22:27:13 +0000152 /* empty string is an error */
153 if (str->ptr >= str->end)
154 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000155
Eric Smith7ade6482007-08-26 22:27:13 +0000156 for (p = str->ptr; p < str->end; p++) {
157 digitval = STRINGLIB_TODECIMAL(*p);
Eric Smith8c663262007-08-25 02:26:07 +0000158 if (digitval < 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000159 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000160 /*
161 This trick was copied from old Unicode format code. It's cute,
162 but would really suck on an old machine with a slow divide
163 implementation. Fortunately, in the normal case we do not
164 expect too many digits.
165 */
166 oldaccumulator = accumulator;
167 accumulator *= 10;
168 if ((accumulator+10)/10 != oldaccumulator+1) {
169 PyErr_Format(PyExc_ValueError,
170 "Too many decimal digits in format string");
171 return -1;
172 }
173 accumulator += digitval;
174 }
Eric Smith7ade6482007-08-26 22:27:13 +0000175 return accumulator;
Eric Smith8c663262007-08-25 02:26:07 +0000176}
177
178/************************************************************************/
179/******** Functions to get field objects and specification strings ******/
180/************************************************************************/
181
Eric Smith7ade6482007-08-26 22:27:13 +0000182/* do the equivalent of obj.name */
Eric Smith8c663262007-08-25 02:26:07 +0000183static PyObject *
Eric Smith7ade6482007-08-26 22:27:13 +0000184getattr(PyObject *obj, SubString *name)
Eric Smith8c663262007-08-25 02:26:07 +0000185{
Eric Smith7ade6482007-08-26 22:27:13 +0000186 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000187 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000188 if (str == NULL)
189 return NULL;
190 newobj = PyObject_GetAttr(obj, str);
191 Py_DECREF(str);
192 return newobj;
Eric Smith8c663262007-08-25 02:26:07 +0000193}
194
Eric Smith7ade6482007-08-26 22:27:13 +0000195/* do the equivalent of obj[idx], where obj is a sequence */
196static PyObject *
197getitem_sequence(PyObject *obj, Py_ssize_t idx)
198{
199 return PySequence_GetItem(obj, idx);
200}
201
202/* do the equivalent of obj[idx], where obj is not a sequence */
203static PyObject *
204getitem_idx(PyObject *obj, Py_ssize_t idx)
205{
206 PyObject *newobj;
207 PyObject *idx_obj = PyInt_FromSsize_t(idx);
208 if (idx_obj == NULL)
209 return NULL;
210 newobj = PyObject_GetItem(obj, idx_obj);
211 Py_DECREF(idx_obj);
212 return newobj;
213}
214
215/* do the equivalent of obj[name] */
216static PyObject *
217getitem_str(PyObject *obj, SubString *name)
218{
219 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000220 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000221 if (str == NULL)
222 return NULL;
223 newobj = PyObject_GetItem(obj, str);
224 Py_DECREF(str);
225 return newobj;
226}
227
228typedef struct {
229 /* the entire string we're parsing. we assume that someone else
230 is managing its lifetime, and that it will exist for the
231 lifetime of the iterator. can be empty */
232 SubString str;
233
234 /* pointer to where we are inside field_name */
235 STRINGLIB_CHAR *ptr;
236} FieldNameIterator;
237
238
239static int
240FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
241 Py_ssize_t len)
242{
243 SubString_init(&self->str, ptr, len);
244 self->ptr = self->str.ptr;
245 return 1;
246}
247
248static int
249_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
250{
251 STRINGLIB_CHAR c;
252
253 name->ptr = self->ptr;
254
255 /* return everything until '.' or '[' */
256 while (self->ptr < self->str.end) {
257 switch (c = *self->ptr++) {
258 case '[':
259 case '.':
260 /* backup so that we this character will be seen next time */
261 self->ptr--;
262 break;
263 default:
264 continue;
265 }
266 break;
267 }
268 /* end of string is okay */
269 name->end = self->ptr;
270 return 1;
271}
272
273static int
274_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
275{
276 STRINGLIB_CHAR c;
277
278 name->ptr = self->ptr;
279
280 /* return everything until ']' */
281 while (self->ptr < self->str.end) {
282 switch (c = *self->ptr++) {
283 case ']':
284 break;
285 default:
286 continue;
287 }
288 break;
289 }
290 /* end of string is okay */
291 /* don't include the ']' */
292 name->end = self->ptr-1;
293 return 1;
294}
295
296/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
297static int
298FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
299 Py_ssize_t *name_idx, SubString *name)
300{
301 /* check at end of input */
302 if (self->ptr >= self->str.end)
303 return 1;
304
305 switch (*self->ptr++) {
306 case '.':
307 *is_attribute = 1;
308 if (_FieldNameIterator_attr(self, name) == 0) {
309 return 0;
310 }
311 *name_idx = -1;
312 break;
313 case '[':
314 *is_attribute = 0;
315 if (_FieldNameIterator_item(self, name) == 0) {
316 return 0;
317 }
318 *name_idx = get_integer(name);
319 break;
320 default:
321 /* interal error, can't get here */
322 assert(0);
323 return 0;
324 }
325
326 /* empty string is an error */
327 if (name->ptr == name->end) {
328 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
329 return 0;
330 }
331
332 return 2;
333}
334
335
336/* input: field_name
337 output: 'first' points to the part before the first '[' or '.'
338 'first_idx' is -1 if 'first' is not an integer, otherwise
339 it's the value of first converted to an integer
340 'rest' is an iterator to return the rest
341*/
342static int
343field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
344 Py_ssize_t *first_idx, FieldNameIterator *rest)
345{
346 STRINGLIB_CHAR c;
347 STRINGLIB_CHAR *p = ptr;
348 STRINGLIB_CHAR *end = ptr + len;
349
350 /* find the part up until the first '.' or '[' */
351 while (p < end) {
352 switch (c = *p++) {
353 case '[':
354 case '.':
355 /* backup so that we this character is available to the
356 "rest" iterator */
357 p--;
358 break;
359 default:
360 continue;
361 }
362 break;
363 }
364
365 /* set up the return values */
366 SubString_init(first, ptr, p - ptr);
367 FieldNameIterator_init(rest, p, end - p);
368
369 /* see if "first" is an integer, in which case it's used as an index */
370 *first_idx = get_integer(first);
371
372 /* zero length string is an error */
373 if (first->ptr >= first->end) {
374 PyErr_SetString(PyExc_ValueError, "empty field name");
375 goto error;
376 }
377
378 return 1;
379error:
380 return 0;
381}
382
383
Eric Smith8c663262007-08-25 02:26:07 +0000384/*
385 get_field_object returns the object inside {}, before the
386 format_spec. It handles getindex and getattr lookups and consumes
387 the entire input string.
388*/
389static PyObject *
390get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
391{
Eric Smith7ade6482007-08-26 22:27:13 +0000392 PyObject *obj = NULL;
393 int ok;
394 int is_attribute;
395 SubString name;
396 SubString first;
Eric Smith8c663262007-08-25 02:26:07 +0000397 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000398 FieldNameIterator rest;
Eric Smith8c663262007-08-25 02:26:07 +0000399
Eric Smith7ade6482007-08-26 22:27:13 +0000400 if (!field_name_split(input->ptr, input->end - input->ptr, &first,
401 &index, &rest)) {
402 goto error;
403 }
Eric Smith8c663262007-08-25 02:26:07 +0000404
Eric Smith7ade6482007-08-26 22:27:13 +0000405 if (index == -1) {
406 /* look up in kwargs */
Eric Smith7a6dd292007-08-27 23:30:47 +0000407 PyObject *key = SubString_new_object(&first);
Eric Smith7ade6482007-08-26 22:27:13 +0000408 if (key == NULL)
409 goto error;
410 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
411 PyErr_SetString(PyExc_ValueError, "Keyword argument not found "
412 "in format string");
413 Py_DECREF(key);
414 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000415 }
Neal Norwitz8a4eb292007-08-27 07:24:17 +0000416 Py_DECREF(key);
Neal Norwitz247b5152007-08-27 03:22:50 +0000417 Py_INCREF(obj);
Eric Smith0cb431c2007-08-28 01:07:27 +0000418 }
419 else {
Eric Smith7ade6482007-08-26 22:27:13 +0000420 /* look up in args */
421 obj = PySequence_GetItem(args, index);
422 if (obj == NULL) {
423 /* translate IndexError to a ValueError */
424 PyErr_SetString(PyExc_ValueError, "Not enough positional arguments "
425 "in format string");
426 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000427 }
428 }
Eric Smith7ade6482007-08-26 22:27:13 +0000429
430 /* iterate over the rest of the field_name */
431 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
432 &name)) == 2) {
433 PyObject *tmp;
434
435 if (is_attribute)
436 /* getattr lookup "." */
437 tmp = getattr(obj, &name);
438 else
439 /* getitem lookup "[]" */
440 if (index == -1)
441 tmp = getitem_str(obj, &name);
442 else
443 if (PySequence_Check(obj))
444 tmp = getitem_sequence(obj, index);
445 else
446 /* not a sequence */
447 tmp = getitem_idx(obj, index);
448 if (tmp == NULL)
449 goto error;
450
451 /* assign to obj */
452 Py_DECREF(obj);
453 obj = tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000454 }
Eric Smith7ade6482007-08-26 22:27:13 +0000455 /* end of iterator, this is the non-error case */
456 if (ok == 1)
457 return obj;
458error:
459 Py_XDECREF(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000460 return NULL;
461}
462
463/************************************************************************/
464/***************** Field rendering functions **************************/
465/************************************************************************/
466
467/*
468 render_field() is the main function in this section. It takes the
469 field object and field specification string generated by
470 get_field_and_spec, and renders the field into the output string.
471
472 format() does the actual calling of the objects __format__ method.
473*/
474
475
476/* returns fieldobj.__format__(format_spec) */
477static PyObject *
478format(PyObject *fieldobj, SubString *format_spec)
479{
480 static PyObject *format_str = NULL;
481 PyObject *meth;
482 PyObject *spec = NULL;
483 PyObject *result = NULL;
484
485 /* Initialize cached value */
486 if (format_str == NULL) {
487 /* Initialize static variable needed by _PyType_Lookup */
488 format_str = PyUnicode_FromString("__format__");
489 if (format_str == NULL)
490 return NULL;
491 }
492
493 /* Make sure the type is initialized. float gets initialized late */
494 if (Py_Type(fieldobj)->tp_dict == NULL)
495 if (PyType_Ready(Py_Type(fieldobj)) < 0)
496 return NULL;
497
498 /* we need to create an object out of the pointers we have */
Eric Smith625cbf22007-08-29 03:22:59 +0000499 spec = SubString_new_object_or_empty(format_spec);
Eric Smith8c663262007-08-25 02:26:07 +0000500 if (spec == NULL)
501 goto done;
502
503 /* Find the (unbound!) __format__ method (a borrowed reference) */
504 meth = _PyType_Lookup(Py_Type(fieldobj), format_str);
505 if (meth == NULL) {
506 PyErr_Format(PyExc_TypeError,
507 "Type %.100s doesn't define __format__",
508 Py_Type(fieldobj)->tp_name);
509 goto done;
510 }
511
512 /* And call it, binding it to the value */
513 result = PyObject_CallFunctionObjArgs(meth, fieldobj, spec, NULL);
514 if (result == NULL)
515 goto done;
516
517 if (!STRINGLIB_CHECK(result)) {
518 PyErr_SetString(PyExc_TypeError,
519 "__format__ method did not return "
520 STRINGLIB_TYPE_NAME);
521 Py_DECREF(result);
522 result = NULL;
523 goto done;
524 }
525
526done:
527 Py_XDECREF(spec);
528 return result;
529}
530
531/*
532 render_field calls fieldobj.__format__(format_spec) method, and
533 appends to the output.
534*/
535static int
536render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
537{
538 int ok = 0;
539 PyObject *result = format(fieldobj, format_spec);
540
541 if (result == NULL)
542 goto done;
543
544 ok = output_data(output,
545 STRINGLIB_STR(result), STRINGLIB_LEN(result));
546done:
547 Py_XDECREF(result);
548 return ok;
549}
550
551static int
552parse_field(SubString *str, SubString *field_name, SubString *format_spec,
553 STRINGLIB_CHAR *conversion)
554{
555 STRINGLIB_CHAR c = 0;
556
557 /* initialize these, as they may be empty */
558 *conversion = '\0';
559 SubString_init(format_spec, NULL, 0);
560
561 /* search for the field name. it's terminated by the end of the
562 string, or a ':' or '!' */
563 field_name->ptr = str->ptr;
564 while (str->ptr < str->end) {
565 switch (c = *(str->ptr++)) {
566 case ':':
567 case '!':
568 break;
569 default:
570 continue;
571 }
572 break;
573 }
574
575 if (c == '!' || c == ':') {
576 /* we have a format specifier and/or a conversion */
577 /* don't include the last character */
578 field_name->end = str->ptr-1;
579
580 /* the format specifier is the rest of the string */
581 format_spec->ptr = str->ptr;
582 format_spec->end = str->end;
583
584 /* see if there's a conversion specifier */
585 if (c == '!') {
586 /* there must be another character present */
587 if (format_spec->ptr >= format_spec->end) {
588 PyErr_SetString(PyExc_ValueError,
589 "end of format while looking for conversion "
590 "specifier");
591 return 0;
592 }
593 *conversion = *(format_spec->ptr++);
594
595 /* if there is another character, it must be a colon */
596 if (format_spec->ptr < format_spec->end) {
597 c = *(format_spec->ptr++);
598 if (c != ':') {
599 PyErr_SetString(PyExc_ValueError,
600 "expected ':' after format specifier");
601 return 0;
602 }
603 }
604 }
605
606 return 1;
607
Eric Smith0cb431c2007-08-28 01:07:27 +0000608 }
609 else {
Eric Smith8c663262007-08-25 02:26:07 +0000610 /* end of string, there's no format_spec or conversion */
611 field_name->end = str->ptr;
612 return 1;
613 }
614}
615
616/************************************************************************/
617/******* Output string allocation and escape-to-markup processing ******/
618/************************************************************************/
619
620/* MarkupIterator breaks the string into pieces of either literal
621 text, or things inside {} that need to be marked up. it is
622 designed to make it easy to wrap a Python iterator around it, for
623 use with the Formatter class */
624
625typedef struct {
626 SubString str;
Eric Smith8c663262007-08-25 02:26:07 +0000627} MarkupIterator;
628
629static int
630MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
631{
632 SubString_init(&self->str, ptr, len);
Eric Smith8c663262007-08-25 02:26:07 +0000633 return 1;
634}
635
636/* returns 0 on error, 1 on non-error termination, and 2 if it got a
637 string (or something to be expanded) */
638static int
Eric Smith625cbf22007-08-29 03:22:59 +0000639MarkupIterator_next(MarkupIterator *self, SubString *literal,
Eric Smith8c663262007-08-25 02:26:07 +0000640 SubString *field_name, SubString *format_spec,
641 STRINGLIB_CHAR *conversion,
642 int *format_spec_needs_expanding)
643{
644 int at_end;
645 STRINGLIB_CHAR c = 0;
646 STRINGLIB_CHAR *start;
647 int count;
648 Py_ssize_t len;
Eric Smith625cbf22007-08-29 03:22:59 +0000649 int markup_follows = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000650
Eric Smith625cbf22007-08-29 03:22:59 +0000651 /* initialize all of the output variables */
652 SubString_init(literal, NULL, 0);
653 SubString_init(field_name, NULL, 0);
654 SubString_init(format_spec, NULL, 0);
655 *conversion = '\0';
Eric Smith8c663262007-08-25 02:26:07 +0000656 *format_spec_needs_expanding = 0;
657
Eric Smith625cbf22007-08-29 03:22:59 +0000658 /* No more input, end of iterator. This is the normal exit
659 path. */
Eric Smith8c663262007-08-25 02:26:07 +0000660 if (self->str.ptr >= self->str.end)
661 return 1;
662
Eric Smith8c663262007-08-25 02:26:07 +0000663 start = self->str.ptr;
664
Eric Smith625cbf22007-08-29 03:22:59 +0000665 /* First read any literal text. Read until the end of string, an
666 escaped '{' or '}', or an unescaped '{'. In order to never
667 allocate memory and so I can just pass pointers around, if
668 there's an escaped '{' or '}' then we'll return the literal
669 including the brace, but no format object. The next time
670 through, we'll return the rest of the literal, skipping past
671 the second consecutive brace. */
672 while (self->str.ptr < self->str.end) {
673 switch (c = *(self->str.ptr++)) {
674 case '{':
675 case '}':
676 markup_follows = 1;
677 break;
678 default:
679 continue;
Eric Smith8c663262007-08-25 02:26:07 +0000680 }
Eric Smith625cbf22007-08-29 03:22:59 +0000681 break;
Eric Smith0cb431c2007-08-28 01:07:27 +0000682 }
Eric Smith625cbf22007-08-29 03:22:59 +0000683
684 at_end = self->str.ptr >= self->str.end;
685 len = self->str.ptr - start;
686
687 if ((c == '}') && (at_end || (c != *self->str.ptr))) {
688 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
689 "in format string");
690 return 0;
691 }
692 if (at_end && c == '{') {
693 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
694 "in format string");
695 return 0;
696 }
697 if (!at_end) {
698 if (c == *self->str.ptr) {
699 /* escaped } or {, skip it in the input. there is no
700 markup object following us, just this literal text */
701 self->str.ptr++;
702 markup_follows = 0;
703 }
704 else
705 len--;
706 }
707
708 /* record the literal text */
709 literal->ptr = start;
710 literal->end = start + len;
711
712 if (!markup_follows)
713 return 2;
714
715 /* this is markup, find the end of the string by counting nested
716 braces. note that this prohibits escaped braces, so that
717 format_specs cannot have braces in them. */
718 count = 1;
719
720 start = self->str.ptr;
721
722 /* we know we can't have a zero length string, so don't worry
723 about that case */
724 while (self->str.ptr < self->str.end) {
725 switch (c = *(self->str.ptr++)) {
726 case '{':
727 /* the format spec needs to be recursively expanded.
728 this is an optimization, and not strictly needed */
729 *format_spec_needs_expanding = 1;
730 count++;
731 break;
732 case '}':
733 count--;
734 if (count <= 0) {
735 /* we're done. parse and get out */
736 SubString s;
737
738 SubString_init(&s, start, self->str.ptr - 1 - start);
739 if (parse_field(&s, field_name, format_spec, conversion) == 0)
740 return 0;
741
742 /* a zero length field_name is an error */
743 if (field_name->ptr == field_name->end) {
744 PyErr_SetString(PyExc_ValueError, "zero length field name "
745 "in format");
746 return 0;
747 }
748
749 /* success */
750 return 2;
Eric Smith8c663262007-08-25 02:26:07 +0000751 }
752 break;
753 }
Eric Smith8c663262007-08-25 02:26:07 +0000754 }
Eric Smith625cbf22007-08-29 03:22:59 +0000755
756 /* end of string while searching for matching '}' */
757 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
758 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000759}
760
761
762/* do the !r or !s conversion on obj */
763static PyObject *
764do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
765{
766 /* XXX in pre-3.0, do we need to convert this to unicode, since it
767 might have returned a string? */
768 switch (conversion) {
769 case 'r':
770 return PyObject_Repr(obj);
771 case 's':
772 return PyObject_Unicode(obj);
773 default:
774 PyErr_Format(PyExc_ValueError,
775 "Unknown converion specifier %c",
776 conversion);
777 return NULL;
778 }
779}
780
781/* given:
782
783 {field_name!conversion:format_spec}
784
785 compute the result and write it to output.
786 format_spec_needs_expanding is an optimization. if it's false,
787 just output the string directly, otherwise recursively expand the
788 format_spec string. */
789
790static int
791output_markup(SubString *field_name, SubString *format_spec,
792 int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
793 OutputString *output, PyObject *args, PyObject *kwargs,
794 int *recursion_level)
795{
796 PyObject *tmp = NULL;
797 PyObject *fieldobj = NULL;
798 SubString expanded_format_spec;
799 SubString *actual_format_spec;
800 int result = 0;
801
802 /* convert field_name to an object */
803 fieldobj = get_field_object(field_name, args, kwargs);
804 if (fieldobj == NULL)
805 goto done;
806
807 if (conversion != '\0') {
808 tmp = do_conversion(fieldobj, conversion);
809 if (tmp == NULL)
810 goto done;
811
812 /* do the assignment, transferring ownership: fieldobj = tmp */
813 Py_DECREF(fieldobj);
814 fieldobj = tmp;
815 tmp = NULL;
816 }
817
818 /* if needed, recurively compute the format_spec */
819 if (format_spec_needs_expanding) {
820 tmp = build_string(format_spec, args, kwargs, recursion_level);
821 if (tmp == NULL)
822 goto done;
823
824 /* note that in the case we're expanding the format string,
825 tmp must be kept around until after the call to
826 render_field. */
827 SubString_init(&expanded_format_spec,
828 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
829 actual_format_spec = &expanded_format_spec;
Eric Smith0cb431c2007-08-28 01:07:27 +0000830 }
831 else
Eric Smith8c663262007-08-25 02:26:07 +0000832 actual_format_spec = format_spec;
833
834 if (render_field(fieldobj, actual_format_spec, output) == 0)
835 goto done;
836
837 result = 1;
838
839done:
840 Py_XDECREF(fieldobj);
841 Py_XDECREF(tmp);
842
843 return result;
844}
845
846/*
847 do_markup is the top-level loop for the format() function. It
848 searches through the format string for escapes to markup codes, and
849 calls other functions to move non-markup text to the output,
850 and to perform the markup to the output.
851*/
852static int
853do_markup(SubString *input, PyObject *args, PyObject *kwargs,
854 OutputString *output, int *recursion_level)
855{
856 MarkupIterator iter;
Eric Smith8c663262007-08-25 02:26:07 +0000857 int format_spec_needs_expanding;
858 int result;
Eric Smith625cbf22007-08-29 03:22:59 +0000859 SubString literal;
Eric Smith8c663262007-08-25 02:26:07 +0000860 SubString field_name;
861 SubString format_spec;
862 STRINGLIB_CHAR conversion;
863
864 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
Eric Smith625cbf22007-08-29 03:22:59 +0000865 while ((result = MarkupIterator_next(&iter, &literal, &field_name,
Eric Smith8c663262007-08-25 02:26:07 +0000866 &format_spec, &conversion,
867 &format_spec_needs_expanding)) == 2) {
Eric Smith625cbf22007-08-29 03:22:59 +0000868 if (!output_data(output, literal.ptr, literal.end - literal.ptr))
869 return 0;
870 if (field_name.ptr != field_name.end)
Eric Smith8c663262007-08-25 02:26:07 +0000871 if (!output_markup(&field_name, &format_spec,
872 format_spec_needs_expanding, conversion, output,
873 args, kwargs, recursion_level))
874 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000875 }
876 return result;
877}
878
879
880/*
881 build_string allocates the output string and then
882 calls do_markup to do the heavy lifting.
883*/
884static PyObject *
885build_string(SubString *input, PyObject *args, PyObject *kwargs,
886 int *recursion_level)
887{
888 OutputString output;
889 PyObject *result = NULL;
890 Py_ssize_t count;
891
892 output.obj = NULL; /* needed so cleanup code always works */
893
894 /* check the recursion level */
895 (*recursion_level)--;
896 if (*recursion_level < 0) {
897 PyErr_SetString(PyExc_ValueError,
898 "Max string recursion exceeded");
899 goto done;
900 }
901
902 /* initial size is the length of the format string, plus the size
903 increment. seems like a reasonable default */
904 if (!output_initialize(&output,
905 input->end - input->ptr +
906 INITIAL_SIZE_INCREMENT))
907 goto done;
908
909 if (!do_markup(input, args, kwargs, &output, recursion_level)) {
910 goto done;
911 }
912
913 count = output.ptr - STRINGLIB_STR(output.obj);
914 if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
915 goto done;
916 }
917
918 /* transfer ownership to result */
919 result = output.obj;
920 output.obj = NULL;
921
922done:
923 (*recursion_level)++;
924 Py_XDECREF(output.obj);
925 return result;
926}
927
928/************************************************************************/
929/*********** main routine ***********************************************/
930/************************************************************************/
931
932/* this is the main entry point */
933static PyObject *
934do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
935{
936 SubString input;
937
938 /* PEP 3101 says only 2 levels, so that
939 "{0:{1}}".format('abc', 's') # works
940 "{0:{1:{2}}}".format('abc', 's', '') # fails
941 */
942 int recursion_level = 2;
943
944 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
945 return build_string(&input, args, kwargs, &recursion_level);
946}
Eric Smithf6db4092007-08-27 23:52:26 +0000947
948
949
950/************************************************************************/
951/*********** formatteriterator ******************************************/
952/************************************************************************/
953
954/* This is used to implement string.Formatter.vparse(). It exists so
955 Formatter can share code with the built in unicode.format() method.
956 It's really just a wrapper around MarkupIterator that is callable
957 from Python. */
958
959typedef struct {
960 PyObject_HEAD
961
962 PyUnicodeObject *str;
963
964 MarkupIterator it_markup;
965} formatteriterobject;
966
967static void
968formatteriter_dealloc(formatteriterobject *it)
969{
970 Py_XDECREF(it->str);
971 PyObject_FREE(it);
972}
973
974/* returns a tuple:
Eric Smith625cbf22007-08-29 03:22:59 +0000975 (literal, field_name, format_spec, conversion)
976
977 literal is any literal text to output. might be zero length
978 field_name is the string before the ':'. might be None
979 format_spec is the string after the ':'. mibht be None
980 conversion is either None, or the string after the '!'
Eric Smithf6db4092007-08-27 23:52:26 +0000981*/
982static PyObject *
983formatteriter_next(formatteriterobject *it)
984{
985 SubString literal;
986 SubString field_name;
987 SubString format_spec;
988 Py_UNICODE conversion;
Eric Smithf6db4092007-08-27 23:52:26 +0000989 int format_spec_needs_expanding;
Eric Smith625cbf22007-08-29 03:22:59 +0000990 int result = MarkupIterator_next(&it->it_markup, &literal, &field_name,
991 &format_spec, &conversion,
Eric Smithf6db4092007-08-27 23:52:26 +0000992 &format_spec_needs_expanding);
993
994 /* all of the SubString objects point into it->str, so no
995 memory management needs to be done on them */
996 assert(0 <= result && result <= 2);
Eric Smith0cb431c2007-08-28 01:07:27 +0000997 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +0000998 /* if 0, error has already been set, if 1, iterator is empty */
999 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001000 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001001 PyObject *literal_str = NULL;
1002 PyObject *field_name_str = NULL;
1003 PyObject *format_spec_str = NULL;
1004 PyObject *conversion_str = NULL;
1005 PyObject *tuple = NULL;
Eric Smith625cbf22007-08-29 03:22:59 +00001006 int has_field = field_name.ptr != field_name.end;
Eric Smithf6db4092007-08-27 23:52:26 +00001007
Eric Smith625cbf22007-08-29 03:22:59 +00001008 literal_str = SubString_new_object(&literal);
1009 if (literal_str == NULL)
1010 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001011
Eric Smith625cbf22007-08-29 03:22:59 +00001012 field_name_str = SubString_new_object(&field_name);
1013 if (field_name_str == NULL)
1014 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001015
Eric Smith625cbf22007-08-29 03:22:59 +00001016 /* if field_name is non-zero length, return a string for
1017 format_spec (even if zero length), else return None */
1018 format_spec_str = (has_field ?
1019 SubString_new_object_or_empty :
1020 SubString_new_object)(&format_spec);
1021 if (format_spec_str == NULL)
1022 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001023
Eric Smith625cbf22007-08-29 03:22:59 +00001024 /* if the conversion is not specified, return a None,
1025 otherwise create a one length string with the conversion
1026 character */
1027 if (conversion == '\0') {
Eric Smithf6db4092007-08-27 23:52:26 +00001028 conversion_str = Py_None;
Eric Smithf6db4092007-08-27 23:52:26 +00001029 Py_INCREF(conversion_str);
1030 }
Eric Smith625cbf22007-08-29 03:22:59 +00001031 else
1032 conversion_str = PyUnicode_FromUnicode(&conversion, 1);
1033 if (conversion_str == NULL)
1034 goto done;
1035
Eric Smith9e7c8da2007-08-28 11:15:20 +00001036 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
Eric Smithf6db4092007-08-27 23:52:26 +00001037 conversion_str);
Eric Smith625cbf22007-08-29 03:22:59 +00001038 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001039 Py_XDECREF(literal_str);
1040 Py_XDECREF(field_name_str);
1041 Py_XDECREF(format_spec_str);
1042 Py_XDECREF(conversion_str);
1043 return tuple;
1044 }
1045}
1046
1047static PyMethodDef formatteriter_methods[] = {
1048 {NULL, NULL} /* sentinel */
1049};
1050
1051PyTypeObject PyFormatterIter_Type = {
1052 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1053 "formatteriterator", /* tp_name */
1054 sizeof(formatteriterobject), /* tp_basicsize */
1055 0, /* tp_itemsize */
1056 /* methods */
1057 (destructor)formatteriter_dealloc, /* tp_dealloc */
1058 0, /* tp_print */
1059 0, /* tp_getattr */
1060 0, /* tp_setattr */
1061 0, /* tp_compare */
1062 0, /* tp_repr */
1063 0, /* tp_as_number */
1064 0, /* tp_as_sequence */
1065 0, /* tp_as_mapping */
1066 0, /* tp_hash */
1067 0, /* tp_call */
1068 0, /* tp_str */
1069 PyObject_GenericGetAttr, /* tp_getattro */
1070 0, /* tp_setattro */
1071 0, /* tp_as_buffer */
1072 Py_TPFLAGS_DEFAULT, /* tp_flags */
1073 0, /* tp_doc */
1074 0, /* tp_traverse */
1075 0, /* tp_clear */
1076 0, /* tp_richcompare */
1077 0, /* tp_weaklistoffset */
1078 PyObject_SelfIter, /* tp_iter */
1079 (iternextfunc)formatteriter_next, /* tp_iternext */
1080 formatteriter_methods, /* tp_methods */
1081 0,
1082};
1083
1084/* unicode_formatter_parser is used to implement
1085 string.Formatter.vformat. it parses a string and returns tuples
1086 describing the parsed elements. It's a wrapper around
1087 stringlib/string_format.h's MarkupIterator */
1088static PyObject *
1089formatter_parser(PyUnicodeObject *self)
1090{
1091 formatteriterobject *it;
1092
1093 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1094 if (it == NULL)
1095 return NULL;
1096
1097 /* take ownership, give the object to the iterator */
1098 Py_INCREF(self);
1099 it->str = self;
1100
1101 /* initialize the contained MarkupIterator */
1102 MarkupIterator_init(&it->it_markup,
1103 PyUnicode_AS_UNICODE(self),
1104 PyUnicode_GET_SIZE(self));
1105
1106 return (PyObject *)it;
1107}
1108
1109
1110/************************************************************************/
1111/*********** fieldnameiterator ******************************************/
1112/************************************************************************/
1113
1114
1115/* This is used to implement string.Formatter.vparse(). It parses the
1116 field name into attribute and item values. It's a Python-callable
1117 wrapper around FieldNameIterator */
1118
1119typedef struct {
1120 PyObject_HEAD
1121
1122 PyUnicodeObject *str;
1123
1124 FieldNameIterator it_field;
1125} fieldnameiterobject;
1126
1127static void
1128fieldnameiter_dealloc(fieldnameiterobject *it)
1129{
1130 Py_XDECREF(it->str);
1131 PyObject_FREE(it);
1132}
1133
1134/* returns a tuple:
1135 (is_attr, value)
1136 is_attr is true if we used attribute syntax (e.g., '.foo')
1137 false if we used index syntax (e.g., '[foo]')
1138 value is an integer or string
1139*/
1140static PyObject *
1141fieldnameiter_next(fieldnameiterobject *it)
1142{
1143 int result;
1144 int is_attr;
1145 Py_ssize_t idx;
1146 SubString name;
1147
1148 result = FieldNameIterator_next(&it->it_field, &is_attr,
1149 &idx, &name);
Eric Smith0cb431c2007-08-28 01:07:27 +00001150 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001151 /* if 0, error has already been set, if 1, iterator is empty */
1152 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001153 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001154 PyObject* result = NULL;
1155 PyObject* is_attr_obj = NULL;
1156 PyObject* obj = NULL;
1157
1158 is_attr_obj = PyBool_FromLong(is_attr);
1159 if (is_attr_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001160 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001161
1162 /* either an integer or a string */
1163 if (idx != -1)
1164 obj = PyInt_FromSsize_t(idx);
1165 else
1166 obj = SubString_new_object(&name);
1167 if (obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001168 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001169
1170 /* return a tuple of values */
1171 result = PyTuple_Pack(2, is_attr_obj, obj);
Eric Smithf6db4092007-08-27 23:52:26 +00001172
Eric Smith625cbf22007-08-29 03:22:59 +00001173 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001174 Py_XDECREF(is_attr_obj);
1175 Py_XDECREF(obj);
Eric Smith625cbf22007-08-29 03:22:59 +00001176 return result;
Eric Smithf6db4092007-08-27 23:52:26 +00001177 }
Eric Smithf6db4092007-08-27 23:52:26 +00001178}
1179
1180static PyMethodDef fieldnameiter_methods[] = {
1181 {NULL, NULL} /* sentinel */
1182};
1183
1184static PyTypeObject PyFieldNameIter_Type = {
1185 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1186 "fieldnameiterator", /* tp_name */
1187 sizeof(fieldnameiterobject), /* tp_basicsize */
1188 0, /* tp_itemsize */
1189 /* methods */
1190 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1191 0, /* tp_print */
1192 0, /* tp_getattr */
1193 0, /* tp_setattr */
1194 0, /* tp_compare */
1195 0, /* tp_repr */
1196 0, /* tp_as_number */
1197 0, /* tp_as_sequence */
1198 0, /* tp_as_mapping */
1199 0, /* tp_hash */
1200 0, /* tp_call */
1201 0, /* tp_str */
1202 PyObject_GenericGetAttr, /* tp_getattro */
1203 0, /* tp_setattro */
1204 0, /* tp_as_buffer */
1205 Py_TPFLAGS_DEFAULT, /* tp_flags */
1206 0, /* tp_doc */
1207 0, /* tp_traverse */
1208 0, /* tp_clear */
1209 0, /* tp_richcompare */
1210 0, /* tp_weaklistoffset */
1211 PyObject_SelfIter, /* tp_iter */
1212 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1213 fieldnameiter_methods, /* tp_methods */
1214 0};
1215
1216/* unicode_formatter_field_name_split is used to implement
1217 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1218 returns a tuple of (first, rest): "first", the part before the
1219 first '.' or '['; and "rest", an iterator for the rest of the field
1220 name. it's a wrapper around stringlib/string_format.h's
1221 field_name_split. The iterator it returns is a
1222 FieldNameIterator */
1223static PyObject *
1224formatter_field_name_split(PyUnicodeObject *self)
1225{
1226 SubString first;
1227 Py_ssize_t first_idx;
1228 fieldnameiterobject *it;
1229
1230 PyObject *first_obj = NULL;
1231 PyObject *result = NULL;
1232
1233 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1234 if (it == NULL)
1235 return NULL;
1236
1237 /* take ownership, give the object to the iterator. this is
1238 just to keep the field_name alive */
1239 Py_INCREF(self);
1240 it->str = self;
1241
1242 if (!field_name_split(STRINGLIB_STR(self),
1243 STRINGLIB_LEN(self),
1244 &first, &first_idx, &it->it_field))
Eric Smith625cbf22007-08-29 03:22:59 +00001245 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001246
Eric Smith0cb431c2007-08-28 01:07:27 +00001247 /* first becomes an integer, if possible; else a string */
Eric Smithf6db4092007-08-27 23:52:26 +00001248 if (first_idx != -1)
1249 first_obj = PyInt_FromSsize_t(first_idx);
1250 else
1251 /* convert "first" into a string object */
1252 first_obj = SubString_new_object(&first);
1253 if (first_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001254 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001255
1256 /* return a tuple of values */
1257 result = PyTuple_Pack(2, first_obj, it);
1258
Eric Smith625cbf22007-08-29 03:22:59 +00001259done:
Eric Smithf6db4092007-08-27 23:52:26 +00001260 Py_XDECREF(it);
1261 Py_XDECREF(first_obj);
1262 return result;
1263}