blob: 44c5442e988bf887b25b2ab3fcfdebd5790f56ae [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/*
2 string_format.h -- implementation of string.format().
3
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
6*/
7
8
9/* Defines for more efficiently reallocating the string buffer */
10#define INITIAL_SIZE_INCREMENT 100
11#define SIZE_MULTIPLIER 2
12#define MAX_SIZE_INCREMENT 3200
13
14
15/************************************************************************/
16/*********** Global data structures and forward declarations *********/
17/************************************************************************/
18
19/*
20 A SubString consists of the characters between two string or
21 unicode pointers.
22*/
23typedef struct {
24 STRINGLIB_CHAR *ptr;
25 STRINGLIB_CHAR *end;
26} SubString;
27
28
29/* forward declaration for recursion */
30static PyObject *
31build_string(SubString *input, PyObject *args, PyObject *kwargs,
32 int *recursion_level);
33
34
35
36/************************************************************************/
37/************************** Utility functions ************************/
38/************************************************************************/
39
40/* fill in a SubString from a pointer and length */
41Py_LOCAL_INLINE(void)
42SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
43{
44 str->ptr = p;
45 if (p == NULL)
46 str->end = NULL;
47 else
48 str->end = str->ptr + len;
49}
50
51Py_LOCAL_INLINE(PyObject *)
52SubString_new_object(SubString *str)
53{
54 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
55}
56
57/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +000058/*********** Output string management functions ****************/
59/************************************************************************/
60
61typedef struct {
62 STRINGLIB_CHAR *ptr;
63 STRINGLIB_CHAR *end;
64 PyObject *obj;
65 Py_ssize_t size_increment;
66} OutputString;
67
68/* initialize an OutputString object, reserving size characters */
69static int
70output_initialize(OutputString *output, Py_ssize_t size)
71{
72 output->obj = STRINGLIB_NEW(NULL, size);
73 if (output->obj == NULL)
74 return 0;
75
76 output->ptr = STRINGLIB_STR(output->obj);
77 output->end = STRINGLIB_LEN(output->obj) + output->ptr;
78 output->size_increment = INITIAL_SIZE_INCREMENT;
79
80 return 1;
81}
82
83/*
84 output_extend reallocates the output string buffer.
85 It returns a status: 0 for a failed reallocation,
86 1 for success.
87*/
88
89static int
90output_extend(OutputString *output, Py_ssize_t count)
91{
92 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
93 Py_ssize_t curlen = output->ptr - startptr;
94 Py_ssize_t maxlen = curlen + count + output->size_increment;
95
96 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
97 return 0;
98 startptr = STRINGLIB_STR(output->obj);
99 output->ptr = startptr + curlen;
100 output->end = startptr + maxlen;
101 if (output->size_increment < MAX_SIZE_INCREMENT)
102 output->size_increment *= SIZE_MULTIPLIER;
103 return 1;
104}
105
106/*
107 output_data dumps characters into our output string
108 buffer.
109
110 In some cases, it has to reallocate the string.
111
112 It returns a status: 0 for a failed reallocation,
113 1 for success.
114*/
115static int
116output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
117{
118 if ((count > output->end - output->ptr) && !output_extend(output, count))
119 return 0;
120 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
121 output->ptr += count;
122 return 1;
123}
124
125/************************************************************************/
126/*********** Format string parsing -- integers and identifiers *********/
127/************************************************************************/
128
Eric Smith7ade6482007-08-26 22:27:13 +0000129static Py_ssize_t
130get_integer(const SubString *str)
Eric Smith8c663262007-08-25 02:26:07 +0000131{
Eric Smith7ade6482007-08-26 22:27:13 +0000132 Py_ssize_t accumulator = 0;
133 Py_ssize_t digitval;
134 Py_ssize_t oldaccumulator;
135 STRINGLIB_CHAR *p;
Eric Smith8c663262007-08-25 02:26:07 +0000136
Eric Smith7ade6482007-08-26 22:27:13 +0000137 /* empty string is an error */
138 if (str->ptr >= str->end)
139 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000140
Eric Smith7ade6482007-08-26 22:27:13 +0000141 for (p = str->ptr; p < str->end; p++) {
142 digitval = STRINGLIB_TODECIMAL(*p);
Eric Smith8c663262007-08-25 02:26:07 +0000143 if (digitval < 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000144 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000145 /*
146 This trick was copied from old Unicode format code. It's cute,
147 but would really suck on an old machine with a slow divide
148 implementation. Fortunately, in the normal case we do not
149 expect too many digits.
150 */
151 oldaccumulator = accumulator;
152 accumulator *= 10;
153 if ((accumulator+10)/10 != oldaccumulator+1) {
154 PyErr_Format(PyExc_ValueError,
155 "Too many decimal digits in format string");
156 return -1;
157 }
158 accumulator += digitval;
159 }
Eric Smith7ade6482007-08-26 22:27:13 +0000160 return accumulator;
Eric Smith8c663262007-08-25 02:26:07 +0000161}
162
163/************************************************************************/
164/******** Functions to get field objects and specification strings ******/
165/************************************************************************/
166
Eric Smith7ade6482007-08-26 22:27:13 +0000167/* do the equivalent of obj.name */
Eric Smith8c663262007-08-25 02:26:07 +0000168static PyObject *
Eric Smith7ade6482007-08-26 22:27:13 +0000169getattr(PyObject *obj, SubString *name)
Eric Smith8c663262007-08-25 02:26:07 +0000170{
Eric Smith7ade6482007-08-26 22:27:13 +0000171 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000172 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000173 if (str == NULL)
174 return NULL;
175 newobj = PyObject_GetAttr(obj, str);
176 Py_DECREF(str);
177 return newobj;
Eric Smith8c663262007-08-25 02:26:07 +0000178}
179
Eric Smith7ade6482007-08-26 22:27:13 +0000180/* do the equivalent of obj[idx], where obj is a sequence */
181static PyObject *
182getitem_sequence(PyObject *obj, Py_ssize_t idx)
183{
184 return PySequence_GetItem(obj, idx);
185}
186
187/* do the equivalent of obj[idx], where obj is not a sequence */
188static PyObject *
189getitem_idx(PyObject *obj, Py_ssize_t idx)
190{
191 PyObject *newobj;
192 PyObject *idx_obj = PyInt_FromSsize_t(idx);
193 if (idx_obj == NULL)
194 return NULL;
195 newobj = PyObject_GetItem(obj, idx_obj);
196 Py_DECREF(idx_obj);
197 return newobj;
198}
199
200/* do the equivalent of obj[name] */
201static PyObject *
202getitem_str(PyObject *obj, SubString *name)
203{
204 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000205 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000206 if (str == NULL)
207 return NULL;
208 newobj = PyObject_GetItem(obj, str);
209 Py_DECREF(str);
210 return newobj;
211}
212
213typedef struct {
214 /* the entire string we're parsing. we assume that someone else
215 is managing its lifetime, and that it will exist for the
216 lifetime of the iterator. can be empty */
217 SubString str;
218
219 /* pointer to where we are inside field_name */
220 STRINGLIB_CHAR *ptr;
221} FieldNameIterator;
222
223
224static int
225FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
226 Py_ssize_t len)
227{
228 SubString_init(&self->str, ptr, len);
229 self->ptr = self->str.ptr;
230 return 1;
231}
232
233static int
234_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
235{
236 STRINGLIB_CHAR c;
237
238 name->ptr = self->ptr;
239
240 /* return everything until '.' or '[' */
241 while (self->ptr < self->str.end) {
242 switch (c = *self->ptr++) {
243 case '[':
244 case '.':
245 /* backup so that we this character will be seen next time */
246 self->ptr--;
247 break;
248 default:
249 continue;
250 }
251 break;
252 }
253 /* end of string is okay */
254 name->end = self->ptr;
255 return 1;
256}
257
258static int
259_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
260{
261 STRINGLIB_CHAR c;
262
263 name->ptr = self->ptr;
264
265 /* return everything until ']' */
266 while (self->ptr < self->str.end) {
267 switch (c = *self->ptr++) {
268 case ']':
269 break;
270 default:
271 continue;
272 }
273 break;
274 }
275 /* end of string is okay */
276 /* don't include the ']' */
277 name->end = self->ptr-1;
278 return 1;
279}
280
281/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
282static int
283FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
284 Py_ssize_t *name_idx, SubString *name)
285{
286 /* check at end of input */
287 if (self->ptr >= self->str.end)
288 return 1;
289
290 switch (*self->ptr++) {
291 case '.':
292 *is_attribute = 1;
293 if (_FieldNameIterator_attr(self, name) == 0) {
294 return 0;
295 }
296 *name_idx = -1;
297 break;
298 case '[':
299 *is_attribute = 0;
300 if (_FieldNameIterator_item(self, name) == 0) {
301 return 0;
302 }
303 *name_idx = get_integer(name);
304 break;
305 default:
306 /* interal error, can't get here */
307 assert(0);
308 return 0;
309 }
310
311 /* empty string is an error */
312 if (name->ptr == name->end) {
313 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
314 return 0;
315 }
316
317 return 2;
318}
319
320
321/* input: field_name
322 output: 'first' points to the part before the first '[' or '.'
323 'first_idx' is -1 if 'first' is not an integer, otherwise
324 it's the value of first converted to an integer
325 'rest' is an iterator to return the rest
326*/
327static int
328field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
329 Py_ssize_t *first_idx, FieldNameIterator *rest)
330{
331 STRINGLIB_CHAR c;
332 STRINGLIB_CHAR *p = ptr;
333 STRINGLIB_CHAR *end = ptr + len;
334
335 /* find the part up until the first '.' or '[' */
336 while (p < end) {
337 switch (c = *p++) {
338 case '[':
339 case '.':
340 /* backup so that we this character is available to the
341 "rest" iterator */
342 p--;
343 break;
344 default:
345 continue;
346 }
347 break;
348 }
349
350 /* set up the return values */
351 SubString_init(first, ptr, p - ptr);
352 FieldNameIterator_init(rest, p, end - p);
353
354 /* see if "first" is an integer, in which case it's used as an index */
355 *first_idx = get_integer(first);
356
357 /* zero length string is an error */
358 if (first->ptr >= first->end) {
359 PyErr_SetString(PyExc_ValueError, "empty field name");
360 goto error;
361 }
362
363 return 1;
364error:
365 return 0;
366}
367
368
Eric Smith8c663262007-08-25 02:26:07 +0000369/*
370 get_field_object returns the object inside {}, before the
371 format_spec. It handles getindex and getattr lookups and consumes
372 the entire input string.
373*/
374static PyObject *
375get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
376{
Eric Smith7ade6482007-08-26 22:27:13 +0000377 PyObject *obj = NULL;
378 int ok;
379 int is_attribute;
380 SubString name;
381 SubString first;
Eric Smith8c663262007-08-25 02:26:07 +0000382 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000383 FieldNameIterator rest;
Eric Smith8c663262007-08-25 02:26:07 +0000384
Eric Smith7ade6482007-08-26 22:27:13 +0000385 if (!field_name_split(input->ptr, input->end - input->ptr, &first,
386 &index, &rest)) {
387 goto error;
388 }
Eric Smith8c663262007-08-25 02:26:07 +0000389
Eric Smith7ade6482007-08-26 22:27:13 +0000390 if (index == -1) {
391 /* look up in kwargs */
Eric Smith7a6dd292007-08-27 23:30:47 +0000392 PyObject *key = SubString_new_object(&first);
Eric Smith7ade6482007-08-26 22:27:13 +0000393 if (key == NULL)
394 goto error;
395 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
396 PyErr_SetString(PyExc_ValueError, "Keyword argument not found "
397 "in format string");
398 Py_DECREF(key);
399 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000400 }
Neal Norwitz8a4eb292007-08-27 07:24:17 +0000401 Py_DECREF(key);
Neal Norwitz247b5152007-08-27 03:22:50 +0000402 Py_INCREF(obj);
Eric Smith0cb431c2007-08-28 01:07:27 +0000403 }
404 else {
Eric Smith7ade6482007-08-26 22:27:13 +0000405 /* look up in args */
406 obj = PySequence_GetItem(args, index);
407 if (obj == NULL) {
408 /* translate IndexError to a ValueError */
409 PyErr_SetString(PyExc_ValueError, "Not enough positional arguments "
410 "in format string");
411 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000412 }
413 }
Eric Smith7ade6482007-08-26 22:27:13 +0000414
415 /* iterate over the rest of the field_name */
416 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
417 &name)) == 2) {
418 PyObject *tmp;
419
420 if (is_attribute)
421 /* getattr lookup "." */
422 tmp = getattr(obj, &name);
423 else
424 /* getitem lookup "[]" */
425 if (index == -1)
426 tmp = getitem_str(obj, &name);
427 else
428 if (PySequence_Check(obj))
429 tmp = getitem_sequence(obj, index);
430 else
431 /* not a sequence */
432 tmp = getitem_idx(obj, index);
433 if (tmp == NULL)
434 goto error;
435
436 /* assign to obj */
437 Py_DECREF(obj);
438 obj = tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000439 }
Eric Smith7ade6482007-08-26 22:27:13 +0000440 /* end of iterator, this is the non-error case */
441 if (ok == 1)
442 return obj;
443error:
444 Py_XDECREF(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000445 return NULL;
446}
447
448/************************************************************************/
449/***************** Field rendering functions **************************/
450/************************************************************************/
451
452/*
453 render_field() is the main function in this section. It takes the
454 field object and field specification string generated by
455 get_field_and_spec, and renders the field into the output string.
456
457 format() does the actual calling of the objects __format__ method.
458*/
459
460
461/* returns fieldobj.__format__(format_spec) */
462static PyObject *
463format(PyObject *fieldobj, SubString *format_spec)
464{
465 static PyObject *format_str = NULL;
466 PyObject *meth;
467 PyObject *spec = NULL;
468 PyObject *result = NULL;
469
470 /* Initialize cached value */
471 if (format_str == NULL) {
472 /* Initialize static variable needed by _PyType_Lookup */
473 format_str = PyUnicode_FromString("__format__");
474 if (format_str == NULL)
475 return NULL;
476 }
477
478 /* Make sure the type is initialized. float gets initialized late */
479 if (Py_Type(fieldobj)->tp_dict == NULL)
480 if (PyType_Ready(Py_Type(fieldobj)) < 0)
481 return NULL;
482
483 /* we need to create an object out of the pointers we have */
484 spec = SubString_new_object(format_spec);
485 if (spec == NULL)
486 goto done;
487
488 /* Find the (unbound!) __format__ method (a borrowed reference) */
489 meth = _PyType_Lookup(Py_Type(fieldobj), format_str);
490 if (meth == NULL) {
491 PyErr_Format(PyExc_TypeError,
492 "Type %.100s doesn't define __format__",
493 Py_Type(fieldobj)->tp_name);
494 goto done;
495 }
496
497 /* And call it, binding it to the value */
498 result = PyObject_CallFunctionObjArgs(meth, fieldobj, spec, NULL);
499 if (result == NULL)
500 goto done;
501
502 if (!STRINGLIB_CHECK(result)) {
503 PyErr_SetString(PyExc_TypeError,
504 "__format__ method did not return "
505 STRINGLIB_TYPE_NAME);
506 Py_DECREF(result);
507 result = NULL;
508 goto done;
509 }
510
511done:
512 Py_XDECREF(spec);
513 return result;
514}
515
516/*
517 render_field calls fieldobj.__format__(format_spec) method, and
518 appends to the output.
519*/
520static int
521render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
522{
523 int ok = 0;
524 PyObject *result = format(fieldobj, format_spec);
525
526 if (result == NULL)
527 goto done;
528
529 ok = output_data(output,
530 STRINGLIB_STR(result), STRINGLIB_LEN(result));
531done:
532 Py_XDECREF(result);
533 return ok;
534}
535
536static int
537parse_field(SubString *str, SubString *field_name, SubString *format_spec,
538 STRINGLIB_CHAR *conversion)
539{
540 STRINGLIB_CHAR c = 0;
541
542 /* initialize these, as they may be empty */
543 *conversion = '\0';
544 SubString_init(format_spec, NULL, 0);
545
546 /* search for the field name. it's terminated by the end of the
547 string, or a ':' or '!' */
548 field_name->ptr = str->ptr;
549 while (str->ptr < str->end) {
550 switch (c = *(str->ptr++)) {
551 case ':':
552 case '!':
553 break;
554 default:
555 continue;
556 }
557 break;
558 }
559
560 if (c == '!' || c == ':') {
561 /* we have a format specifier and/or a conversion */
562 /* don't include the last character */
563 field_name->end = str->ptr-1;
564
565 /* the format specifier is the rest of the string */
566 format_spec->ptr = str->ptr;
567 format_spec->end = str->end;
568
569 /* see if there's a conversion specifier */
570 if (c == '!') {
571 /* there must be another character present */
572 if (format_spec->ptr >= format_spec->end) {
573 PyErr_SetString(PyExc_ValueError,
574 "end of format while looking for conversion "
575 "specifier");
576 return 0;
577 }
578 *conversion = *(format_spec->ptr++);
579
580 /* if there is another character, it must be a colon */
581 if (format_spec->ptr < format_spec->end) {
582 c = *(format_spec->ptr++);
583 if (c != ':') {
584 PyErr_SetString(PyExc_ValueError,
585 "expected ':' after format specifier");
586 return 0;
587 }
588 }
589 }
590
591 return 1;
592
Eric Smith0cb431c2007-08-28 01:07:27 +0000593 }
594 else {
Eric Smith8c663262007-08-25 02:26:07 +0000595 /* end of string, there's no format_spec or conversion */
596 field_name->end = str->ptr;
597 return 1;
598 }
599}
600
601/************************************************************************/
602/******* Output string allocation and escape-to-markup processing ******/
603/************************************************************************/
604
605/* MarkupIterator breaks the string into pieces of either literal
606 text, or things inside {} that need to be marked up. it is
607 designed to make it easy to wrap a Python iterator around it, for
608 use with the Formatter class */
609
610typedef struct {
611 SubString str;
612 int in_markup;
613} MarkupIterator;
614
615static int
616MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
617{
618 SubString_init(&self->str, ptr, len);
619 self->in_markup = 0;
620 return 1;
621}
622
623/* returns 0 on error, 1 on non-error termination, and 2 if it got a
624 string (or something to be expanded) */
625static int
626MarkupIterator_next(MarkupIterator *self, int *is_markup, SubString *literal,
627 SubString *field_name, SubString *format_spec,
628 STRINGLIB_CHAR *conversion,
629 int *format_spec_needs_expanding)
630{
631 int at_end;
632 STRINGLIB_CHAR c = 0;
633 STRINGLIB_CHAR *start;
634 int count;
635 Py_ssize_t len;
636
637 *format_spec_needs_expanding = 0;
638
639 /* no more input, end of iterator */
640 if (self->str.ptr >= self->str.end)
641 return 1;
642
643 *is_markup = self->in_markup;
644 start = self->str.ptr;
645
646 if (self->in_markup) {
647
648 /* prepare for next iteration */
649 self->in_markup = 0;
650
651 /* this is markup, find the end of the string by counting nested
652 braces. note that this prohibits escaped braces, so that
653 format_specs cannot have braces in them. */
654 count = 1;
655
656 /* we know we can't have a zero length string, so don't worry
657 about that case */
658 while (self->str.ptr < self->str.end) {
659 switch (c = *(self->str.ptr++)) {
660 case '{':
661 /* the format spec needs to be recursively expanded.
662 this is an optimization, and not strictly needed */
663 *format_spec_needs_expanding = 1;
664 count++;
665 break;
666 case '}':
667 count--;
668 if (count <= 0) {
669 /* we're done. parse and get out */
670 literal->ptr = start;
671 literal->end = self->str.ptr-1;
672
673 if (parse_field(literal, field_name, format_spec,
674 conversion) == 0)
675 return 0;
676
677 /* success */
678 return 2;
679 }
680 break;
681 }
682 }
683 /* end of string while searching for matching '}' */
684 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
685 return 0;
686
Eric Smith0cb431c2007-08-28 01:07:27 +0000687 }
688 else {
Eric Smith8c663262007-08-25 02:26:07 +0000689 /* literal text, read until the end of string, an escaped { or },
690 or an unescaped { */
691 while (self->str.ptr < self->str.end) {
692 switch (c = *(self->str.ptr++)) {
693 case '{':
694 case '}':
695 self->in_markup = 1;
696 break;
697 default:
698 continue;
699 }
700 break;
701 }
702
703 at_end = self->str.ptr >= self->str.end;
704 len = self->str.ptr - start;
705
Neal Norwitz3ef6a572007-08-25 17:08:59 +0000706 if ((c == '}') && (at_end || (c != *self->str.ptr))) {
Eric Smith7a6dd292007-08-27 23:30:47 +0000707 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
708 "in format string");
Neal Norwitz3ef6a572007-08-25 17:08:59 +0000709 return 0;
710 }
711 if (at_end && c == '{') {
Eric Smith7a6dd292007-08-27 23:30:47 +0000712 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
713 "in format string");
Neal Norwitz3ef6a572007-08-25 17:08:59 +0000714 return 0;
715 }
Eric Smith8c663262007-08-25 02:26:07 +0000716 if (!at_end) {
717 if (c == *self->str.ptr) {
718 /* escaped } or {, skip it in the input */
719 self->str.ptr++;
720 self->in_markup = 0;
Eric Smith0cb431c2007-08-28 01:07:27 +0000721 }
722 else
Eric Smith8c663262007-08-25 02:26:07 +0000723 len--;
724 }
725
726 /* this is just plain text, return it */
727 literal->ptr = start;
728 literal->end = start + len;
729 return 2;
730 }
731}
732
733
734/* do the !r or !s conversion on obj */
735static PyObject *
736do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
737{
738 /* XXX in pre-3.0, do we need to convert this to unicode, since it
739 might have returned a string? */
740 switch (conversion) {
741 case 'r':
742 return PyObject_Repr(obj);
743 case 's':
744 return PyObject_Unicode(obj);
745 default:
746 PyErr_Format(PyExc_ValueError,
747 "Unknown converion specifier %c",
748 conversion);
749 return NULL;
750 }
751}
752
753/* given:
754
755 {field_name!conversion:format_spec}
756
757 compute the result and write it to output.
758 format_spec_needs_expanding is an optimization. if it's false,
759 just output the string directly, otherwise recursively expand the
760 format_spec string. */
761
762static int
763output_markup(SubString *field_name, SubString *format_spec,
764 int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
765 OutputString *output, PyObject *args, PyObject *kwargs,
766 int *recursion_level)
767{
768 PyObject *tmp = NULL;
769 PyObject *fieldobj = NULL;
770 SubString expanded_format_spec;
771 SubString *actual_format_spec;
772 int result = 0;
773
774 /* convert field_name to an object */
775 fieldobj = get_field_object(field_name, args, kwargs);
776 if (fieldobj == NULL)
777 goto done;
778
779 if (conversion != '\0') {
780 tmp = do_conversion(fieldobj, conversion);
781 if (tmp == NULL)
782 goto done;
783
784 /* do the assignment, transferring ownership: fieldobj = tmp */
785 Py_DECREF(fieldobj);
786 fieldobj = tmp;
787 tmp = NULL;
788 }
789
790 /* if needed, recurively compute the format_spec */
791 if (format_spec_needs_expanding) {
792 tmp = build_string(format_spec, args, kwargs, recursion_level);
793 if (tmp == NULL)
794 goto done;
795
796 /* note that in the case we're expanding the format string,
797 tmp must be kept around until after the call to
798 render_field. */
799 SubString_init(&expanded_format_spec,
800 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
801 actual_format_spec = &expanded_format_spec;
Eric Smith0cb431c2007-08-28 01:07:27 +0000802 }
803 else
Eric Smith8c663262007-08-25 02:26:07 +0000804 actual_format_spec = format_spec;
805
806 if (render_field(fieldobj, actual_format_spec, output) == 0)
807 goto done;
808
809 result = 1;
810
811done:
812 Py_XDECREF(fieldobj);
813 Py_XDECREF(tmp);
814
815 return result;
816}
817
818/*
819 do_markup is the top-level loop for the format() function. It
820 searches through the format string for escapes to markup codes, and
821 calls other functions to move non-markup text to the output,
822 and to perform the markup to the output.
823*/
824static int
825do_markup(SubString *input, PyObject *args, PyObject *kwargs,
826 OutputString *output, int *recursion_level)
827{
828 MarkupIterator iter;
829 int is_markup;
830 int format_spec_needs_expanding;
831 int result;
832 SubString str;
833 SubString field_name;
834 SubString format_spec;
835 STRINGLIB_CHAR conversion;
836
837 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
838 while ((result = MarkupIterator_next(&iter, &is_markup, &str, &field_name,
839 &format_spec, &conversion,
840 &format_spec_needs_expanding)) == 2) {
841 if (is_markup) {
842 if (!output_markup(&field_name, &format_spec,
843 format_spec_needs_expanding, conversion, output,
844 args, kwargs, recursion_level))
845 return 0;
Eric Smith0cb431c2007-08-28 01:07:27 +0000846 }
847 else
Eric Smith8c663262007-08-25 02:26:07 +0000848 if (!output_data(output, str.ptr, str.end-str.ptr))
849 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000850 }
851 return result;
852}
853
854
855/*
856 build_string allocates the output string and then
857 calls do_markup to do the heavy lifting.
858*/
859static PyObject *
860build_string(SubString *input, PyObject *args, PyObject *kwargs,
861 int *recursion_level)
862{
863 OutputString output;
864 PyObject *result = NULL;
865 Py_ssize_t count;
866
867 output.obj = NULL; /* needed so cleanup code always works */
868
869 /* check the recursion level */
870 (*recursion_level)--;
871 if (*recursion_level < 0) {
872 PyErr_SetString(PyExc_ValueError,
873 "Max string recursion exceeded");
874 goto done;
875 }
876
877 /* initial size is the length of the format string, plus the size
878 increment. seems like a reasonable default */
879 if (!output_initialize(&output,
880 input->end - input->ptr +
881 INITIAL_SIZE_INCREMENT))
882 goto done;
883
884 if (!do_markup(input, args, kwargs, &output, recursion_level)) {
885 goto done;
886 }
887
888 count = output.ptr - STRINGLIB_STR(output.obj);
889 if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
890 goto done;
891 }
892
893 /* transfer ownership to result */
894 result = output.obj;
895 output.obj = NULL;
896
897done:
898 (*recursion_level)++;
899 Py_XDECREF(output.obj);
900 return result;
901}
902
903/************************************************************************/
904/*********** main routine ***********************************************/
905/************************************************************************/
906
907/* this is the main entry point */
908static PyObject *
909do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
910{
911 SubString input;
912
913 /* PEP 3101 says only 2 levels, so that
914 "{0:{1}}".format('abc', 's') # works
915 "{0:{1:{2}}}".format('abc', 's', '') # fails
916 */
917 int recursion_level = 2;
918
919 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
920 return build_string(&input, args, kwargs, &recursion_level);
921}
Eric Smithf6db4092007-08-27 23:52:26 +0000922
923
924
925/************************************************************************/
926/*********** formatteriterator ******************************************/
927/************************************************************************/
928
929/* This is used to implement string.Formatter.vparse(). It exists so
930 Formatter can share code with the built in unicode.format() method.
931 It's really just a wrapper around MarkupIterator that is callable
932 from Python. */
933
934typedef struct {
935 PyObject_HEAD
936
937 PyUnicodeObject *str;
938
939 MarkupIterator it_markup;
940} formatteriterobject;
941
942static void
943formatteriter_dealloc(formatteriterobject *it)
944{
945 Py_XDECREF(it->str);
946 PyObject_FREE(it);
947}
948
949/* returns a tuple:
950 (is_markup, literal, field_name, format_spec, conversion)
951 if is_markup == True:
952 literal is None
953 field_name is the string before the ':'
954 format_spec is the string after the ':'
955 conversion is either None, or the string after the '!'
956 if is_markup == False:
957 literal is the literal string
958 field_name is None
959 format_spec is None
960 conversion is None
961*/
962static PyObject *
963formatteriter_next(formatteriterobject *it)
964{
965 SubString literal;
966 SubString field_name;
967 SubString format_spec;
968 Py_UNICODE conversion;
969 int is_markup;
970 int format_spec_needs_expanding;
971 int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal,
972 &field_name, &format_spec, &conversion,
973 &format_spec_needs_expanding);
974
975 /* all of the SubString objects point into it->str, so no
976 memory management needs to be done on them */
977 assert(0 <= result && result <= 2);
Eric Smith0cb431c2007-08-28 01:07:27 +0000978 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +0000979 /* if 0, error has already been set, if 1, iterator is empty */
980 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +0000981 else {
Eric Smithf6db4092007-08-27 23:52:26 +0000982 PyObject *is_markup_bool = NULL;
983 PyObject *literal_str = NULL;
984 PyObject *field_name_str = NULL;
985 PyObject *format_spec_str = NULL;
986 PyObject *conversion_str = NULL;
987 PyObject *tuple = NULL;
988
989 is_markup_bool = PyBool_FromLong(is_markup);
990 if (!is_markup_bool)
991 return NULL;
992
993 if (is_markup) {
994 /* field_name, format_spec, and conversion are returned */
995 literal_str = Py_None;
996 Py_INCREF(literal_str);
997
998 field_name_str = SubString_new_object(&field_name);
999 if (field_name_str == NULL)
1000 goto error;
1001
1002 format_spec_str = SubString_new_object(&format_spec);
1003 if (format_spec_str == NULL)
1004 goto error;
1005
1006 /* if the conversion is not specified, return a None,
1007 otherwise create a one length string with the
1008 conversion characater */
1009 if (conversion == '\0') {
1010 conversion_str = Py_None;
1011 Py_INCREF(conversion_str);
Eric Smith0cb431c2007-08-28 01:07:27 +00001012 }
1013 else
Eric Smithf6db4092007-08-27 23:52:26 +00001014 conversion_str = PyUnicode_FromUnicode(&conversion,
1015 1);
1016 if (conversion_str == NULL)
1017 goto error;
Eric Smith0cb431c2007-08-28 01:07:27 +00001018 }
1019 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001020 /* only literal is returned */
1021 literal_str = SubString_new_object(&literal);
1022 if (literal_str == NULL)
1023 goto error;
1024
1025 field_name_str = Py_None;
1026 format_spec_str = Py_None;
1027 conversion_str = Py_None;
1028
1029 Py_INCREF(field_name_str);
1030 Py_INCREF(format_spec_str);
1031 Py_INCREF(conversion_str);
1032 }
1033 tuple = PyTuple_Pack(5, is_markup_bool, literal_str,
1034 field_name_str, format_spec_str,
1035 conversion_str);
1036 error:
1037 Py_XDECREF(is_markup_bool);
1038 Py_XDECREF(literal_str);
1039 Py_XDECREF(field_name_str);
1040 Py_XDECREF(format_spec_str);
1041 Py_XDECREF(conversion_str);
1042 return tuple;
1043 }
1044}
1045
1046static PyMethodDef formatteriter_methods[] = {
1047 {NULL, NULL} /* sentinel */
1048};
1049
1050PyTypeObject PyFormatterIter_Type = {
1051 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1052 "formatteriterator", /* tp_name */
1053 sizeof(formatteriterobject), /* tp_basicsize */
1054 0, /* tp_itemsize */
1055 /* methods */
1056 (destructor)formatteriter_dealloc, /* tp_dealloc */
1057 0, /* tp_print */
1058 0, /* tp_getattr */
1059 0, /* tp_setattr */
1060 0, /* tp_compare */
1061 0, /* tp_repr */
1062 0, /* tp_as_number */
1063 0, /* tp_as_sequence */
1064 0, /* tp_as_mapping */
1065 0, /* tp_hash */
1066 0, /* tp_call */
1067 0, /* tp_str */
1068 PyObject_GenericGetAttr, /* tp_getattro */
1069 0, /* tp_setattro */
1070 0, /* tp_as_buffer */
1071 Py_TPFLAGS_DEFAULT, /* tp_flags */
1072 0, /* tp_doc */
1073 0, /* tp_traverse */
1074 0, /* tp_clear */
1075 0, /* tp_richcompare */
1076 0, /* tp_weaklistoffset */
1077 PyObject_SelfIter, /* tp_iter */
1078 (iternextfunc)formatteriter_next, /* tp_iternext */
1079 formatteriter_methods, /* tp_methods */
1080 0,
1081};
1082
1083/* unicode_formatter_parser is used to implement
1084 string.Formatter.vformat. it parses a string and returns tuples
1085 describing the parsed elements. It's a wrapper around
1086 stringlib/string_format.h's MarkupIterator */
1087static PyObject *
1088formatter_parser(PyUnicodeObject *self)
1089{
1090 formatteriterobject *it;
1091
1092 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1093 if (it == NULL)
1094 return NULL;
1095
1096 /* take ownership, give the object to the iterator */
1097 Py_INCREF(self);
1098 it->str = self;
1099
1100 /* initialize the contained MarkupIterator */
1101 MarkupIterator_init(&it->it_markup,
1102 PyUnicode_AS_UNICODE(self),
1103 PyUnicode_GET_SIZE(self));
1104
1105 return (PyObject *)it;
1106}
1107
1108
1109/************************************************************************/
1110/*********** fieldnameiterator ******************************************/
1111/************************************************************************/
1112
1113
1114/* This is used to implement string.Formatter.vparse(). It parses the
1115 field name into attribute and item values. It's a Python-callable
1116 wrapper around FieldNameIterator */
1117
1118typedef struct {
1119 PyObject_HEAD
1120
1121 PyUnicodeObject *str;
1122
1123 FieldNameIterator it_field;
1124} fieldnameiterobject;
1125
1126static void
1127fieldnameiter_dealloc(fieldnameiterobject *it)
1128{
1129 Py_XDECREF(it->str);
1130 PyObject_FREE(it);
1131}
1132
1133/* returns a tuple:
1134 (is_attr, value)
1135 is_attr is true if we used attribute syntax (e.g., '.foo')
1136 false if we used index syntax (e.g., '[foo]')
1137 value is an integer or string
1138*/
1139static PyObject *
1140fieldnameiter_next(fieldnameiterobject *it)
1141{
1142 int result;
1143 int is_attr;
1144 Py_ssize_t idx;
1145 SubString name;
1146
1147 result = FieldNameIterator_next(&it->it_field, &is_attr,
1148 &idx, &name);
Eric Smith0cb431c2007-08-28 01:07:27 +00001149 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001150 /* if 0, error has already been set, if 1, iterator is empty */
1151 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001152 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001153 PyObject* result = NULL;
1154 PyObject* is_attr_obj = NULL;
1155 PyObject* obj = NULL;
1156
1157 is_attr_obj = PyBool_FromLong(is_attr);
1158 if (is_attr_obj == NULL)
1159 goto error;
1160
1161 /* either an integer or a string */
1162 if (idx != -1)
1163 obj = PyInt_FromSsize_t(idx);
1164 else
1165 obj = SubString_new_object(&name);
1166 if (obj == NULL)
1167 goto error;
1168
1169 /* return a tuple of values */
1170 result = PyTuple_Pack(2, is_attr_obj, obj);
1171 if (result == NULL)
1172 goto error;
1173
1174 return result;
1175
1176 error:
1177 Py_XDECREF(result);
1178 Py_XDECREF(is_attr_obj);
1179 Py_XDECREF(obj);
1180 return NULL;
1181 }
1182 return NULL;
1183}
1184
1185static PyMethodDef fieldnameiter_methods[] = {
1186 {NULL, NULL} /* sentinel */
1187};
1188
1189static PyTypeObject PyFieldNameIter_Type = {
1190 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1191 "fieldnameiterator", /* tp_name */
1192 sizeof(fieldnameiterobject), /* tp_basicsize */
1193 0, /* tp_itemsize */
1194 /* methods */
1195 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1196 0, /* tp_print */
1197 0, /* tp_getattr */
1198 0, /* tp_setattr */
1199 0, /* tp_compare */
1200 0, /* tp_repr */
1201 0, /* tp_as_number */
1202 0, /* tp_as_sequence */
1203 0, /* tp_as_mapping */
1204 0, /* tp_hash */
1205 0, /* tp_call */
1206 0, /* tp_str */
1207 PyObject_GenericGetAttr, /* tp_getattro */
1208 0, /* tp_setattro */
1209 0, /* tp_as_buffer */
1210 Py_TPFLAGS_DEFAULT, /* tp_flags */
1211 0, /* tp_doc */
1212 0, /* tp_traverse */
1213 0, /* tp_clear */
1214 0, /* tp_richcompare */
1215 0, /* tp_weaklistoffset */
1216 PyObject_SelfIter, /* tp_iter */
1217 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1218 fieldnameiter_methods, /* tp_methods */
1219 0};
1220
1221/* unicode_formatter_field_name_split is used to implement
1222 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1223 returns a tuple of (first, rest): "first", the part before the
1224 first '.' or '['; and "rest", an iterator for the rest of the field
1225 name. it's a wrapper around stringlib/string_format.h's
1226 field_name_split. The iterator it returns is a
1227 FieldNameIterator */
1228static PyObject *
1229formatter_field_name_split(PyUnicodeObject *self)
1230{
1231 SubString first;
1232 Py_ssize_t first_idx;
1233 fieldnameiterobject *it;
1234
1235 PyObject *first_obj = NULL;
1236 PyObject *result = NULL;
1237
1238 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1239 if (it == NULL)
1240 return NULL;
1241
1242 /* take ownership, give the object to the iterator. this is
1243 just to keep the field_name alive */
1244 Py_INCREF(self);
1245 it->str = self;
1246
1247 if (!field_name_split(STRINGLIB_STR(self),
1248 STRINGLIB_LEN(self),
1249 &first, &first_idx, &it->it_field))
1250 goto error;
1251
Eric Smith0cb431c2007-08-28 01:07:27 +00001252 /* first becomes an integer, if possible; else a string */
Eric Smithf6db4092007-08-27 23:52:26 +00001253 if (first_idx != -1)
1254 first_obj = PyInt_FromSsize_t(first_idx);
1255 else
1256 /* convert "first" into a string object */
1257 first_obj = SubString_new_object(&first);
1258 if (first_obj == NULL)
1259 goto error;
1260
1261 /* return a tuple of values */
1262 result = PyTuple_Pack(2, first_obj, it);
1263
1264error:
1265 Py_XDECREF(it);
1266 Py_XDECREF(first_obj);
1267 return result;
1268}