blob: edbd5dd682d335a21fd7b71fd0180dcddc0be489 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/*
2 string_format.h -- implementation of string.format().
3
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
6*/
7
8
9/* Defines for more efficiently reallocating the string buffer */
10#define INITIAL_SIZE_INCREMENT 100
11#define SIZE_MULTIPLIER 2
12#define MAX_SIZE_INCREMENT 3200
13
14
15/************************************************************************/
16/*********** Global data structures and forward declarations *********/
17/************************************************************************/
18
19/*
20 A SubString consists of the characters between two string or
21 unicode pointers.
22*/
23typedef struct {
24 STRINGLIB_CHAR *ptr;
25 STRINGLIB_CHAR *end;
26} SubString;
27
28
29/* forward declaration for recursion */
30static PyObject *
31build_string(SubString *input, PyObject *args, PyObject *kwargs,
32 int *recursion_level);
33
34
35
36/************************************************************************/
37/************************** Utility functions ************************/
38/************************************************************************/
39
40/* fill in a SubString from a pointer and length */
41Py_LOCAL_INLINE(void)
42SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
43{
44 str->ptr = p;
45 if (p == NULL)
46 str->end = NULL;
47 else
48 str->end = str->ptr + len;
49}
50
51Py_LOCAL_INLINE(PyObject *)
52SubString_new_object(SubString *str)
53{
54 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
55}
56
57/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +000058/*********** Output string management functions ****************/
59/************************************************************************/
60
61typedef struct {
62 STRINGLIB_CHAR *ptr;
63 STRINGLIB_CHAR *end;
64 PyObject *obj;
65 Py_ssize_t size_increment;
66} OutputString;
67
68/* initialize an OutputString object, reserving size characters */
69static int
70output_initialize(OutputString *output, Py_ssize_t size)
71{
72 output->obj = STRINGLIB_NEW(NULL, size);
73 if (output->obj == NULL)
74 return 0;
75
76 output->ptr = STRINGLIB_STR(output->obj);
77 output->end = STRINGLIB_LEN(output->obj) + output->ptr;
78 output->size_increment = INITIAL_SIZE_INCREMENT;
79
80 return 1;
81}
82
83/*
84 output_extend reallocates the output string buffer.
85 It returns a status: 0 for a failed reallocation,
86 1 for success.
87*/
88
89static int
90output_extend(OutputString *output, Py_ssize_t count)
91{
92 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
93 Py_ssize_t curlen = output->ptr - startptr;
94 Py_ssize_t maxlen = curlen + count + output->size_increment;
95
96 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
97 return 0;
98 startptr = STRINGLIB_STR(output->obj);
99 output->ptr = startptr + curlen;
100 output->end = startptr + maxlen;
101 if (output->size_increment < MAX_SIZE_INCREMENT)
102 output->size_increment *= SIZE_MULTIPLIER;
103 return 1;
104}
105
106/*
107 output_data dumps characters into our output string
108 buffer.
109
110 In some cases, it has to reallocate the string.
111
112 It returns a status: 0 for a failed reallocation,
113 1 for success.
114*/
115static int
116output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
117{
118 if ((count > output->end - output->ptr) && !output_extend(output, count))
119 return 0;
120 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
121 output->ptr += count;
122 return 1;
123}
124
125/************************************************************************/
126/*********** Format string parsing -- integers and identifiers *********/
127/************************************************************************/
128
Eric Smith7ade6482007-08-26 22:27:13 +0000129static Py_ssize_t
130get_integer(const SubString *str)
Eric Smith8c663262007-08-25 02:26:07 +0000131{
Eric Smith7ade6482007-08-26 22:27:13 +0000132 Py_ssize_t accumulator = 0;
133 Py_ssize_t digitval;
134 Py_ssize_t oldaccumulator;
135 STRINGLIB_CHAR *p;
Eric Smith8c663262007-08-25 02:26:07 +0000136
Eric Smith7ade6482007-08-26 22:27:13 +0000137 /* empty string is an error */
138 if (str->ptr >= str->end)
139 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000140
Eric Smith7ade6482007-08-26 22:27:13 +0000141 for (p = str->ptr; p < str->end; p++) {
142 digitval = STRINGLIB_TODECIMAL(*p);
Eric Smith8c663262007-08-25 02:26:07 +0000143 if (digitval < 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000144 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000145 /*
146 This trick was copied from old Unicode format code. It's cute,
147 but would really suck on an old machine with a slow divide
148 implementation. Fortunately, in the normal case we do not
149 expect too many digits.
150 */
151 oldaccumulator = accumulator;
152 accumulator *= 10;
153 if ((accumulator+10)/10 != oldaccumulator+1) {
154 PyErr_Format(PyExc_ValueError,
155 "Too many decimal digits in format string");
156 return -1;
157 }
158 accumulator += digitval;
159 }
Eric Smith7ade6482007-08-26 22:27:13 +0000160 return accumulator;
Eric Smith8c663262007-08-25 02:26:07 +0000161}
162
163/************************************************************************/
164/******** Functions to get field objects and specification strings ******/
165/************************************************************************/
166
Eric Smith7ade6482007-08-26 22:27:13 +0000167/* do the equivalent of obj.name */
Eric Smith8c663262007-08-25 02:26:07 +0000168static PyObject *
Eric Smith7ade6482007-08-26 22:27:13 +0000169getattr(PyObject *obj, SubString *name)
Eric Smith8c663262007-08-25 02:26:07 +0000170{
Eric Smith7ade6482007-08-26 22:27:13 +0000171 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000172 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000173 if (str == NULL)
174 return NULL;
175 newobj = PyObject_GetAttr(obj, str);
176 Py_DECREF(str);
177 return newobj;
Eric Smith8c663262007-08-25 02:26:07 +0000178}
179
Eric Smith7ade6482007-08-26 22:27:13 +0000180/* do the equivalent of obj[idx], where obj is a sequence */
181static PyObject *
182getitem_sequence(PyObject *obj, Py_ssize_t idx)
183{
184 return PySequence_GetItem(obj, idx);
185}
186
187/* do the equivalent of obj[idx], where obj is not a sequence */
188static PyObject *
189getitem_idx(PyObject *obj, Py_ssize_t idx)
190{
191 PyObject *newobj;
192 PyObject *idx_obj = PyInt_FromSsize_t(idx);
193 if (idx_obj == NULL)
194 return NULL;
195 newobj = PyObject_GetItem(obj, idx_obj);
196 Py_DECREF(idx_obj);
197 return newobj;
198}
199
200/* do the equivalent of obj[name] */
201static PyObject *
202getitem_str(PyObject *obj, SubString *name)
203{
204 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000205 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000206 if (str == NULL)
207 return NULL;
208 newobj = PyObject_GetItem(obj, str);
209 Py_DECREF(str);
210 return newobj;
211}
212
213typedef struct {
214 /* the entire string we're parsing. we assume that someone else
215 is managing its lifetime, and that it will exist for the
216 lifetime of the iterator. can be empty */
217 SubString str;
218
219 /* pointer to where we are inside field_name */
220 STRINGLIB_CHAR *ptr;
221} FieldNameIterator;
222
223
224static int
225FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
226 Py_ssize_t len)
227{
228 SubString_init(&self->str, ptr, len);
229 self->ptr = self->str.ptr;
230 return 1;
231}
232
233static int
234_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
235{
236 STRINGLIB_CHAR c;
237
238 name->ptr = self->ptr;
239
240 /* return everything until '.' or '[' */
241 while (self->ptr < self->str.end) {
242 switch (c = *self->ptr++) {
243 case '[':
244 case '.':
245 /* backup so that we this character will be seen next time */
246 self->ptr--;
247 break;
248 default:
249 continue;
250 }
251 break;
252 }
253 /* end of string is okay */
254 name->end = self->ptr;
255 return 1;
256}
257
258static int
259_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
260{
261 STRINGLIB_CHAR c;
262
263 name->ptr = self->ptr;
264
265 /* return everything until ']' */
266 while (self->ptr < self->str.end) {
267 switch (c = *self->ptr++) {
268 case ']':
269 break;
270 default:
271 continue;
272 }
273 break;
274 }
275 /* end of string is okay */
276 /* don't include the ']' */
277 name->end = self->ptr-1;
278 return 1;
279}
280
281/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
282static int
283FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
284 Py_ssize_t *name_idx, SubString *name)
285{
286 /* check at end of input */
287 if (self->ptr >= self->str.end)
288 return 1;
289
290 switch (*self->ptr++) {
291 case '.':
292 *is_attribute = 1;
293 if (_FieldNameIterator_attr(self, name) == 0) {
294 return 0;
295 }
296 *name_idx = -1;
297 break;
298 case '[':
299 *is_attribute = 0;
300 if (_FieldNameIterator_item(self, name) == 0) {
301 return 0;
302 }
303 *name_idx = get_integer(name);
304 break;
305 default:
306 /* interal error, can't get here */
307 assert(0);
308 return 0;
309 }
310
311 /* empty string is an error */
312 if (name->ptr == name->end) {
313 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
314 return 0;
315 }
316
317 return 2;
318}
319
320
321/* input: field_name
322 output: 'first' points to the part before the first '[' or '.'
323 'first_idx' is -1 if 'first' is not an integer, otherwise
324 it's the value of first converted to an integer
325 'rest' is an iterator to return the rest
326*/
327static int
328field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
329 Py_ssize_t *first_idx, FieldNameIterator *rest)
330{
331 STRINGLIB_CHAR c;
332 STRINGLIB_CHAR *p = ptr;
333 STRINGLIB_CHAR *end = ptr + len;
334
335 /* find the part up until the first '.' or '[' */
336 while (p < end) {
337 switch (c = *p++) {
338 case '[':
339 case '.':
340 /* backup so that we this character is available to the
341 "rest" iterator */
342 p--;
343 break;
344 default:
345 continue;
346 }
347 break;
348 }
349
350 /* set up the return values */
351 SubString_init(first, ptr, p - ptr);
352 FieldNameIterator_init(rest, p, end - p);
353
354 /* see if "first" is an integer, in which case it's used as an index */
355 *first_idx = get_integer(first);
356
357 /* zero length string is an error */
358 if (first->ptr >= first->end) {
359 PyErr_SetString(PyExc_ValueError, "empty field name");
360 goto error;
361 }
362
363 return 1;
364error:
365 return 0;
366}
367
368
Eric Smith8c663262007-08-25 02:26:07 +0000369/*
370 get_field_object returns the object inside {}, before the
371 format_spec. It handles getindex and getattr lookups and consumes
372 the entire input string.
373*/
374static PyObject *
375get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
376{
Eric Smith7ade6482007-08-26 22:27:13 +0000377 PyObject *obj = NULL;
378 int ok;
379 int is_attribute;
380 SubString name;
381 SubString first;
Eric Smith8c663262007-08-25 02:26:07 +0000382 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000383 FieldNameIterator rest;
Eric Smith8c663262007-08-25 02:26:07 +0000384
Eric Smith7ade6482007-08-26 22:27:13 +0000385 if (!field_name_split(input->ptr, input->end - input->ptr, &first,
386 &index, &rest)) {
387 goto error;
388 }
Eric Smith8c663262007-08-25 02:26:07 +0000389
Eric Smith7ade6482007-08-26 22:27:13 +0000390 if (index == -1) {
391 /* look up in kwargs */
Eric Smith7a6dd292007-08-27 23:30:47 +0000392 PyObject *key = SubString_new_object(&first);
Eric Smith7ade6482007-08-26 22:27:13 +0000393 if (key == NULL)
394 goto error;
395 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
396 PyErr_SetString(PyExc_ValueError, "Keyword argument not found "
397 "in format string");
398 Py_DECREF(key);
399 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000400 }
Neal Norwitz8a4eb292007-08-27 07:24:17 +0000401 Py_DECREF(key);
Neal Norwitz247b5152007-08-27 03:22:50 +0000402 Py_INCREF(obj);
Eric Smith7ade6482007-08-26 22:27:13 +0000403 } else {
404 /* look up in args */
405 obj = PySequence_GetItem(args, index);
406 if (obj == NULL) {
407 /* translate IndexError to a ValueError */
408 PyErr_SetString(PyExc_ValueError, "Not enough positional arguments "
409 "in format string");
410 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000411 }
412 }
Eric Smith7ade6482007-08-26 22:27:13 +0000413
414 /* iterate over the rest of the field_name */
415 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
416 &name)) == 2) {
417 PyObject *tmp;
418
419 if (is_attribute)
420 /* getattr lookup "." */
421 tmp = getattr(obj, &name);
422 else
423 /* getitem lookup "[]" */
424 if (index == -1)
425 tmp = getitem_str(obj, &name);
426 else
427 if (PySequence_Check(obj))
428 tmp = getitem_sequence(obj, index);
429 else
430 /* not a sequence */
431 tmp = getitem_idx(obj, index);
432 if (tmp == NULL)
433 goto error;
434
435 /* assign to obj */
436 Py_DECREF(obj);
437 obj = tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000438 }
Eric Smith7ade6482007-08-26 22:27:13 +0000439 /* end of iterator, this is the non-error case */
440 if (ok == 1)
441 return obj;
442error:
443 Py_XDECREF(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000444 return NULL;
445}
446
447/************************************************************************/
448/***************** Field rendering functions **************************/
449/************************************************************************/
450
451/*
452 render_field() is the main function in this section. It takes the
453 field object and field specification string generated by
454 get_field_and_spec, and renders the field into the output string.
455
456 format() does the actual calling of the objects __format__ method.
457*/
458
459
460/* returns fieldobj.__format__(format_spec) */
461static PyObject *
462format(PyObject *fieldobj, SubString *format_spec)
463{
464 static PyObject *format_str = NULL;
465 PyObject *meth;
466 PyObject *spec = NULL;
467 PyObject *result = NULL;
468
469 /* Initialize cached value */
470 if (format_str == NULL) {
471 /* Initialize static variable needed by _PyType_Lookup */
472 format_str = PyUnicode_FromString("__format__");
473 if (format_str == NULL)
474 return NULL;
475 }
476
477 /* Make sure the type is initialized. float gets initialized late */
478 if (Py_Type(fieldobj)->tp_dict == NULL)
479 if (PyType_Ready(Py_Type(fieldobj)) < 0)
480 return NULL;
481
482 /* we need to create an object out of the pointers we have */
483 spec = SubString_new_object(format_spec);
484 if (spec == NULL)
485 goto done;
486
487 /* Find the (unbound!) __format__ method (a borrowed reference) */
488 meth = _PyType_Lookup(Py_Type(fieldobj), format_str);
489 if (meth == NULL) {
490 PyErr_Format(PyExc_TypeError,
491 "Type %.100s doesn't define __format__",
492 Py_Type(fieldobj)->tp_name);
493 goto done;
494 }
495
496 /* And call it, binding it to the value */
497 result = PyObject_CallFunctionObjArgs(meth, fieldobj, spec, NULL);
498 if (result == NULL)
499 goto done;
500
501 if (!STRINGLIB_CHECK(result)) {
502 PyErr_SetString(PyExc_TypeError,
503 "__format__ method did not return "
504 STRINGLIB_TYPE_NAME);
505 Py_DECREF(result);
506 result = NULL;
507 goto done;
508 }
509
510done:
511 Py_XDECREF(spec);
512 return result;
513}
514
515/*
516 render_field calls fieldobj.__format__(format_spec) method, and
517 appends to the output.
518*/
519static int
520render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
521{
522 int ok = 0;
523 PyObject *result = format(fieldobj, format_spec);
524
525 if (result == NULL)
526 goto done;
527
528 ok = output_data(output,
529 STRINGLIB_STR(result), STRINGLIB_LEN(result));
530done:
531 Py_XDECREF(result);
532 return ok;
533}
534
535static int
536parse_field(SubString *str, SubString *field_name, SubString *format_spec,
537 STRINGLIB_CHAR *conversion)
538{
539 STRINGLIB_CHAR c = 0;
540
541 /* initialize these, as they may be empty */
542 *conversion = '\0';
543 SubString_init(format_spec, NULL, 0);
544
545 /* search for the field name. it's terminated by the end of the
546 string, or a ':' or '!' */
547 field_name->ptr = str->ptr;
548 while (str->ptr < str->end) {
549 switch (c = *(str->ptr++)) {
550 case ':':
551 case '!':
552 break;
553 default:
554 continue;
555 }
556 break;
557 }
558
559 if (c == '!' || c == ':') {
560 /* we have a format specifier and/or a conversion */
561 /* don't include the last character */
562 field_name->end = str->ptr-1;
563
564 /* the format specifier is the rest of the string */
565 format_spec->ptr = str->ptr;
566 format_spec->end = str->end;
567
568 /* see if there's a conversion specifier */
569 if (c == '!') {
570 /* there must be another character present */
571 if (format_spec->ptr >= format_spec->end) {
572 PyErr_SetString(PyExc_ValueError,
573 "end of format while looking for conversion "
574 "specifier");
575 return 0;
576 }
577 *conversion = *(format_spec->ptr++);
578
579 /* if there is another character, it must be a colon */
580 if (format_spec->ptr < format_spec->end) {
581 c = *(format_spec->ptr++);
582 if (c != ':') {
583 PyErr_SetString(PyExc_ValueError,
584 "expected ':' after format specifier");
585 return 0;
586 }
587 }
588 }
589
590 return 1;
591
592 } else {
593 /* end of string, there's no format_spec or conversion */
594 field_name->end = str->ptr;
595 return 1;
596 }
597}
598
599/************************************************************************/
600/******* Output string allocation and escape-to-markup processing ******/
601/************************************************************************/
602
603/* MarkupIterator breaks the string into pieces of either literal
604 text, or things inside {} that need to be marked up. it is
605 designed to make it easy to wrap a Python iterator around it, for
606 use with the Formatter class */
607
608typedef struct {
609 SubString str;
610 int in_markup;
611} MarkupIterator;
612
613static int
614MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
615{
616 SubString_init(&self->str, ptr, len);
617 self->in_markup = 0;
618 return 1;
619}
620
621/* returns 0 on error, 1 on non-error termination, and 2 if it got a
622 string (or something to be expanded) */
623static int
624MarkupIterator_next(MarkupIterator *self, int *is_markup, SubString *literal,
625 SubString *field_name, SubString *format_spec,
626 STRINGLIB_CHAR *conversion,
627 int *format_spec_needs_expanding)
628{
629 int at_end;
630 STRINGLIB_CHAR c = 0;
631 STRINGLIB_CHAR *start;
632 int count;
633 Py_ssize_t len;
634
635 *format_spec_needs_expanding = 0;
636
637 /* no more input, end of iterator */
638 if (self->str.ptr >= self->str.end)
639 return 1;
640
641 *is_markup = self->in_markup;
642 start = self->str.ptr;
643
644 if (self->in_markup) {
645
646 /* prepare for next iteration */
647 self->in_markup = 0;
648
649 /* this is markup, find the end of the string by counting nested
650 braces. note that this prohibits escaped braces, so that
651 format_specs cannot have braces in them. */
652 count = 1;
653
654 /* we know we can't have a zero length string, so don't worry
655 about that case */
656 while (self->str.ptr < self->str.end) {
657 switch (c = *(self->str.ptr++)) {
658 case '{':
659 /* the format spec needs to be recursively expanded.
660 this is an optimization, and not strictly needed */
661 *format_spec_needs_expanding = 1;
662 count++;
663 break;
664 case '}':
665 count--;
666 if (count <= 0) {
667 /* we're done. parse and get out */
668 literal->ptr = start;
669 literal->end = self->str.ptr-1;
670
671 if (parse_field(literal, field_name, format_spec,
672 conversion) == 0)
673 return 0;
674
675 /* success */
676 return 2;
677 }
678 break;
679 }
680 }
681 /* end of string while searching for matching '}' */
682 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
683 return 0;
684
685 } else {
686 /* literal text, read until the end of string, an escaped { or },
687 or an unescaped { */
688 while (self->str.ptr < self->str.end) {
689 switch (c = *(self->str.ptr++)) {
690 case '{':
691 case '}':
692 self->in_markup = 1;
693 break;
694 default:
695 continue;
696 }
697 break;
698 }
699
700 at_end = self->str.ptr >= self->str.end;
701 len = self->str.ptr - start;
702
Neal Norwitz3ef6a572007-08-25 17:08:59 +0000703 if ((c == '}') && (at_end || (c != *self->str.ptr))) {
Eric Smith7a6dd292007-08-27 23:30:47 +0000704 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
705 "in format string");
Neal Norwitz3ef6a572007-08-25 17:08:59 +0000706 return 0;
707 }
708 if (at_end && c == '{') {
Eric Smith7a6dd292007-08-27 23:30:47 +0000709 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
710 "in format string");
Neal Norwitz3ef6a572007-08-25 17:08:59 +0000711 return 0;
712 }
Eric Smith8c663262007-08-25 02:26:07 +0000713 if (!at_end) {
714 if (c == *self->str.ptr) {
715 /* escaped } or {, skip it in the input */
716 self->str.ptr++;
717 self->in_markup = 0;
718 } else
719 len--;
720 }
721
722 /* this is just plain text, return it */
723 literal->ptr = start;
724 literal->end = start + len;
725 return 2;
726 }
727}
728
729
730/* do the !r or !s conversion on obj */
731static PyObject *
732do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
733{
734 /* XXX in pre-3.0, do we need to convert this to unicode, since it
735 might have returned a string? */
736 switch (conversion) {
737 case 'r':
738 return PyObject_Repr(obj);
739 case 's':
740 return PyObject_Unicode(obj);
741 default:
742 PyErr_Format(PyExc_ValueError,
743 "Unknown converion specifier %c",
744 conversion);
745 return NULL;
746 }
747}
748
749/* given:
750
751 {field_name!conversion:format_spec}
752
753 compute the result and write it to output.
754 format_spec_needs_expanding is an optimization. if it's false,
755 just output the string directly, otherwise recursively expand the
756 format_spec string. */
757
758static int
759output_markup(SubString *field_name, SubString *format_spec,
760 int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
761 OutputString *output, PyObject *args, PyObject *kwargs,
762 int *recursion_level)
763{
764 PyObject *tmp = NULL;
765 PyObject *fieldobj = NULL;
766 SubString expanded_format_spec;
767 SubString *actual_format_spec;
768 int result = 0;
769
770 /* convert field_name to an object */
771 fieldobj = get_field_object(field_name, args, kwargs);
772 if (fieldobj == NULL)
773 goto done;
774
775 if (conversion != '\0') {
776 tmp = do_conversion(fieldobj, conversion);
777 if (tmp == NULL)
778 goto done;
779
780 /* do the assignment, transferring ownership: fieldobj = tmp */
781 Py_DECREF(fieldobj);
782 fieldobj = tmp;
783 tmp = NULL;
784 }
785
786 /* if needed, recurively compute the format_spec */
787 if (format_spec_needs_expanding) {
788 tmp = build_string(format_spec, args, kwargs, recursion_level);
789 if (tmp == NULL)
790 goto done;
791
792 /* note that in the case we're expanding the format string,
793 tmp must be kept around until after the call to
794 render_field. */
795 SubString_init(&expanded_format_spec,
796 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
797 actual_format_spec = &expanded_format_spec;
798 } else
799 actual_format_spec = format_spec;
800
801 if (render_field(fieldobj, actual_format_spec, output) == 0)
802 goto done;
803
804 result = 1;
805
806done:
807 Py_XDECREF(fieldobj);
808 Py_XDECREF(tmp);
809
810 return result;
811}
812
813/*
814 do_markup is the top-level loop for the format() function. It
815 searches through the format string for escapes to markup codes, and
816 calls other functions to move non-markup text to the output,
817 and to perform the markup to the output.
818*/
819static int
820do_markup(SubString *input, PyObject *args, PyObject *kwargs,
821 OutputString *output, int *recursion_level)
822{
823 MarkupIterator iter;
824 int is_markup;
825 int format_spec_needs_expanding;
826 int result;
827 SubString str;
828 SubString field_name;
829 SubString format_spec;
830 STRINGLIB_CHAR conversion;
831
832 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
833 while ((result = MarkupIterator_next(&iter, &is_markup, &str, &field_name,
834 &format_spec, &conversion,
835 &format_spec_needs_expanding)) == 2) {
836 if (is_markup) {
837 if (!output_markup(&field_name, &format_spec,
838 format_spec_needs_expanding, conversion, output,
839 args, kwargs, recursion_level))
840 return 0;
841 } else {
842 if (!output_data(output, str.ptr, str.end-str.ptr))
843 return 0;
844 }
845 }
846 return result;
847}
848
849
850/*
851 build_string allocates the output string and then
852 calls do_markup to do the heavy lifting.
853*/
854static PyObject *
855build_string(SubString *input, PyObject *args, PyObject *kwargs,
856 int *recursion_level)
857{
858 OutputString output;
859 PyObject *result = NULL;
860 Py_ssize_t count;
861
862 output.obj = NULL; /* needed so cleanup code always works */
863
864 /* check the recursion level */
865 (*recursion_level)--;
866 if (*recursion_level < 0) {
867 PyErr_SetString(PyExc_ValueError,
868 "Max string recursion exceeded");
869 goto done;
870 }
871
872 /* initial size is the length of the format string, plus the size
873 increment. seems like a reasonable default */
874 if (!output_initialize(&output,
875 input->end - input->ptr +
876 INITIAL_SIZE_INCREMENT))
877 goto done;
878
879 if (!do_markup(input, args, kwargs, &output, recursion_level)) {
880 goto done;
881 }
882
883 count = output.ptr - STRINGLIB_STR(output.obj);
884 if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
885 goto done;
886 }
887
888 /* transfer ownership to result */
889 result = output.obj;
890 output.obj = NULL;
891
892done:
893 (*recursion_level)++;
894 Py_XDECREF(output.obj);
895 return result;
896}
897
898/************************************************************************/
899/*********** main routine ***********************************************/
900/************************************************************************/
901
902/* this is the main entry point */
903static PyObject *
904do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
905{
906 SubString input;
907
908 /* PEP 3101 says only 2 levels, so that
909 "{0:{1}}".format('abc', 's') # works
910 "{0:{1:{2}}}".format('abc', 's', '') # fails
911 */
912 int recursion_level = 2;
913
914 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
915 return build_string(&input, args, kwargs, &recursion_level);
916}
Eric Smithf6db4092007-08-27 23:52:26 +0000917
918
919
920/************************************************************************/
921/*********** formatteriterator ******************************************/
922/************************************************************************/
923
924/* This is used to implement string.Formatter.vparse(). It exists so
925 Formatter can share code with the built in unicode.format() method.
926 It's really just a wrapper around MarkupIterator that is callable
927 from Python. */
928
929typedef struct {
930 PyObject_HEAD
931
932 PyUnicodeObject *str;
933
934 MarkupIterator it_markup;
935} formatteriterobject;
936
937static void
938formatteriter_dealloc(formatteriterobject *it)
939{
940 Py_XDECREF(it->str);
941 PyObject_FREE(it);
942}
943
944/* returns a tuple:
945 (is_markup, literal, field_name, format_spec, conversion)
946 if is_markup == True:
947 literal is None
948 field_name is the string before the ':'
949 format_spec is the string after the ':'
950 conversion is either None, or the string after the '!'
951 if is_markup == False:
952 literal is the literal string
953 field_name is None
954 format_spec is None
955 conversion is None
956*/
957static PyObject *
958formatteriter_next(formatteriterobject *it)
959{
960 SubString literal;
961 SubString field_name;
962 SubString format_spec;
963 Py_UNICODE conversion;
964 int is_markup;
965 int format_spec_needs_expanding;
966 int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal,
967 &field_name, &format_spec, &conversion,
968 &format_spec_needs_expanding);
969
970 /* all of the SubString objects point into it->str, so no
971 memory management needs to be done on them */
972 assert(0 <= result && result <= 2);
973 if (result == 0 || result == 1) {
974 /* if 0, error has already been set, if 1, iterator is empty */
975 return NULL;
976 } else {
977 PyObject *is_markup_bool = NULL;
978 PyObject *literal_str = NULL;
979 PyObject *field_name_str = NULL;
980 PyObject *format_spec_str = NULL;
981 PyObject *conversion_str = NULL;
982 PyObject *tuple = NULL;
983
984 is_markup_bool = PyBool_FromLong(is_markup);
985 if (!is_markup_bool)
986 return NULL;
987
988 if (is_markup) {
989 /* field_name, format_spec, and conversion are returned */
990 literal_str = Py_None;
991 Py_INCREF(literal_str);
992
993 field_name_str = SubString_new_object(&field_name);
994 if (field_name_str == NULL)
995 goto error;
996
997 format_spec_str = SubString_new_object(&format_spec);
998 if (format_spec_str == NULL)
999 goto error;
1000
1001 /* if the conversion is not specified, return a None,
1002 otherwise create a one length string with the
1003 conversion characater */
1004 if (conversion == '\0') {
1005 conversion_str = Py_None;
1006 Py_INCREF(conversion_str);
1007 } else
1008 conversion_str = PyUnicode_FromUnicode(&conversion,
1009 1);
1010 if (conversion_str == NULL)
1011 goto error;
1012 } else {
1013 /* only literal is returned */
1014 literal_str = SubString_new_object(&literal);
1015 if (literal_str == NULL)
1016 goto error;
1017
1018 field_name_str = Py_None;
1019 format_spec_str = Py_None;
1020 conversion_str = Py_None;
1021
1022 Py_INCREF(field_name_str);
1023 Py_INCREF(format_spec_str);
1024 Py_INCREF(conversion_str);
1025 }
1026 tuple = PyTuple_Pack(5, is_markup_bool, literal_str,
1027 field_name_str, format_spec_str,
1028 conversion_str);
1029 error:
1030 Py_XDECREF(is_markup_bool);
1031 Py_XDECREF(literal_str);
1032 Py_XDECREF(field_name_str);
1033 Py_XDECREF(format_spec_str);
1034 Py_XDECREF(conversion_str);
1035 return tuple;
1036 }
1037}
1038
1039static PyMethodDef formatteriter_methods[] = {
1040 {NULL, NULL} /* sentinel */
1041};
1042
1043PyTypeObject PyFormatterIter_Type = {
1044 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1045 "formatteriterator", /* tp_name */
1046 sizeof(formatteriterobject), /* tp_basicsize */
1047 0, /* tp_itemsize */
1048 /* methods */
1049 (destructor)formatteriter_dealloc, /* tp_dealloc */
1050 0, /* tp_print */
1051 0, /* tp_getattr */
1052 0, /* tp_setattr */
1053 0, /* tp_compare */
1054 0, /* tp_repr */
1055 0, /* tp_as_number */
1056 0, /* tp_as_sequence */
1057 0, /* tp_as_mapping */
1058 0, /* tp_hash */
1059 0, /* tp_call */
1060 0, /* tp_str */
1061 PyObject_GenericGetAttr, /* tp_getattro */
1062 0, /* tp_setattro */
1063 0, /* tp_as_buffer */
1064 Py_TPFLAGS_DEFAULT, /* tp_flags */
1065 0, /* tp_doc */
1066 0, /* tp_traverse */
1067 0, /* tp_clear */
1068 0, /* tp_richcompare */
1069 0, /* tp_weaklistoffset */
1070 PyObject_SelfIter, /* tp_iter */
1071 (iternextfunc)formatteriter_next, /* tp_iternext */
1072 formatteriter_methods, /* tp_methods */
1073 0,
1074};
1075
1076/* unicode_formatter_parser is used to implement
1077 string.Formatter.vformat. it parses a string and returns tuples
1078 describing the parsed elements. It's a wrapper around
1079 stringlib/string_format.h's MarkupIterator */
1080static PyObject *
1081formatter_parser(PyUnicodeObject *self)
1082{
1083 formatteriterobject *it;
1084
1085 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1086 if (it == NULL)
1087 return NULL;
1088
1089 /* take ownership, give the object to the iterator */
1090 Py_INCREF(self);
1091 it->str = self;
1092
1093 /* initialize the contained MarkupIterator */
1094 MarkupIterator_init(&it->it_markup,
1095 PyUnicode_AS_UNICODE(self),
1096 PyUnicode_GET_SIZE(self));
1097
1098 return (PyObject *)it;
1099}
1100
1101
1102/************************************************************************/
1103/*********** fieldnameiterator ******************************************/
1104/************************************************************************/
1105
1106
1107/* This is used to implement string.Formatter.vparse(). It parses the
1108 field name into attribute and item values. It's a Python-callable
1109 wrapper around FieldNameIterator */
1110
1111typedef struct {
1112 PyObject_HEAD
1113
1114 PyUnicodeObject *str;
1115
1116 FieldNameIterator it_field;
1117} fieldnameiterobject;
1118
1119static void
1120fieldnameiter_dealloc(fieldnameiterobject *it)
1121{
1122 Py_XDECREF(it->str);
1123 PyObject_FREE(it);
1124}
1125
1126/* returns a tuple:
1127 (is_attr, value)
1128 is_attr is true if we used attribute syntax (e.g., '.foo')
1129 false if we used index syntax (e.g., '[foo]')
1130 value is an integer or string
1131*/
1132static PyObject *
1133fieldnameiter_next(fieldnameiterobject *it)
1134{
1135 int result;
1136 int is_attr;
1137 Py_ssize_t idx;
1138 SubString name;
1139
1140 result = FieldNameIterator_next(&it->it_field, &is_attr,
1141 &idx, &name);
1142 if (result == 0 || result == 1) {
1143 /* if 0, error has already been set, if 1, iterator is empty */
1144 return NULL;
1145 } else {
1146 PyObject* result = NULL;
1147 PyObject* is_attr_obj = NULL;
1148 PyObject* obj = NULL;
1149
1150 is_attr_obj = PyBool_FromLong(is_attr);
1151 if (is_attr_obj == NULL)
1152 goto error;
1153
1154 /* either an integer or a string */
1155 if (idx != -1)
1156 obj = PyInt_FromSsize_t(idx);
1157 else
1158 obj = SubString_new_object(&name);
1159 if (obj == NULL)
1160 goto error;
1161
1162 /* return a tuple of values */
1163 result = PyTuple_Pack(2, is_attr_obj, obj);
1164 if (result == NULL)
1165 goto error;
1166
1167 return result;
1168
1169 error:
1170 Py_XDECREF(result);
1171 Py_XDECREF(is_attr_obj);
1172 Py_XDECREF(obj);
1173 return NULL;
1174 }
1175 return NULL;
1176}
1177
1178static PyMethodDef fieldnameiter_methods[] = {
1179 {NULL, NULL} /* sentinel */
1180};
1181
1182static PyTypeObject PyFieldNameIter_Type = {
1183 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1184 "fieldnameiterator", /* tp_name */
1185 sizeof(fieldnameiterobject), /* tp_basicsize */
1186 0, /* tp_itemsize */
1187 /* methods */
1188 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1189 0, /* tp_print */
1190 0, /* tp_getattr */
1191 0, /* tp_setattr */
1192 0, /* tp_compare */
1193 0, /* tp_repr */
1194 0, /* tp_as_number */
1195 0, /* tp_as_sequence */
1196 0, /* tp_as_mapping */
1197 0, /* tp_hash */
1198 0, /* tp_call */
1199 0, /* tp_str */
1200 PyObject_GenericGetAttr, /* tp_getattro */
1201 0, /* tp_setattro */
1202 0, /* tp_as_buffer */
1203 Py_TPFLAGS_DEFAULT, /* tp_flags */
1204 0, /* tp_doc */
1205 0, /* tp_traverse */
1206 0, /* tp_clear */
1207 0, /* tp_richcompare */
1208 0, /* tp_weaklistoffset */
1209 PyObject_SelfIter, /* tp_iter */
1210 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1211 fieldnameiter_methods, /* tp_methods */
1212 0};
1213
1214/* unicode_formatter_field_name_split is used to implement
1215 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1216 returns a tuple of (first, rest): "first", the part before the
1217 first '.' or '['; and "rest", an iterator for the rest of the field
1218 name. it's a wrapper around stringlib/string_format.h's
1219 field_name_split. The iterator it returns is a
1220 FieldNameIterator */
1221static PyObject *
1222formatter_field_name_split(PyUnicodeObject *self)
1223{
1224 SubString first;
1225 Py_ssize_t first_idx;
1226 fieldnameiterobject *it;
1227
1228 PyObject *first_obj = NULL;
1229 PyObject *result = NULL;
1230
1231 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1232 if (it == NULL)
1233 return NULL;
1234
1235 /* take ownership, give the object to the iterator. this is
1236 just to keep the field_name alive */
1237 Py_INCREF(self);
1238 it->str = self;
1239
1240 if (!field_name_split(STRINGLIB_STR(self),
1241 STRINGLIB_LEN(self),
1242 &first, &first_idx, &it->it_field))
1243 goto error;
1244
1245 /* first becomes an integer, if possible, else a string */
1246 if (first_idx != -1)
1247 first_obj = PyInt_FromSsize_t(first_idx);
1248 else
1249 /* convert "first" into a string object */
1250 first_obj = SubString_new_object(&first);
1251 if (first_obj == NULL)
1252 goto error;
1253
1254 /* return a tuple of values */
1255 result = PyTuple_Pack(2, first_obj, it);
1256
1257error:
1258 Py_XDECREF(it);
1259 Py_XDECREF(first_obj);
1260 return result;
1261}