blob: de700f618bd4326804ed2a8b8d9112d86e700dc4 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/*
2 string_format.h -- implementation of string.format().
3
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
6*/
7
8
9/* Defines for more efficiently reallocating the string buffer */
10#define INITIAL_SIZE_INCREMENT 100
11#define SIZE_MULTIPLIER 2
12#define MAX_SIZE_INCREMENT 3200
13
14
15/************************************************************************/
16/*********** Global data structures and forward declarations *********/
17/************************************************************************/
18
19/*
20 A SubString consists of the characters between two string or
21 unicode pointers.
22*/
23typedef struct {
24 STRINGLIB_CHAR *ptr;
25 STRINGLIB_CHAR *end;
26} SubString;
27
28
29/* forward declaration for recursion */
30static PyObject *
31build_string(SubString *input, PyObject *args, PyObject *kwargs,
32 int *recursion_level);
33
34
35
36/************************************************************************/
37/************************** Utility functions ************************/
38/************************************************************************/
39
40/* fill in a SubString from a pointer and length */
41Py_LOCAL_INLINE(void)
42SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
43{
44 str->ptr = p;
45 if (p == NULL)
46 str->end = NULL;
47 else
48 str->end = str->ptr + len;
49}
50
Eric Smith625cbf22007-08-29 03:22:59 +000051/* return a new string. if str->ptr is NULL, return None */
Eric Smith8c663262007-08-25 02:26:07 +000052Py_LOCAL_INLINE(PyObject *)
53SubString_new_object(SubString *str)
54{
Eric Smith625cbf22007-08-29 03:22:59 +000055 if (str->ptr == NULL) {
56 Py_INCREF(Py_None);
57 return Py_None;
58 }
59 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
60}
61
62/* return a new string. if str->ptr is NULL, return None */
63Py_LOCAL_INLINE(PyObject *)
64SubString_new_object_or_empty(SubString *str)
65{
66 if (str->ptr == NULL) {
67 return STRINGLIB_NEW(NULL, 0);
68 }
Eric Smith8c663262007-08-25 02:26:07 +000069 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
70}
71
72/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +000073/*********** Output string management functions ****************/
74/************************************************************************/
75
76typedef struct {
77 STRINGLIB_CHAR *ptr;
78 STRINGLIB_CHAR *end;
79 PyObject *obj;
80 Py_ssize_t size_increment;
81} OutputString;
82
83/* initialize an OutputString object, reserving size characters */
84static int
85output_initialize(OutputString *output, Py_ssize_t size)
86{
87 output->obj = STRINGLIB_NEW(NULL, size);
88 if (output->obj == NULL)
89 return 0;
90
91 output->ptr = STRINGLIB_STR(output->obj);
92 output->end = STRINGLIB_LEN(output->obj) + output->ptr;
93 output->size_increment = INITIAL_SIZE_INCREMENT;
94
95 return 1;
96}
97
98/*
99 output_extend reallocates the output string buffer.
100 It returns a status: 0 for a failed reallocation,
101 1 for success.
102*/
103
104static int
105output_extend(OutputString *output, Py_ssize_t count)
106{
107 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
108 Py_ssize_t curlen = output->ptr - startptr;
109 Py_ssize_t maxlen = curlen + count + output->size_increment;
110
111 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
112 return 0;
113 startptr = STRINGLIB_STR(output->obj);
114 output->ptr = startptr + curlen;
115 output->end = startptr + maxlen;
116 if (output->size_increment < MAX_SIZE_INCREMENT)
117 output->size_increment *= SIZE_MULTIPLIER;
118 return 1;
119}
120
121/*
122 output_data dumps characters into our output string
123 buffer.
124
125 In some cases, it has to reallocate the string.
126
127 It returns a status: 0 for a failed reallocation,
128 1 for success.
129*/
130static int
131output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
132{
133 if ((count > output->end - output->ptr) && !output_extend(output, count))
134 return 0;
135 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
136 output->ptr += count;
137 return 1;
138}
139
140/************************************************************************/
141/*********** Format string parsing -- integers and identifiers *********/
142/************************************************************************/
143
Eric Smith7ade6482007-08-26 22:27:13 +0000144static Py_ssize_t
145get_integer(const SubString *str)
Eric Smith8c663262007-08-25 02:26:07 +0000146{
Eric Smith7ade6482007-08-26 22:27:13 +0000147 Py_ssize_t accumulator = 0;
148 Py_ssize_t digitval;
149 Py_ssize_t oldaccumulator;
150 STRINGLIB_CHAR *p;
Eric Smith8c663262007-08-25 02:26:07 +0000151
Eric Smith7ade6482007-08-26 22:27:13 +0000152 /* empty string is an error */
153 if (str->ptr >= str->end)
154 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000155
Eric Smith7ade6482007-08-26 22:27:13 +0000156 for (p = str->ptr; p < str->end; p++) {
157 digitval = STRINGLIB_TODECIMAL(*p);
Eric Smith8c663262007-08-25 02:26:07 +0000158 if (digitval < 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000159 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000160 /*
161 This trick was copied from old Unicode format code. It's cute,
162 but would really suck on an old machine with a slow divide
163 implementation. Fortunately, in the normal case we do not
164 expect too many digits.
165 */
166 oldaccumulator = accumulator;
167 accumulator *= 10;
168 if ((accumulator+10)/10 != oldaccumulator+1) {
169 PyErr_Format(PyExc_ValueError,
170 "Too many decimal digits in format string");
171 return -1;
172 }
173 accumulator += digitval;
174 }
Eric Smith7ade6482007-08-26 22:27:13 +0000175 return accumulator;
Eric Smith8c663262007-08-25 02:26:07 +0000176}
177
178/************************************************************************/
179/******** Functions to get field objects and specification strings ******/
180/************************************************************************/
181
Eric Smith7ade6482007-08-26 22:27:13 +0000182/* do the equivalent of obj.name */
Eric Smith8c663262007-08-25 02:26:07 +0000183static PyObject *
Eric Smith7ade6482007-08-26 22:27:13 +0000184getattr(PyObject *obj, SubString *name)
Eric Smith8c663262007-08-25 02:26:07 +0000185{
Eric Smith7ade6482007-08-26 22:27:13 +0000186 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000187 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000188 if (str == NULL)
189 return NULL;
190 newobj = PyObject_GetAttr(obj, str);
191 Py_DECREF(str);
192 return newobj;
Eric Smith8c663262007-08-25 02:26:07 +0000193}
194
Eric Smith7ade6482007-08-26 22:27:13 +0000195/* do the equivalent of obj[idx], where obj is a sequence */
196static PyObject *
197getitem_sequence(PyObject *obj, Py_ssize_t idx)
198{
199 return PySequence_GetItem(obj, idx);
200}
201
202/* do the equivalent of obj[idx], where obj is not a sequence */
203static PyObject *
204getitem_idx(PyObject *obj, Py_ssize_t idx)
205{
206 PyObject *newobj;
207 PyObject *idx_obj = PyInt_FromSsize_t(idx);
208 if (idx_obj == NULL)
209 return NULL;
210 newobj = PyObject_GetItem(obj, idx_obj);
211 Py_DECREF(idx_obj);
212 return newobj;
213}
214
215/* do the equivalent of obj[name] */
216static PyObject *
217getitem_str(PyObject *obj, SubString *name)
218{
219 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000220 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000221 if (str == NULL)
222 return NULL;
223 newobj = PyObject_GetItem(obj, str);
224 Py_DECREF(str);
225 return newobj;
226}
227
228typedef struct {
229 /* the entire string we're parsing. we assume that someone else
230 is managing its lifetime, and that it will exist for the
231 lifetime of the iterator. can be empty */
232 SubString str;
233
234 /* pointer to where we are inside field_name */
235 STRINGLIB_CHAR *ptr;
236} FieldNameIterator;
237
238
239static int
240FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
241 Py_ssize_t len)
242{
243 SubString_init(&self->str, ptr, len);
244 self->ptr = self->str.ptr;
245 return 1;
246}
247
248static int
249_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
250{
251 STRINGLIB_CHAR c;
252
253 name->ptr = self->ptr;
254
255 /* return everything until '.' or '[' */
256 while (self->ptr < self->str.end) {
257 switch (c = *self->ptr++) {
258 case '[':
259 case '.':
260 /* backup so that we this character will be seen next time */
261 self->ptr--;
262 break;
263 default:
264 continue;
265 }
266 break;
267 }
268 /* end of string is okay */
269 name->end = self->ptr;
270 return 1;
271}
272
273static int
274_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
275{
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000276 int bracket_seen = 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000277 STRINGLIB_CHAR c;
278
279 name->ptr = self->ptr;
280
281 /* return everything until ']' */
282 while (self->ptr < self->str.end) {
283 switch (c = *self->ptr++) {
284 case ']':
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000285 bracket_seen = 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000286 break;
287 default:
288 continue;
289 }
290 break;
291 }
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000292 /* make sure we ended with a ']' */
293 if (!bracket_seen) {
294 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
295 return 0;
296 }
297
Eric Smith7ade6482007-08-26 22:27:13 +0000298 /* end of string is okay */
299 /* don't include the ']' */
300 name->end = self->ptr-1;
301 return 1;
302}
303
304/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
305static int
306FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
307 Py_ssize_t *name_idx, SubString *name)
308{
309 /* check at end of input */
310 if (self->ptr >= self->str.end)
311 return 1;
312
313 switch (*self->ptr++) {
314 case '.':
315 *is_attribute = 1;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000316 if (_FieldNameIterator_attr(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000317 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000318 *name_idx = -1;
319 break;
320 case '[':
321 *is_attribute = 0;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000322 if (_FieldNameIterator_item(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000323 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000324 *name_idx = get_integer(name);
325 break;
326 default:
327 /* interal error, can't get here */
328 assert(0);
329 return 0;
330 }
331
332 /* empty string is an error */
333 if (name->ptr == name->end) {
334 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
335 return 0;
336 }
337
338 return 2;
339}
340
341
342/* input: field_name
343 output: 'first' points to the part before the first '[' or '.'
344 'first_idx' is -1 if 'first' is not an integer, otherwise
345 it's the value of first converted to an integer
346 'rest' is an iterator to return the rest
347*/
348static int
349field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
350 Py_ssize_t *first_idx, FieldNameIterator *rest)
351{
352 STRINGLIB_CHAR c;
353 STRINGLIB_CHAR *p = ptr;
354 STRINGLIB_CHAR *end = ptr + len;
355
356 /* find the part up until the first '.' or '[' */
357 while (p < end) {
358 switch (c = *p++) {
359 case '[':
360 case '.':
361 /* backup so that we this character is available to the
362 "rest" iterator */
363 p--;
364 break;
365 default:
366 continue;
367 }
368 break;
369 }
370
371 /* set up the return values */
372 SubString_init(first, ptr, p - ptr);
373 FieldNameIterator_init(rest, p, end - p);
374
375 /* see if "first" is an integer, in which case it's used as an index */
376 *first_idx = get_integer(first);
377
378 /* zero length string is an error */
379 if (first->ptr >= first->end) {
380 PyErr_SetString(PyExc_ValueError, "empty field name");
381 goto error;
382 }
383
384 return 1;
385error:
386 return 0;
387}
388
389
Eric Smith8c663262007-08-25 02:26:07 +0000390/*
391 get_field_object returns the object inside {}, before the
392 format_spec. It handles getindex and getattr lookups and consumes
393 the entire input string.
394*/
395static PyObject *
396get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
397{
Eric Smith7ade6482007-08-26 22:27:13 +0000398 PyObject *obj = NULL;
399 int ok;
400 int is_attribute;
401 SubString name;
402 SubString first;
Eric Smith8c663262007-08-25 02:26:07 +0000403 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000404 FieldNameIterator rest;
Eric Smith8c663262007-08-25 02:26:07 +0000405
Eric Smith7ade6482007-08-26 22:27:13 +0000406 if (!field_name_split(input->ptr, input->end - input->ptr, &first,
407 &index, &rest)) {
408 goto error;
409 }
Eric Smith8c663262007-08-25 02:26:07 +0000410
Eric Smith7ade6482007-08-26 22:27:13 +0000411 if (index == -1) {
412 /* look up in kwargs */
Eric Smith7a6dd292007-08-27 23:30:47 +0000413 PyObject *key = SubString_new_object(&first);
Eric Smith7ade6482007-08-26 22:27:13 +0000414 if (key == NULL)
415 goto error;
416 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
417 PyErr_SetString(PyExc_ValueError, "Keyword argument not found "
418 "in format string");
419 Py_DECREF(key);
420 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000421 }
Neal Norwitz8a4eb292007-08-27 07:24:17 +0000422 Py_DECREF(key);
Neal Norwitz247b5152007-08-27 03:22:50 +0000423 Py_INCREF(obj);
Eric Smith0cb431c2007-08-28 01:07:27 +0000424 }
425 else {
Eric Smith7ade6482007-08-26 22:27:13 +0000426 /* look up in args */
427 obj = PySequence_GetItem(args, index);
428 if (obj == NULL) {
429 /* translate IndexError to a ValueError */
430 PyErr_SetString(PyExc_ValueError, "Not enough positional arguments "
431 "in format string");
432 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000433 }
434 }
Eric Smith7ade6482007-08-26 22:27:13 +0000435
436 /* iterate over the rest of the field_name */
437 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
438 &name)) == 2) {
439 PyObject *tmp;
440
441 if (is_attribute)
442 /* getattr lookup "." */
443 tmp = getattr(obj, &name);
444 else
445 /* getitem lookup "[]" */
446 if (index == -1)
447 tmp = getitem_str(obj, &name);
448 else
449 if (PySequence_Check(obj))
450 tmp = getitem_sequence(obj, index);
451 else
452 /* not a sequence */
453 tmp = getitem_idx(obj, index);
454 if (tmp == NULL)
455 goto error;
456
457 /* assign to obj */
458 Py_DECREF(obj);
459 obj = tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000460 }
Eric Smith7ade6482007-08-26 22:27:13 +0000461 /* end of iterator, this is the non-error case */
462 if (ok == 1)
463 return obj;
464error:
465 Py_XDECREF(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000466 return NULL;
467}
468
469/************************************************************************/
470/***************** Field rendering functions **************************/
471/************************************************************************/
472
473/*
474 render_field() is the main function in this section. It takes the
475 field object and field specification string generated by
476 get_field_and_spec, and renders the field into the output string.
477
478 format() does the actual calling of the objects __format__ method.
479*/
480
481
482/* returns fieldobj.__format__(format_spec) */
483static PyObject *
484format(PyObject *fieldobj, SubString *format_spec)
485{
486 static PyObject *format_str = NULL;
487 PyObject *meth;
488 PyObject *spec = NULL;
489 PyObject *result = NULL;
490
491 /* Initialize cached value */
492 if (format_str == NULL) {
493 /* Initialize static variable needed by _PyType_Lookup */
494 format_str = PyUnicode_FromString("__format__");
495 if (format_str == NULL)
496 return NULL;
497 }
498
499 /* Make sure the type is initialized. float gets initialized late */
500 if (Py_Type(fieldobj)->tp_dict == NULL)
501 if (PyType_Ready(Py_Type(fieldobj)) < 0)
502 return NULL;
503
504 /* we need to create an object out of the pointers we have */
Eric Smith625cbf22007-08-29 03:22:59 +0000505 spec = SubString_new_object_or_empty(format_spec);
Eric Smith8c663262007-08-25 02:26:07 +0000506 if (spec == NULL)
507 goto done;
508
509 /* Find the (unbound!) __format__ method (a borrowed reference) */
510 meth = _PyType_Lookup(Py_Type(fieldobj), format_str);
511 if (meth == NULL) {
512 PyErr_Format(PyExc_TypeError,
513 "Type %.100s doesn't define __format__",
514 Py_Type(fieldobj)->tp_name);
515 goto done;
516 }
517
518 /* And call it, binding it to the value */
519 result = PyObject_CallFunctionObjArgs(meth, fieldobj, spec, NULL);
520 if (result == NULL)
521 goto done;
522
523 if (!STRINGLIB_CHECK(result)) {
524 PyErr_SetString(PyExc_TypeError,
525 "__format__ method did not return "
526 STRINGLIB_TYPE_NAME);
527 Py_DECREF(result);
528 result = NULL;
529 goto done;
530 }
531
532done:
533 Py_XDECREF(spec);
534 return result;
535}
536
537/*
538 render_field calls fieldobj.__format__(format_spec) method, and
539 appends to the output.
540*/
541static int
542render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
543{
544 int ok = 0;
545 PyObject *result = format(fieldobj, format_spec);
546
547 if (result == NULL)
548 goto done;
549
550 ok = output_data(output,
551 STRINGLIB_STR(result), STRINGLIB_LEN(result));
552done:
553 Py_XDECREF(result);
554 return ok;
555}
556
557static int
558parse_field(SubString *str, SubString *field_name, SubString *format_spec,
559 STRINGLIB_CHAR *conversion)
560{
561 STRINGLIB_CHAR c = 0;
562
563 /* initialize these, as they may be empty */
564 *conversion = '\0';
565 SubString_init(format_spec, NULL, 0);
566
567 /* search for the field name. it's terminated by the end of the
568 string, or a ':' or '!' */
569 field_name->ptr = str->ptr;
570 while (str->ptr < str->end) {
571 switch (c = *(str->ptr++)) {
572 case ':':
573 case '!':
574 break;
575 default:
576 continue;
577 }
578 break;
579 }
580
581 if (c == '!' || c == ':') {
582 /* we have a format specifier and/or a conversion */
583 /* don't include the last character */
584 field_name->end = str->ptr-1;
585
586 /* the format specifier is the rest of the string */
587 format_spec->ptr = str->ptr;
588 format_spec->end = str->end;
589
590 /* see if there's a conversion specifier */
591 if (c == '!') {
592 /* there must be another character present */
593 if (format_spec->ptr >= format_spec->end) {
594 PyErr_SetString(PyExc_ValueError,
595 "end of format while looking for conversion "
596 "specifier");
597 return 0;
598 }
599 *conversion = *(format_spec->ptr++);
600
601 /* if there is another character, it must be a colon */
602 if (format_spec->ptr < format_spec->end) {
603 c = *(format_spec->ptr++);
604 if (c != ':') {
605 PyErr_SetString(PyExc_ValueError,
606 "expected ':' after format specifier");
607 return 0;
608 }
609 }
610 }
611
612 return 1;
613
Eric Smith0cb431c2007-08-28 01:07:27 +0000614 }
615 else {
Eric Smith8c663262007-08-25 02:26:07 +0000616 /* end of string, there's no format_spec or conversion */
617 field_name->end = str->ptr;
618 return 1;
619 }
620}
621
622/************************************************************************/
623/******* Output string allocation and escape-to-markup processing ******/
624/************************************************************************/
625
626/* MarkupIterator breaks the string into pieces of either literal
627 text, or things inside {} that need to be marked up. it is
628 designed to make it easy to wrap a Python iterator around it, for
629 use with the Formatter class */
630
631typedef struct {
632 SubString str;
Eric Smith8c663262007-08-25 02:26:07 +0000633} MarkupIterator;
634
635static int
636MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
637{
638 SubString_init(&self->str, ptr, len);
Eric Smith8c663262007-08-25 02:26:07 +0000639 return 1;
640}
641
642/* returns 0 on error, 1 on non-error termination, and 2 if it got a
643 string (or something to be expanded) */
644static int
Eric Smith625cbf22007-08-29 03:22:59 +0000645MarkupIterator_next(MarkupIterator *self, SubString *literal,
Eric Smith8c663262007-08-25 02:26:07 +0000646 SubString *field_name, SubString *format_spec,
647 STRINGLIB_CHAR *conversion,
648 int *format_spec_needs_expanding)
649{
650 int at_end;
651 STRINGLIB_CHAR c = 0;
652 STRINGLIB_CHAR *start;
653 int count;
654 Py_ssize_t len;
Eric Smith625cbf22007-08-29 03:22:59 +0000655 int markup_follows = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000656
Eric Smith625cbf22007-08-29 03:22:59 +0000657 /* initialize all of the output variables */
658 SubString_init(literal, NULL, 0);
659 SubString_init(field_name, NULL, 0);
660 SubString_init(format_spec, NULL, 0);
661 *conversion = '\0';
Eric Smith8c663262007-08-25 02:26:07 +0000662 *format_spec_needs_expanding = 0;
663
Eric Smith625cbf22007-08-29 03:22:59 +0000664 /* No more input, end of iterator. This is the normal exit
665 path. */
Eric Smith8c663262007-08-25 02:26:07 +0000666 if (self->str.ptr >= self->str.end)
667 return 1;
668
Eric Smith8c663262007-08-25 02:26:07 +0000669 start = self->str.ptr;
670
Eric Smith625cbf22007-08-29 03:22:59 +0000671 /* First read any literal text. Read until the end of string, an
672 escaped '{' or '}', or an unescaped '{'. In order to never
673 allocate memory and so I can just pass pointers around, if
674 there's an escaped '{' or '}' then we'll return the literal
675 including the brace, but no format object. The next time
676 through, we'll return the rest of the literal, skipping past
677 the second consecutive brace. */
678 while (self->str.ptr < self->str.end) {
679 switch (c = *(self->str.ptr++)) {
680 case '{':
681 case '}':
682 markup_follows = 1;
683 break;
684 default:
685 continue;
Eric Smith8c663262007-08-25 02:26:07 +0000686 }
Eric Smith625cbf22007-08-29 03:22:59 +0000687 break;
Eric Smith0cb431c2007-08-28 01:07:27 +0000688 }
Eric Smith625cbf22007-08-29 03:22:59 +0000689
690 at_end = self->str.ptr >= self->str.end;
691 len = self->str.ptr - start;
692
693 if ((c == '}') && (at_end || (c != *self->str.ptr))) {
694 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
695 "in format string");
696 return 0;
697 }
698 if (at_end && c == '{') {
699 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
700 "in format string");
701 return 0;
702 }
703 if (!at_end) {
704 if (c == *self->str.ptr) {
705 /* escaped } or {, skip it in the input. there is no
706 markup object following us, just this literal text */
707 self->str.ptr++;
708 markup_follows = 0;
709 }
710 else
711 len--;
712 }
713
714 /* record the literal text */
715 literal->ptr = start;
716 literal->end = start + len;
717
718 if (!markup_follows)
719 return 2;
720
721 /* this is markup, find the end of the string by counting nested
722 braces. note that this prohibits escaped braces, so that
723 format_specs cannot have braces in them. */
724 count = 1;
725
726 start = self->str.ptr;
727
728 /* we know we can't have a zero length string, so don't worry
729 about that case */
730 while (self->str.ptr < self->str.end) {
731 switch (c = *(self->str.ptr++)) {
732 case '{':
733 /* the format spec needs to be recursively expanded.
734 this is an optimization, and not strictly needed */
735 *format_spec_needs_expanding = 1;
736 count++;
737 break;
738 case '}':
739 count--;
740 if (count <= 0) {
741 /* we're done. parse and get out */
742 SubString s;
743
744 SubString_init(&s, start, self->str.ptr - 1 - start);
745 if (parse_field(&s, field_name, format_spec, conversion) == 0)
746 return 0;
747
748 /* a zero length field_name is an error */
749 if (field_name->ptr == field_name->end) {
750 PyErr_SetString(PyExc_ValueError, "zero length field name "
751 "in format");
752 return 0;
753 }
754
755 /* success */
756 return 2;
Eric Smith8c663262007-08-25 02:26:07 +0000757 }
758 break;
759 }
Eric Smith8c663262007-08-25 02:26:07 +0000760 }
Eric Smith625cbf22007-08-29 03:22:59 +0000761
762 /* end of string while searching for matching '}' */
763 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
764 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000765}
766
767
768/* do the !r or !s conversion on obj */
769static PyObject *
770do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
771{
772 /* XXX in pre-3.0, do we need to convert this to unicode, since it
773 might have returned a string? */
774 switch (conversion) {
775 case 'r':
776 return PyObject_Repr(obj);
777 case 's':
778 return PyObject_Unicode(obj);
779 default:
780 PyErr_Format(PyExc_ValueError,
781 "Unknown converion specifier %c",
782 conversion);
783 return NULL;
784 }
785}
786
787/* given:
788
789 {field_name!conversion:format_spec}
790
791 compute the result and write it to output.
792 format_spec_needs_expanding is an optimization. if it's false,
793 just output the string directly, otherwise recursively expand the
794 format_spec string. */
795
796static int
797output_markup(SubString *field_name, SubString *format_spec,
798 int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
799 OutputString *output, PyObject *args, PyObject *kwargs,
800 int *recursion_level)
801{
802 PyObject *tmp = NULL;
803 PyObject *fieldobj = NULL;
804 SubString expanded_format_spec;
805 SubString *actual_format_spec;
806 int result = 0;
807
808 /* convert field_name to an object */
809 fieldobj = get_field_object(field_name, args, kwargs);
810 if (fieldobj == NULL)
811 goto done;
812
813 if (conversion != '\0') {
814 tmp = do_conversion(fieldobj, conversion);
815 if (tmp == NULL)
816 goto done;
817
818 /* do the assignment, transferring ownership: fieldobj = tmp */
819 Py_DECREF(fieldobj);
820 fieldobj = tmp;
821 tmp = NULL;
822 }
823
824 /* if needed, recurively compute the format_spec */
825 if (format_spec_needs_expanding) {
826 tmp = build_string(format_spec, args, kwargs, recursion_level);
827 if (tmp == NULL)
828 goto done;
829
830 /* note that in the case we're expanding the format string,
831 tmp must be kept around until after the call to
832 render_field. */
833 SubString_init(&expanded_format_spec,
834 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
835 actual_format_spec = &expanded_format_spec;
Eric Smith0cb431c2007-08-28 01:07:27 +0000836 }
837 else
Eric Smith8c663262007-08-25 02:26:07 +0000838 actual_format_spec = format_spec;
839
840 if (render_field(fieldobj, actual_format_spec, output) == 0)
841 goto done;
842
843 result = 1;
844
845done:
846 Py_XDECREF(fieldobj);
847 Py_XDECREF(tmp);
848
849 return result;
850}
851
852/*
853 do_markup is the top-level loop for the format() function. It
854 searches through the format string for escapes to markup codes, and
855 calls other functions to move non-markup text to the output,
856 and to perform the markup to the output.
857*/
858static int
859do_markup(SubString *input, PyObject *args, PyObject *kwargs,
860 OutputString *output, int *recursion_level)
861{
862 MarkupIterator iter;
Eric Smith8c663262007-08-25 02:26:07 +0000863 int format_spec_needs_expanding;
864 int result;
Eric Smith625cbf22007-08-29 03:22:59 +0000865 SubString literal;
Eric Smith8c663262007-08-25 02:26:07 +0000866 SubString field_name;
867 SubString format_spec;
868 STRINGLIB_CHAR conversion;
869
870 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
Eric Smith625cbf22007-08-29 03:22:59 +0000871 while ((result = MarkupIterator_next(&iter, &literal, &field_name,
Eric Smith8c663262007-08-25 02:26:07 +0000872 &format_spec, &conversion,
873 &format_spec_needs_expanding)) == 2) {
Eric Smith625cbf22007-08-29 03:22:59 +0000874 if (!output_data(output, literal.ptr, literal.end - literal.ptr))
875 return 0;
876 if (field_name.ptr != field_name.end)
Eric Smith8c663262007-08-25 02:26:07 +0000877 if (!output_markup(&field_name, &format_spec,
878 format_spec_needs_expanding, conversion, output,
879 args, kwargs, recursion_level))
880 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000881 }
882 return result;
883}
884
885
886/*
887 build_string allocates the output string and then
888 calls do_markup to do the heavy lifting.
889*/
890static PyObject *
891build_string(SubString *input, PyObject *args, PyObject *kwargs,
892 int *recursion_level)
893{
894 OutputString output;
895 PyObject *result = NULL;
896 Py_ssize_t count;
897
898 output.obj = NULL; /* needed so cleanup code always works */
899
900 /* check the recursion level */
901 (*recursion_level)--;
902 if (*recursion_level < 0) {
903 PyErr_SetString(PyExc_ValueError,
904 "Max string recursion exceeded");
905 goto done;
906 }
907
908 /* initial size is the length of the format string, plus the size
909 increment. seems like a reasonable default */
910 if (!output_initialize(&output,
911 input->end - input->ptr +
912 INITIAL_SIZE_INCREMENT))
913 goto done;
914
915 if (!do_markup(input, args, kwargs, &output, recursion_level)) {
916 goto done;
917 }
918
919 count = output.ptr - STRINGLIB_STR(output.obj);
920 if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
921 goto done;
922 }
923
924 /* transfer ownership to result */
925 result = output.obj;
926 output.obj = NULL;
927
928done:
929 (*recursion_level)++;
930 Py_XDECREF(output.obj);
931 return result;
932}
933
934/************************************************************************/
935/*********** main routine ***********************************************/
936/************************************************************************/
937
938/* this is the main entry point */
939static PyObject *
940do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
941{
942 SubString input;
943
944 /* PEP 3101 says only 2 levels, so that
945 "{0:{1}}".format('abc', 's') # works
946 "{0:{1:{2}}}".format('abc', 's', '') # fails
947 */
948 int recursion_level = 2;
949
950 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
951 return build_string(&input, args, kwargs, &recursion_level);
952}
Eric Smithf6db4092007-08-27 23:52:26 +0000953
954
955
956/************************************************************************/
957/*********** formatteriterator ******************************************/
958/************************************************************************/
959
960/* This is used to implement string.Formatter.vparse(). It exists so
961 Formatter can share code with the built in unicode.format() method.
962 It's really just a wrapper around MarkupIterator that is callable
963 from Python. */
964
965typedef struct {
966 PyObject_HEAD
967
968 PyUnicodeObject *str;
969
970 MarkupIterator it_markup;
971} formatteriterobject;
972
973static void
974formatteriter_dealloc(formatteriterobject *it)
975{
976 Py_XDECREF(it->str);
977 PyObject_FREE(it);
978}
979
980/* returns a tuple:
Eric Smith625cbf22007-08-29 03:22:59 +0000981 (literal, field_name, format_spec, conversion)
982
983 literal is any literal text to output. might be zero length
984 field_name is the string before the ':'. might be None
985 format_spec is the string after the ':'. mibht be None
986 conversion is either None, or the string after the '!'
Eric Smithf6db4092007-08-27 23:52:26 +0000987*/
988static PyObject *
989formatteriter_next(formatteriterobject *it)
990{
991 SubString literal;
992 SubString field_name;
993 SubString format_spec;
994 Py_UNICODE conversion;
Eric Smithf6db4092007-08-27 23:52:26 +0000995 int format_spec_needs_expanding;
Eric Smith625cbf22007-08-29 03:22:59 +0000996 int result = MarkupIterator_next(&it->it_markup, &literal, &field_name,
997 &format_spec, &conversion,
Eric Smithf6db4092007-08-27 23:52:26 +0000998 &format_spec_needs_expanding);
999
1000 /* all of the SubString objects point into it->str, so no
1001 memory management needs to be done on them */
1002 assert(0 <= result && result <= 2);
Eric Smith0cb431c2007-08-28 01:07:27 +00001003 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001004 /* if 0, error has already been set, if 1, iterator is empty */
1005 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001006 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001007 PyObject *literal_str = NULL;
1008 PyObject *field_name_str = NULL;
1009 PyObject *format_spec_str = NULL;
1010 PyObject *conversion_str = NULL;
1011 PyObject *tuple = NULL;
Eric Smith625cbf22007-08-29 03:22:59 +00001012 int has_field = field_name.ptr != field_name.end;
Eric Smithf6db4092007-08-27 23:52:26 +00001013
Eric Smith625cbf22007-08-29 03:22:59 +00001014 literal_str = SubString_new_object(&literal);
1015 if (literal_str == NULL)
1016 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001017
Eric Smith625cbf22007-08-29 03:22:59 +00001018 field_name_str = SubString_new_object(&field_name);
1019 if (field_name_str == NULL)
1020 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001021
Eric Smith625cbf22007-08-29 03:22:59 +00001022 /* if field_name is non-zero length, return a string for
1023 format_spec (even if zero length), else return None */
1024 format_spec_str = (has_field ?
1025 SubString_new_object_or_empty :
1026 SubString_new_object)(&format_spec);
1027 if (format_spec_str == NULL)
1028 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001029
Eric Smith625cbf22007-08-29 03:22:59 +00001030 /* if the conversion is not specified, return a None,
1031 otherwise create a one length string with the conversion
1032 character */
1033 if (conversion == '\0') {
Eric Smithf6db4092007-08-27 23:52:26 +00001034 conversion_str = Py_None;
Eric Smithf6db4092007-08-27 23:52:26 +00001035 Py_INCREF(conversion_str);
1036 }
Eric Smith625cbf22007-08-29 03:22:59 +00001037 else
1038 conversion_str = PyUnicode_FromUnicode(&conversion, 1);
1039 if (conversion_str == NULL)
1040 goto done;
1041
Eric Smith9e7c8da2007-08-28 11:15:20 +00001042 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
Eric Smithf6db4092007-08-27 23:52:26 +00001043 conversion_str);
Eric Smith625cbf22007-08-29 03:22:59 +00001044 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001045 Py_XDECREF(literal_str);
1046 Py_XDECREF(field_name_str);
1047 Py_XDECREF(format_spec_str);
1048 Py_XDECREF(conversion_str);
1049 return tuple;
1050 }
1051}
1052
1053static PyMethodDef formatteriter_methods[] = {
1054 {NULL, NULL} /* sentinel */
1055};
1056
1057PyTypeObject PyFormatterIter_Type = {
1058 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1059 "formatteriterator", /* tp_name */
1060 sizeof(formatteriterobject), /* tp_basicsize */
1061 0, /* tp_itemsize */
1062 /* methods */
1063 (destructor)formatteriter_dealloc, /* tp_dealloc */
1064 0, /* tp_print */
1065 0, /* tp_getattr */
1066 0, /* tp_setattr */
1067 0, /* tp_compare */
1068 0, /* tp_repr */
1069 0, /* tp_as_number */
1070 0, /* tp_as_sequence */
1071 0, /* tp_as_mapping */
1072 0, /* tp_hash */
1073 0, /* tp_call */
1074 0, /* tp_str */
1075 PyObject_GenericGetAttr, /* tp_getattro */
1076 0, /* tp_setattro */
1077 0, /* tp_as_buffer */
1078 Py_TPFLAGS_DEFAULT, /* tp_flags */
1079 0, /* tp_doc */
1080 0, /* tp_traverse */
1081 0, /* tp_clear */
1082 0, /* tp_richcompare */
1083 0, /* tp_weaklistoffset */
1084 PyObject_SelfIter, /* tp_iter */
1085 (iternextfunc)formatteriter_next, /* tp_iternext */
1086 formatteriter_methods, /* tp_methods */
1087 0,
1088};
1089
1090/* unicode_formatter_parser is used to implement
1091 string.Formatter.vformat. it parses a string and returns tuples
1092 describing the parsed elements. It's a wrapper around
1093 stringlib/string_format.h's MarkupIterator */
1094static PyObject *
1095formatter_parser(PyUnicodeObject *self)
1096{
1097 formatteriterobject *it;
1098
1099 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1100 if (it == NULL)
1101 return NULL;
1102
1103 /* take ownership, give the object to the iterator */
1104 Py_INCREF(self);
1105 it->str = self;
1106
1107 /* initialize the contained MarkupIterator */
1108 MarkupIterator_init(&it->it_markup,
1109 PyUnicode_AS_UNICODE(self),
1110 PyUnicode_GET_SIZE(self));
1111
1112 return (PyObject *)it;
1113}
1114
1115
1116/************************************************************************/
1117/*********** fieldnameiterator ******************************************/
1118/************************************************************************/
1119
1120
1121/* This is used to implement string.Formatter.vparse(). It parses the
1122 field name into attribute and item values. It's a Python-callable
1123 wrapper around FieldNameIterator */
1124
1125typedef struct {
1126 PyObject_HEAD
1127
1128 PyUnicodeObject *str;
1129
1130 FieldNameIterator it_field;
1131} fieldnameiterobject;
1132
1133static void
1134fieldnameiter_dealloc(fieldnameiterobject *it)
1135{
1136 Py_XDECREF(it->str);
1137 PyObject_FREE(it);
1138}
1139
1140/* returns a tuple:
1141 (is_attr, value)
1142 is_attr is true if we used attribute syntax (e.g., '.foo')
1143 false if we used index syntax (e.g., '[foo]')
1144 value is an integer or string
1145*/
1146static PyObject *
1147fieldnameiter_next(fieldnameiterobject *it)
1148{
1149 int result;
1150 int is_attr;
1151 Py_ssize_t idx;
1152 SubString name;
1153
1154 result = FieldNameIterator_next(&it->it_field, &is_attr,
1155 &idx, &name);
Eric Smith0cb431c2007-08-28 01:07:27 +00001156 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001157 /* if 0, error has already been set, if 1, iterator is empty */
1158 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001159 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001160 PyObject* result = NULL;
1161 PyObject* is_attr_obj = NULL;
1162 PyObject* obj = NULL;
1163
1164 is_attr_obj = PyBool_FromLong(is_attr);
1165 if (is_attr_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001166 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001167
1168 /* either an integer or a string */
1169 if (idx != -1)
1170 obj = PyInt_FromSsize_t(idx);
1171 else
1172 obj = SubString_new_object(&name);
1173 if (obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001174 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001175
1176 /* return a tuple of values */
1177 result = PyTuple_Pack(2, is_attr_obj, obj);
Eric Smithf6db4092007-08-27 23:52:26 +00001178
Eric Smith625cbf22007-08-29 03:22:59 +00001179 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001180 Py_XDECREF(is_attr_obj);
1181 Py_XDECREF(obj);
Eric Smith625cbf22007-08-29 03:22:59 +00001182 return result;
Eric Smithf6db4092007-08-27 23:52:26 +00001183 }
Eric Smithf6db4092007-08-27 23:52:26 +00001184}
1185
1186static PyMethodDef fieldnameiter_methods[] = {
1187 {NULL, NULL} /* sentinel */
1188};
1189
1190static PyTypeObject PyFieldNameIter_Type = {
1191 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1192 "fieldnameiterator", /* tp_name */
1193 sizeof(fieldnameiterobject), /* tp_basicsize */
1194 0, /* tp_itemsize */
1195 /* methods */
1196 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1197 0, /* tp_print */
1198 0, /* tp_getattr */
1199 0, /* tp_setattr */
1200 0, /* tp_compare */
1201 0, /* tp_repr */
1202 0, /* tp_as_number */
1203 0, /* tp_as_sequence */
1204 0, /* tp_as_mapping */
1205 0, /* tp_hash */
1206 0, /* tp_call */
1207 0, /* tp_str */
1208 PyObject_GenericGetAttr, /* tp_getattro */
1209 0, /* tp_setattro */
1210 0, /* tp_as_buffer */
1211 Py_TPFLAGS_DEFAULT, /* tp_flags */
1212 0, /* tp_doc */
1213 0, /* tp_traverse */
1214 0, /* tp_clear */
1215 0, /* tp_richcompare */
1216 0, /* tp_weaklistoffset */
1217 PyObject_SelfIter, /* tp_iter */
1218 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1219 fieldnameiter_methods, /* tp_methods */
1220 0};
1221
1222/* unicode_formatter_field_name_split is used to implement
1223 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1224 returns a tuple of (first, rest): "first", the part before the
1225 first '.' or '['; and "rest", an iterator for the rest of the field
1226 name. it's a wrapper around stringlib/string_format.h's
1227 field_name_split. The iterator it returns is a
1228 FieldNameIterator */
1229static PyObject *
1230formatter_field_name_split(PyUnicodeObject *self)
1231{
1232 SubString first;
1233 Py_ssize_t first_idx;
1234 fieldnameiterobject *it;
1235
1236 PyObject *first_obj = NULL;
1237 PyObject *result = NULL;
1238
1239 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1240 if (it == NULL)
1241 return NULL;
1242
1243 /* take ownership, give the object to the iterator. this is
1244 just to keep the field_name alive */
1245 Py_INCREF(self);
1246 it->str = self;
1247
1248 if (!field_name_split(STRINGLIB_STR(self),
1249 STRINGLIB_LEN(self),
1250 &first, &first_idx, &it->it_field))
Eric Smith625cbf22007-08-29 03:22:59 +00001251 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001252
Eric Smith0cb431c2007-08-28 01:07:27 +00001253 /* first becomes an integer, if possible; else a string */
Eric Smithf6db4092007-08-27 23:52:26 +00001254 if (first_idx != -1)
1255 first_obj = PyInt_FromSsize_t(first_idx);
1256 else
1257 /* convert "first" into a string object */
1258 first_obj = SubString_new_object(&first);
1259 if (first_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001260 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001261
1262 /* return a tuple of values */
1263 result = PyTuple_Pack(2, first_obj, it);
1264
Eric Smith625cbf22007-08-29 03:22:59 +00001265done:
Eric Smithf6db4092007-08-27 23:52:26 +00001266 Py_XDECREF(it);
1267 Py_XDECREF(first_obj);
1268 return result;
1269}