blob: ea8b0e72f6a577461a290dce32e1ab0dc7869c21 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/*
2 string_format.h -- implementation of string.format().
3
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
6*/
7
8
9/* Defines for more efficiently reallocating the string buffer */
10#define INITIAL_SIZE_INCREMENT 100
11#define SIZE_MULTIPLIER 2
12#define MAX_SIZE_INCREMENT 3200
13
14
15/************************************************************************/
16/*********** Global data structures and forward declarations *********/
17/************************************************************************/
18
19/*
20 A SubString consists of the characters between two string or
21 unicode pointers.
22*/
23typedef struct {
24 STRINGLIB_CHAR *ptr;
25 STRINGLIB_CHAR *end;
26} SubString;
27
28
29/* forward declaration for recursion */
30static PyObject *
31build_string(SubString *input, PyObject *args, PyObject *kwargs,
32 int *recursion_level);
33
34
35
36/************************************************************************/
37/************************** Utility functions ************************/
38/************************************************************************/
39
40/* fill in a SubString from a pointer and length */
41Py_LOCAL_INLINE(void)
42SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
43{
44 str->ptr = p;
45 if (p == NULL)
46 str->end = NULL;
47 else
48 str->end = str->ptr + len;
49}
50
Eric Smith625cbf22007-08-29 03:22:59 +000051/* return a new string. if str->ptr is NULL, return None */
Eric Smith8c663262007-08-25 02:26:07 +000052Py_LOCAL_INLINE(PyObject *)
53SubString_new_object(SubString *str)
54{
Eric Smith625cbf22007-08-29 03:22:59 +000055 if (str->ptr == NULL) {
56 Py_INCREF(Py_None);
57 return Py_None;
58 }
59 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
60}
61
62/* return a new string. if str->ptr is NULL, return None */
63Py_LOCAL_INLINE(PyObject *)
64SubString_new_object_or_empty(SubString *str)
65{
66 if (str->ptr == NULL) {
67 return STRINGLIB_NEW(NULL, 0);
68 }
Eric Smith8c663262007-08-25 02:26:07 +000069 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
70}
71
72/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +000073/*********** Output string management functions ****************/
74/************************************************************************/
75
76typedef struct {
77 STRINGLIB_CHAR *ptr;
78 STRINGLIB_CHAR *end;
79 PyObject *obj;
80 Py_ssize_t size_increment;
81} OutputString;
82
83/* initialize an OutputString object, reserving size characters */
84static int
85output_initialize(OutputString *output, Py_ssize_t size)
86{
87 output->obj = STRINGLIB_NEW(NULL, size);
88 if (output->obj == NULL)
89 return 0;
90
91 output->ptr = STRINGLIB_STR(output->obj);
92 output->end = STRINGLIB_LEN(output->obj) + output->ptr;
93 output->size_increment = INITIAL_SIZE_INCREMENT;
94
95 return 1;
96}
97
98/*
99 output_extend reallocates the output string buffer.
100 It returns a status: 0 for a failed reallocation,
101 1 for success.
102*/
103
104static int
105output_extend(OutputString *output, Py_ssize_t count)
106{
107 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
108 Py_ssize_t curlen = output->ptr - startptr;
109 Py_ssize_t maxlen = curlen + count + output->size_increment;
110
111 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
112 return 0;
113 startptr = STRINGLIB_STR(output->obj);
114 output->ptr = startptr + curlen;
115 output->end = startptr + maxlen;
116 if (output->size_increment < MAX_SIZE_INCREMENT)
117 output->size_increment *= SIZE_MULTIPLIER;
118 return 1;
119}
120
121/*
122 output_data dumps characters into our output string
123 buffer.
124
125 In some cases, it has to reallocate the string.
126
127 It returns a status: 0 for a failed reallocation,
128 1 for success.
129*/
130static int
131output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
132{
133 if ((count > output->end - output->ptr) && !output_extend(output, count))
134 return 0;
135 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
136 output->ptr += count;
137 return 1;
138}
139
140/************************************************************************/
141/*********** Format string parsing -- integers and identifiers *********/
142/************************************************************************/
143
Eric Smith7ade6482007-08-26 22:27:13 +0000144static Py_ssize_t
145get_integer(const SubString *str)
Eric Smith8c663262007-08-25 02:26:07 +0000146{
Eric Smith7ade6482007-08-26 22:27:13 +0000147 Py_ssize_t accumulator = 0;
148 Py_ssize_t digitval;
149 Py_ssize_t oldaccumulator;
150 STRINGLIB_CHAR *p;
Eric Smith8c663262007-08-25 02:26:07 +0000151
Eric Smith7ade6482007-08-26 22:27:13 +0000152 /* empty string is an error */
153 if (str->ptr >= str->end)
154 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000155
Eric Smith7ade6482007-08-26 22:27:13 +0000156 for (p = str->ptr; p < str->end; p++) {
157 digitval = STRINGLIB_TODECIMAL(*p);
Eric Smith8c663262007-08-25 02:26:07 +0000158 if (digitval < 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000159 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000160 /*
161 This trick was copied from old Unicode format code. It's cute,
162 but would really suck on an old machine with a slow divide
163 implementation. Fortunately, in the normal case we do not
164 expect too many digits.
165 */
166 oldaccumulator = accumulator;
167 accumulator *= 10;
168 if ((accumulator+10)/10 != oldaccumulator+1) {
169 PyErr_Format(PyExc_ValueError,
170 "Too many decimal digits in format string");
171 return -1;
172 }
173 accumulator += digitval;
174 }
Eric Smith7ade6482007-08-26 22:27:13 +0000175 return accumulator;
Eric Smith8c663262007-08-25 02:26:07 +0000176}
177
178/************************************************************************/
179/******** Functions to get field objects and specification strings ******/
180/************************************************************************/
181
Eric Smith7ade6482007-08-26 22:27:13 +0000182/* do the equivalent of obj.name */
Eric Smith8c663262007-08-25 02:26:07 +0000183static PyObject *
Eric Smith7ade6482007-08-26 22:27:13 +0000184getattr(PyObject *obj, SubString *name)
Eric Smith8c663262007-08-25 02:26:07 +0000185{
Eric Smith7ade6482007-08-26 22:27:13 +0000186 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000187 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000188 if (str == NULL)
189 return NULL;
190 newobj = PyObject_GetAttr(obj, str);
191 Py_DECREF(str);
192 return newobj;
Eric Smith8c663262007-08-25 02:26:07 +0000193}
194
Eric Smith7ade6482007-08-26 22:27:13 +0000195/* do the equivalent of obj[idx], where obj is a sequence */
196static PyObject *
197getitem_sequence(PyObject *obj, Py_ssize_t idx)
198{
199 return PySequence_GetItem(obj, idx);
200}
201
202/* do the equivalent of obj[idx], where obj is not a sequence */
203static PyObject *
204getitem_idx(PyObject *obj, Py_ssize_t idx)
205{
206 PyObject *newobj;
207 PyObject *idx_obj = PyInt_FromSsize_t(idx);
208 if (idx_obj == NULL)
209 return NULL;
210 newobj = PyObject_GetItem(obj, idx_obj);
211 Py_DECREF(idx_obj);
212 return newobj;
213}
214
215/* do the equivalent of obj[name] */
216static PyObject *
217getitem_str(PyObject *obj, SubString *name)
218{
219 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000220 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000221 if (str == NULL)
222 return NULL;
223 newobj = PyObject_GetItem(obj, str);
224 Py_DECREF(str);
225 return newobj;
226}
227
228typedef struct {
229 /* the entire string we're parsing. we assume that someone else
230 is managing its lifetime, and that it will exist for the
231 lifetime of the iterator. can be empty */
232 SubString str;
233
234 /* pointer to where we are inside field_name */
235 STRINGLIB_CHAR *ptr;
236} FieldNameIterator;
237
238
239static int
240FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
241 Py_ssize_t len)
242{
243 SubString_init(&self->str, ptr, len);
244 self->ptr = self->str.ptr;
245 return 1;
246}
247
248static int
249_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
250{
251 STRINGLIB_CHAR c;
252
253 name->ptr = self->ptr;
254
255 /* return everything until '.' or '[' */
256 while (self->ptr < self->str.end) {
257 switch (c = *self->ptr++) {
258 case '[':
259 case '.':
260 /* backup so that we this character will be seen next time */
261 self->ptr--;
262 break;
263 default:
264 continue;
265 }
266 break;
267 }
268 /* end of string is okay */
269 name->end = self->ptr;
270 return 1;
271}
272
273static int
274_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
275{
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000276 int bracket_seen = 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000277 STRINGLIB_CHAR c;
278
279 name->ptr = self->ptr;
280
281 /* return everything until ']' */
282 while (self->ptr < self->str.end) {
283 switch (c = *self->ptr++) {
284 case ']':
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000285 bracket_seen = 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000286 break;
287 default:
288 continue;
289 }
290 break;
291 }
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000292 /* make sure we ended with a ']' */
293 if (!bracket_seen) {
294 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
295 return 0;
296 }
297
Eric Smith7ade6482007-08-26 22:27:13 +0000298 /* end of string is okay */
299 /* don't include the ']' */
300 name->end = self->ptr-1;
301 return 1;
302}
303
304/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
305static int
306FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
307 Py_ssize_t *name_idx, SubString *name)
308{
309 /* check at end of input */
310 if (self->ptr >= self->str.end)
311 return 1;
312
313 switch (*self->ptr++) {
314 case '.':
315 *is_attribute = 1;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000316 if (_FieldNameIterator_attr(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000317 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000318 *name_idx = -1;
319 break;
320 case '[':
321 *is_attribute = 0;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000322 if (_FieldNameIterator_item(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000323 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000324 *name_idx = get_integer(name);
325 break;
326 default:
327 /* interal error, can't get here */
328 assert(0);
329 return 0;
330 }
331
332 /* empty string is an error */
333 if (name->ptr == name->end) {
334 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
335 return 0;
336 }
337
338 return 2;
339}
340
341
342/* input: field_name
343 output: 'first' points to the part before the first '[' or '.'
344 'first_idx' is -1 if 'first' is not an integer, otherwise
345 it's the value of first converted to an integer
346 'rest' is an iterator to return the rest
347*/
348static int
349field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
350 Py_ssize_t *first_idx, FieldNameIterator *rest)
351{
352 STRINGLIB_CHAR c;
353 STRINGLIB_CHAR *p = ptr;
354 STRINGLIB_CHAR *end = ptr + len;
355
356 /* find the part up until the first '.' or '[' */
357 while (p < end) {
358 switch (c = *p++) {
359 case '[':
360 case '.':
361 /* backup so that we this character is available to the
362 "rest" iterator */
363 p--;
364 break;
365 default:
366 continue;
367 }
368 break;
369 }
370
371 /* set up the return values */
372 SubString_init(first, ptr, p - ptr);
373 FieldNameIterator_init(rest, p, end - p);
374
375 /* see if "first" is an integer, in which case it's used as an index */
376 *first_idx = get_integer(first);
377
378 /* zero length string is an error */
379 if (first->ptr >= first->end) {
380 PyErr_SetString(PyExc_ValueError, "empty field name");
381 goto error;
382 }
383
384 return 1;
385error:
386 return 0;
387}
388
389
Eric Smith8c663262007-08-25 02:26:07 +0000390/*
391 get_field_object returns the object inside {}, before the
392 format_spec. It handles getindex and getattr lookups and consumes
393 the entire input string.
394*/
395static PyObject *
396get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
397{
Eric Smith7ade6482007-08-26 22:27:13 +0000398 PyObject *obj = NULL;
399 int ok;
400 int is_attribute;
401 SubString name;
402 SubString first;
Eric Smith8c663262007-08-25 02:26:07 +0000403 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000404 FieldNameIterator rest;
Eric Smith8c663262007-08-25 02:26:07 +0000405
Eric Smith7ade6482007-08-26 22:27:13 +0000406 if (!field_name_split(input->ptr, input->end - input->ptr, &first,
407 &index, &rest)) {
408 goto error;
409 }
Eric Smith8c663262007-08-25 02:26:07 +0000410
Eric Smith7ade6482007-08-26 22:27:13 +0000411 if (index == -1) {
412 /* look up in kwargs */
Eric Smith7a6dd292007-08-27 23:30:47 +0000413 PyObject *key = SubString_new_object(&first);
Eric Smith7ade6482007-08-26 22:27:13 +0000414 if (key == NULL)
415 goto error;
416 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
Eric Smith11529192007-09-04 23:04:22 +0000417 PyErr_SetObject(PyExc_KeyError, key);
Eric Smith7ade6482007-08-26 22:27:13 +0000418 Py_DECREF(key);
419 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000420 }
Neal Norwitz8a4eb292007-08-27 07:24:17 +0000421 Py_DECREF(key);
Neal Norwitz247b5152007-08-27 03:22:50 +0000422 Py_INCREF(obj);
Eric Smith0cb431c2007-08-28 01:07:27 +0000423 }
424 else {
Eric Smith7ade6482007-08-26 22:27:13 +0000425 /* look up in args */
426 obj = PySequence_GetItem(args, index);
Eric Smith11529192007-09-04 23:04:22 +0000427 if (obj == NULL)
Eric Smith7ade6482007-08-26 22:27:13 +0000428 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000429 }
Eric Smith7ade6482007-08-26 22:27:13 +0000430
431 /* iterate over the rest of the field_name */
432 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
433 &name)) == 2) {
434 PyObject *tmp;
435
436 if (is_attribute)
437 /* getattr lookup "." */
438 tmp = getattr(obj, &name);
439 else
440 /* getitem lookup "[]" */
441 if (index == -1)
442 tmp = getitem_str(obj, &name);
443 else
444 if (PySequence_Check(obj))
445 tmp = getitem_sequence(obj, index);
446 else
447 /* not a sequence */
448 tmp = getitem_idx(obj, index);
449 if (tmp == NULL)
450 goto error;
451
452 /* assign to obj */
453 Py_DECREF(obj);
454 obj = tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000455 }
Eric Smith7ade6482007-08-26 22:27:13 +0000456 /* end of iterator, this is the non-error case */
457 if (ok == 1)
458 return obj;
459error:
460 Py_XDECREF(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000461 return NULL;
462}
463
464/************************************************************************/
465/***************** Field rendering functions **************************/
466/************************************************************************/
467
468/*
469 render_field() is the main function in this section. It takes the
470 field object and field specification string generated by
471 get_field_and_spec, and renders the field into the output string.
472
473 format() does the actual calling of the objects __format__ method.
474*/
475
476
477/* returns fieldobj.__format__(format_spec) */
478static PyObject *
479format(PyObject *fieldobj, SubString *format_spec)
480{
481 static PyObject *format_str = NULL;
482 PyObject *meth;
483 PyObject *spec = NULL;
484 PyObject *result = NULL;
485
486 /* Initialize cached value */
487 if (format_str == NULL) {
488 /* Initialize static variable needed by _PyType_Lookup */
489 format_str = PyUnicode_FromString("__format__");
490 if (format_str == NULL)
491 return NULL;
492 }
493
494 /* Make sure the type is initialized. float gets initialized late */
495 if (Py_Type(fieldobj)->tp_dict == NULL)
496 if (PyType_Ready(Py_Type(fieldobj)) < 0)
497 return NULL;
498
499 /* we need to create an object out of the pointers we have */
Eric Smith625cbf22007-08-29 03:22:59 +0000500 spec = SubString_new_object_or_empty(format_spec);
Eric Smith8c663262007-08-25 02:26:07 +0000501 if (spec == NULL)
502 goto done;
503
504 /* Find the (unbound!) __format__ method (a borrowed reference) */
505 meth = _PyType_Lookup(Py_Type(fieldobj), format_str);
506 if (meth == NULL) {
507 PyErr_Format(PyExc_TypeError,
508 "Type %.100s doesn't define __format__",
509 Py_Type(fieldobj)->tp_name);
510 goto done;
511 }
512
513 /* And call it, binding it to the value */
514 result = PyObject_CallFunctionObjArgs(meth, fieldobj, spec, NULL);
515 if (result == NULL)
516 goto done;
517
518 if (!STRINGLIB_CHECK(result)) {
519 PyErr_SetString(PyExc_TypeError,
520 "__format__ method did not return "
521 STRINGLIB_TYPE_NAME);
522 Py_DECREF(result);
523 result = NULL;
524 goto done;
525 }
526
527done:
528 Py_XDECREF(spec);
529 return result;
530}
531
532/*
533 render_field calls fieldobj.__format__(format_spec) method, and
534 appends to the output.
535*/
536static int
537render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
538{
539 int ok = 0;
540 PyObject *result = format(fieldobj, format_spec);
541
542 if (result == NULL)
543 goto done;
544
545 ok = output_data(output,
546 STRINGLIB_STR(result), STRINGLIB_LEN(result));
547done:
548 Py_XDECREF(result);
549 return ok;
550}
551
552static int
553parse_field(SubString *str, SubString *field_name, SubString *format_spec,
554 STRINGLIB_CHAR *conversion)
555{
556 STRINGLIB_CHAR c = 0;
557
558 /* initialize these, as they may be empty */
559 *conversion = '\0';
560 SubString_init(format_spec, NULL, 0);
561
562 /* search for the field name. it's terminated by the end of the
563 string, or a ':' or '!' */
564 field_name->ptr = str->ptr;
565 while (str->ptr < str->end) {
566 switch (c = *(str->ptr++)) {
567 case ':':
568 case '!':
569 break;
570 default:
571 continue;
572 }
573 break;
574 }
575
576 if (c == '!' || c == ':') {
577 /* we have a format specifier and/or a conversion */
578 /* don't include the last character */
579 field_name->end = str->ptr-1;
580
581 /* the format specifier is the rest of the string */
582 format_spec->ptr = str->ptr;
583 format_spec->end = str->end;
584
585 /* see if there's a conversion specifier */
586 if (c == '!') {
587 /* there must be another character present */
588 if (format_spec->ptr >= format_spec->end) {
589 PyErr_SetString(PyExc_ValueError,
590 "end of format while looking for conversion "
591 "specifier");
592 return 0;
593 }
594 *conversion = *(format_spec->ptr++);
595
596 /* if there is another character, it must be a colon */
597 if (format_spec->ptr < format_spec->end) {
598 c = *(format_spec->ptr++);
599 if (c != ':') {
600 PyErr_SetString(PyExc_ValueError,
601 "expected ':' after format specifier");
602 return 0;
603 }
604 }
605 }
606
607 return 1;
608
Eric Smith0cb431c2007-08-28 01:07:27 +0000609 }
610 else {
Eric Smith8c663262007-08-25 02:26:07 +0000611 /* end of string, there's no format_spec or conversion */
612 field_name->end = str->ptr;
613 return 1;
614 }
615}
616
617/************************************************************************/
618/******* Output string allocation and escape-to-markup processing ******/
619/************************************************************************/
620
621/* MarkupIterator breaks the string into pieces of either literal
622 text, or things inside {} that need to be marked up. it is
623 designed to make it easy to wrap a Python iterator around it, for
624 use with the Formatter class */
625
626typedef struct {
627 SubString str;
Eric Smith8c663262007-08-25 02:26:07 +0000628} MarkupIterator;
629
630static int
631MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
632{
633 SubString_init(&self->str, ptr, len);
Eric Smith8c663262007-08-25 02:26:07 +0000634 return 1;
635}
636
637/* returns 0 on error, 1 on non-error termination, and 2 if it got a
638 string (or something to be expanded) */
639static int
Eric Smith625cbf22007-08-29 03:22:59 +0000640MarkupIterator_next(MarkupIterator *self, SubString *literal,
Eric Smith8c663262007-08-25 02:26:07 +0000641 SubString *field_name, SubString *format_spec,
642 STRINGLIB_CHAR *conversion,
643 int *format_spec_needs_expanding)
644{
645 int at_end;
646 STRINGLIB_CHAR c = 0;
647 STRINGLIB_CHAR *start;
648 int count;
649 Py_ssize_t len;
Eric Smith625cbf22007-08-29 03:22:59 +0000650 int markup_follows = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000651
Eric Smith625cbf22007-08-29 03:22:59 +0000652 /* initialize all of the output variables */
653 SubString_init(literal, NULL, 0);
654 SubString_init(field_name, NULL, 0);
655 SubString_init(format_spec, NULL, 0);
656 *conversion = '\0';
Eric Smith8c663262007-08-25 02:26:07 +0000657 *format_spec_needs_expanding = 0;
658
Eric Smith625cbf22007-08-29 03:22:59 +0000659 /* No more input, end of iterator. This is the normal exit
660 path. */
Eric Smith8c663262007-08-25 02:26:07 +0000661 if (self->str.ptr >= self->str.end)
662 return 1;
663
Eric Smith8c663262007-08-25 02:26:07 +0000664 start = self->str.ptr;
665
Eric Smith625cbf22007-08-29 03:22:59 +0000666 /* First read any literal text. Read until the end of string, an
667 escaped '{' or '}', or an unescaped '{'. In order to never
668 allocate memory and so I can just pass pointers around, if
669 there's an escaped '{' or '}' then we'll return the literal
670 including the brace, but no format object. The next time
671 through, we'll return the rest of the literal, skipping past
672 the second consecutive brace. */
673 while (self->str.ptr < self->str.end) {
674 switch (c = *(self->str.ptr++)) {
675 case '{':
676 case '}':
677 markup_follows = 1;
678 break;
679 default:
680 continue;
Eric Smith8c663262007-08-25 02:26:07 +0000681 }
Eric Smith625cbf22007-08-29 03:22:59 +0000682 break;
Eric Smith0cb431c2007-08-28 01:07:27 +0000683 }
Eric Smith625cbf22007-08-29 03:22:59 +0000684
685 at_end = self->str.ptr >= self->str.end;
686 len = self->str.ptr - start;
687
688 if ((c == '}') && (at_end || (c != *self->str.ptr))) {
689 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
690 "in format string");
691 return 0;
692 }
693 if (at_end && c == '{') {
694 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
695 "in format string");
696 return 0;
697 }
698 if (!at_end) {
699 if (c == *self->str.ptr) {
700 /* escaped } or {, skip it in the input. there is no
701 markup object following us, just this literal text */
702 self->str.ptr++;
703 markup_follows = 0;
704 }
705 else
706 len--;
707 }
708
709 /* record the literal text */
710 literal->ptr = start;
711 literal->end = start + len;
712
713 if (!markup_follows)
714 return 2;
715
716 /* this is markup, find the end of the string by counting nested
717 braces. note that this prohibits escaped braces, so that
718 format_specs cannot have braces in them. */
719 count = 1;
720
721 start = self->str.ptr;
722
723 /* we know we can't have a zero length string, so don't worry
724 about that case */
725 while (self->str.ptr < self->str.end) {
726 switch (c = *(self->str.ptr++)) {
727 case '{':
728 /* the format spec needs to be recursively expanded.
729 this is an optimization, and not strictly needed */
730 *format_spec_needs_expanding = 1;
731 count++;
732 break;
733 case '}':
734 count--;
735 if (count <= 0) {
736 /* we're done. parse and get out */
737 SubString s;
738
739 SubString_init(&s, start, self->str.ptr - 1 - start);
740 if (parse_field(&s, field_name, format_spec, conversion) == 0)
741 return 0;
742
743 /* a zero length field_name is an error */
744 if (field_name->ptr == field_name->end) {
745 PyErr_SetString(PyExc_ValueError, "zero length field name "
746 "in format");
747 return 0;
748 }
749
750 /* success */
751 return 2;
Eric Smith8c663262007-08-25 02:26:07 +0000752 }
753 break;
754 }
Eric Smith8c663262007-08-25 02:26:07 +0000755 }
Eric Smith625cbf22007-08-29 03:22:59 +0000756
757 /* end of string while searching for matching '}' */
758 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
759 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000760}
761
762
763/* do the !r or !s conversion on obj */
764static PyObject *
765do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
766{
767 /* XXX in pre-3.0, do we need to convert this to unicode, since it
768 might have returned a string? */
769 switch (conversion) {
770 case 'r':
771 return PyObject_Repr(obj);
772 case 's':
773 return PyObject_Unicode(obj);
774 default:
775 PyErr_Format(PyExc_ValueError,
776 "Unknown converion specifier %c",
777 conversion);
778 return NULL;
779 }
780}
781
782/* given:
783
784 {field_name!conversion:format_spec}
785
786 compute the result and write it to output.
787 format_spec_needs_expanding is an optimization. if it's false,
788 just output the string directly, otherwise recursively expand the
789 format_spec string. */
790
791static int
792output_markup(SubString *field_name, SubString *format_spec,
793 int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
794 OutputString *output, PyObject *args, PyObject *kwargs,
795 int *recursion_level)
796{
797 PyObject *tmp = NULL;
798 PyObject *fieldobj = NULL;
799 SubString expanded_format_spec;
800 SubString *actual_format_spec;
801 int result = 0;
802
803 /* convert field_name to an object */
804 fieldobj = get_field_object(field_name, args, kwargs);
805 if (fieldobj == NULL)
806 goto done;
807
808 if (conversion != '\0') {
809 tmp = do_conversion(fieldobj, conversion);
810 if (tmp == NULL)
811 goto done;
812
813 /* do the assignment, transferring ownership: fieldobj = tmp */
814 Py_DECREF(fieldobj);
815 fieldobj = tmp;
816 tmp = NULL;
817 }
818
819 /* if needed, recurively compute the format_spec */
820 if (format_spec_needs_expanding) {
821 tmp = build_string(format_spec, args, kwargs, recursion_level);
822 if (tmp == NULL)
823 goto done;
824
825 /* note that in the case we're expanding the format string,
826 tmp must be kept around until after the call to
827 render_field. */
828 SubString_init(&expanded_format_spec,
829 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
830 actual_format_spec = &expanded_format_spec;
Eric Smith0cb431c2007-08-28 01:07:27 +0000831 }
832 else
Eric Smith8c663262007-08-25 02:26:07 +0000833 actual_format_spec = format_spec;
834
835 if (render_field(fieldobj, actual_format_spec, output) == 0)
836 goto done;
837
838 result = 1;
839
840done:
841 Py_XDECREF(fieldobj);
842 Py_XDECREF(tmp);
843
844 return result;
845}
846
847/*
848 do_markup is the top-level loop for the format() function. It
849 searches through the format string for escapes to markup codes, and
850 calls other functions to move non-markup text to the output,
851 and to perform the markup to the output.
852*/
853static int
854do_markup(SubString *input, PyObject *args, PyObject *kwargs,
855 OutputString *output, int *recursion_level)
856{
857 MarkupIterator iter;
Eric Smith8c663262007-08-25 02:26:07 +0000858 int format_spec_needs_expanding;
859 int result;
Eric Smith625cbf22007-08-29 03:22:59 +0000860 SubString literal;
Eric Smith8c663262007-08-25 02:26:07 +0000861 SubString field_name;
862 SubString format_spec;
863 STRINGLIB_CHAR conversion;
864
865 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
Eric Smith625cbf22007-08-29 03:22:59 +0000866 while ((result = MarkupIterator_next(&iter, &literal, &field_name,
Eric Smith8c663262007-08-25 02:26:07 +0000867 &format_spec, &conversion,
868 &format_spec_needs_expanding)) == 2) {
Eric Smith625cbf22007-08-29 03:22:59 +0000869 if (!output_data(output, literal.ptr, literal.end - literal.ptr))
870 return 0;
871 if (field_name.ptr != field_name.end)
Eric Smith8c663262007-08-25 02:26:07 +0000872 if (!output_markup(&field_name, &format_spec,
873 format_spec_needs_expanding, conversion, output,
874 args, kwargs, recursion_level))
875 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000876 }
877 return result;
878}
879
880
881/*
882 build_string allocates the output string and then
883 calls do_markup to do the heavy lifting.
884*/
885static PyObject *
886build_string(SubString *input, PyObject *args, PyObject *kwargs,
887 int *recursion_level)
888{
889 OutputString output;
890 PyObject *result = NULL;
891 Py_ssize_t count;
892
893 output.obj = NULL; /* needed so cleanup code always works */
894
895 /* check the recursion level */
896 (*recursion_level)--;
897 if (*recursion_level < 0) {
898 PyErr_SetString(PyExc_ValueError,
899 "Max string recursion exceeded");
900 goto done;
901 }
902
903 /* initial size is the length of the format string, plus the size
904 increment. seems like a reasonable default */
905 if (!output_initialize(&output,
906 input->end - input->ptr +
907 INITIAL_SIZE_INCREMENT))
908 goto done;
909
910 if (!do_markup(input, args, kwargs, &output, recursion_level)) {
911 goto done;
912 }
913
914 count = output.ptr - STRINGLIB_STR(output.obj);
915 if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
916 goto done;
917 }
918
919 /* transfer ownership to result */
920 result = output.obj;
921 output.obj = NULL;
922
923done:
924 (*recursion_level)++;
925 Py_XDECREF(output.obj);
926 return result;
927}
928
929/************************************************************************/
930/*********** main routine ***********************************************/
931/************************************************************************/
932
933/* this is the main entry point */
934static PyObject *
935do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
936{
937 SubString input;
938
939 /* PEP 3101 says only 2 levels, so that
940 "{0:{1}}".format('abc', 's') # works
941 "{0:{1:{2}}}".format('abc', 's', '') # fails
942 */
943 int recursion_level = 2;
944
945 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
946 return build_string(&input, args, kwargs, &recursion_level);
947}
Eric Smithf6db4092007-08-27 23:52:26 +0000948
949
950
951/************************************************************************/
952/*********** formatteriterator ******************************************/
953/************************************************************************/
954
955/* This is used to implement string.Formatter.vparse(). It exists so
956 Formatter can share code with the built in unicode.format() method.
957 It's really just a wrapper around MarkupIterator that is callable
958 from Python. */
959
960typedef struct {
961 PyObject_HEAD
962
963 PyUnicodeObject *str;
964
965 MarkupIterator it_markup;
966} formatteriterobject;
967
968static void
969formatteriter_dealloc(formatteriterobject *it)
970{
971 Py_XDECREF(it->str);
972 PyObject_FREE(it);
973}
974
975/* returns a tuple:
Eric Smith625cbf22007-08-29 03:22:59 +0000976 (literal, field_name, format_spec, conversion)
977
978 literal is any literal text to output. might be zero length
979 field_name is the string before the ':'. might be None
980 format_spec is the string after the ':'. mibht be None
981 conversion is either None, or the string after the '!'
Eric Smithf6db4092007-08-27 23:52:26 +0000982*/
983static PyObject *
984formatteriter_next(formatteriterobject *it)
985{
986 SubString literal;
987 SubString field_name;
988 SubString format_spec;
989 Py_UNICODE conversion;
Eric Smithf6db4092007-08-27 23:52:26 +0000990 int format_spec_needs_expanding;
Eric Smith625cbf22007-08-29 03:22:59 +0000991 int result = MarkupIterator_next(&it->it_markup, &literal, &field_name,
992 &format_spec, &conversion,
Eric Smithf6db4092007-08-27 23:52:26 +0000993 &format_spec_needs_expanding);
994
995 /* all of the SubString objects point into it->str, so no
996 memory management needs to be done on them */
997 assert(0 <= result && result <= 2);
Eric Smith0cb431c2007-08-28 01:07:27 +0000998 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +0000999 /* if 0, error has already been set, if 1, iterator is empty */
1000 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001001 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001002 PyObject *literal_str = NULL;
1003 PyObject *field_name_str = NULL;
1004 PyObject *format_spec_str = NULL;
1005 PyObject *conversion_str = NULL;
1006 PyObject *tuple = NULL;
Eric Smith625cbf22007-08-29 03:22:59 +00001007 int has_field = field_name.ptr != field_name.end;
Eric Smithf6db4092007-08-27 23:52:26 +00001008
Eric Smith625cbf22007-08-29 03:22:59 +00001009 literal_str = SubString_new_object(&literal);
1010 if (literal_str == NULL)
1011 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001012
Eric Smith625cbf22007-08-29 03:22:59 +00001013 field_name_str = SubString_new_object(&field_name);
1014 if (field_name_str == NULL)
1015 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001016
Eric Smith625cbf22007-08-29 03:22:59 +00001017 /* if field_name is non-zero length, return a string for
1018 format_spec (even if zero length), else return None */
1019 format_spec_str = (has_field ?
1020 SubString_new_object_or_empty :
1021 SubString_new_object)(&format_spec);
1022 if (format_spec_str == NULL)
1023 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001024
Eric Smith625cbf22007-08-29 03:22:59 +00001025 /* if the conversion is not specified, return a None,
1026 otherwise create a one length string with the conversion
1027 character */
1028 if (conversion == '\0') {
Eric Smithf6db4092007-08-27 23:52:26 +00001029 conversion_str = Py_None;
Eric Smithf6db4092007-08-27 23:52:26 +00001030 Py_INCREF(conversion_str);
1031 }
Eric Smith625cbf22007-08-29 03:22:59 +00001032 else
1033 conversion_str = PyUnicode_FromUnicode(&conversion, 1);
1034 if (conversion_str == NULL)
1035 goto done;
1036
Eric Smith9e7c8da2007-08-28 11:15:20 +00001037 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
Eric Smithf6db4092007-08-27 23:52:26 +00001038 conversion_str);
Eric Smith625cbf22007-08-29 03:22:59 +00001039 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001040 Py_XDECREF(literal_str);
1041 Py_XDECREF(field_name_str);
1042 Py_XDECREF(format_spec_str);
1043 Py_XDECREF(conversion_str);
1044 return tuple;
1045 }
1046}
1047
1048static PyMethodDef formatteriter_methods[] = {
1049 {NULL, NULL} /* sentinel */
1050};
1051
1052PyTypeObject PyFormatterIter_Type = {
1053 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1054 "formatteriterator", /* tp_name */
1055 sizeof(formatteriterobject), /* tp_basicsize */
1056 0, /* tp_itemsize */
1057 /* methods */
1058 (destructor)formatteriter_dealloc, /* tp_dealloc */
1059 0, /* tp_print */
1060 0, /* tp_getattr */
1061 0, /* tp_setattr */
1062 0, /* tp_compare */
1063 0, /* tp_repr */
1064 0, /* tp_as_number */
1065 0, /* tp_as_sequence */
1066 0, /* tp_as_mapping */
1067 0, /* tp_hash */
1068 0, /* tp_call */
1069 0, /* tp_str */
1070 PyObject_GenericGetAttr, /* tp_getattro */
1071 0, /* tp_setattro */
1072 0, /* tp_as_buffer */
1073 Py_TPFLAGS_DEFAULT, /* tp_flags */
1074 0, /* tp_doc */
1075 0, /* tp_traverse */
1076 0, /* tp_clear */
1077 0, /* tp_richcompare */
1078 0, /* tp_weaklistoffset */
1079 PyObject_SelfIter, /* tp_iter */
1080 (iternextfunc)formatteriter_next, /* tp_iternext */
1081 formatteriter_methods, /* tp_methods */
1082 0,
1083};
1084
1085/* unicode_formatter_parser is used to implement
1086 string.Formatter.vformat. it parses a string and returns tuples
1087 describing the parsed elements. It's a wrapper around
1088 stringlib/string_format.h's MarkupIterator */
1089static PyObject *
1090formatter_parser(PyUnicodeObject *self)
1091{
1092 formatteriterobject *it;
1093
1094 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1095 if (it == NULL)
1096 return NULL;
1097
1098 /* take ownership, give the object to the iterator */
1099 Py_INCREF(self);
1100 it->str = self;
1101
1102 /* initialize the contained MarkupIterator */
1103 MarkupIterator_init(&it->it_markup,
1104 PyUnicode_AS_UNICODE(self),
1105 PyUnicode_GET_SIZE(self));
1106
1107 return (PyObject *)it;
1108}
1109
1110
1111/************************************************************************/
1112/*********** fieldnameiterator ******************************************/
1113/************************************************************************/
1114
1115
1116/* This is used to implement string.Formatter.vparse(). It parses the
1117 field name into attribute and item values. It's a Python-callable
1118 wrapper around FieldNameIterator */
1119
1120typedef struct {
1121 PyObject_HEAD
1122
1123 PyUnicodeObject *str;
1124
1125 FieldNameIterator it_field;
1126} fieldnameiterobject;
1127
1128static void
1129fieldnameiter_dealloc(fieldnameiterobject *it)
1130{
1131 Py_XDECREF(it->str);
1132 PyObject_FREE(it);
1133}
1134
1135/* returns a tuple:
1136 (is_attr, value)
1137 is_attr is true if we used attribute syntax (e.g., '.foo')
1138 false if we used index syntax (e.g., '[foo]')
1139 value is an integer or string
1140*/
1141static PyObject *
1142fieldnameiter_next(fieldnameiterobject *it)
1143{
1144 int result;
1145 int is_attr;
1146 Py_ssize_t idx;
1147 SubString name;
1148
1149 result = FieldNameIterator_next(&it->it_field, &is_attr,
1150 &idx, &name);
Eric Smith0cb431c2007-08-28 01:07:27 +00001151 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001152 /* if 0, error has already been set, if 1, iterator is empty */
1153 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001154 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001155 PyObject* result = NULL;
1156 PyObject* is_attr_obj = NULL;
1157 PyObject* obj = NULL;
1158
1159 is_attr_obj = PyBool_FromLong(is_attr);
1160 if (is_attr_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001161 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001162
1163 /* either an integer or a string */
1164 if (idx != -1)
1165 obj = PyInt_FromSsize_t(idx);
1166 else
1167 obj = SubString_new_object(&name);
1168 if (obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001169 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001170
1171 /* return a tuple of values */
1172 result = PyTuple_Pack(2, is_attr_obj, obj);
Eric Smithf6db4092007-08-27 23:52:26 +00001173
Eric Smith625cbf22007-08-29 03:22:59 +00001174 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001175 Py_XDECREF(is_attr_obj);
1176 Py_XDECREF(obj);
Eric Smith625cbf22007-08-29 03:22:59 +00001177 return result;
Eric Smithf6db4092007-08-27 23:52:26 +00001178 }
Eric Smithf6db4092007-08-27 23:52:26 +00001179}
1180
1181static PyMethodDef fieldnameiter_methods[] = {
1182 {NULL, NULL} /* sentinel */
1183};
1184
1185static PyTypeObject PyFieldNameIter_Type = {
1186 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1187 "fieldnameiterator", /* tp_name */
1188 sizeof(fieldnameiterobject), /* tp_basicsize */
1189 0, /* tp_itemsize */
1190 /* methods */
1191 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1192 0, /* tp_print */
1193 0, /* tp_getattr */
1194 0, /* tp_setattr */
1195 0, /* tp_compare */
1196 0, /* tp_repr */
1197 0, /* tp_as_number */
1198 0, /* tp_as_sequence */
1199 0, /* tp_as_mapping */
1200 0, /* tp_hash */
1201 0, /* tp_call */
1202 0, /* tp_str */
1203 PyObject_GenericGetAttr, /* tp_getattro */
1204 0, /* tp_setattro */
1205 0, /* tp_as_buffer */
1206 Py_TPFLAGS_DEFAULT, /* tp_flags */
1207 0, /* tp_doc */
1208 0, /* tp_traverse */
1209 0, /* tp_clear */
1210 0, /* tp_richcompare */
1211 0, /* tp_weaklistoffset */
1212 PyObject_SelfIter, /* tp_iter */
1213 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1214 fieldnameiter_methods, /* tp_methods */
1215 0};
1216
1217/* unicode_formatter_field_name_split is used to implement
1218 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1219 returns a tuple of (first, rest): "first", the part before the
1220 first '.' or '['; and "rest", an iterator for the rest of the field
1221 name. it's a wrapper around stringlib/string_format.h's
1222 field_name_split. The iterator it returns is a
1223 FieldNameIterator */
1224static PyObject *
1225formatter_field_name_split(PyUnicodeObject *self)
1226{
1227 SubString first;
1228 Py_ssize_t first_idx;
1229 fieldnameiterobject *it;
1230
1231 PyObject *first_obj = NULL;
1232 PyObject *result = NULL;
1233
1234 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1235 if (it == NULL)
1236 return NULL;
1237
1238 /* take ownership, give the object to the iterator. this is
1239 just to keep the field_name alive */
1240 Py_INCREF(self);
1241 it->str = self;
1242
1243 if (!field_name_split(STRINGLIB_STR(self),
1244 STRINGLIB_LEN(self),
1245 &first, &first_idx, &it->it_field))
Eric Smith625cbf22007-08-29 03:22:59 +00001246 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001247
Eric Smith0cb431c2007-08-28 01:07:27 +00001248 /* first becomes an integer, if possible; else a string */
Eric Smithf6db4092007-08-27 23:52:26 +00001249 if (first_idx != -1)
1250 first_obj = PyInt_FromSsize_t(first_idx);
1251 else
1252 /* convert "first" into a string object */
1253 first_obj = SubString_new_object(&first);
1254 if (first_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001255 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001256
1257 /* return a tuple of values */
1258 result = PyTuple_Pack(2, first_obj, it);
1259
Eric Smith625cbf22007-08-29 03:22:59 +00001260done:
Eric Smithf6db4092007-08-27 23:52:26 +00001261 Py_XDECREF(it);
1262 Py_XDECREF(first_obj);
1263 return result;
1264}