blob: 70f8f13aea22006236435412dec6b4655a130835 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/*
2 string_format.h -- implementation of string.format().
3
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
6*/
7
8
Eric Smith8fd3eba2008-02-17 19:48:00 +00009/* Defines for Python 2.6 compatability */
10#if PY_VERSION_HEX < 0x03000000
11#define PyLong_FromSsize_t _PyLong_FromSsize_t
12#endif
13
Eric Smith8c663262007-08-25 02:26:07 +000014/* Defines for more efficiently reallocating the string buffer */
15#define INITIAL_SIZE_INCREMENT 100
16#define SIZE_MULTIPLIER 2
17#define MAX_SIZE_INCREMENT 3200
18
19
20/************************************************************************/
21/*********** Global data structures and forward declarations *********/
22/************************************************************************/
23
24/*
25 A SubString consists of the characters between two string or
26 unicode pointers.
27*/
28typedef struct {
29 STRINGLIB_CHAR *ptr;
30 STRINGLIB_CHAR *end;
31} SubString;
32
33
34/* forward declaration for recursion */
35static PyObject *
36build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith45c07872007-09-05 02:02:43 +000037 int recursion_depth);
Eric Smith8c663262007-08-25 02:26:07 +000038
39
40
41/************************************************************************/
42/************************** Utility functions ************************/
43/************************************************************************/
44
45/* fill in a SubString from a pointer and length */
46Py_LOCAL_INLINE(void)
47SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
48{
49 str->ptr = p;
50 if (p == NULL)
51 str->end = NULL;
52 else
53 str->end = str->ptr + len;
54}
55
Eric Smith625cbf22007-08-29 03:22:59 +000056/* return a new string. if str->ptr is NULL, return None */
Eric Smith8c663262007-08-25 02:26:07 +000057Py_LOCAL_INLINE(PyObject *)
58SubString_new_object(SubString *str)
59{
Eric Smith625cbf22007-08-29 03:22:59 +000060 if (str->ptr == NULL) {
61 Py_INCREF(Py_None);
62 return Py_None;
63 }
64 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
65}
66
67/* return a new string. if str->ptr is NULL, return None */
68Py_LOCAL_INLINE(PyObject *)
69SubString_new_object_or_empty(SubString *str)
70{
71 if (str->ptr == NULL) {
72 return STRINGLIB_NEW(NULL, 0);
73 }
Eric Smith8c663262007-08-25 02:26:07 +000074 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
75}
76
77/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +000078/*********** Output string management functions ****************/
79/************************************************************************/
80
81typedef struct {
82 STRINGLIB_CHAR *ptr;
83 STRINGLIB_CHAR *end;
84 PyObject *obj;
85 Py_ssize_t size_increment;
86} OutputString;
87
88/* initialize an OutputString object, reserving size characters */
89static int
90output_initialize(OutputString *output, Py_ssize_t size)
91{
92 output->obj = STRINGLIB_NEW(NULL, size);
93 if (output->obj == NULL)
94 return 0;
95
96 output->ptr = STRINGLIB_STR(output->obj);
97 output->end = STRINGLIB_LEN(output->obj) + output->ptr;
98 output->size_increment = INITIAL_SIZE_INCREMENT;
99
100 return 1;
101}
102
103/*
104 output_extend reallocates the output string buffer.
105 It returns a status: 0 for a failed reallocation,
106 1 for success.
107*/
108
109static int
110output_extend(OutputString *output, Py_ssize_t count)
111{
112 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
113 Py_ssize_t curlen = output->ptr - startptr;
114 Py_ssize_t maxlen = curlen + count + output->size_increment;
115
116 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
117 return 0;
118 startptr = STRINGLIB_STR(output->obj);
119 output->ptr = startptr + curlen;
120 output->end = startptr + maxlen;
121 if (output->size_increment < MAX_SIZE_INCREMENT)
122 output->size_increment *= SIZE_MULTIPLIER;
123 return 1;
124}
125
126/*
127 output_data dumps characters into our output string
128 buffer.
129
130 In some cases, it has to reallocate the string.
131
132 It returns a status: 0 for a failed reallocation,
133 1 for success.
134*/
135static int
136output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
137{
138 if ((count > output->end - output->ptr) && !output_extend(output, count))
139 return 0;
140 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
141 output->ptr += count;
142 return 1;
143}
144
145/************************************************************************/
146/*********** Format string parsing -- integers and identifiers *********/
147/************************************************************************/
148
Eric Smith7ade6482007-08-26 22:27:13 +0000149static Py_ssize_t
150get_integer(const SubString *str)
Eric Smith8c663262007-08-25 02:26:07 +0000151{
Eric Smith7ade6482007-08-26 22:27:13 +0000152 Py_ssize_t accumulator = 0;
153 Py_ssize_t digitval;
154 Py_ssize_t oldaccumulator;
155 STRINGLIB_CHAR *p;
Eric Smith8c663262007-08-25 02:26:07 +0000156
Eric Smith7ade6482007-08-26 22:27:13 +0000157 /* empty string is an error */
158 if (str->ptr >= str->end)
159 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000160
Eric Smith7ade6482007-08-26 22:27:13 +0000161 for (p = str->ptr; p < str->end; p++) {
162 digitval = STRINGLIB_TODECIMAL(*p);
Eric Smith8c663262007-08-25 02:26:07 +0000163 if (digitval < 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000164 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000165 /*
166 This trick was copied from old Unicode format code. It's cute,
167 but would really suck on an old machine with a slow divide
168 implementation. Fortunately, in the normal case we do not
169 expect too many digits.
170 */
171 oldaccumulator = accumulator;
172 accumulator *= 10;
173 if ((accumulator+10)/10 != oldaccumulator+1) {
174 PyErr_Format(PyExc_ValueError,
175 "Too many decimal digits in format string");
176 return -1;
177 }
178 accumulator += digitval;
179 }
Eric Smith7ade6482007-08-26 22:27:13 +0000180 return accumulator;
Eric Smith8c663262007-08-25 02:26:07 +0000181}
182
183/************************************************************************/
184/******** Functions to get field objects and specification strings ******/
185/************************************************************************/
186
Eric Smith7ade6482007-08-26 22:27:13 +0000187/* do the equivalent of obj.name */
Eric Smith8c663262007-08-25 02:26:07 +0000188static PyObject *
Eric Smith7ade6482007-08-26 22:27:13 +0000189getattr(PyObject *obj, SubString *name)
Eric Smith8c663262007-08-25 02:26:07 +0000190{
Eric Smith7ade6482007-08-26 22:27:13 +0000191 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000192 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000193 if (str == NULL)
194 return NULL;
195 newobj = PyObject_GetAttr(obj, str);
196 Py_DECREF(str);
197 return newobj;
Eric Smith8c663262007-08-25 02:26:07 +0000198}
199
Eric Smith7ade6482007-08-26 22:27:13 +0000200/* do the equivalent of obj[idx], where obj is a sequence */
201static PyObject *
202getitem_sequence(PyObject *obj, Py_ssize_t idx)
203{
204 return PySequence_GetItem(obj, idx);
205}
206
207/* do the equivalent of obj[idx], where obj is not a sequence */
208static PyObject *
209getitem_idx(PyObject *obj, Py_ssize_t idx)
210{
211 PyObject *newobj;
Christian Heimes217cfd12007-12-02 14:31:20 +0000212 PyObject *idx_obj = PyLong_FromSsize_t(idx);
Eric Smith7ade6482007-08-26 22:27:13 +0000213 if (idx_obj == NULL)
214 return NULL;
215 newobj = PyObject_GetItem(obj, idx_obj);
216 Py_DECREF(idx_obj);
217 return newobj;
218}
219
220/* do the equivalent of obj[name] */
221static PyObject *
222getitem_str(PyObject *obj, SubString *name)
223{
224 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000225 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000226 if (str == NULL)
227 return NULL;
228 newobj = PyObject_GetItem(obj, str);
229 Py_DECREF(str);
230 return newobj;
231}
232
233typedef struct {
234 /* the entire string we're parsing. we assume that someone else
235 is managing its lifetime, and that it will exist for the
236 lifetime of the iterator. can be empty */
237 SubString str;
238
239 /* pointer to where we are inside field_name */
240 STRINGLIB_CHAR *ptr;
241} FieldNameIterator;
242
243
244static int
245FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
246 Py_ssize_t len)
247{
248 SubString_init(&self->str, ptr, len);
249 self->ptr = self->str.ptr;
250 return 1;
251}
252
253static int
254_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
255{
256 STRINGLIB_CHAR c;
257
258 name->ptr = self->ptr;
259
260 /* return everything until '.' or '[' */
261 while (self->ptr < self->str.end) {
262 switch (c = *self->ptr++) {
263 case '[':
264 case '.':
265 /* backup so that we this character will be seen next time */
266 self->ptr--;
267 break;
268 default:
269 continue;
270 }
271 break;
272 }
273 /* end of string is okay */
274 name->end = self->ptr;
275 return 1;
276}
277
278static int
279_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
280{
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000281 int bracket_seen = 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000282 STRINGLIB_CHAR c;
283
284 name->ptr = self->ptr;
285
286 /* return everything until ']' */
287 while (self->ptr < self->str.end) {
288 switch (c = *self->ptr++) {
289 case ']':
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000290 bracket_seen = 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000291 break;
292 default:
293 continue;
294 }
295 break;
296 }
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000297 /* make sure we ended with a ']' */
298 if (!bracket_seen) {
299 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
300 return 0;
301 }
302
Eric Smith7ade6482007-08-26 22:27:13 +0000303 /* end of string is okay */
304 /* don't include the ']' */
305 name->end = self->ptr-1;
306 return 1;
307}
308
309/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
310static int
311FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
312 Py_ssize_t *name_idx, SubString *name)
313{
314 /* check at end of input */
315 if (self->ptr >= self->str.end)
316 return 1;
317
318 switch (*self->ptr++) {
319 case '.':
320 *is_attribute = 1;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000321 if (_FieldNameIterator_attr(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000322 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000323 *name_idx = -1;
324 break;
325 case '[':
326 *is_attribute = 0;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000327 if (_FieldNameIterator_item(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000328 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000329 *name_idx = get_integer(name);
330 break;
331 default:
332 /* interal error, can't get here */
333 assert(0);
334 return 0;
335 }
336
337 /* empty string is an error */
338 if (name->ptr == name->end) {
339 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
340 return 0;
341 }
342
343 return 2;
344}
345
346
347/* input: field_name
348 output: 'first' points to the part before the first '[' or '.'
349 'first_idx' is -1 if 'first' is not an integer, otherwise
350 it's the value of first converted to an integer
351 'rest' is an iterator to return the rest
352*/
353static int
354field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
355 Py_ssize_t *first_idx, FieldNameIterator *rest)
356{
357 STRINGLIB_CHAR c;
358 STRINGLIB_CHAR *p = ptr;
359 STRINGLIB_CHAR *end = ptr + len;
360
361 /* find the part up until the first '.' or '[' */
362 while (p < end) {
363 switch (c = *p++) {
364 case '[':
365 case '.':
366 /* backup so that we this character is available to the
367 "rest" iterator */
368 p--;
369 break;
370 default:
371 continue;
372 }
373 break;
374 }
375
376 /* set up the return values */
377 SubString_init(first, ptr, p - ptr);
378 FieldNameIterator_init(rest, p, end - p);
379
380 /* see if "first" is an integer, in which case it's used as an index */
381 *first_idx = get_integer(first);
382
383 /* zero length string is an error */
384 if (first->ptr >= first->end) {
385 PyErr_SetString(PyExc_ValueError, "empty field name");
386 goto error;
387 }
388
389 return 1;
390error:
391 return 0;
392}
393
394
Eric Smith8c663262007-08-25 02:26:07 +0000395/*
396 get_field_object returns the object inside {}, before the
397 format_spec. It handles getindex and getattr lookups and consumes
398 the entire input string.
399*/
400static PyObject *
401get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
402{
Eric Smith7ade6482007-08-26 22:27:13 +0000403 PyObject *obj = NULL;
404 int ok;
405 int is_attribute;
406 SubString name;
407 SubString first;
Eric Smith8c663262007-08-25 02:26:07 +0000408 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000409 FieldNameIterator rest;
Eric Smith8c663262007-08-25 02:26:07 +0000410
Eric Smith7ade6482007-08-26 22:27:13 +0000411 if (!field_name_split(input->ptr, input->end - input->ptr, &first,
412 &index, &rest)) {
413 goto error;
414 }
Eric Smith8c663262007-08-25 02:26:07 +0000415
Eric Smith7ade6482007-08-26 22:27:13 +0000416 if (index == -1) {
417 /* look up in kwargs */
Eric Smith7a6dd292007-08-27 23:30:47 +0000418 PyObject *key = SubString_new_object(&first);
Eric Smith7ade6482007-08-26 22:27:13 +0000419 if (key == NULL)
420 goto error;
421 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
Eric Smith11529192007-09-04 23:04:22 +0000422 PyErr_SetObject(PyExc_KeyError, key);
Eric Smith7ade6482007-08-26 22:27:13 +0000423 Py_DECREF(key);
424 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000425 }
Neal Norwitz8a4eb292007-08-27 07:24:17 +0000426 Py_DECREF(key);
Neal Norwitz247b5152007-08-27 03:22:50 +0000427 Py_INCREF(obj);
Eric Smith0cb431c2007-08-28 01:07:27 +0000428 }
429 else {
Eric Smith7ade6482007-08-26 22:27:13 +0000430 /* look up in args */
431 obj = PySequence_GetItem(args, index);
Eric Smith11529192007-09-04 23:04:22 +0000432 if (obj == NULL)
Eric Smith7ade6482007-08-26 22:27:13 +0000433 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000434 }
Eric Smith7ade6482007-08-26 22:27:13 +0000435
436 /* iterate over the rest of the field_name */
437 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
438 &name)) == 2) {
439 PyObject *tmp;
440
441 if (is_attribute)
442 /* getattr lookup "." */
443 tmp = getattr(obj, &name);
444 else
445 /* getitem lookup "[]" */
446 if (index == -1)
447 tmp = getitem_str(obj, &name);
448 else
449 if (PySequence_Check(obj))
450 tmp = getitem_sequence(obj, index);
451 else
452 /* not a sequence */
453 tmp = getitem_idx(obj, index);
454 if (tmp == NULL)
455 goto error;
456
457 /* assign to obj */
458 Py_DECREF(obj);
459 obj = tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000460 }
Eric Smith7ade6482007-08-26 22:27:13 +0000461 /* end of iterator, this is the non-error case */
462 if (ok == 1)
463 return obj;
464error:
465 Py_XDECREF(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000466 return NULL;
467}
468
469/************************************************************************/
470/***************** Field rendering functions **************************/
471/************************************************************************/
472
473/*
474 render_field() is the main function in this section. It takes the
475 field object and field specification string generated by
476 get_field_and_spec, and renders the field into the output string.
477
Eric Smith8c663262007-08-25 02:26:07 +0000478 render_field calls fieldobj.__format__(format_spec) method, and
479 appends to the output.
480*/
481static int
482render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
483{
484 int ok = 0;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000485 PyObject *result = NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000486
Eric Smith8fd3eba2008-02-17 19:48:00 +0000487 /* we need to create an object out of the pointers we have */
488 PyObject *format_spec_object = SubString_new_object_or_empty(format_spec);
489 if (format_spec_object == NULL)
490 goto done;
491
492 result = PyObject_Format(fieldobj, format_spec_object);
Eric Smith8c663262007-08-25 02:26:07 +0000493 if (result == NULL)
494 goto done;
495
496 ok = output_data(output,
497 STRINGLIB_STR(result), STRINGLIB_LEN(result));
498done:
Eric Smith8fd3eba2008-02-17 19:48:00 +0000499 Py_DECREF(format_spec_object);
Eric Smith8c663262007-08-25 02:26:07 +0000500 Py_XDECREF(result);
501 return ok;
502}
503
504static int
505parse_field(SubString *str, SubString *field_name, SubString *format_spec,
506 STRINGLIB_CHAR *conversion)
507{
508 STRINGLIB_CHAR c = 0;
509
510 /* initialize these, as they may be empty */
511 *conversion = '\0';
512 SubString_init(format_spec, NULL, 0);
513
514 /* search for the field name. it's terminated by the end of the
515 string, or a ':' or '!' */
516 field_name->ptr = str->ptr;
517 while (str->ptr < str->end) {
518 switch (c = *(str->ptr++)) {
519 case ':':
520 case '!':
521 break;
522 default:
523 continue;
524 }
525 break;
526 }
527
528 if (c == '!' || c == ':') {
529 /* we have a format specifier and/or a conversion */
530 /* don't include the last character */
531 field_name->end = str->ptr-1;
532
533 /* the format specifier is the rest of the string */
534 format_spec->ptr = str->ptr;
535 format_spec->end = str->end;
536
537 /* see if there's a conversion specifier */
538 if (c == '!') {
539 /* there must be another character present */
540 if (format_spec->ptr >= format_spec->end) {
541 PyErr_SetString(PyExc_ValueError,
542 "end of format while looking for conversion "
543 "specifier");
544 return 0;
545 }
546 *conversion = *(format_spec->ptr++);
547
548 /* if there is another character, it must be a colon */
549 if (format_spec->ptr < format_spec->end) {
550 c = *(format_spec->ptr++);
551 if (c != ':') {
552 PyErr_SetString(PyExc_ValueError,
553 "expected ':' after format specifier");
554 return 0;
555 }
556 }
557 }
558
559 return 1;
560
Eric Smith0cb431c2007-08-28 01:07:27 +0000561 }
562 else {
Eric Smith8c663262007-08-25 02:26:07 +0000563 /* end of string, there's no format_spec or conversion */
564 field_name->end = str->ptr;
565 return 1;
566 }
567}
568
569/************************************************************************/
570/******* Output string allocation and escape-to-markup processing ******/
571/************************************************************************/
572
573/* MarkupIterator breaks the string into pieces of either literal
574 text, or things inside {} that need to be marked up. it is
575 designed to make it easy to wrap a Python iterator around it, for
576 use with the Formatter class */
577
578typedef struct {
579 SubString str;
Eric Smith8c663262007-08-25 02:26:07 +0000580} MarkupIterator;
581
582static int
583MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
584{
585 SubString_init(&self->str, ptr, len);
Eric Smith8c663262007-08-25 02:26:07 +0000586 return 1;
587}
588
589/* returns 0 on error, 1 on non-error termination, and 2 if it got a
590 string (or something to be expanded) */
591static int
Eric Smith625cbf22007-08-29 03:22:59 +0000592MarkupIterator_next(MarkupIterator *self, SubString *literal,
Eric Smith8c663262007-08-25 02:26:07 +0000593 SubString *field_name, SubString *format_spec,
594 STRINGLIB_CHAR *conversion,
595 int *format_spec_needs_expanding)
596{
597 int at_end;
598 STRINGLIB_CHAR c = 0;
599 STRINGLIB_CHAR *start;
600 int count;
601 Py_ssize_t len;
Eric Smith625cbf22007-08-29 03:22:59 +0000602 int markup_follows = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000603
Eric Smith625cbf22007-08-29 03:22:59 +0000604 /* initialize all of the output variables */
605 SubString_init(literal, NULL, 0);
606 SubString_init(field_name, NULL, 0);
607 SubString_init(format_spec, NULL, 0);
608 *conversion = '\0';
Eric Smith8c663262007-08-25 02:26:07 +0000609 *format_spec_needs_expanding = 0;
610
Eric Smith625cbf22007-08-29 03:22:59 +0000611 /* No more input, end of iterator. This is the normal exit
612 path. */
Eric Smith8c663262007-08-25 02:26:07 +0000613 if (self->str.ptr >= self->str.end)
614 return 1;
615
Eric Smith8c663262007-08-25 02:26:07 +0000616 start = self->str.ptr;
617
Eric Smith625cbf22007-08-29 03:22:59 +0000618 /* First read any literal text. Read until the end of string, an
619 escaped '{' or '}', or an unescaped '{'. In order to never
620 allocate memory and so I can just pass pointers around, if
621 there's an escaped '{' or '}' then we'll return the literal
622 including the brace, but no format object. The next time
623 through, we'll return the rest of the literal, skipping past
624 the second consecutive brace. */
625 while (self->str.ptr < self->str.end) {
626 switch (c = *(self->str.ptr++)) {
627 case '{':
628 case '}':
629 markup_follows = 1;
630 break;
631 default:
632 continue;
Eric Smith8c663262007-08-25 02:26:07 +0000633 }
Eric Smith625cbf22007-08-29 03:22:59 +0000634 break;
Eric Smith0cb431c2007-08-28 01:07:27 +0000635 }
Eric Smith625cbf22007-08-29 03:22:59 +0000636
637 at_end = self->str.ptr >= self->str.end;
638 len = self->str.ptr - start;
639
640 if ((c == '}') && (at_end || (c != *self->str.ptr))) {
641 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
642 "in format string");
643 return 0;
644 }
645 if (at_end && c == '{') {
646 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
647 "in format string");
648 return 0;
649 }
650 if (!at_end) {
651 if (c == *self->str.ptr) {
652 /* escaped } or {, skip it in the input. there is no
653 markup object following us, just this literal text */
654 self->str.ptr++;
655 markup_follows = 0;
656 }
657 else
658 len--;
659 }
660
661 /* record the literal text */
662 literal->ptr = start;
663 literal->end = start + len;
664
665 if (!markup_follows)
666 return 2;
667
668 /* this is markup, find the end of the string by counting nested
669 braces. note that this prohibits escaped braces, so that
670 format_specs cannot have braces in them. */
671 count = 1;
672
673 start = self->str.ptr;
674
675 /* we know we can't have a zero length string, so don't worry
676 about that case */
677 while (self->str.ptr < self->str.end) {
678 switch (c = *(self->str.ptr++)) {
679 case '{':
680 /* the format spec needs to be recursively expanded.
681 this is an optimization, and not strictly needed */
682 *format_spec_needs_expanding = 1;
683 count++;
684 break;
685 case '}':
686 count--;
687 if (count <= 0) {
688 /* we're done. parse and get out */
689 SubString s;
690
691 SubString_init(&s, start, self->str.ptr - 1 - start);
692 if (parse_field(&s, field_name, format_spec, conversion) == 0)
693 return 0;
694
695 /* a zero length field_name is an error */
696 if (field_name->ptr == field_name->end) {
697 PyErr_SetString(PyExc_ValueError, "zero length field name "
698 "in format");
699 return 0;
700 }
701
702 /* success */
703 return 2;
Eric Smith8c663262007-08-25 02:26:07 +0000704 }
705 break;
706 }
Eric Smith8c663262007-08-25 02:26:07 +0000707 }
Eric Smith625cbf22007-08-29 03:22:59 +0000708
709 /* end of string while searching for matching '}' */
710 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
711 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000712}
713
714
715/* do the !r or !s conversion on obj */
716static PyObject *
717do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
718{
719 /* XXX in pre-3.0, do we need to convert this to unicode, since it
720 might have returned a string? */
721 switch (conversion) {
722 case 'r':
723 return PyObject_Repr(obj);
724 case 's':
Eric Smith8fd3eba2008-02-17 19:48:00 +0000725 return STRINGLIB_TOSTR(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000726 default:
727 PyErr_Format(PyExc_ValueError,
728 "Unknown converion specifier %c",
729 conversion);
730 return NULL;
731 }
732}
733
734/* given:
735
736 {field_name!conversion:format_spec}
737
738 compute the result and write it to output.
739 format_spec_needs_expanding is an optimization. if it's false,
740 just output the string directly, otherwise recursively expand the
741 format_spec string. */
742
743static int
744output_markup(SubString *field_name, SubString *format_spec,
745 int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
746 OutputString *output, PyObject *args, PyObject *kwargs,
Eric Smith45c07872007-09-05 02:02:43 +0000747 int recursion_depth)
Eric Smith8c663262007-08-25 02:26:07 +0000748{
749 PyObject *tmp = NULL;
750 PyObject *fieldobj = NULL;
751 SubString expanded_format_spec;
752 SubString *actual_format_spec;
753 int result = 0;
754
755 /* convert field_name to an object */
756 fieldobj = get_field_object(field_name, args, kwargs);
757 if (fieldobj == NULL)
758 goto done;
759
760 if (conversion != '\0') {
761 tmp = do_conversion(fieldobj, conversion);
762 if (tmp == NULL)
763 goto done;
764
765 /* do the assignment, transferring ownership: fieldobj = tmp */
766 Py_DECREF(fieldobj);
767 fieldobj = tmp;
768 tmp = NULL;
769 }
770
771 /* if needed, recurively compute the format_spec */
772 if (format_spec_needs_expanding) {
Eric Smith45c07872007-09-05 02:02:43 +0000773 tmp = build_string(format_spec, args, kwargs, recursion_depth-1);
Eric Smith8c663262007-08-25 02:26:07 +0000774 if (tmp == NULL)
775 goto done;
776
777 /* note that in the case we're expanding the format string,
778 tmp must be kept around until after the call to
779 render_field. */
780 SubString_init(&expanded_format_spec,
781 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
782 actual_format_spec = &expanded_format_spec;
Eric Smith0cb431c2007-08-28 01:07:27 +0000783 }
784 else
Eric Smith8c663262007-08-25 02:26:07 +0000785 actual_format_spec = format_spec;
786
787 if (render_field(fieldobj, actual_format_spec, output) == 0)
788 goto done;
789
790 result = 1;
791
792done:
793 Py_XDECREF(fieldobj);
794 Py_XDECREF(tmp);
795
796 return result;
797}
798
799/*
Eric Smith8fd3eba2008-02-17 19:48:00 +0000800 do_markup is the top-level loop for the format() method. It
Eric Smith8c663262007-08-25 02:26:07 +0000801 searches through the format string for escapes to markup codes, and
802 calls other functions to move non-markup text to the output,
803 and to perform the markup to the output.
804*/
805static int
806do_markup(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith45c07872007-09-05 02:02:43 +0000807 OutputString *output, int recursion_depth)
Eric Smith8c663262007-08-25 02:26:07 +0000808{
809 MarkupIterator iter;
Eric Smith8c663262007-08-25 02:26:07 +0000810 int format_spec_needs_expanding;
811 int result;
Eric Smith625cbf22007-08-29 03:22:59 +0000812 SubString literal;
Eric Smith8c663262007-08-25 02:26:07 +0000813 SubString field_name;
814 SubString format_spec;
815 STRINGLIB_CHAR conversion;
816
817 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
Eric Smith625cbf22007-08-29 03:22:59 +0000818 while ((result = MarkupIterator_next(&iter, &literal, &field_name,
Eric Smith8c663262007-08-25 02:26:07 +0000819 &format_spec, &conversion,
820 &format_spec_needs_expanding)) == 2) {
Eric Smith625cbf22007-08-29 03:22:59 +0000821 if (!output_data(output, literal.ptr, literal.end - literal.ptr))
822 return 0;
823 if (field_name.ptr != field_name.end)
Eric Smith8c663262007-08-25 02:26:07 +0000824 if (!output_markup(&field_name, &format_spec,
825 format_spec_needs_expanding, conversion, output,
Eric Smith45c07872007-09-05 02:02:43 +0000826 args, kwargs, recursion_depth))
Eric Smith8c663262007-08-25 02:26:07 +0000827 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000828 }
829 return result;
830}
831
832
833/*
834 build_string allocates the output string and then
835 calls do_markup to do the heavy lifting.
836*/
837static PyObject *
838build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith45c07872007-09-05 02:02:43 +0000839 int recursion_depth)
Eric Smith8c663262007-08-25 02:26:07 +0000840{
841 OutputString output;
842 PyObject *result = NULL;
843 Py_ssize_t count;
844
845 output.obj = NULL; /* needed so cleanup code always works */
846
847 /* check the recursion level */
Eric Smith45c07872007-09-05 02:02:43 +0000848 if (recursion_depth <= 0) {
Eric Smith8c663262007-08-25 02:26:07 +0000849 PyErr_SetString(PyExc_ValueError,
850 "Max string recursion exceeded");
851 goto done;
852 }
853
854 /* initial size is the length of the format string, plus the size
855 increment. seems like a reasonable default */
856 if (!output_initialize(&output,
857 input->end - input->ptr +
858 INITIAL_SIZE_INCREMENT))
859 goto done;
860
Eric Smith45c07872007-09-05 02:02:43 +0000861 if (!do_markup(input, args, kwargs, &output, recursion_depth)) {
Eric Smith8c663262007-08-25 02:26:07 +0000862 goto done;
863 }
864
865 count = output.ptr - STRINGLIB_STR(output.obj);
866 if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
867 goto done;
868 }
869
870 /* transfer ownership to result */
871 result = output.obj;
872 output.obj = NULL;
873
874done:
Eric Smith8c663262007-08-25 02:26:07 +0000875 Py_XDECREF(output.obj);
876 return result;
877}
878
879/************************************************************************/
880/*********** main routine ***********************************************/
881/************************************************************************/
882
883/* this is the main entry point */
884static PyObject *
885do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
886{
887 SubString input;
888
889 /* PEP 3101 says only 2 levels, so that
890 "{0:{1}}".format('abc', 's') # works
891 "{0:{1:{2}}}".format('abc', 's', '') # fails
892 */
Eric Smith45c07872007-09-05 02:02:43 +0000893 int recursion_depth = 2;
Eric Smith8c663262007-08-25 02:26:07 +0000894
895 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
Eric Smith45c07872007-09-05 02:02:43 +0000896 return build_string(&input, args, kwargs, recursion_depth);
Eric Smith8c663262007-08-25 02:26:07 +0000897}
Eric Smithf6db4092007-08-27 23:52:26 +0000898
899
900
901/************************************************************************/
902/*********** formatteriterator ******************************************/
903/************************************************************************/
904
905/* This is used to implement string.Formatter.vparse(). It exists so
906 Formatter can share code with the built in unicode.format() method.
907 It's really just a wrapper around MarkupIterator that is callable
908 from Python. */
909
910typedef struct {
911 PyObject_HEAD
912
Eric Smith8fd3eba2008-02-17 19:48:00 +0000913 STRINGLIB_OBJECT *str;
Eric Smithf6db4092007-08-27 23:52:26 +0000914
915 MarkupIterator it_markup;
916} formatteriterobject;
917
918static void
919formatteriter_dealloc(formatteriterobject *it)
920{
921 Py_XDECREF(it->str);
922 PyObject_FREE(it);
923}
924
925/* returns a tuple:
Eric Smith625cbf22007-08-29 03:22:59 +0000926 (literal, field_name, format_spec, conversion)
927
928 literal is any literal text to output. might be zero length
929 field_name is the string before the ':'. might be None
930 format_spec is the string after the ':'. mibht be None
931 conversion is either None, or the string after the '!'
Eric Smithf6db4092007-08-27 23:52:26 +0000932*/
933static PyObject *
934formatteriter_next(formatteriterobject *it)
935{
936 SubString literal;
937 SubString field_name;
938 SubString format_spec;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000939 STRINGLIB_CHAR conversion;
Eric Smithf6db4092007-08-27 23:52:26 +0000940 int format_spec_needs_expanding;
Eric Smith625cbf22007-08-29 03:22:59 +0000941 int result = MarkupIterator_next(&it->it_markup, &literal, &field_name,
942 &format_spec, &conversion,
Eric Smithf6db4092007-08-27 23:52:26 +0000943 &format_spec_needs_expanding);
944
945 /* all of the SubString objects point into it->str, so no
946 memory management needs to be done on them */
947 assert(0 <= result && result <= 2);
Eric Smith0cb431c2007-08-28 01:07:27 +0000948 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +0000949 /* if 0, error has already been set, if 1, iterator is empty */
950 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +0000951 else {
Eric Smithf6db4092007-08-27 23:52:26 +0000952 PyObject *literal_str = NULL;
953 PyObject *field_name_str = NULL;
954 PyObject *format_spec_str = NULL;
955 PyObject *conversion_str = NULL;
956 PyObject *tuple = NULL;
Eric Smith625cbf22007-08-29 03:22:59 +0000957 int has_field = field_name.ptr != field_name.end;
Eric Smithf6db4092007-08-27 23:52:26 +0000958
Eric Smith625cbf22007-08-29 03:22:59 +0000959 literal_str = SubString_new_object(&literal);
960 if (literal_str == NULL)
961 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +0000962
Eric Smith625cbf22007-08-29 03:22:59 +0000963 field_name_str = SubString_new_object(&field_name);
964 if (field_name_str == NULL)
965 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +0000966
Eric Smith625cbf22007-08-29 03:22:59 +0000967 /* if field_name is non-zero length, return a string for
968 format_spec (even if zero length), else return None */
969 format_spec_str = (has_field ?
970 SubString_new_object_or_empty :
971 SubString_new_object)(&format_spec);
972 if (format_spec_str == NULL)
973 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +0000974
Eric Smith625cbf22007-08-29 03:22:59 +0000975 /* if the conversion is not specified, return a None,
976 otherwise create a one length string with the conversion
977 character */
978 if (conversion == '\0') {
Eric Smithf6db4092007-08-27 23:52:26 +0000979 conversion_str = Py_None;
Eric Smithf6db4092007-08-27 23:52:26 +0000980 Py_INCREF(conversion_str);
981 }
Eric Smith625cbf22007-08-29 03:22:59 +0000982 else
Eric Smith8fd3eba2008-02-17 19:48:00 +0000983 conversion_str = STRINGLIB_NEW(&conversion, 1);
Eric Smith625cbf22007-08-29 03:22:59 +0000984 if (conversion_str == NULL)
985 goto done;
986
Eric Smith9e7c8da2007-08-28 11:15:20 +0000987 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
Eric Smithf6db4092007-08-27 23:52:26 +0000988 conversion_str);
Eric Smith625cbf22007-08-29 03:22:59 +0000989 done:
Eric Smithf6db4092007-08-27 23:52:26 +0000990 Py_XDECREF(literal_str);
991 Py_XDECREF(field_name_str);
992 Py_XDECREF(format_spec_str);
993 Py_XDECREF(conversion_str);
994 return tuple;
995 }
996}
997
998static PyMethodDef formatteriter_methods[] = {
999 {NULL, NULL} /* sentinel */
1000};
1001
Eric Smith8fd3eba2008-02-17 19:48:00 +00001002static PyTypeObject PyFormatterIter_Type = {
Eric Smithf6db4092007-08-27 23:52:26 +00001003 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1004 "formatteriterator", /* tp_name */
1005 sizeof(formatteriterobject), /* tp_basicsize */
1006 0, /* tp_itemsize */
1007 /* methods */
1008 (destructor)formatteriter_dealloc, /* tp_dealloc */
1009 0, /* tp_print */
1010 0, /* tp_getattr */
1011 0, /* tp_setattr */
1012 0, /* tp_compare */
1013 0, /* tp_repr */
1014 0, /* tp_as_number */
1015 0, /* tp_as_sequence */
1016 0, /* tp_as_mapping */
1017 0, /* tp_hash */
1018 0, /* tp_call */
1019 0, /* tp_str */
1020 PyObject_GenericGetAttr, /* tp_getattro */
1021 0, /* tp_setattro */
1022 0, /* tp_as_buffer */
1023 Py_TPFLAGS_DEFAULT, /* tp_flags */
1024 0, /* tp_doc */
1025 0, /* tp_traverse */
1026 0, /* tp_clear */
1027 0, /* tp_richcompare */
1028 0, /* tp_weaklistoffset */
1029 PyObject_SelfIter, /* tp_iter */
1030 (iternextfunc)formatteriter_next, /* tp_iternext */
1031 formatteriter_methods, /* tp_methods */
1032 0,
1033};
1034
1035/* unicode_formatter_parser is used to implement
1036 string.Formatter.vformat. it parses a string and returns tuples
1037 describing the parsed elements. It's a wrapper around
1038 stringlib/string_format.h's MarkupIterator */
1039static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +00001040formatter_parser(STRINGLIB_OBJECT *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001041{
1042 formatteriterobject *it;
1043
1044 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1045 if (it == NULL)
1046 return NULL;
1047
1048 /* take ownership, give the object to the iterator */
1049 Py_INCREF(self);
1050 it->str = self;
1051
1052 /* initialize the contained MarkupIterator */
1053 MarkupIterator_init(&it->it_markup,
Eric Smith8fd3eba2008-02-17 19:48:00 +00001054 STRINGLIB_STR(self),
1055 STRINGLIB_LEN(self));
Eric Smithf6db4092007-08-27 23:52:26 +00001056
1057 return (PyObject *)it;
1058}
1059
1060
1061/************************************************************************/
1062/*********** fieldnameiterator ******************************************/
1063/************************************************************************/
1064
1065
1066/* This is used to implement string.Formatter.vparse(). It parses the
1067 field name into attribute and item values. It's a Python-callable
1068 wrapper around FieldNameIterator */
1069
1070typedef struct {
1071 PyObject_HEAD
1072
Eric Smith8fd3eba2008-02-17 19:48:00 +00001073 STRINGLIB_OBJECT *str;
Eric Smithf6db4092007-08-27 23:52:26 +00001074
1075 FieldNameIterator it_field;
1076} fieldnameiterobject;
1077
1078static void
1079fieldnameiter_dealloc(fieldnameiterobject *it)
1080{
1081 Py_XDECREF(it->str);
1082 PyObject_FREE(it);
1083}
1084
1085/* returns a tuple:
1086 (is_attr, value)
1087 is_attr is true if we used attribute syntax (e.g., '.foo')
1088 false if we used index syntax (e.g., '[foo]')
1089 value is an integer or string
1090*/
1091static PyObject *
1092fieldnameiter_next(fieldnameiterobject *it)
1093{
1094 int result;
1095 int is_attr;
1096 Py_ssize_t idx;
1097 SubString name;
1098
1099 result = FieldNameIterator_next(&it->it_field, &is_attr,
1100 &idx, &name);
Eric Smith0cb431c2007-08-28 01:07:27 +00001101 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001102 /* if 0, error has already been set, if 1, iterator is empty */
1103 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001104 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001105 PyObject* result = NULL;
1106 PyObject* is_attr_obj = NULL;
1107 PyObject* obj = NULL;
1108
1109 is_attr_obj = PyBool_FromLong(is_attr);
1110 if (is_attr_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001111 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001112
1113 /* either an integer or a string */
1114 if (idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001115 obj = PyLong_FromSsize_t(idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001116 else
1117 obj = SubString_new_object(&name);
1118 if (obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001119 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001120
1121 /* return a tuple of values */
1122 result = PyTuple_Pack(2, is_attr_obj, obj);
Eric Smithf6db4092007-08-27 23:52:26 +00001123
Eric Smith625cbf22007-08-29 03:22:59 +00001124 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001125 Py_XDECREF(is_attr_obj);
1126 Py_XDECREF(obj);
Eric Smith625cbf22007-08-29 03:22:59 +00001127 return result;
Eric Smithf6db4092007-08-27 23:52:26 +00001128 }
Eric Smithf6db4092007-08-27 23:52:26 +00001129}
1130
1131static PyMethodDef fieldnameiter_methods[] = {
1132 {NULL, NULL} /* sentinel */
1133};
1134
1135static PyTypeObject PyFieldNameIter_Type = {
1136 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1137 "fieldnameiterator", /* tp_name */
1138 sizeof(fieldnameiterobject), /* tp_basicsize */
1139 0, /* tp_itemsize */
1140 /* methods */
1141 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1142 0, /* tp_print */
1143 0, /* tp_getattr */
1144 0, /* tp_setattr */
1145 0, /* tp_compare */
1146 0, /* tp_repr */
1147 0, /* tp_as_number */
1148 0, /* tp_as_sequence */
1149 0, /* tp_as_mapping */
1150 0, /* tp_hash */
1151 0, /* tp_call */
1152 0, /* tp_str */
1153 PyObject_GenericGetAttr, /* tp_getattro */
1154 0, /* tp_setattro */
1155 0, /* tp_as_buffer */
1156 Py_TPFLAGS_DEFAULT, /* tp_flags */
1157 0, /* tp_doc */
1158 0, /* tp_traverse */
1159 0, /* tp_clear */
1160 0, /* tp_richcompare */
1161 0, /* tp_weaklistoffset */
1162 PyObject_SelfIter, /* tp_iter */
1163 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1164 fieldnameiter_methods, /* tp_methods */
1165 0};
1166
1167/* unicode_formatter_field_name_split is used to implement
1168 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1169 returns a tuple of (first, rest): "first", the part before the
1170 first '.' or '['; and "rest", an iterator for the rest of the field
1171 name. it's a wrapper around stringlib/string_format.h's
1172 field_name_split. The iterator it returns is a
1173 FieldNameIterator */
1174static PyObject *
Eric Smith8fd3eba2008-02-17 19:48:00 +00001175formatter_field_name_split(STRINGLIB_OBJECT *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001176{
1177 SubString first;
1178 Py_ssize_t first_idx;
1179 fieldnameiterobject *it;
1180
1181 PyObject *first_obj = NULL;
1182 PyObject *result = NULL;
1183
1184 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1185 if (it == NULL)
1186 return NULL;
1187
1188 /* take ownership, give the object to the iterator. this is
1189 just to keep the field_name alive */
1190 Py_INCREF(self);
1191 it->str = self;
1192
1193 if (!field_name_split(STRINGLIB_STR(self),
1194 STRINGLIB_LEN(self),
1195 &first, &first_idx, &it->it_field))
Eric Smith625cbf22007-08-29 03:22:59 +00001196 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001197
Eric Smith0cb431c2007-08-28 01:07:27 +00001198 /* first becomes an integer, if possible; else a string */
Eric Smithf6db4092007-08-27 23:52:26 +00001199 if (first_idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001200 first_obj = PyLong_FromSsize_t(first_idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001201 else
1202 /* convert "first" into a string object */
1203 first_obj = SubString_new_object(&first);
1204 if (first_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001205 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001206
1207 /* return a tuple of values */
1208 result = PyTuple_Pack(2, first_obj, it);
1209
Eric Smith625cbf22007-08-29 03:22:59 +00001210done:
Eric Smithf6db4092007-08-27 23:52:26 +00001211 Py_XDECREF(it);
1212 Py_XDECREF(first_obj);
1213 return result;
1214}