blob: be580c685897d6760d3f664f3e99dd8c11f0b9fe [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002 unicode_format.h -- implementation of str.format().
Eric Smith8c663262007-08-25 02:26:07 +00003*/
4
Eric Smith8c663262007-08-25 02:26:07 +00005/* Defines for more efficiently reallocating the string buffer */
6#define INITIAL_SIZE_INCREMENT 100
7#define SIZE_MULTIPLIER 2
8#define MAX_SIZE_INCREMENT 3200
9
10
11/************************************************************************/
12/*********** Global data structures and forward declarations *********/
13/************************************************************************/
14
15/*
16 A SubString consists of the characters between two string or
17 unicode pointers.
18*/
19typedef struct {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020020 PyObject *str; /* borrowed reference */
21 Py_ssize_t start, end;
Eric Smith8c663262007-08-25 02:26:07 +000022} SubString;
23
24
Eric Smith8ec90442009-03-14 12:29:34 +000025typedef enum {
26 ANS_INIT,
27 ANS_AUTO,
Georg Brandlfb526ac2009-05-01 08:59:13 +000028 ANS_MANUAL
Eric Smith8ec90442009-03-14 12:29:34 +000029} AutoNumberState; /* Keep track if we're auto-numbering fields */
30
31/* Keeps track of our auto-numbering state, and which number field we're on */
32typedef struct {
33 AutoNumberState an_state;
34 int an_field_number;
35} AutoNumber;
36
37
Eric Smith8c663262007-08-25 02:26:07 +000038/* forward declaration for recursion */
39static PyObject *
40build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +000041 int recursion_depth, AutoNumber *auto_number);
Eric Smith8c663262007-08-25 02:26:07 +000042
43
44
45/************************************************************************/
46/************************** Utility functions ************************/
47/************************************************************************/
48
Eric Smith8ec90442009-03-14 12:29:34 +000049static void
50AutoNumber_Init(AutoNumber *auto_number)
51{
52 auto_number->an_state = ANS_INIT;
53 auto_number->an_field_number = 0;
54}
55
Eric Smith8c663262007-08-25 02:26:07 +000056/* fill in a SubString from a pointer and length */
57Py_LOCAL_INLINE(void)
Antoine Pitroudbf697a2011-10-06 15:34:41 +020058SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
Eric Smith8c663262007-08-25 02:26:07 +000059{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020060 str->str = s;
61 str->start = start;
62 str->end = end;
Eric Smith8c663262007-08-25 02:26:07 +000063}
64
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020065/* return a new string. if str->str is NULL, return None */
Eric Smith8c663262007-08-25 02:26:07 +000066Py_LOCAL_INLINE(PyObject *)
67SubString_new_object(SubString *str)
68{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020069 if (str->str == NULL) {
Eric Smith625cbf22007-08-29 03:22:59 +000070 Py_INCREF(Py_None);
71 return Py_None;
72 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020073 return PyUnicode_Substring(str->str, str->start, str->end);
Eric Smith625cbf22007-08-29 03:22:59 +000074}
75
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020076/* return a new string. if str->str is NULL, return None */
Eric Smith625cbf22007-08-29 03:22:59 +000077Py_LOCAL_INLINE(PyObject *)
78SubString_new_object_or_empty(SubString *str)
79{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020080 if (str->str == NULL) {
Victor Stinnerb37b1742011-12-01 03:18:59 +010081 return PyUnicode_New(0, 0);
Eric Smith625cbf22007-08-29 03:22:59 +000082 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020083 return SubString_new_object(str);
Eric Smith8c663262007-08-25 02:26:07 +000084}
85
Eric Smith8ec90442009-03-14 12:29:34 +000086/* Return 1 if an error has been detected switching between automatic
87 field numbering and manual field specification, else return 0. Set
88 ValueError on error. */
89static int
90autonumber_state_error(AutoNumberState state, int field_name_is_empty)
91{
92 if (state == ANS_MANUAL) {
93 if (field_name_is_empty) {
94 PyErr_SetString(PyExc_ValueError, "cannot switch from "
95 "manual field specification to "
96 "automatic field numbering");
97 return 1;
98 }
99 }
100 else {
101 if (!field_name_is_empty) {
102 PyErr_SetString(PyExc_ValueError, "cannot switch from "
103 "automatic field numbering to "
104 "manual field specification");
105 return 1;
106 }
107 }
108 return 0;
109}
110
111
Eric Smith8c663262007-08-25 02:26:07 +0000112/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +0000113/*********** Format string parsing -- integers and identifiers *********/
114/************************************************************************/
115
Eric Smith7ade6482007-08-26 22:27:13 +0000116static Py_ssize_t
117get_integer(const SubString *str)
Eric Smith8c663262007-08-25 02:26:07 +0000118{
Eric Smith7ade6482007-08-26 22:27:13 +0000119 Py_ssize_t accumulator = 0;
120 Py_ssize_t digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200121 Py_ssize_t i;
Eric Smith8c663262007-08-25 02:26:07 +0000122
Eric Smith7ade6482007-08-26 22:27:13 +0000123 /* empty string is an error */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200124 if (str->start >= str->end)
Eric Smith7ade6482007-08-26 22:27:13 +0000125 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000126
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200127 for (i = str->start; i < str->end; i++) {
128 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
Eric Smith8c663262007-08-25 02:26:07 +0000129 if (digitval < 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000130 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000131 /*
Mark Dickinsonc7d93b72011-09-25 15:34:32 +0100132 Detect possible overflow before it happens:
133
134 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
135 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Eric Smith8c663262007-08-25 02:26:07 +0000136 */
Mark Dickinsonc7d93b72011-09-25 15:34:32 +0100137 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Eric Smith8c663262007-08-25 02:26:07 +0000138 PyErr_Format(PyExc_ValueError,
139 "Too many decimal digits in format string");
140 return -1;
141 }
Mark Dickinsonc7d93b72011-09-25 15:34:32 +0100142 accumulator = accumulator * 10 + digitval;
Eric Smith8c663262007-08-25 02:26:07 +0000143 }
Eric Smith7ade6482007-08-26 22:27:13 +0000144 return accumulator;
Eric Smith8c663262007-08-25 02:26:07 +0000145}
146
147/************************************************************************/
148/******** Functions to get field objects and specification strings ******/
149/************************************************************************/
150
Eric Smith7ade6482007-08-26 22:27:13 +0000151/* do the equivalent of obj.name */
Eric Smith8c663262007-08-25 02:26:07 +0000152static PyObject *
Eric Smith7ade6482007-08-26 22:27:13 +0000153getattr(PyObject *obj, SubString *name)
Eric Smith8c663262007-08-25 02:26:07 +0000154{
Eric Smith7ade6482007-08-26 22:27:13 +0000155 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000156 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000157 if (str == NULL)
158 return NULL;
159 newobj = PyObject_GetAttr(obj, str);
160 Py_DECREF(str);
161 return newobj;
Eric Smith8c663262007-08-25 02:26:07 +0000162}
163
Eric Smith7ade6482007-08-26 22:27:13 +0000164/* do the equivalent of obj[idx], where obj is a sequence */
165static PyObject *
166getitem_sequence(PyObject *obj, Py_ssize_t idx)
167{
168 return PySequence_GetItem(obj, idx);
169}
170
171/* do the equivalent of obj[idx], where obj is not a sequence */
172static PyObject *
173getitem_idx(PyObject *obj, Py_ssize_t idx)
174{
175 PyObject *newobj;
Christian Heimes217cfd12007-12-02 14:31:20 +0000176 PyObject *idx_obj = PyLong_FromSsize_t(idx);
Eric Smith7ade6482007-08-26 22:27:13 +0000177 if (idx_obj == NULL)
178 return NULL;
179 newobj = PyObject_GetItem(obj, idx_obj);
180 Py_DECREF(idx_obj);
181 return newobj;
182}
183
184/* do the equivalent of obj[name] */
185static PyObject *
186getitem_str(PyObject *obj, SubString *name)
187{
188 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000189 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000190 if (str == NULL)
191 return NULL;
192 newobj = PyObject_GetItem(obj, str);
193 Py_DECREF(str);
194 return newobj;
195}
196
197typedef struct {
198 /* the entire string we're parsing. we assume that someone else
199 is managing its lifetime, and that it will exist for the
200 lifetime of the iterator. can be empty */
201 SubString str;
202
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200203 /* index to where we are inside field_name */
204 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000205} FieldNameIterator;
206
207
208static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200209FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
210 Py_ssize_t start, Py_ssize_t end)
Eric Smith7ade6482007-08-26 22:27:13 +0000211{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200212 SubString_init(&self->str, s, start, end);
213 self->index = start;
Eric Smith7ade6482007-08-26 22:27:13 +0000214 return 1;
215}
216
217static int
218_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
219{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200220 Py_UCS4 c;
Eric Smith7ade6482007-08-26 22:27:13 +0000221
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200222 name->str = self->str.str;
223 name->start = self->index;
Eric Smith7ade6482007-08-26 22:27:13 +0000224
225 /* return everything until '.' or '[' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200226 while (self->index < self->str.end) {
227 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
228 switch (c) {
Eric Smith7ade6482007-08-26 22:27:13 +0000229 case '[':
230 case '.':
231 /* backup so that we this character will be seen next time */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200232 self->index--;
Eric Smith7ade6482007-08-26 22:27:13 +0000233 break;
234 default:
235 continue;
236 }
237 break;
238 }
239 /* end of string is okay */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200240 name->end = self->index;
Eric Smith7ade6482007-08-26 22:27:13 +0000241 return 1;
242}
243
244static int
245_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
246{
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000247 int bracket_seen = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200248 Py_UCS4 c;
Eric Smith7ade6482007-08-26 22:27:13 +0000249
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200250 name->str = self->str.str;
251 name->start = self->index;
Eric Smith7ade6482007-08-26 22:27:13 +0000252
253 /* return everything until ']' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200254 while (self->index < self->str.end) {
255 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
256 switch (c) {
Eric Smith7ade6482007-08-26 22:27:13 +0000257 case ']':
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000258 bracket_seen = 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000259 break;
260 default:
261 continue;
262 }
263 break;
264 }
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000265 /* make sure we ended with a ']' */
266 if (!bracket_seen) {
267 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
268 return 0;
269 }
270
Eric Smith7ade6482007-08-26 22:27:13 +0000271 /* end of string is okay */
272 /* don't include the ']' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200273 name->end = self->index-1;
Eric Smith7ade6482007-08-26 22:27:13 +0000274 return 1;
275}
276
277/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
278static int
279FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
280 Py_ssize_t *name_idx, SubString *name)
281{
282 /* check at end of input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200283 if (self->index >= self->str.end)
Eric Smith7ade6482007-08-26 22:27:13 +0000284 return 1;
285
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200286 switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000287 case '.':
288 *is_attribute = 1;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000289 if (_FieldNameIterator_attr(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000290 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000291 *name_idx = -1;
292 break;
293 case '[':
294 *is_attribute = 0;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000295 if (_FieldNameIterator_item(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000296 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000297 *name_idx = get_integer(name);
Benjamin Peterson59a1b2f2010-06-07 22:31:26 +0000298 if (*name_idx == -1 && PyErr_Occurred())
299 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000300 break;
301 default:
Eric Smith41669ca2009-05-23 14:23:22 +0000302 /* Invalid character follows ']' */
303 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
304 "follow ']' in format field specifier");
Eric Smith7ade6482007-08-26 22:27:13 +0000305 return 0;
306 }
307
308 /* empty string is an error */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200309 if (name->start == name->end) {
Eric Smith7ade6482007-08-26 22:27:13 +0000310 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
311 return 0;
312 }
313
314 return 2;
315}
316
317
318/* input: field_name
319 output: 'first' points to the part before the first '[' or '.'
320 'first_idx' is -1 if 'first' is not an integer, otherwise
321 it's the value of first converted to an integer
322 'rest' is an iterator to return the rest
323*/
324static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200325field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
Eric Smith8ec90442009-03-14 12:29:34 +0000326 Py_ssize_t *first_idx, FieldNameIterator *rest,
327 AutoNumber *auto_number)
Eric Smith7ade6482007-08-26 22:27:13 +0000328{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200329 Py_UCS4 c;
330 Py_ssize_t i = start;
Eric Smith8ec90442009-03-14 12:29:34 +0000331 int field_name_is_empty;
332 int using_numeric_index;
Eric Smith7ade6482007-08-26 22:27:13 +0000333
334 /* find the part up until the first '.' or '[' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 while (i < end) {
336 switch (c = PyUnicode_READ_CHAR(str, i++)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000337 case '[':
338 case '.':
339 /* backup so that we this character is available to the
340 "rest" iterator */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200341 i--;
Eric Smith7ade6482007-08-26 22:27:13 +0000342 break;
343 default:
344 continue;
345 }
346 break;
347 }
348
349 /* set up the return values */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200350 SubString_init(first, str, start, i);
351 FieldNameIterator_init(rest, str, i, end);
Eric Smith7ade6482007-08-26 22:27:13 +0000352
353 /* see if "first" is an integer, in which case it's used as an index */
354 *first_idx = get_integer(first);
Benjamin Peterson59a1b2f2010-06-07 22:31:26 +0000355 if (*first_idx == -1 && PyErr_Occurred())
356 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000357
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200358 field_name_is_empty = first->start >= first->end;
Eric Smith8ec90442009-03-14 12:29:34 +0000359
360 /* If the field name is omitted or if we have a numeric index
361 specified, then we're doing numeric indexing into args. */
362 using_numeric_index = field_name_is_empty || *first_idx != -1;
363
364 /* We always get here exactly one time for each field we're
365 processing. And we get here in field order (counting by left
366 braces). So this is the perfect place to handle automatic field
367 numbering if the field name is omitted. */
368
369 /* Check if we need to do the auto-numbering. It's not needed if
370 we're called from string.Format routines, because it's handled
371 in that class by itself. */
372 if (auto_number) {
373 /* Initialize our auto numbering state if this is the first
374 time we're either auto-numbering or manually numbering. */
375 if (auto_number->an_state == ANS_INIT && using_numeric_index)
376 auto_number->an_state = field_name_is_empty ?
377 ANS_AUTO : ANS_MANUAL;
378
379 /* Make sure our state is consistent with what we're doing
380 this time through. Only check if we're using a numeric
381 index. */
382 if (using_numeric_index)
383 if (autonumber_state_error(auto_number->an_state,
384 field_name_is_empty))
385 return 0;
386 /* Zero length field means we want to do auto-numbering of the
387 fields. */
388 if (field_name_is_empty)
389 *first_idx = (auto_number->an_field_number)++;
Eric Smith7ade6482007-08-26 22:27:13 +0000390 }
391
392 return 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000393}
394
395
Eric Smith8c663262007-08-25 02:26:07 +0000396/*
397 get_field_object returns the object inside {}, before the
398 format_spec. It handles getindex and getattr lookups and consumes
399 the entire input string.
400*/
401static PyObject *
Eric Smith8ec90442009-03-14 12:29:34 +0000402get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
403 AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000404{
Eric Smith7ade6482007-08-26 22:27:13 +0000405 PyObject *obj = NULL;
406 int ok;
407 int is_attribute;
408 SubString name;
409 SubString first;
Eric Smith8c663262007-08-25 02:26:07 +0000410 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000411 FieldNameIterator rest;
Eric Smith8c663262007-08-25 02:26:07 +0000412
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 if (!field_name_split(input->str, input->start, input->end, &first,
Eric Smith8ec90442009-03-14 12:29:34 +0000414 &index, &rest, auto_number)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000415 goto error;
416 }
Eric Smith8c663262007-08-25 02:26:07 +0000417
Eric Smith7ade6482007-08-26 22:27:13 +0000418 if (index == -1) {
419 /* look up in kwargs */
Eric Smith7a6dd292007-08-27 23:30:47 +0000420 PyObject *key = SubString_new_object(&first);
Eric Smith7ade6482007-08-26 22:27:13 +0000421 if (key == NULL)
422 goto error;
Eric Smith27bbca62010-11-04 17:06:58 +0000423
424 /* Use PyObject_GetItem instead of PyDict_GetItem because this
425 code is no longer just used with kwargs. It might be passed
426 a non-dict when called through format_map. */
427 if ((kwargs == NULL) || (obj = PyObject_GetItem(kwargs, key)) == NULL) {
Eric Smith11529192007-09-04 23:04:22 +0000428 PyErr_SetObject(PyExc_KeyError, key);
Eric Smith7ade6482007-08-26 22:27:13 +0000429 Py_DECREF(key);
430 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000431 }
Neal Norwitz8a4eb292007-08-27 07:24:17 +0000432 Py_DECREF(key);
Eric Smith0cb431c2007-08-28 01:07:27 +0000433 }
434 else {
Eric V. Smith12ebefc2011-07-18 14:03:41 -0400435 /* If args is NULL, we have a format string with a positional field
436 with only kwargs to retrieve it from. This can only happen when
437 used with format_map(), where positional arguments are not
438 allowed. */
439 if (args == NULL) {
440 PyErr_SetString(PyExc_ValueError, "Format string contains "
441 "positional fields");
442 goto error;
443 }
444
Eric Smith7ade6482007-08-26 22:27:13 +0000445 /* look up in args */
446 obj = PySequence_GetItem(args, index);
Eric Smith11529192007-09-04 23:04:22 +0000447 if (obj == NULL)
Eric Smith7ade6482007-08-26 22:27:13 +0000448 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000449 }
Eric Smith7ade6482007-08-26 22:27:13 +0000450
451 /* iterate over the rest of the field_name */
452 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
453 &name)) == 2) {
454 PyObject *tmp;
455
456 if (is_attribute)
457 /* getattr lookup "." */
458 tmp = getattr(obj, &name);
459 else
460 /* getitem lookup "[]" */
461 if (index == -1)
462 tmp = getitem_str(obj, &name);
463 else
464 if (PySequence_Check(obj))
465 tmp = getitem_sequence(obj, index);
466 else
467 /* not a sequence */
468 tmp = getitem_idx(obj, index);
469 if (tmp == NULL)
470 goto error;
471
472 /* assign to obj */
473 Py_DECREF(obj);
474 obj = tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000475 }
Eric Smith7ade6482007-08-26 22:27:13 +0000476 /* end of iterator, this is the non-error case */
477 if (ok == 1)
478 return obj;
479error:
480 Py_XDECREF(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000481 return NULL;
482}
483
484/************************************************************************/
485/***************** Field rendering functions **************************/
486/************************************************************************/
487
488/*
489 render_field() is the main function in this section. It takes the
490 field object and field specification string generated by
491 get_field_and_spec, and renders the field into the output string.
492
Eric Smith8c663262007-08-25 02:26:07 +0000493 render_field calls fieldobj.__format__(format_spec) method, and
494 appends to the output.
495*/
496static int
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200497render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
Eric Smith8c663262007-08-25 02:26:07 +0000498{
499 int ok = 0;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000500 PyObject *result = NULL;
Eric Smith1d138f12008-05-31 01:40:08 +0000501 PyObject *format_spec_object = NULL;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200502 int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
503 int err;
Victor Stinner7931d9a2011-11-04 00:22:48 +0100504
Eric Smith1d138f12008-05-31 01:40:08 +0000505 /* If we know the type exactly, skip the lookup of __format__ and just
506 call the formatter directly. */
507 if (PyUnicode_CheckExact(fieldobj))
Victor Stinnerd3f08822012-05-29 12:57:52 +0200508 formatter = _PyUnicode_FormatAdvancedWriter;
Eric Smith1d138f12008-05-31 01:40:08 +0000509 else if (PyLong_CheckExact(fieldobj))
Victor Stinnerd3f08822012-05-29 12:57:52 +0200510 formatter = _PyLong_FormatAdvancedWriter;
Eric Smith1d138f12008-05-31 01:40:08 +0000511 else if (PyFloat_CheckExact(fieldobj))
Victor Stinnerd3f08822012-05-29 12:57:52 +0200512 formatter = _PyFloat_FormatAdvancedWriter;
513 else if (PyComplex_CheckExact(fieldobj))
514 formatter = _PyComplex_FormatAdvancedWriter;
Eric Smithba8c0282008-06-02 14:57:32 +0000515
516 if (formatter) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000517 /* we know exactly which formatter will be called when __format__ is
518 looked up, so call it directly, instead. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200519 err = formatter(writer, fieldobj, format_spec->str,
520 format_spec->start, format_spec->end);
521 return (err == 0);
Eric Smithba8c0282008-06-02 14:57:32 +0000522 }
Eric Smith1d138f12008-05-31 01:40:08 +0000523 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 /* We need to create an object out of the pointers we have, because
525 __format__ takes a string/unicode object for format_spec. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200526 if (format_spec->str)
527 format_spec_object = PyUnicode_Substring(format_spec->str,
528 format_spec->start,
529 format_spec->end);
530 else
531 format_spec_object = PyUnicode_New(0, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000532 if (format_spec_object == NULL)
533 goto done;
Eric Smith1d138f12008-05-31 01:40:08 +0000534
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000535 result = PyObject_Format(fieldobj, format_spec_object);
Eric Smith1d138f12008-05-31 01:40:08 +0000536 }
Victor Stinneree4544c2012-05-09 22:24:08 +0200537 if (result == NULL)
538 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000539
Victor Stinnerd3f08822012-05-29 12:57:52 +0200540 if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
Victor Stinneree4544c2012-05-09 22:24:08 +0200541 goto done;
Victor Stinneree4544c2012-05-09 22:24:08 +0200542 ok = 1;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200543
Eric Smith8c663262007-08-25 02:26:07 +0000544done:
Eric Smith1d138f12008-05-31 01:40:08 +0000545 Py_XDECREF(format_spec_object);
Eric Smith8c663262007-08-25 02:26:07 +0000546 Py_XDECREF(result);
547 return ok;
548}
549
550static int
551parse_field(SubString *str, SubString *field_name, SubString *format_spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200552 Py_UCS4 *conversion)
Eric Smith8c663262007-08-25 02:26:07 +0000553{
Eric Smith8ec90442009-03-14 12:29:34 +0000554 /* Note this function works if the field name is zero length,
555 which is good. Zero length field names are handled later, in
556 field_name_split. */
557
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200558 Py_UCS4 c = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000559
560 /* initialize these, as they may be empty */
561 *conversion = '\0';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200562 SubString_init(format_spec, NULL, 0, 0);
Eric Smith8c663262007-08-25 02:26:07 +0000563
Eric Smith8ec90442009-03-14 12:29:34 +0000564 /* Search for the field name. it's terminated by the end of
565 the string, or a ':' or '!' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200566 field_name->str = str->str;
567 field_name->start = str->start;
568 while (str->start < str->end) {
569 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
Eric Smith8c663262007-08-25 02:26:07 +0000570 case ':':
571 case '!':
572 break;
573 default:
574 continue;
575 }
576 break;
577 }
578
579 if (c == '!' || c == ':') {
580 /* we have a format specifier and/or a conversion */
581 /* don't include the last character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200582 field_name->end = str->start-1;
Eric Smith8c663262007-08-25 02:26:07 +0000583
584 /* the format specifier is the rest of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200585 format_spec->str = str->str;
586 format_spec->start = str->start;
Eric Smith8c663262007-08-25 02:26:07 +0000587 format_spec->end = str->end;
588
589 /* see if there's a conversion specifier */
590 if (c == '!') {
591 /* there must be another character present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200592 if (format_spec->start >= format_spec->end) {
Eric Smith8c663262007-08-25 02:26:07 +0000593 PyErr_SetString(PyExc_ValueError,
594 "end of format while looking for conversion "
595 "specifier");
596 return 0;
597 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200598 *conversion = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
Eric Smith8c663262007-08-25 02:26:07 +0000599
600 /* if there is another character, it must be a colon */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200601 if (format_spec->start < format_spec->end) {
602 c = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
Eric Smith8c663262007-08-25 02:26:07 +0000603 if (c != ':') {
604 PyErr_SetString(PyExc_ValueError,
605 "expected ':' after format specifier");
606 return 0;
607 }
608 }
609 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000610 }
Eric Smith8ec90442009-03-14 12:29:34 +0000611 else
Eric Smith8c663262007-08-25 02:26:07 +0000612 /* end of string, there's no format_spec or conversion */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200613 field_name->end = str->start;
Eric Smith8ec90442009-03-14 12:29:34 +0000614
615 return 1;
Eric Smith8c663262007-08-25 02:26:07 +0000616}
617
618/************************************************************************/
619/******* Output string allocation and escape-to-markup processing ******/
620/************************************************************************/
621
622/* MarkupIterator breaks the string into pieces of either literal
623 text, or things inside {} that need to be marked up. it is
624 designed to make it easy to wrap a Python iterator around it, for
625 use with the Formatter class */
626
627typedef struct {
628 SubString str;
Eric Smith8c663262007-08-25 02:26:07 +0000629} MarkupIterator;
630
631static int
Victor Stinner7931d9a2011-11-04 00:22:48 +0100632MarkupIterator_init(MarkupIterator *self, PyObject *str,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200633 Py_ssize_t start, Py_ssize_t end)
Eric Smith8c663262007-08-25 02:26:07 +0000634{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200635 SubString_init(&self->str, str, start, end);
Eric Smith8c663262007-08-25 02:26:07 +0000636 return 1;
637}
638
639/* returns 0 on error, 1 on non-error termination, and 2 if it got a
640 string (or something to be expanded) */
641static int
Eric Smith625cbf22007-08-29 03:22:59 +0000642MarkupIterator_next(MarkupIterator *self, SubString *literal,
Eric Smith8ec90442009-03-14 12:29:34 +0000643 int *field_present, SubString *field_name,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200644 SubString *format_spec, Py_UCS4 *conversion,
Eric Smith8c663262007-08-25 02:26:07 +0000645 int *format_spec_needs_expanding)
646{
647 int at_end;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200648 Py_UCS4 c = 0;
649 Py_ssize_t start;
Eric Smith8c663262007-08-25 02:26:07 +0000650 int count;
651 Py_ssize_t len;
Eric Smith625cbf22007-08-29 03:22:59 +0000652 int markup_follows = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000653
Eric Smith625cbf22007-08-29 03:22:59 +0000654 /* initialize all of the output variables */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200655 SubString_init(literal, NULL, 0, 0);
656 SubString_init(field_name, NULL, 0, 0);
657 SubString_init(format_spec, NULL, 0, 0);
Eric Smith625cbf22007-08-29 03:22:59 +0000658 *conversion = '\0';
Eric Smith8c663262007-08-25 02:26:07 +0000659 *format_spec_needs_expanding = 0;
Eric Smith8ec90442009-03-14 12:29:34 +0000660 *field_present = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000661
Eric Smith625cbf22007-08-29 03:22:59 +0000662 /* No more input, end of iterator. This is the normal exit
663 path. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200664 if (self->str.start >= self->str.end)
Eric Smith8c663262007-08-25 02:26:07 +0000665 return 1;
666
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200667 start = self->str.start;
Eric Smith8c663262007-08-25 02:26:07 +0000668
Eric Smith625cbf22007-08-29 03:22:59 +0000669 /* First read any literal text. Read until the end of string, an
670 escaped '{' or '}', or an unescaped '{'. In order to never
671 allocate memory and so I can just pass pointers around, if
672 there's an escaped '{' or '}' then we'll return the literal
673 including the brace, but no format object. The next time
674 through, we'll return the rest of the literal, skipping past
675 the second consecutive brace. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200676 while (self->str.start < self->str.end) {
677 switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
Eric Smith625cbf22007-08-29 03:22:59 +0000678 case '{':
679 case '}':
680 markup_follows = 1;
681 break;
682 default:
683 continue;
Eric Smith8c663262007-08-25 02:26:07 +0000684 }
Eric Smith625cbf22007-08-29 03:22:59 +0000685 break;
Eric Smith0cb431c2007-08-28 01:07:27 +0000686 }
Eric Smith625cbf22007-08-29 03:22:59 +0000687
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200688 at_end = self->str.start >= self->str.end;
689 len = self->str.start - start;
Eric Smith625cbf22007-08-29 03:22:59 +0000690
Victor Stinner7931d9a2011-11-04 00:22:48 +0100691 if ((c == '}') && (at_end ||
692 (c != PyUnicode_READ_CHAR(self->str.str,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200693 self->str.start)))) {
Eric Smith625cbf22007-08-29 03:22:59 +0000694 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
695 "in format string");
696 return 0;
697 }
698 if (at_end && c == '{') {
699 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
700 "in format string");
701 return 0;
702 }
703 if (!at_end) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200704 if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
Eric Smith625cbf22007-08-29 03:22:59 +0000705 /* escaped } or {, skip it in the input. there is no
706 markup object following us, just this literal text */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200707 self->str.start++;
Eric Smith625cbf22007-08-29 03:22:59 +0000708 markup_follows = 0;
709 }
710 else
711 len--;
712 }
713
714 /* record the literal text */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200715 literal->str = self->str.str;
716 literal->start = start;
Eric Smith625cbf22007-08-29 03:22:59 +0000717 literal->end = start + len;
718
719 if (!markup_follows)
720 return 2;
721
722 /* this is markup, find the end of the string by counting nested
723 braces. note that this prohibits escaped braces, so that
724 format_specs cannot have braces in them. */
Eric Smith8ec90442009-03-14 12:29:34 +0000725 *field_present = 1;
Eric Smith625cbf22007-08-29 03:22:59 +0000726 count = 1;
727
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200728 start = self->str.start;
Eric Smith625cbf22007-08-29 03:22:59 +0000729
730 /* we know we can't have a zero length string, so don't worry
731 about that case */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200732 while (self->str.start < self->str.end) {
733 switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
Eric Smith625cbf22007-08-29 03:22:59 +0000734 case '{':
735 /* the format spec needs to be recursively expanded.
736 this is an optimization, and not strictly needed */
737 *format_spec_needs_expanding = 1;
738 count++;
739 break;
740 case '}':
741 count--;
742 if (count <= 0) {
743 /* we're done. parse and get out */
744 SubString s;
745
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200746 SubString_init(&s, self->str.str, start, self->str.start - 1);
Eric Smith625cbf22007-08-29 03:22:59 +0000747 if (parse_field(&s, field_name, format_spec, conversion) == 0)
748 return 0;
749
Eric Smith625cbf22007-08-29 03:22:59 +0000750 /* success */
751 return 2;
Eric Smith8c663262007-08-25 02:26:07 +0000752 }
753 break;
754 }
Eric Smith8c663262007-08-25 02:26:07 +0000755 }
Eric Smith625cbf22007-08-29 03:22:59 +0000756
757 /* end of string while searching for matching '}' */
758 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
759 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000760}
761
762
763/* do the !r or !s conversion on obj */
764static PyObject *
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200765do_conversion(PyObject *obj, Py_UCS4 conversion)
Eric Smith8c663262007-08-25 02:26:07 +0000766{
767 /* XXX in pre-3.0, do we need to convert this to unicode, since it
768 might have returned a string? */
769 switch (conversion) {
770 case 'r':
771 return PyObject_Repr(obj);
772 case 's':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200773 return PyObject_Str(obj);
Georg Brandl559e5d72008-06-11 18:37:52 +0000774 case 'a':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200775 return PyObject_ASCII(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000776 default:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000777 if (conversion > 32 && conversion < 127) {
778 /* It's the ASCII subrange; casting to char is safe
779 (assuming the execution character set is an ASCII
780 superset). */
781 PyErr_Format(PyExc_ValueError,
Martin v. Löwis5a6f4582008-04-07 03:22:07 +0000782 "Unknown conversion specifier %c",
783 (char)conversion);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000784 } else
785 PyErr_Format(PyExc_ValueError,
786 "Unknown conversion specifier \\x%x",
787 (unsigned int)conversion);
Eric Smith8c663262007-08-25 02:26:07 +0000788 return NULL;
789 }
790}
791
792/* given:
793
794 {field_name!conversion:format_spec}
795
796 compute the result and write it to output.
797 format_spec_needs_expanding is an optimization. if it's false,
798 just output the string directly, otherwise recursively expand the
Eric Smith8ec90442009-03-14 12:29:34 +0000799 format_spec string.
800
801 field_name is allowed to be zero length, in which case we
802 are doing auto field numbering.
803*/
Eric Smith8c663262007-08-25 02:26:07 +0000804
805static int
806output_markup(SubString *field_name, SubString *format_spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200807 int format_spec_needs_expanding, Py_UCS4 conversion,
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200808 _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000809 int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000810{
811 PyObject *tmp = NULL;
812 PyObject *fieldobj = NULL;
813 SubString expanded_format_spec;
814 SubString *actual_format_spec;
815 int result = 0;
816
817 /* convert field_name to an object */
Eric Smith8ec90442009-03-14 12:29:34 +0000818 fieldobj = get_field_object(field_name, args, kwargs, auto_number);
Eric Smith8c663262007-08-25 02:26:07 +0000819 if (fieldobj == NULL)
820 goto done;
821
822 if (conversion != '\0') {
823 tmp = do_conversion(fieldobj, conversion);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200824 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
Eric Smith8c663262007-08-25 02:26:07 +0000825 goto done;
826
827 /* do the assignment, transferring ownership: fieldobj = tmp */
828 Py_DECREF(fieldobj);
829 fieldobj = tmp;
830 tmp = NULL;
831 }
832
833 /* if needed, recurively compute the format_spec */
834 if (format_spec_needs_expanding) {
Eric Smith8ec90442009-03-14 12:29:34 +0000835 tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
836 auto_number);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200837 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
Eric Smith8c663262007-08-25 02:26:07 +0000838 goto done;
839
840 /* note that in the case we're expanding the format string,
841 tmp must be kept around until after the call to
842 render_field. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200843 SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
Eric Smith8c663262007-08-25 02:26:07 +0000844 actual_format_spec = &expanded_format_spec;
Eric Smith0cb431c2007-08-28 01:07:27 +0000845 }
846 else
Eric Smith8c663262007-08-25 02:26:07 +0000847 actual_format_spec = format_spec;
848
Victor Stinner202fdca2012-05-07 12:47:02 +0200849 if (render_field(fieldobj, actual_format_spec, writer) == 0)
Eric Smith8c663262007-08-25 02:26:07 +0000850 goto done;
851
852 result = 1;
853
854done:
855 Py_XDECREF(fieldobj);
856 Py_XDECREF(tmp);
857
858 return result;
859}
860
861/*
Eric Smith8fd3eba2008-02-17 19:48:00 +0000862 do_markup is the top-level loop for the format() method. It
Eric Smith8c663262007-08-25 02:26:07 +0000863 searches through the format string for escapes to markup codes, and
864 calls other functions to move non-markup text to the output,
865 and to perform the markup to the output.
866*/
867static int
868do_markup(SubString *input, PyObject *args, PyObject *kwargs,
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200869 _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000870{
871 MarkupIterator iter;
Eric Smith8c663262007-08-25 02:26:07 +0000872 int format_spec_needs_expanding;
873 int result;
Eric Smith8ec90442009-03-14 12:29:34 +0000874 int field_present;
Eric Smith625cbf22007-08-29 03:22:59 +0000875 SubString literal;
Eric Smith8c663262007-08-25 02:26:07 +0000876 SubString field_name;
877 SubString format_spec;
Victor Stinneree4544c2012-05-09 22:24:08 +0200878 Py_UCS4 conversion, maxchar;
879 Py_ssize_t sublen;
Victor Stinner202fdca2012-05-07 12:47:02 +0200880 int err;
Eric Smith8c663262007-08-25 02:26:07 +0000881
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200882 MarkupIterator_init(&iter, input->str, input->start, input->end);
Eric Smith8ec90442009-03-14 12:29:34 +0000883 while ((result = MarkupIterator_next(&iter, &literal, &field_present,
884 &field_name, &format_spec,
885 &conversion,
Eric Smith8c663262007-08-25 02:26:07 +0000886 &format_spec_needs_expanding)) == 2) {
Victor Stinneree4544c2012-05-09 22:24:08 +0200887 sublen = literal.end - literal.start;
888 if (sublen) {
889 maxchar = _PyUnicode_FindMaxChar(literal.str,
890 literal.start, literal.end);
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200891 err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
Victor Stinneree4544c2012-05-09 22:24:08 +0200892 if (err == -1)
893 return 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200894 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
895 literal.str, literal.start, sublen);
Victor Stinneree4544c2012-05-09 22:24:08 +0200896 writer->pos += sublen;
897 }
898
Victor Stinnerd3f08822012-05-29 12:57:52 +0200899 if (field_present) {
900 if (iter.str.start == iter.str.end)
Victor Stinnerd7b7c742012-06-04 22:52:12 +0200901 writer->overallocate = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000902 if (!output_markup(&field_name, &format_spec,
Victor Stinner202fdca2012-05-07 12:47:02 +0200903 format_spec_needs_expanding, conversion, writer,
Eric Smith8ec90442009-03-14 12:29:34 +0000904 args, kwargs, recursion_depth, auto_number))
Eric Smith8c663262007-08-25 02:26:07 +0000905 return 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200906 }
Eric Smith8c663262007-08-25 02:26:07 +0000907 }
908 return result;
909}
910
911
912/*
913 build_string allocates the output string and then
914 calls do_markup to do the heavy lifting.
915*/
916static PyObject *
917build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000918 int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000919{
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200920 _PyUnicodeWriter writer;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200921 Py_ssize_t minlen;
Eric Smith8c663262007-08-25 02:26:07 +0000922
923 /* check the recursion level */
Eric Smith45c07872007-09-05 02:02:43 +0000924 if (recursion_depth <= 0) {
Eric Smith8c663262007-08-25 02:26:07 +0000925 PyErr_SetString(PyExc_ValueError,
926 "Max string recursion exceeded");
Antoine Pitrou4574e622011-10-07 02:26:47 +0200927 return NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000928 }
929
Victor Stinnerd3f08822012-05-29 12:57:52 +0200930 minlen = PyUnicode_GET_LENGTH(input->str) + 100;
931 _PyUnicodeWriter_Init(&writer, minlen);
Eric Smith8c663262007-08-25 02:26:07 +0000932
Victor Stinner202fdca2012-05-07 12:47:02 +0200933 if (!do_markup(input, args, kwargs, &writer, recursion_depth,
Eric Smith8ec90442009-03-14 12:29:34 +0000934 auto_number)) {
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200935 _PyUnicodeWriter_Dealloc(&writer);
Antoine Pitrou4574e622011-10-07 02:26:47 +0200936 return NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000937 }
938
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200939 return _PyUnicodeWriter_Finish(&writer);
Eric Smith8c663262007-08-25 02:26:07 +0000940}
941
942/************************************************************************/
943/*********** main routine ***********************************************/
944/************************************************************************/
945
946/* this is the main entry point */
947static PyObject *
948do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
949{
950 SubString input;
951
952 /* PEP 3101 says only 2 levels, so that
953 "{0:{1}}".format('abc', 's') # works
954 "{0:{1:{2}}}".format('abc', 's', '') # fails
955 */
Eric Smith45c07872007-09-05 02:02:43 +0000956 int recursion_depth = 2;
Eric Smith8c663262007-08-25 02:26:07 +0000957
Eric Smith8ec90442009-03-14 12:29:34 +0000958 AutoNumber auto_number;
959
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200960 if (PyUnicode_READY(self) == -1)
961 return NULL;
962
Eric Smith8ec90442009-03-14 12:29:34 +0000963 AutoNumber_Init(&auto_number);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200964 SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
Eric Smith8ec90442009-03-14 12:29:34 +0000965 return build_string(&input, args, kwargs, recursion_depth, &auto_number);
Eric Smith8c663262007-08-25 02:26:07 +0000966}
Eric Smithf6db4092007-08-27 23:52:26 +0000967
Eric Smith27bbca62010-11-04 17:06:58 +0000968static PyObject *
969do_string_format_map(PyObject *self, PyObject *obj)
970{
971 return do_string_format(self, NULL, obj);
972}
Eric Smithf6db4092007-08-27 23:52:26 +0000973
974
975/************************************************************************/
976/*********** formatteriterator ******************************************/
977/************************************************************************/
978
979/* This is used to implement string.Formatter.vparse(). It exists so
980 Formatter can share code with the built in unicode.format() method.
981 It's really just a wrapper around MarkupIterator that is callable
982 from Python. */
983
984typedef struct {
985 PyObject_HEAD
Victor Stinner7931d9a2011-11-04 00:22:48 +0100986 PyObject *str;
Eric Smithf6db4092007-08-27 23:52:26 +0000987 MarkupIterator it_markup;
988} formatteriterobject;
989
990static void
991formatteriter_dealloc(formatteriterobject *it)
992{
993 Py_XDECREF(it->str);
994 PyObject_FREE(it);
995}
996
997/* returns a tuple:
Eric Smith625cbf22007-08-29 03:22:59 +0000998 (literal, field_name, format_spec, conversion)
999
1000 literal is any literal text to output. might be zero length
1001 field_name is the string before the ':'. might be None
1002 format_spec is the string after the ':'. mibht be None
1003 conversion is either None, or the string after the '!'
Eric Smithf6db4092007-08-27 23:52:26 +00001004*/
1005static PyObject *
1006formatteriter_next(formatteriterobject *it)
1007{
1008 SubString literal;
1009 SubString field_name;
1010 SubString format_spec;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001011 Py_UCS4 conversion;
Eric Smithf6db4092007-08-27 23:52:26 +00001012 int format_spec_needs_expanding;
Eric Smith8ec90442009-03-14 12:29:34 +00001013 int field_present;
1014 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1015 &field_name, &format_spec, &conversion,
Eric Smithf6db4092007-08-27 23:52:26 +00001016 &format_spec_needs_expanding);
1017
1018 /* all of the SubString objects point into it->str, so no
1019 memory management needs to be done on them */
1020 assert(0 <= result && result <= 2);
Eric Smith0cb431c2007-08-28 01:07:27 +00001021 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001022 /* if 0, error has already been set, if 1, iterator is empty */
1023 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001024 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001025 PyObject *literal_str = NULL;
1026 PyObject *field_name_str = NULL;
1027 PyObject *format_spec_str = NULL;
1028 PyObject *conversion_str = NULL;
1029 PyObject *tuple = NULL;
1030
Eric Smith625cbf22007-08-29 03:22:59 +00001031 literal_str = SubString_new_object(&literal);
1032 if (literal_str == NULL)
1033 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001034
Eric Smith625cbf22007-08-29 03:22:59 +00001035 field_name_str = SubString_new_object(&field_name);
1036 if (field_name_str == NULL)
1037 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001038
Eric Smith625cbf22007-08-29 03:22:59 +00001039 /* if field_name is non-zero length, return a string for
1040 format_spec (even if zero length), else return None */
Eric Smith8ec90442009-03-14 12:29:34 +00001041 format_spec_str = (field_present ?
Eric Smith625cbf22007-08-29 03:22:59 +00001042 SubString_new_object_or_empty :
1043 SubString_new_object)(&format_spec);
1044 if (format_spec_str == NULL)
1045 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001046
Eric Smith625cbf22007-08-29 03:22:59 +00001047 /* if the conversion is not specified, return a None,
1048 otherwise create a one length string with the conversion
1049 character */
1050 if (conversion == '\0') {
Eric Smithf6db4092007-08-27 23:52:26 +00001051 conversion_str = Py_None;
Eric Smithf6db4092007-08-27 23:52:26 +00001052 Py_INCREF(conversion_str);
1053 }
Eric Smith625cbf22007-08-29 03:22:59 +00001054 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001055 conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1056 &conversion, 1);
Eric Smith625cbf22007-08-29 03:22:59 +00001057 if (conversion_str == NULL)
1058 goto done;
1059
Eric Smith9e7c8da2007-08-28 11:15:20 +00001060 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
Eric Smithf6db4092007-08-27 23:52:26 +00001061 conversion_str);
Eric Smith625cbf22007-08-29 03:22:59 +00001062 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001063 Py_XDECREF(literal_str);
1064 Py_XDECREF(field_name_str);
1065 Py_XDECREF(format_spec_str);
1066 Py_XDECREF(conversion_str);
1067 return tuple;
1068 }
1069}
1070
1071static PyMethodDef formatteriter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 {NULL, NULL} /* sentinel */
Eric Smithf6db4092007-08-27 23:52:26 +00001073};
1074
Eric Smith8fd3eba2008-02-17 19:48:00 +00001075static PyTypeObject PyFormatterIter_Type = {
Eric Smithf6db4092007-08-27 23:52:26 +00001076 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 "formatteriterator", /* tp_name */
1078 sizeof(formatteriterobject), /* tp_basicsize */
1079 0, /* tp_itemsize */
Eric Smithf6db4092007-08-27 23:52:26 +00001080 /* methods */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001081 (destructor)formatteriter_dealloc, /* tp_dealloc */
1082 0, /* tp_print */
1083 0, /* tp_getattr */
1084 0, /* tp_setattr */
1085 0, /* tp_reserved */
1086 0, /* tp_repr */
1087 0, /* tp_as_number */
1088 0, /* tp_as_sequence */
1089 0, /* tp_as_mapping */
1090 0, /* tp_hash */
1091 0, /* tp_call */
1092 0, /* tp_str */
1093 PyObject_GenericGetAttr, /* tp_getattro */
1094 0, /* tp_setattro */
1095 0, /* tp_as_buffer */
1096 Py_TPFLAGS_DEFAULT, /* tp_flags */
1097 0, /* tp_doc */
1098 0, /* tp_traverse */
1099 0, /* tp_clear */
1100 0, /* tp_richcompare */
1101 0, /* tp_weaklistoffset */
1102 PyObject_SelfIter, /* tp_iter */
1103 (iternextfunc)formatteriter_next, /* tp_iternext */
1104 formatteriter_methods, /* tp_methods */
Eric Smithf6db4092007-08-27 23:52:26 +00001105 0,
1106};
1107
1108/* unicode_formatter_parser is used to implement
1109 string.Formatter.vformat. it parses a string and returns tuples
1110 describing the parsed elements. It's a wrapper around
1111 stringlib/string_format.h's MarkupIterator */
1112static PyObject *
Victor Stinner7931d9a2011-11-04 00:22:48 +01001113formatter_parser(PyObject *ignored, PyObject *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001114{
1115 formatteriterobject *it;
1116
Eric Smitha1eac722011-01-29 11:15:35 +00001117 if (!PyUnicode_Check(self)) {
1118 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1119 return NULL;
1120 }
1121
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122 if (PyUnicode_READY(self) == -1)
1123 return NULL;
1124
Eric Smithf6db4092007-08-27 23:52:26 +00001125 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1126 if (it == NULL)
1127 return NULL;
1128
1129 /* take ownership, give the object to the iterator */
1130 Py_INCREF(self);
1131 it->str = self;
1132
1133 /* initialize the contained MarkupIterator */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001134 MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
Eric Smithf6db4092007-08-27 23:52:26 +00001135 return (PyObject *)it;
1136}
1137
1138
1139/************************************************************************/
1140/*********** fieldnameiterator ******************************************/
1141/************************************************************************/
1142
1143
1144/* This is used to implement string.Formatter.vparse(). It parses the
1145 field name into attribute and item values. It's a Python-callable
1146 wrapper around FieldNameIterator */
1147
1148typedef struct {
1149 PyObject_HEAD
Victor Stinner7931d9a2011-11-04 00:22:48 +01001150 PyObject *str;
Eric Smithf6db4092007-08-27 23:52:26 +00001151 FieldNameIterator it_field;
1152} fieldnameiterobject;
1153
1154static void
1155fieldnameiter_dealloc(fieldnameiterobject *it)
1156{
1157 Py_XDECREF(it->str);
1158 PyObject_FREE(it);
1159}
1160
1161/* returns a tuple:
1162 (is_attr, value)
1163 is_attr is true if we used attribute syntax (e.g., '.foo')
1164 false if we used index syntax (e.g., '[foo]')
1165 value is an integer or string
1166*/
1167static PyObject *
1168fieldnameiter_next(fieldnameiterobject *it)
1169{
1170 int result;
1171 int is_attr;
1172 Py_ssize_t idx;
1173 SubString name;
1174
1175 result = FieldNameIterator_next(&it->it_field, &is_attr,
1176 &idx, &name);
Eric Smith0cb431c2007-08-28 01:07:27 +00001177 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001178 /* if 0, error has already been set, if 1, iterator is empty */
1179 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001180 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001181 PyObject* result = NULL;
1182 PyObject* is_attr_obj = NULL;
1183 PyObject* obj = NULL;
1184
1185 is_attr_obj = PyBool_FromLong(is_attr);
1186 if (is_attr_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001187 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001188
1189 /* either an integer or a string */
1190 if (idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001191 obj = PyLong_FromSsize_t(idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001192 else
1193 obj = SubString_new_object(&name);
1194 if (obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001195 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001196
1197 /* return a tuple of values */
1198 result = PyTuple_Pack(2, is_attr_obj, obj);
Eric Smithf6db4092007-08-27 23:52:26 +00001199
Eric Smith625cbf22007-08-29 03:22:59 +00001200 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001201 Py_XDECREF(is_attr_obj);
1202 Py_XDECREF(obj);
Eric Smith625cbf22007-08-29 03:22:59 +00001203 return result;
Eric Smithf6db4092007-08-27 23:52:26 +00001204 }
Eric Smithf6db4092007-08-27 23:52:26 +00001205}
1206
1207static PyMethodDef fieldnameiter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 {NULL, NULL} /* sentinel */
Eric Smithf6db4092007-08-27 23:52:26 +00001209};
1210
1211static PyTypeObject PyFieldNameIter_Type = {
1212 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001213 "fieldnameiterator", /* tp_name */
1214 sizeof(fieldnameiterobject), /* tp_basicsize */
1215 0, /* tp_itemsize */
Eric Smithf6db4092007-08-27 23:52:26 +00001216 /* methods */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001217 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1218 0, /* tp_print */
1219 0, /* tp_getattr */
1220 0, /* tp_setattr */
1221 0, /* tp_reserved */
1222 0, /* tp_repr */
1223 0, /* tp_as_number */
1224 0, /* tp_as_sequence */
1225 0, /* tp_as_mapping */
1226 0, /* tp_hash */
1227 0, /* tp_call */
1228 0, /* tp_str */
1229 PyObject_GenericGetAttr, /* tp_getattro */
1230 0, /* tp_setattro */
1231 0, /* tp_as_buffer */
1232 Py_TPFLAGS_DEFAULT, /* tp_flags */
1233 0, /* tp_doc */
1234 0, /* tp_traverse */
1235 0, /* tp_clear */
1236 0, /* tp_richcompare */
1237 0, /* tp_weaklistoffset */
1238 PyObject_SelfIter, /* tp_iter */
1239 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1240 fieldnameiter_methods, /* tp_methods */
Eric Smithf6db4092007-08-27 23:52:26 +00001241 0};
1242
1243/* unicode_formatter_field_name_split is used to implement
1244 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1245 returns a tuple of (first, rest): "first", the part before the
1246 first '.' or '['; and "rest", an iterator for the rest of the field
1247 name. it's a wrapper around stringlib/string_format.h's
1248 field_name_split. The iterator it returns is a
1249 FieldNameIterator */
1250static PyObject *
Victor Stinner7931d9a2011-11-04 00:22:48 +01001251formatter_field_name_split(PyObject *ignored, PyObject *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001252{
1253 SubString first;
1254 Py_ssize_t first_idx;
1255 fieldnameiterobject *it;
1256
1257 PyObject *first_obj = NULL;
1258 PyObject *result = NULL;
1259
Eric Smitha1eac722011-01-29 11:15:35 +00001260 if (!PyUnicode_Check(self)) {
1261 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1262 return NULL;
1263 }
1264
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001265 if (PyUnicode_READY(self) == -1)
1266 return NULL;
1267
Eric Smithf6db4092007-08-27 23:52:26 +00001268 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1269 if (it == NULL)
1270 return NULL;
1271
1272 /* take ownership, give the object to the iterator. this is
1273 just to keep the field_name alive */
1274 Py_INCREF(self);
1275 it->str = self;
1276
Eric Smith8ec90442009-03-14 12:29:34 +00001277 /* Pass in auto_number = NULL. We'll return an empty string for
1278 first_obj in that case. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001279 if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
Eric Smith8ec90442009-03-14 12:29:34 +00001280 &first, &first_idx, &it->it_field, NULL))
Eric Smith625cbf22007-08-29 03:22:59 +00001281 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001282
Eric Smith0cb431c2007-08-28 01:07:27 +00001283 /* first becomes an integer, if possible; else a string */
Eric Smithf6db4092007-08-27 23:52:26 +00001284 if (first_idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001285 first_obj = PyLong_FromSsize_t(first_idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001286 else
1287 /* convert "first" into a string object */
1288 first_obj = SubString_new_object(&first);
1289 if (first_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001290 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001291
1292 /* return a tuple of values */
1293 result = PyTuple_Pack(2, first_obj, it);
1294
Eric Smith625cbf22007-08-29 03:22:59 +00001295done:
Eric Smithf6db4092007-08-27 23:52:26 +00001296 Py_XDECREF(it);
1297 Py_XDECREF(first_obj);
1298 return result;
1299}