blob: e9be516318f22f71371409e7929c542a93237a09 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002 unicode_format.h -- implementation of str.format().
Eric Smith8c663262007-08-25 02:26:07 +00003*/
4
Eric Smith8c663262007-08-25 02:26:07 +00005/************************************************************************/
6/*********** Global data structures and forward declarations *********/
7/************************************************************************/
8
9/*
10 A SubString consists of the characters between two string or
11 unicode pointers.
12*/
13typedef struct {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020014 PyObject *str; /* borrowed reference */
15 Py_ssize_t start, end;
Eric Smith8c663262007-08-25 02:26:07 +000016} SubString;
17
18
Eric Smith8ec90442009-03-14 12:29:34 +000019typedef enum {
20 ANS_INIT,
21 ANS_AUTO,
Georg Brandlfb526ac2009-05-01 08:59:13 +000022 ANS_MANUAL
Eric Smith8ec90442009-03-14 12:29:34 +000023} AutoNumberState; /* Keep track if we're auto-numbering fields */
24
25/* Keeps track of our auto-numbering state, and which number field we're on */
26typedef struct {
27 AutoNumberState an_state;
28 int an_field_number;
29} AutoNumber;
30
31
Eric Smith8c663262007-08-25 02:26:07 +000032/* forward declaration for recursion */
33static PyObject *
34build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +000035 int recursion_depth, AutoNumber *auto_number);
Eric Smith8c663262007-08-25 02:26:07 +000036
37
38
39/************************************************************************/
40/************************** Utility functions ************************/
41/************************************************************************/
42
Eric Smith8ec90442009-03-14 12:29:34 +000043static void
44AutoNumber_Init(AutoNumber *auto_number)
45{
46 auto_number->an_state = ANS_INIT;
47 auto_number->an_field_number = 0;
48}
49
Eric Smith8c663262007-08-25 02:26:07 +000050/* fill in a SubString from a pointer and length */
51Py_LOCAL_INLINE(void)
Antoine Pitroudbf697a2011-10-06 15:34:41 +020052SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
Eric Smith8c663262007-08-25 02:26:07 +000053{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020054 str->str = s;
55 str->start = start;
56 str->end = end;
Eric Smith8c663262007-08-25 02:26:07 +000057}
58
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020059/* return a new string. if str->str is NULL, return None */
Eric Smith8c663262007-08-25 02:26:07 +000060Py_LOCAL_INLINE(PyObject *)
61SubString_new_object(SubString *str)
62{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020063 if (str->str == NULL) {
Eric Smith625cbf22007-08-29 03:22:59 +000064 Py_INCREF(Py_None);
65 return Py_None;
66 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020067 return PyUnicode_Substring(str->str, str->start, str->end);
Eric Smith625cbf22007-08-29 03:22:59 +000068}
69
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020070/* return a new string. if str->str is NULL, return None */
Eric Smith625cbf22007-08-29 03:22:59 +000071Py_LOCAL_INLINE(PyObject *)
72SubString_new_object_or_empty(SubString *str)
73{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020074 if (str->str == NULL) {
Victor Stinnerb37b1742011-12-01 03:18:59 +010075 return PyUnicode_New(0, 0);
Eric Smith625cbf22007-08-29 03:22:59 +000076 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020077 return SubString_new_object(str);
Eric Smith8c663262007-08-25 02:26:07 +000078}
79
Eric Smith8ec90442009-03-14 12:29:34 +000080/* Return 1 if an error has been detected switching between automatic
81 field numbering and manual field specification, else return 0. Set
82 ValueError on error. */
83static int
84autonumber_state_error(AutoNumberState state, int field_name_is_empty)
85{
86 if (state == ANS_MANUAL) {
87 if (field_name_is_empty) {
88 PyErr_SetString(PyExc_ValueError, "cannot switch from "
89 "manual field specification to "
90 "automatic field numbering");
91 return 1;
92 }
93 }
94 else {
95 if (!field_name_is_empty) {
96 PyErr_SetString(PyExc_ValueError, "cannot switch from "
97 "automatic field numbering to "
98 "manual field specification");
99 return 1;
100 }
101 }
102 return 0;
103}
104
105
Eric Smith8c663262007-08-25 02:26:07 +0000106/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +0000107/*********** Format string parsing -- integers and identifiers *********/
108/************************************************************************/
109
Eric Smith7ade6482007-08-26 22:27:13 +0000110static Py_ssize_t
111get_integer(const SubString *str)
Eric Smith8c663262007-08-25 02:26:07 +0000112{
Eric Smith7ade6482007-08-26 22:27:13 +0000113 Py_ssize_t accumulator = 0;
114 Py_ssize_t digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200115 Py_ssize_t i;
Eric Smith8c663262007-08-25 02:26:07 +0000116
Eric Smith7ade6482007-08-26 22:27:13 +0000117 /* empty string is an error */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200118 if (str->start >= str->end)
Eric Smith7ade6482007-08-26 22:27:13 +0000119 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000120
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200121 for (i = str->start; i < str->end; i++) {
122 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
Eric Smith8c663262007-08-25 02:26:07 +0000123 if (digitval < 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000124 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000125 /*
Mark Dickinsonc7d93b72011-09-25 15:34:32 +0100126 Detect possible overflow before it happens:
127
128 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
129 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Eric Smith8c663262007-08-25 02:26:07 +0000130 */
Mark Dickinsonc7d93b72011-09-25 15:34:32 +0100131 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Eric Smith8c663262007-08-25 02:26:07 +0000132 PyErr_Format(PyExc_ValueError,
133 "Too many decimal digits in format string");
134 return -1;
135 }
Mark Dickinsonc7d93b72011-09-25 15:34:32 +0100136 accumulator = accumulator * 10 + digitval;
Eric Smith8c663262007-08-25 02:26:07 +0000137 }
Eric Smith7ade6482007-08-26 22:27:13 +0000138 return accumulator;
Eric Smith8c663262007-08-25 02:26:07 +0000139}
140
141/************************************************************************/
142/******** Functions to get field objects and specification strings ******/
143/************************************************************************/
144
Eric Smith7ade6482007-08-26 22:27:13 +0000145/* do the equivalent of obj.name */
Eric Smith8c663262007-08-25 02:26:07 +0000146static PyObject *
Eric Smith7ade6482007-08-26 22:27:13 +0000147getattr(PyObject *obj, SubString *name)
Eric Smith8c663262007-08-25 02:26:07 +0000148{
Eric Smith7ade6482007-08-26 22:27:13 +0000149 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000150 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000151 if (str == NULL)
152 return NULL;
153 newobj = PyObject_GetAttr(obj, str);
154 Py_DECREF(str);
155 return newobj;
Eric Smith8c663262007-08-25 02:26:07 +0000156}
157
Eric Smith7ade6482007-08-26 22:27:13 +0000158/* do the equivalent of obj[idx], where obj is a sequence */
159static PyObject *
160getitem_sequence(PyObject *obj, Py_ssize_t idx)
161{
162 return PySequence_GetItem(obj, idx);
163}
164
165/* do the equivalent of obj[idx], where obj is not a sequence */
166static PyObject *
167getitem_idx(PyObject *obj, Py_ssize_t idx)
168{
169 PyObject *newobj;
Christian Heimes217cfd12007-12-02 14:31:20 +0000170 PyObject *idx_obj = PyLong_FromSsize_t(idx);
Eric Smith7ade6482007-08-26 22:27:13 +0000171 if (idx_obj == NULL)
172 return NULL;
173 newobj = PyObject_GetItem(obj, idx_obj);
174 Py_DECREF(idx_obj);
175 return newobj;
176}
177
178/* do the equivalent of obj[name] */
179static PyObject *
180getitem_str(PyObject *obj, SubString *name)
181{
182 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000183 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000184 if (str == NULL)
185 return NULL;
186 newobj = PyObject_GetItem(obj, str);
187 Py_DECREF(str);
188 return newobj;
189}
190
191typedef struct {
192 /* the entire string we're parsing. we assume that someone else
193 is managing its lifetime, and that it will exist for the
194 lifetime of the iterator. can be empty */
195 SubString str;
196
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 /* index to where we are inside field_name */
198 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000199} FieldNameIterator;
200
201
202static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200203FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
204 Py_ssize_t start, Py_ssize_t end)
Eric Smith7ade6482007-08-26 22:27:13 +0000205{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200206 SubString_init(&self->str, s, start, end);
207 self->index = start;
Eric Smith7ade6482007-08-26 22:27:13 +0000208 return 1;
209}
210
211static int
212_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
213{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200214 Py_UCS4 c;
Eric Smith7ade6482007-08-26 22:27:13 +0000215
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200216 name->str = self->str.str;
217 name->start = self->index;
Eric Smith7ade6482007-08-26 22:27:13 +0000218
219 /* return everything until '.' or '[' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200220 while (self->index < self->str.end) {
221 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
222 switch (c) {
Eric Smith7ade6482007-08-26 22:27:13 +0000223 case '[':
224 case '.':
225 /* backup so that we this character will be seen next time */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200226 self->index--;
Eric Smith7ade6482007-08-26 22:27:13 +0000227 break;
228 default:
229 continue;
230 }
231 break;
232 }
233 /* end of string is okay */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200234 name->end = self->index;
Eric Smith7ade6482007-08-26 22:27:13 +0000235 return 1;
236}
237
238static int
239_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
240{
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000241 int bracket_seen = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200242 Py_UCS4 c;
Eric Smith7ade6482007-08-26 22:27:13 +0000243
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200244 name->str = self->str.str;
245 name->start = self->index;
Eric Smith7ade6482007-08-26 22:27:13 +0000246
247 /* return everything until ']' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200248 while (self->index < self->str.end) {
249 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
250 switch (c) {
Eric Smith7ade6482007-08-26 22:27:13 +0000251 case ']':
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000252 bracket_seen = 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000253 break;
254 default:
255 continue;
256 }
257 break;
258 }
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000259 /* make sure we ended with a ']' */
260 if (!bracket_seen) {
261 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
262 return 0;
263 }
264
Eric Smith7ade6482007-08-26 22:27:13 +0000265 /* end of string is okay */
266 /* don't include the ']' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200267 name->end = self->index-1;
Eric Smith7ade6482007-08-26 22:27:13 +0000268 return 1;
269}
270
271/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
272static int
273FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
274 Py_ssize_t *name_idx, SubString *name)
275{
276 /* check at end of input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200277 if (self->index >= self->str.end)
Eric Smith7ade6482007-08-26 22:27:13 +0000278 return 1;
279
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200280 switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000281 case '.':
282 *is_attribute = 1;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000283 if (_FieldNameIterator_attr(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000284 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000285 *name_idx = -1;
286 break;
287 case '[':
288 *is_attribute = 0;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000289 if (_FieldNameIterator_item(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000290 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000291 *name_idx = get_integer(name);
Benjamin Peterson59a1b2f2010-06-07 22:31:26 +0000292 if (*name_idx == -1 && PyErr_Occurred())
293 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000294 break;
295 default:
Eric Smith41669ca2009-05-23 14:23:22 +0000296 /* Invalid character follows ']' */
297 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
298 "follow ']' in format field specifier");
Eric Smith7ade6482007-08-26 22:27:13 +0000299 return 0;
300 }
301
302 /* empty string is an error */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200303 if (name->start == name->end) {
Eric Smith7ade6482007-08-26 22:27:13 +0000304 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
305 return 0;
306 }
307
308 return 2;
309}
310
311
312/* input: field_name
313 output: 'first' points to the part before the first '[' or '.'
314 'first_idx' is -1 if 'first' is not an integer, otherwise
315 it's the value of first converted to an integer
316 'rest' is an iterator to return the rest
317*/
318static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200319field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
Eric Smith8ec90442009-03-14 12:29:34 +0000320 Py_ssize_t *first_idx, FieldNameIterator *rest,
321 AutoNumber *auto_number)
Eric Smith7ade6482007-08-26 22:27:13 +0000322{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200323 Py_UCS4 c;
324 Py_ssize_t i = start;
Eric Smith8ec90442009-03-14 12:29:34 +0000325 int field_name_is_empty;
326 int using_numeric_index;
Eric Smith7ade6482007-08-26 22:27:13 +0000327
328 /* find the part up until the first '.' or '[' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200329 while (i < end) {
330 switch (c = PyUnicode_READ_CHAR(str, i++)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000331 case '[':
332 case '.':
333 /* backup so that we this character is available to the
334 "rest" iterator */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 i--;
Eric Smith7ade6482007-08-26 22:27:13 +0000336 break;
337 default:
338 continue;
339 }
340 break;
341 }
342
343 /* set up the return values */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200344 SubString_init(first, str, start, i);
345 FieldNameIterator_init(rest, str, i, end);
Eric Smith7ade6482007-08-26 22:27:13 +0000346
347 /* see if "first" is an integer, in which case it's used as an index */
348 *first_idx = get_integer(first);
Benjamin Peterson59a1b2f2010-06-07 22:31:26 +0000349 if (*first_idx == -1 && PyErr_Occurred())
350 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000351
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200352 field_name_is_empty = first->start >= first->end;
Eric Smith8ec90442009-03-14 12:29:34 +0000353
354 /* If the field name is omitted or if we have a numeric index
355 specified, then we're doing numeric indexing into args. */
356 using_numeric_index = field_name_is_empty || *first_idx != -1;
357
358 /* We always get here exactly one time for each field we're
359 processing. And we get here in field order (counting by left
360 braces). So this is the perfect place to handle automatic field
361 numbering if the field name is omitted. */
362
363 /* Check if we need to do the auto-numbering. It's not needed if
364 we're called from string.Format routines, because it's handled
365 in that class by itself. */
366 if (auto_number) {
367 /* Initialize our auto numbering state if this is the first
368 time we're either auto-numbering or manually numbering. */
369 if (auto_number->an_state == ANS_INIT && using_numeric_index)
370 auto_number->an_state = field_name_is_empty ?
371 ANS_AUTO : ANS_MANUAL;
372
373 /* Make sure our state is consistent with what we're doing
374 this time through. Only check if we're using a numeric
375 index. */
376 if (using_numeric_index)
377 if (autonumber_state_error(auto_number->an_state,
378 field_name_is_empty))
379 return 0;
380 /* Zero length field means we want to do auto-numbering of the
381 fields. */
382 if (field_name_is_empty)
383 *first_idx = (auto_number->an_field_number)++;
Eric Smith7ade6482007-08-26 22:27:13 +0000384 }
385
386 return 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000387}
388
389
Eric Smith8c663262007-08-25 02:26:07 +0000390/*
391 get_field_object returns the object inside {}, before the
392 format_spec. It handles getindex and getattr lookups and consumes
393 the entire input string.
394*/
395static PyObject *
Eric Smith8ec90442009-03-14 12:29:34 +0000396get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
397 AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000398{
Eric Smith7ade6482007-08-26 22:27:13 +0000399 PyObject *obj = NULL;
400 int ok;
401 int is_attribute;
402 SubString name;
403 SubString first;
Eric Smith8c663262007-08-25 02:26:07 +0000404 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000405 FieldNameIterator rest;
Eric Smith8c663262007-08-25 02:26:07 +0000406
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200407 if (!field_name_split(input->str, input->start, input->end, &first,
Eric Smith8ec90442009-03-14 12:29:34 +0000408 &index, &rest, auto_number)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000409 goto error;
410 }
Eric Smith8c663262007-08-25 02:26:07 +0000411
Eric Smith7ade6482007-08-26 22:27:13 +0000412 if (index == -1) {
413 /* look up in kwargs */
Eric Smith7a6dd292007-08-27 23:30:47 +0000414 PyObject *key = SubString_new_object(&first);
Eric Smith7ade6482007-08-26 22:27:13 +0000415 if (key == NULL)
416 goto error;
Eric Smith27bbca62010-11-04 17:06:58 +0000417
418 /* Use PyObject_GetItem instead of PyDict_GetItem because this
419 code is no longer just used with kwargs. It might be passed
420 a non-dict when called through format_map. */
421 if ((kwargs == NULL) || (obj = PyObject_GetItem(kwargs, key)) == NULL) {
Eric Smith11529192007-09-04 23:04:22 +0000422 PyErr_SetObject(PyExc_KeyError, key);
Eric Smith7ade6482007-08-26 22:27:13 +0000423 Py_DECREF(key);
424 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000425 }
Neal Norwitz8a4eb292007-08-27 07:24:17 +0000426 Py_DECREF(key);
Eric Smith0cb431c2007-08-28 01:07:27 +0000427 }
428 else {
Eric V. Smith12ebefc2011-07-18 14:03:41 -0400429 /* If args is NULL, we have a format string with a positional field
430 with only kwargs to retrieve it from. This can only happen when
431 used with format_map(), where positional arguments are not
432 allowed. */
433 if (args == NULL) {
434 PyErr_SetString(PyExc_ValueError, "Format string contains "
435 "positional fields");
436 goto error;
437 }
438
Eric Smith7ade6482007-08-26 22:27:13 +0000439 /* look up in args */
440 obj = PySequence_GetItem(args, index);
Eric Smith11529192007-09-04 23:04:22 +0000441 if (obj == NULL)
Eric Smith7ade6482007-08-26 22:27:13 +0000442 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000443 }
Eric Smith7ade6482007-08-26 22:27:13 +0000444
445 /* iterate over the rest of the field_name */
446 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
447 &name)) == 2) {
448 PyObject *tmp;
449
450 if (is_attribute)
451 /* getattr lookup "." */
452 tmp = getattr(obj, &name);
453 else
454 /* getitem lookup "[]" */
455 if (index == -1)
456 tmp = getitem_str(obj, &name);
457 else
458 if (PySequence_Check(obj))
459 tmp = getitem_sequence(obj, index);
460 else
461 /* not a sequence */
462 tmp = getitem_idx(obj, index);
463 if (tmp == NULL)
464 goto error;
465
466 /* assign to obj */
467 Py_DECREF(obj);
468 obj = tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000469 }
Eric Smith7ade6482007-08-26 22:27:13 +0000470 /* end of iterator, this is the non-error case */
471 if (ok == 1)
472 return obj;
473error:
474 Py_XDECREF(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000475 return NULL;
476}
477
478/************************************************************************/
479/***************** Field rendering functions **************************/
480/************************************************************************/
481
482/*
483 render_field() is the main function in this section. It takes the
484 field object and field specification string generated by
485 get_field_and_spec, and renders the field into the output string.
486
Eric Smith8c663262007-08-25 02:26:07 +0000487 render_field calls fieldobj.__format__(format_spec) method, and
488 appends to the output.
489*/
490static int
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200491render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
Eric Smith8c663262007-08-25 02:26:07 +0000492{
493 int ok = 0;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000494 PyObject *result = NULL;
Eric Smith1d138f12008-05-31 01:40:08 +0000495 PyObject *format_spec_object = NULL;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200496 int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
497 int err;
Victor Stinner7931d9a2011-11-04 00:22:48 +0100498
Eric Smith1d138f12008-05-31 01:40:08 +0000499 /* If we know the type exactly, skip the lookup of __format__ and just
500 call the formatter directly. */
501 if (PyUnicode_CheckExact(fieldobj))
Victor Stinnerd3f08822012-05-29 12:57:52 +0200502 formatter = _PyUnicode_FormatAdvancedWriter;
Eric Smith1d138f12008-05-31 01:40:08 +0000503 else if (PyLong_CheckExact(fieldobj))
Victor Stinnerd3f08822012-05-29 12:57:52 +0200504 formatter = _PyLong_FormatAdvancedWriter;
Eric Smith1d138f12008-05-31 01:40:08 +0000505 else if (PyFloat_CheckExact(fieldobj))
Victor Stinnerd3f08822012-05-29 12:57:52 +0200506 formatter = _PyFloat_FormatAdvancedWriter;
507 else if (PyComplex_CheckExact(fieldobj))
508 formatter = _PyComplex_FormatAdvancedWriter;
Eric Smithba8c0282008-06-02 14:57:32 +0000509
510 if (formatter) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000511 /* we know exactly which formatter will be called when __format__ is
512 looked up, so call it directly, instead. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200513 err = formatter(writer, fieldobj, format_spec->str,
514 format_spec->start, format_spec->end);
515 return (err == 0);
Eric Smithba8c0282008-06-02 14:57:32 +0000516 }
Eric Smith1d138f12008-05-31 01:40:08 +0000517 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000518 /* We need to create an object out of the pointers we have, because
519 __format__ takes a string/unicode object for format_spec. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200520 if (format_spec->str)
521 format_spec_object = PyUnicode_Substring(format_spec->str,
522 format_spec->start,
523 format_spec->end);
524 else
525 format_spec_object = PyUnicode_New(0, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000526 if (format_spec_object == NULL)
527 goto done;
Eric Smith1d138f12008-05-31 01:40:08 +0000528
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 result = PyObject_Format(fieldobj, format_spec_object);
Eric Smith1d138f12008-05-31 01:40:08 +0000530 }
Victor Stinneree4544c2012-05-09 22:24:08 +0200531 if (result == NULL)
532 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000533
Victor Stinnerd3f08822012-05-29 12:57:52 +0200534 if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
Victor Stinneree4544c2012-05-09 22:24:08 +0200535 goto done;
Victor Stinneree4544c2012-05-09 22:24:08 +0200536 ok = 1;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200537
Eric Smith8c663262007-08-25 02:26:07 +0000538done:
Eric Smith1d138f12008-05-31 01:40:08 +0000539 Py_XDECREF(format_spec_object);
Eric Smith8c663262007-08-25 02:26:07 +0000540 Py_XDECREF(result);
541 return ok;
542}
543
544static int
545parse_field(SubString *str, SubString *field_name, SubString *format_spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200546 Py_UCS4 *conversion)
Eric Smith8c663262007-08-25 02:26:07 +0000547{
Eric Smith8ec90442009-03-14 12:29:34 +0000548 /* Note this function works if the field name is zero length,
549 which is good. Zero length field names are handled later, in
550 field_name_split. */
551
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200552 Py_UCS4 c = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000553
554 /* initialize these, as they may be empty */
555 *conversion = '\0';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200556 SubString_init(format_spec, NULL, 0, 0);
Eric Smith8c663262007-08-25 02:26:07 +0000557
Eric Smith8ec90442009-03-14 12:29:34 +0000558 /* Search for the field name. it's terminated by the end of
559 the string, or a ':' or '!' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200560 field_name->str = str->str;
561 field_name->start = str->start;
562 while (str->start < str->end) {
563 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
Eric Smith8c663262007-08-25 02:26:07 +0000564 case ':':
565 case '!':
566 break;
567 default:
568 continue;
569 }
570 break;
571 }
572
573 if (c == '!' || c == ':') {
574 /* we have a format specifier and/or a conversion */
575 /* don't include the last character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200576 field_name->end = str->start-1;
Eric Smith8c663262007-08-25 02:26:07 +0000577
578 /* the format specifier is the rest of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200579 format_spec->str = str->str;
580 format_spec->start = str->start;
Eric Smith8c663262007-08-25 02:26:07 +0000581 format_spec->end = str->end;
582
583 /* see if there's a conversion specifier */
584 if (c == '!') {
585 /* there must be another character present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200586 if (format_spec->start >= format_spec->end) {
Eric Smith8c663262007-08-25 02:26:07 +0000587 PyErr_SetString(PyExc_ValueError,
588 "end of format while looking for conversion "
589 "specifier");
590 return 0;
591 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200592 *conversion = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
Eric Smith8c663262007-08-25 02:26:07 +0000593
594 /* if there is another character, it must be a colon */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200595 if (format_spec->start < format_spec->end) {
596 c = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
Eric Smith8c663262007-08-25 02:26:07 +0000597 if (c != ':') {
598 PyErr_SetString(PyExc_ValueError,
599 "expected ':' after format specifier");
600 return 0;
601 }
602 }
603 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000604 }
Eric Smith8ec90442009-03-14 12:29:34 +0000605 else
Eric Smith8c663262007-08-25 02:26:07 +0000606 /* end of string, there's no format_spec or conversion */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200607 field_name->end = str->start;
Eric Smith8ec90442009-03-14 12:29:34 +0000608
609 return 1;
Eric Smith8c663262007-08-25 02:26:07 +0000610}
611
612/************************************************************************/
613/******* Output string allocation and escape-to-markup processing ******/
614/************************************************************************/
615
616/* MarkupIterator breaks the string into pieces of either literal
617 text, or things inside {} that need to be marked up. it is
618 designed to make it easy to wrap a Python iterator around it, for
619 use with the Formatter class */
620
621typedef struct {
622 SubString str;
Eric Smith8c663262007-08-25 02:26:07 +0000623} MarkupIterator;
624
625static int
Victor Stinner7931d9a2011-11-04 00:22:48 +0100626MarkupIterator_init(MarkupIterator *self, PyObject *str,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200627 Py_ssize_t start, Py_ssize_t end)
Eric Smith8c663262007-08-25 02:26:07 +0000628{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200629 SubString_init(&self->str, str, start, end);
Eric Smith8c663262007-08-25 02:26:07 +0000630 return 1;
631}
632
633/* returns 0 on error, 1 on non-error termination, and 2 if it got a
634 string (or something to be expanded) */
635static int
Eric Smith625cbf22007-08-29 03:22:59 +0000636MarkupIterator_next(MarkupIterator *self, SubString *literal,
Eric Smith8ec90442009-03-14 12:29:34 +0000637 int *field_present, SubString *field_name,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200638 SubString *format_spec, Py_UCS4 *conversion,
Eric Smith8c663262007-08-25 02:26:07 +0000639 int *format_spec_needs_expanding)
640{
641 int at_end;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200642 Py_UCS4 c = 0;
643 Py_ssize_t start;
Eric Smith8c663262007-08-25 02:26:07 +0000644 int count;
645 Py_ssize_t len;
Eric Smith625cbf22007-08-29 03:22:59 +0000646 int markup_follows = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000647
Eric Smith625cbf22007-08-29 03:22:59 +0000648 /* initialize all of the output variables */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200649 SubString_init(literal, NULL, 0, 0);
650 SubString_init(field_name, NULL, 0, 0);
651 SubString_init(format_spec, NULL, 0, 0);
Eric Smith625cbf22007-08-29 03:22:59 +0000652 *conversion = '\0';
Eric Smith8c663262007-08-25 02:26:07 +0000653 *format_spec_needs_expanding = 0;
Eric Smith8ec90442009-03-14 12:29:34 +0000654 *field_present = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000655
Eric Smith625cbf22007-08-29 03:22:59 +0000656 /* No more input, end of iterator. This is the normal exit
657 path. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200658 if (self->str.start >= self->str.end)
Eric Smith8c663262007-08-25 02:26:07 +0000659 return 1;
660
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200661 start = self->str.start;
Eric Smith8c663262007-08-25 02:26:07 +0000662
Eric Smith625cbf22007-08-29 03:22:59 +0000663 /* First read any literal text. Read until the end of string, an
664 escaped '{' or '}', or an unescaped '{'. In order to never
665 allocate memory and so I can just pass pointers around, if
666 there's an escaped '{' or '}' then we'll return the literal
667 including the brace, but no format object. The next time
668 through, we'll return the rest of the literal, skipping past
669 the second consecutive brace. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200670 while (self->str.start < self->str.end) {
671 switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
Eric Smith625cbf22007-08-29 03:22:59 +0000672 case '{':
673 case '}':
674 markup_follows = 1;
675 break;
676 default:
677 continue;
Eric Smith8c663262007-08-25 02:26:07 +0000678 }
Eric Smith625cbf22007-08-29 03:22:59 +0000679 break;
Eric Smith0cb431c2007-08-28 01:07:27 +0000680 }
Eric Smith625cbf22007-08-29 03:22:59 +0000681
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200682 at_end = self->str.start >= self->str.end;
683 len = self->str.start - start;
Eric Smith625cbf22007-08-29 03:22:59 +0000684
Victor Stinner7931d9a2011-11-04 00:22:48 +0100685 if ((c == '}') && (at_end ||
686 (c != PyUnicode_READ_CHAR(self->str.str,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200687 self->str.start)))) {
Eric Smith625cbf22007-08-29 03:22:59 +0000688 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
689 "in format string");
690 return 0;
691 }
692 if (at_end && c == '{') {
693 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
694 "in format string");
695 return 0;
696 }
697 if (!at_end) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200698 if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
Eric Smith625cbf22007-08-29 03:22:59 +0000699 /* escaped } or {, skip it in the input. there is no
700 markup object following us, just this literal text */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200701 self->str.start++;
Eric Smith625cbf22007-08-29 03:22:59 +0000702 markup_follows = 0;
703 }
704 else
705 len--;
706 }
707
708 /* record the literal text */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200709 literal->str = self->str.str;
710 literal->start = start;
Eric Smith625cbf22007-08-29 03:22:59 +0000711 literal->end = start + len;
712
713 if (!markup_follows)
714 return 2;
715
716 /* this is markup, find the end of the string by counting nested
717 braces. note that this prohibits escaped braces, so that
718 format_specs cannot have braces in them. */
Eric Smith8ec90442009-03-14 12:29:34 +0000719 *field_present = 1;
Eric Smith625cbf22007-08-29 03:22:59 +0000720 count = 1;
721
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200722 start = self->str.start;
Eric Smith625cbf22007-08-29 03:22:59 +0000723
724 /* we know we can't have a zero length string, so don't worry
725 about that case */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200726 while (self->str.start < self->str.end) {
727 switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
Eric Smith625cbf22007-08-29 03:22:59 +0000728 case '{':
729 /* the format spec needs to be recursively expanded.
730 this is an optimization, and not strictly needed */
731 *format_spec_needs_expanding = 1;
732 count++;
733 break;
734 case '}':
735 count--;
736 if (count <= 0) {
737 /* we're done. parse and get out */
738 SubString s;
739
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200740 SubString_init(&s, self->str.str, start, self->str.start - 1);
Eric Smith625cbf22007-08-29 03:22:59 +0000741 if (parse_field(&s, field_name, format_spec, conversion) == 0)
742 return 0;
743
Eric Smith625cbf22007-08-29 03:22:59 +0000744 /* success */
745 return 2;
Eric Smith8c663262007-08-25 02:26:07 +0000746 }
747 break;
748 }
Eric Smith8c663262007-08-25 02:26:07 +0000749 }
Eric Smith625cbf22007-08-29 03:22:59 +0000750
751 /* end of string while searching for matching '}' */
752 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
753 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000754}
755
756
757/* do the !r or !s conversion on obj */
758static PyObject *
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200759do_conversion(PyObject *obj, Py_UCS4 conversion)
Eric Smith8c663262007-08-25 02:26:07 +0000760{
761 /* XXX in pre-3.0, do we need to convert this to unicode, since it
762 might have returned a string? */
763 switch (conversion) {
764 case 'r':
765 return PyObject_Repr(obj);
766 case 's':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200767 return PyObject_Str(obj);
Georg Brandl559e5d72008-06-11 18:37:52 +0000768 case 'a':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200769 return PyObject_ASCII(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000770 default:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000771 if (conversion > 32 && conversion < 127) {
772 /* It's the ASCII subrange; casting to char is safe
773 (assuming the execution character set is an ASCII
774 superset). */
775 PyErr_Format(PyExc_ValueError,
Martin v. Löwis5a6f4582008-04-07 03:22:07 +0000776 "Unknown conversion specifier %c",
777 (char)conversion);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000778 } else
779 PyErr_Format(PyExc_ValueError,
780 "Unknown conversion specifier \\x%x",
781 (unsigned int)conversion);
Eric Smith8c663262007-08-25 02:26:07 +0000782 return NULL;
783 }
784}
785
786/* given:
787
788 {field_name!conversion:format_spec}
789
790 compute the result and write it to output.
791 format_spec_needs_expanding is an optimization. if it's false,
792 just output the string directly, otherwise recursively expand the
Eric Smith8ec90442009-03-14 12:29:34 +0000793 format_spec string.
794
795 field_name is allowed to be zero length, in which case we
796 are doing auto field numbering.
797*/
Eric Smith8c663262007-08-25 02:26:07 +0000798
799static int
800output_markup(SubString *field_name, SubString *format_spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200801 int format_spec_needs_expanding, Py_UCS4 conversion,
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200802 _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000803 int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000804{
805 PyObject *tmp = NULL;
806 PyObject *fieldobj = NULL;
807 SubString expanded_format_spec;
808 SubString *actual_format_spec;
809 int result = 0;
810
811 /* convert field_name to an object */
Eric Smith8ec90442009-03-14 12:29:34 +0000812 fieldobj = get_field_object(field_name, args, kwargs, auto_number);
Eric Smith8c663262007-08-25 02:26:07 +0000813 if (fieldobj == NULL)
814 goto done;
815
816 if (conversion != '\0') {
817 tmp = do_conversion(fieldobj, conversion);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200818 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
Eric Smith8c663262007-08-25 02:26:07 +0000819 goto done;
820
821 /* do the assignment, transferring ownership: fieldobj = tmp */
822 Py_DECREF(fieldobj);
823 fieldobj = tmp;
824 tmp = NULL;
825 }
826
827 /* if needed, recurively compute the format_spec */
828 if (format_spec_needs_expanding) {
Eric Smith8ec90442009-03-14 12:29:34 +0000829 tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
830 auto_number);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200831 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
Eric Smith8c663262007-08-25 02:26:07 +0000832 goto done;
833
834 /* note that in the case we're expanding the format string,
835 tmp must be kept around until after the call to
836 render_field. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200837 SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
Eric Smith8c663262007-08-25 02:26:07 +0000838 actual_format_spec = &expanded_format_spec;
Eric Smith0cb431c2007-08-28 01:07:27 +0000839 }
840 else
Eric Smith8c663262007-08-25 02:26:07 +0000841 actual_format_spec = format_spec;
842
Victor Stinner202fdca2012-05-07 12:47:02 +0200843 if (render_field(fieldobj, actual_format_spec, writer) == 0)
Eric Smith8c663262007-08-25 02:26:07 +0000844 goto done;
845
846 result = 1;
847
848done:
849 Py_XDECREF(fieldobj);
850 Py_XDECREF(tmp);
851
852 return result;
853}
854
855/*
Eric Smith8fd3eba2008-02-17 19:48:00 +0000856 do_markup is the top-level loop for the format() method. It
Eric Smith8c663262007-08-25 02:26:07 +0000857 searches through the format string for escapes to markup codes, and
858 calls other functions to move non-markup text to the output,
859 and to perform the markup to the output.
860*/
861static int
862do_markup(SubString *input, PyObject *args, PyObject *kwargs,
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200863 _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000864{
865 MarkupIterator iter;
Eric Smith8c663262007-08-25 02:26:07 +0000866 int format_spec_needs_expanding;
867 int result;
Eric Smith8ec90442009-03-14 12:29:34 +0000868 int field_present;
Eric Smith625cbf22007-08-29 03:22:59 +0000869 SubString literal;
Eric Smith8c663262007-08-25 02:26:07 +0000870 SubString field_name;
871 SubString format_spec;
Victor Stinneree4544c2012-05-09 22:24:08 +0200872 Py_UCS4 conversion, maxchar;
873 Py_ssize_t sublen;
Victor Stinner202fdca2012-05-07 12:47:02 +0200874 int err;
Eric Smith8c663262007-08-25 02:26:07 +0000875
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200876 MarkupIterator_init(&iter, input->str, input->start, input->end);
Eric Smith8ec90442009-03-14 12:29:34 +0000877 while ((result = MarkupIterator_next(&iter, &literal, &field_present,
878 &field_name, &format_spec,
879 &conversion,
Eric Smith8c663262007-08-25 02:26:07 +0000880 &format_spec_needs_expanding)) == 2) {
Victor Stinneree4544c2012-05-09 22:24:08 +0200881 sublen = literal.end - literal.start;
882 if (sublen) {
883 maxchar = _PyUnicode_FindMaxChar(literal.str,
884 literal.start, literal.end);
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200885 err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
Victor Stinneree4544c2012-05-09 22:24:08 +0200886 if (err == -1)
887 return 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200888 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
889 literal.str, literal.start, sublen);
Victor Stinneree4544c2012-05-09 22:24:08 +0200890 writer->pos += sublen;
891 }
892
Victor Stinnerd3f08822012-05-29 12:57:52 +0200893 if (field_present) {
894 if (iter.str.start == iter.str.end)
Victor Stinnerd7b7c742012-06-04 22:52:12 +0200895 writer->overallocate = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000896 if (!output_markup(&field_name, &format_spec,
Victor Stinner202fdca2012-05-07 12:47:02 +0200897 format_spec_needs_expanding, conversion, writer,
Eric Smith8ec90442009-03-14 12:29:34 +0000898 args, kwargs, recursion_depth, auto_number))
Eric Smith8c663262007-08-25 02:26:07 +0000899 return 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200900 }
Eric Smith8c663262007-08-25 02:26:07 +0000901 }
902 return result;
903}
904
905
906/*
907 build_string allocates the output string and then
908 calls do_markup to do the heavy lifting.
909*/
910static PyObject *
911build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000912 int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000913{
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200914 _PyUnicodeWriter writer;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200915 Py_ssize_t minlen;
Eric Smith8c663262007-08-25 02:26:07 +0000916
917 /* check the recursion level */
Eric Smith45c07872007-09-05 02:02:43 +0000918 if (recursion_depth <= 0) {
Eric Smith8c663262007-08-25 02:26:07 +0000919 PyErr_SetString(PyExc_ValueError,
920 "Max string recursion exceeded");
Antoine Pitrou4574e622011-10-07 02:26:47 +0200921 return NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000922 }
923
Victor Stinnerd3f08822012-05-29 12:57:52 +0200924 minlen = PyUnicode_GET_LENGTH(input->str) + 100;
925 _PyUnicodeWriter_Init(&writer, minlen);
Eric Smith8c663262007-08-25 02:26:07 +0000926
Victor Stinner202fdca2012-05-07 12:47:02 +0200927 if (!do_markup(input, args, kwargs, &writer, recursion_depth,
Eric Smith8ec90442009-03-14 12:29:34 +0000928 auto_number)) {
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200929 _PyUnicodeWriter_Dealloc(&writer);
Antoine Pitrou4574e622011-10-07 02:26:47 +0200930 return NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000931 }
932
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200933 return _PyUnicodeWriter_Finish(&writer);
Eric Smith8c663262007-08-25 02:26:07 +0000934}
935
936/************************************************************************/
937/*********** main routine ***********************************************/
938/************************************************************************/
939
940/* this is the main entry point */
941static PyObject *
942do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
943{
944 SubString input;
945
946 /* PEP 3101 says only 2 levels, so that
947 "{0:{1}}".format('abc', 's') # works
948 "{0:{1:{2}}}".format('abc', 's', '') # fails
949 */
Eric Smith45c07872007-09-05 02:02:43 +0000950 int recursion_depth = 2;
Eric Smith8c663262007-08-25 02:26:07 +0000951
Eric Smith8ec90442009-03-14 12:29:34 +0000952 AutoNumber auto_number;
953
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200954 if (PyUnicode_READY(self) == -1)
955 return NULL;
956
Eric Smith8ec90442009-03-14 12:29:34 +0000957 AutoNumber_Init(&auto_number);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200958 SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
Eric Smith8ec90442009-03-14 12:29:34 +0000959 return build_string(&input, args, kwargs, recursion_depth, &auto_number);
Eric Smith8c663262007-08-25 02:26:07 +0000960}
Eric Smithf6db4092007-08-27 23:52:26 +0000961
Eric Smith27bbca62010-11-04 17:06:58 +0000962static PyObject *
963do_string_format_map(PyObject *self, PyObject *obj)
964{
965 return do_string_format(self, NULL, obj);
966}
Eric Smithf6db4092007-08-27 23:52:26 +0000967
968
969/************************************************************************/
970/*********** formatteriterator ******************************************/
971/************************************************************************/
972
973/* This is used to implement string.Formatter.vparse(). It exists so
974 Formatter can share code with the built in unicode.format() method.
975 It's really just a wrapper around MarkupIterator that is callable
976 from Python. */
977
978typedef struct {
979 PyObject_HEAD
Victor Stinner7931d9a2011-11-04 00:22:48 +0100980 PyObject *str;
Eric Smithf6db4092007-08-27 23:52:26 +0000981 MarkupIterator it_markup;
982} formatteriterobject;
983
984static void
985formatteriter_dealloc(formatteriterobject *it)
986{
987 Py_XDECREF(it->str);
988 PyObject_FREE(it);
989}
990
991/* returns a tuple:
Eric Smith625cbf22007-08-29 03:22:59 +0000992 (literal, field_name, format_spec, conversion)
993
994 literal is any literal text to output. might be zero length
995 field_name is the string before the ':'. might be None
996 format_spec is the string after the ':'. mibht be None
997 conversion is either None, or the string after the '!'
Eric Smithf6db4092007-08-27 23:52:26 +0000998*/
999static PyObject *
1000formatteriter_next(formatteriterobject *it)
1001{
1002 SubString literal;
1003 SubString field_name;
1004 SubString format_spec;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001005 Py_UCS4 conversion;
Eric Smithf6db4092007-08-27 23:52:26 +00001006 int format_spec_needs_expanding;
Eric Smith8ec90442009-03-14 12:29:34 +00001007 int field_present;
1008 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1009 &field_name, &format_spec, &conversion,
Eric Smithf6db4092007-08-27 23:52:26 +00001010 &format_spec_needs_expanding);
1011
1012 /* all of the SubString objects point into it->str, so no
1013 memory management needs to be done on them */
1014 assert(0 <= result && result <= 2);
Eric Smith0cb431c2007-08-28 01:07:27 +00001015 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001016 /* if 0, error has already been set, if 1, iterator is empty */
1017 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001018 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001019 PyObject *literal_str = NULL;
1020 PyObject *field_name_str = NULL;
1021 PyObject *format_spec_str = NULL;
1022 PyObject *conversion_str = NULL;
1023 PyObject *tuple = NULL;
1024
Eric Smith625cbf22007-08-29 03:22:59 +00001025 literal_str = SubString_new_object(&literal);
1026 if (literal_str == NULL)
1027 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001028
Eric Smith625cbf22007-08-29 03:22:59 +00001029 field_name_str = SubString_new_object(&field_name);
1030 if (field_name_str == NULL)
1031 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001032
Eric Smith625cbf22007-08-29 03:22:59 +00001033 /* if field_name is non-zero length, return a string for
1034 format_spec (even if zero length), else return None */
Eric Smith8ec90442009-03-14 12:29:34 +00001035 format_spec_str = (field_present ?
Eric Smith625cbf22007-08-29 03:22:59 +00001036 SubString_new_object_or_empty :
1037 SubString_new_object)(&format_spec);
1038 if (format_spec_str == NULL)
1039 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001040
Eric Smith625cbf22007-08-29 03:22:59 +00001041 /* if the conversion is not specified, return a None,
1042 otherwise create a one length string with the conversion
1043 character */
1044 if (conversion == '\0') {
Eric Smithf6db4092007-08-27 23:52:26 +00001045 conversion_str = Py_None;
Eric Smithf6db4092007-08-27 23:52:26 +00001046 Py_INCREF(conversion_str);
1047 }
Eric Smith625cbf22007-08-29 03:22:59 +00001048 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001049 conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1050 &conversion, 1);
Eric Smith625cbf22007-08-29 03:22:59 +00001051 if (conversion_str == NULL)
1052 goto done;
1053
Eric Smith9e7c8da2007-08-28 11:15:20 +00001054 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
Eric Smithf6db4092007-08-27 23:52:26 +00001055 conversion_str);
Eric Smith625cbf22007-08-29 03:22:59 +00001056 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001057 Py_XDECREF(literal_str);
1058 Py_XDECREF(field_name_str);
1059 Py_XDECREF(format_spec_str);
1060 Py_XDECREF(conversion_str);
1061 return tuple;
1062 }
1063}
1064
1065static PyMethodDef formatteriter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 {NULL, NULL} /* sentinel */
Eric Smithf6db4092007-08-27 23:52:26 +00001067};
1068
Eric Smith8fd3eba2008-02-17 19:48:00 +00001069static PyTypeObject PyFormatterIter_Type = {
Eric Smithf6db4092007-08-27 23:52:26 +00001070 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001071 "formatteriterator", /* tp_name */
1072 sizeof(formatteriterobject), /* tp_basicsize */
1073 0, /* tp_itemsize */
Eric Smithf6db4092007-08-27 23:52:26 +00001074 /* methods */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 (destructor)formatteriter_dealloc, /* tp_dealloc */
1076 0, /* tp_print */
1077 0, /* tp_getattr */
1078 0, /* tp_setattr */
1079 0, /* tp_reserved */
1080 0, /* tp_repr */
1081 0, /* tp_as_number */
1082 0, /* tp_as_sequence */
1083 0, /* tp_as_mapping */
1084 0, /* tp_hash */
1085 0, /* tp_call */
1086 0, /* tp_str */
1087 PyObject_GenericGetAttr, /* tp_getattro */
1088 0, /* tp_setattro */
1089 0, /* tp_as_buffer */
1090 Py_TPFLAGS_DEFAULT, /* tp_flags */
1091 0, /* tp_doc */
1092 0, /* tp_traverse */
1093 0, /* tp_clear */
1094 0, /* tp_richcompare */
1095 0, /* tp_weaklistoffset */
1096 PyObject_SelfIter, /* tp_iter */
1097 (iternextfunc)formatteriter_next, /* tp_iternext */
1098 formatteriter_methods, /* tp_methods */
Eric Smithf6db4092007-08-27 23:52:26 +00001099 0,
1100};
1101
1102/* unicode_formatter_parser is used to implement
1103 string.Formatter.vformat. it parses a string and returns tuples
1104 describing the parsed elements. It's a wrapper around
1105 stringlib/string_format.h's MarkupIterator */
1106static PyObject *
Victor Stinner7931d9a2011-11-04 00:22:48 +01001107formatter_parser(PyObject *ignored, PyObject *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001108{
1109 formatteriterobject *it;
1110
Eric Smitha1eac722011-01-29 11:15:35 +00001111 if (!PyUnicode_Check(self)) {
1112 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1113 return NULL;
1114 }
1115
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001116 if (PyUnicode_READY(self) == -1)
1117 return NULL;
1118
Eric Smithf6db4092007-08-27 23:52:26 +00001119 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1120 if (it == NULL)
1121 return NULL;
1122
1123 /* take ownership, give the object to the iterator */
1124 Py_INCREF(self);
1125 it->str = self;
1126
1127 /* initialize the contained MarkupIterator */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001128 MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
Eric Smithf6db4092007-08-27 23:52:26 +00001129 return (PyObject *)it;
1130}
1131
1132
1133/************************************************************************/
1134/*********** fieldnameiterator ******************************************/
1135/************************************************************************/
1136
1137
1138/* This is used to implement string.Formatter.vparse(). It parses the
1139 field name into attribute and item values. It's a Python-callable
1140 wrapper around FieldNameIterator */
1141
1142typedef struct {
1143 PyObject_HEAD
Victor Stinner7931d9a2011-11-04 00:22:48 +01001144 PyObject *str;
Eric Smithf6db4092007-08-27 23:52:26 +00001145 FieldNameIterator it_field;
1146} fieldnameiterobject;
1147
1148static void
1149fieldnameiter_dealloc(fieldnameiterobject *it)
1150{
1151 Py_XDECREF(it->str);
1152 PyObject_FREE(it);
1153}
1154
1155/* returns a tuple:
1156 (is_attr, value)
1157 is_attr is true if we used attribute syntax (e.g., '.foo')
1158 false if we used index syntax (e.g., '[foo]')
1159 value is an integer or string
1160*/
1161static PyObject *
1162fieldnameiter_next(fieldnameiterobject *it)
1163{
1164 int result;
1165 int is_attr;
1166 Py_ssize_t idx;
1167 SubString name;
1168
1169 result = FieldNameIterator_next(&it->it_field, &is_attr,
1170 &idx, &name);
Eric Smith0cb431c2007-08-28 01:07:27 +00001171 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001172 /* if 0, error has already been set, if 1, iterator is empty */
1173 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001174 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001175 PyObject* result = NULL;
1176 PyObject* is_attr_obj = NULL;
1177 PyObject* obj = NULL;
1178
1179 is_attr_obj = PyBool_FromLong(is_attr);
1180 if (is_attr_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001181 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001182
1183 /* either an integer or a string */
1184 if (idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001185 obj = PyLong_FromSsize_t(idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001186 else
1187 obj = SubString_new_object(&name);
1188 if (obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001189 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001190
1191 /* return a tuple of values */
1192 result = PyTuple_Pack(2, is_attr_obj, obj);
Eric Smithf6db4092007-08-27 23:52:26 +00001193
Eric Smith625cbf22007-08-29 03:22:59 +00001194 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001195 Py_XDECREF(is_attr_obj);
1196 Py_XDECREF(obj);
Eric Smith625cbf22007-08-29 03:22:59 +00001197 return result;
Eric Smithf6db4092007-08-27 23:52:26 +00001198 }
Eric Smithf6db4092007-08-27 23:52:26 +00001199}
1200
1201static PyMethodDef fieldnameiter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 {NULL, NULL} /* sentinel */
Eric Smithf6db4092007-08-27 23:52:26 +00001203};
1204
1205static PyTypeObject PyFieldNameIter_Type = {
1206 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001207 "fieldnameiterator", /* tp_name */
1208 sizeof(fieldnameiterobject), /* tp_basicsize */
1209 0, /* tp_itemsize */
Eric Smithf6db4092007-08-27 23:52:26 +00001210 /* methods */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001211 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1212 0, /* tp_print */
1213 0, /* tp_getattr */
1214 0, /* tp_setattr */
1215 0, /* tp_reserved */
1216 0, /* tp_repr */
1217 0, /* tp_as_number */
1218 0, /* tp_as_sequence */
1219 0, /* tp_as_mapping */
1220 0, /* tp_hash */
1221 0, /* tp_call */
1222 0, /* tp_str */
1223 PyObject_GenericGetAttr, /* tp_getattro */
1224 0, /* tp_setattro */
1225 0, /* tp_as_buffer */
1226 Py_TPFLAGS_DEFAULT, /* tp_flags */
1227 0, /* tp_doc */
1228 0, /* tp_traverse */
1229 0, /* tp_clear */
1230 0, /* tp_richcompare */
1231 0, /* tp_weaklistoffset */
1232 PyObject_SelfIter, /* tp_iter */
1233 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1234 fieldnameiter_methods, /* tp_methods */
Eric Smithf6db4092007-08-27 23:52:26 +00001235 0};
1236
1237/* unicode_formatter_field_name_split is used to implement
1238 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1239 returns a tuple of (first, rest): "first", the part before the
1240 first '.' or '['; and "rest", an iterator for the rest of the field
1241 name. it's a wrapper around stringlib/string_format.h's
1242 field_name_split. The iterator it returns is a
1243 FieldNameIterator */
1244static PyObject *
Victor Stinner7931d9a2011-11-04 00:22:48 +01001245formatter_field_name_split(PyObject *ignored, PyObject *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001246{
1247 SubString first;
1248 Py_ssize_t first_idx;
1249 fieldnameiterobject *it;
1250
1251 PyObject *first_obj = NULL;
1252 PyObject *result = NULL;
1253
Eric Smitha1eac722011-01-29 11:15:35 +00001254 if (!PyUnicode_Check(self)) {
1255 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1256 return NULL;
1257 }
1258
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001259 if (PyUnicode_READY(self) == -1)
1260 return NULL;
1261
Eric Smithf6db4092007-08-27 23:52:26 +00001262 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1263 if (it == NULL)
1264 return NULL;
1265
1266 /* take ownership, give the object to the iterator. this is
1267 just to keep the field_name alive */
1268 Py_INCREF(self);
1269 it->str = self;
1270
Eric Smith8ec90442009-03-14 12:29:34 +00001271 /* Pass in auto_number = NULL. We'll return an empty string for
1272 first_obj in that case. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001273 if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
Eric Smith8ec90442009-03-14 12:29:34 +00001274 &first, &first_idx, &it->it_field, NULL))
Eric Smith625cbf22007-08-29 03:22:59 +00001275 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001276
Eric Smith0cb431c2007-08-28 01:07:27 +00001277 /* first becomes an integer, if possible; else a string */
Eric Smithf6db4092007-08-27 23:52:26 +00001278 if (first_idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001279 first_obj = PyLong_FromSsize_t(first_idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001280 else
1281 /* convert "first" into a string object */
1282 first_obj = SubString_new_object(&first);
1283 if (first_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001284 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001285
1286 /* return a tuple of values */
1287 result = PyTuple_Pack(2, first_obj, it);
1288
Eric Smith625cbf22007-08-29 03:22:59 +00001289done:
Eric Smithf6db4092007-08-27 23:52:26 +00001290 Py_XDECREF(it);
1291 Py_XDECREF(first_obj);
1292 return result;
1293}