blob: 2f58946ec35000b10114b39b306176a5a9e21224 [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002 unicode_format.h -- implementation of str.format().
Eric Smith8c663262007-08-25 02:26:07 +00003*/
4
Eric Smith8c663262007-08-25 02:26:07 +00005/************************************************************************/
6/*********** Global data structures and forward declarations *********/
7/************************************************************************/
8
9/*
10 A SubString consists of the characters between two string or
11 unicode pointers.
12*/
13typedef struct {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020014 PyObject *str; /* borrowed reference */
15 Py_ssize_t start, end;
Eric Smith8c663262007-08-25 02:26:07 +000016} SubString;
17
18
Eric Smith8ec90442009-03-14 12:29:34 +000019typedef enum {
20 ANS_INIT,
21 ANS_AUTO,
Georg Brandlfb526ac2009-05-01 08:59:13 +000022 ANS_MANUAL
Eric Smith8ec90442009-03-14 12:29:34 +000023} AutoNumberState; /* Keep track if we're auto-numbering fields */
24
25/* Keeps track of our auto-numbering state, and which number field we're on */
26typedef struct {
27 AutoNumberState an_state;
28 int an_field_number;
29} AutoNumber;
30
31
Eric Smith8c663262007-08-25 02:26:07 +000032/* forward declaration for recursion */
33static PyObject *
34build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +000035 int recursion_depth, AutoNumber *auto_number);
Eric Smith8c663262007-08-25 02:26:07 +000036
37
38
39/************************************************************************/
40/************************** Utility functions ************************/
41/************************************************************************/
42
Eric Smith8ec90442009-03-14 12:29:34 +000043static void
44AutoNumber_Init(AutoNumber *auto_number)
45{
46 auto_number->an_state = ANS_INIT;
47 auto_number->an_field_number = 0;
48}
49
Eric Smith8c663262007-08-25 02:26:07 +000050/* fill in a SubString from a pointer and length */
51Py_LOCAL_INLINE(void)
Antoine Pitroudbf697a2011-10-06 15:34:41 +020052SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
Eric Smith8c663262007-08-25 02:26:07 +000053{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020054 str->str = s;
55 str->start = start;
56 str->end = end;
Eric Smith8c663262007-08-25 02:26:07 +000057}
58
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020059/* return a new string. if str->str is NULL, return None */
Eric Smith8c663262007-08-25 02:26:07 +000060Py_LOCAL_INLINE(PyObject *)
61SubString_new_object(SubString *str)
62{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020063 if (str->str == NULL) {
Eric Smith625cbf22007-08-29 03:22:59 +000064 Py_INCREF(Py_None);
65 return Py_None;
66 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020067 return PyUnicode_Substring(str->str, str->start, str->end);
Eric Smith625cbf22007-08-29 03:22:59 +000068}
69
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020070/* return a new string. if str->str is NULL, return None */
Eric Smith625cbf22007-08-29 03:22:59 +000071Py_LOCAL_INLINE(PyObject *)
72SubString_new_object_or_empty(SubString *str)
73{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020074 if (str->str == NULL) {
Victor Stinnerb37b1742011-12-01 03:18:59 +010075 return PyUnicode_New(0, 0);
Eric Smith625cbf22007-08-29 03:22:59 +000076 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020077 return SubString_new_object(str);
Eric Smith8c663262007-08-25 02:26:07 +000078}
79
Eric Smith8ec90442009-03-14 12:29:34 +000080/* Return 1 if an error has been detected switching between automatic
81 field numbering and manual field specification, else return 0. Set
82 ValueError on error. */
83static int
84autonumber_state_error(AutoNumberState state, int field_name_is_empty)
85{
86 if (state == ANS_MANUAL) {
87 if (field_name_is_empty) {
88 PyErr_SetString(PyExc_ValueError, "cannot switch from "
89 "manual field specification to "
90 "automatic field numbering");
91 return 1;
92 }
93 }
94 else {
95 if (!field_name_is_empty) {
96 PyErr_SetString(PyExc_ValueError, "cannot switch from "
97 "automatic field numbering to "
98 "manual field specification");
99 return 1;
100 }
101 }
102 return 0;
103}
104
105
Eric Smith8c663262007-08-25 02:26:07 +0000106/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +0000107/*********** Format string parsing -- integers and identifiers *********/
108/************************************************************************/
109
Eric Smith7ade6482007-08-26 22:27:13 +0000110static Py_ssize_t
111get_integer(const SubString *str)
Eric Smith8c663262007-08-25 02:26:07 +0000112{
Eric Smith7ade6482007-08-26 22:27:13 +0000113 Py_ssize_t accumulator = 0;
114 Py_ssize_t digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200115 Py_ssize_t i;
Eric Smith8c663262007-08-25 02:26:07 +0000116
Eric Smith7ade6482007-08-26 22:27:13 +0000117 /* empty string is an error */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200118 if (str->start >= str->end)
Eric Smith7ade6482007-08-26 22:27:13 +0000119 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000120
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200121 for (i = str->start; i < str->end; i++) {
122 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
Eric Smith8c663262007-08-25 02:26:07 +0000123 if (digitval < 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000124 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000125 /*
Mark Dickinsonc7d93b72011-09-25 15:34:32 +0100126 Detect possible overflow before it happens:
127
128 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
129 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Eric Smith8c663262007-08-25 02:26:07 +0000130 */
Mark Dickinsonc7d93b72011-09-25 15:34:32 +0100131 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Eric Smith8c663262007-08-25 02:26:07 +0000132 PyErr_Format(PyExc_ValueError,
133 "Too many decimal digits in format string");
134 return -1;
135 }
Mark Dickinsonc7d93b72011-09-25 15:34:32 +0100136 accumulator = accumulator * 10 + digitval;
Eric Smith8c663262007-08-25 02:26:07 +0000137 }
Eric Smith7ade6482007-08-26 22:27:13 +0000138 return accumulator;
Eric Smith8c663262007-08-25 02:26:07 +0000139}
140
141/************************************************************************/
142/******** Functions to get field objects and specification strings ******/
143/************************************************************************/
144
Eric Smith7ade6482007-08-26 22:27:13 +0000145/* do the equivalent of obj.name */
Eric Smith8c663262007-08-25 02:26:07 +0000146static PyObject *
Eric Smith7ade6482007-08-26 22:27:13 +0000147getattr(PyObject *obj, SubString *name)
Eric Smith8c663262007-08-25 02:26:07 +0000148{
Eric Smith7ade6482007-08-26 22:27:13 +0000149 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000150 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000151 if (str == NULL)
152 return NULL;
153 newobj = PyObject_GetAttr(obj, str);
154 Py_DECREF(str);
155 return newobj;
Eric Smith8c663262007-08-25 02:26:07 +0000156}
157
Eric Smith7ade6482007-08-26 22:27:13 +0000158/* do the equivalent of obj[idx], where obj is a sequence */
159static PyObject *
160getitem_sequence(PyObject *obj, Py_ssize_t idx)
161{
162 return PySequence_GetItem(obj, idx);
163}
164
165/* do the equivalent of obj[idx], where obj is not a sequence */
166static PyObject *
167getitem_idx(PyObject *obj, Py_ssize_t idx)
168{
169 PyObject *newobj;
Christian Heimes217cfd12007-12-02 14:31:20 +0000170 PyObject *idx_obj = PyLong_FromSsize_t(idx);
Eric Smith7ade6482007-08-26 22:27:13 +0000171 if (idx_obj == NULL)
172 return NULL;
173 newobj = PyObject_GetItem(obj, idx_obj);
174 Py_DECREF(idx_obj);
175 return newobj;
176}
177
178/* do the equivalent of obj[name] */
179static PyObject *
180getitem_str(PyObject *obj, SubString *name)
181{
182 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000183 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000184 if (str == NULL)
185 return NULL;
186 newobj = PyObject_GetItem(obj, str);
187 Py_DECREF(str);
188 return newobj;
189}
190
191typedef struct {
192 /* the entire string we're parsing. we assume that someone else
193 is managing its lifetime, and that it will exist for the
194 lifetime of the iterator. can be empty */
195 SubString str;
196
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 /* index to where we are inside field_name */
198 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000199} FieldNameIterator;
200
201
202static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200203FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
204 Py_ssize_t start, Py_ssize_t end)
Eric Smith7ade6482007-08-26 22:27:13 +0000205{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200206 SubString_init(&self->str, s, start, end);
207 self->index = start;
Eric Smith7ade6482007-08-26 22:27:13 +0000208 return 1;
209}
210
211static int
212_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
213{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200214 Py_UCS4 c;
Eric Smith7ade6482007-08-26 22:27:13 +0000215
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200216 name->str = self->str.str;
217 name->start = self->index;
Eric Smith7ade6482007-08-26 22:27:13 +0000218
219 /* return everything until '.' or '[' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200220 while (self->index < self->str.end) {
221 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
222 switch (c) {
Eric Smith7ade6482007-08-26 22:27:13 +0000223 case '[':
224 case '.':
225 /* backup so that we this character will be seen next time */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200226 self->index--;
Eric Smith7ade6482007-08-26 22:27:13 +0000227 break;
228 default:
229 continue;
230 }
231 break;
232 }
233 /* end of string is okay */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200234 name->end = self->index;
Eric Smith7ade6482007-08-26 22:27:13 +0000235 return 1;
236}
237
238static int
239_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
240{
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000241 int bracket_seen = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200242 Py_UCS4 c;
Eric Smith7ade6482007-08-26 22:27:13 +0000243
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200244 name->str = self->str.str;
245 name->start = self->index;
Eric Smith7ade6482007-08-26 22:27:13 +0000246
247 /* return everything until ']' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200248 while (self->index < self->str.end) {
249 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
250 switch (c) {
Eric Smith7ade6482007-08-26 22:27:13 +0000251 case ']':
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000252 bracket_seen = 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000253 break;
254 default:
255 continue;
256 }
257 break;
258 }
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000259 /* make sure we ended with a ']' */
260 if (!bracket_seen) {
261 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
262 return 0;
263 }
264
Eric Smith7ade6482007-08-26 22:27:13 +0000265 /* end of string is okay */
266 /* don't include the ']' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200267 name->end = self->index-1;
Eric Smith7ade6482007-08-26 22:27:13 +0000268 return 1;
269}
270
271/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
272static int
273FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
274 Py_ssize_t *name_idx, SubString *name)
275{
276 /* check at end of input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200277 if (self->index >= self->str.end)
Eric Smith7ade6482007-08-26 22:27:13 +0000278 return 1;
279
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200280 switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000281 case '.':
282 *is_attribute = 1;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000283 if (_FieldNameIterator_attr(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000284 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000285 *name_idx = -1;
286 break;
287 case '[':
288 *is_attribute = 0;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000289 if (_FieldNameIterator_item(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000290 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000291 *name_idx = get_integer(name);
Benjamin Peterson59a1b2f2010-06-07 22:31:26 +0000292 if (*name_idx == -1 && PyErr_Occurred())
293 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000294 break;
295 default:
Eric Smith41669ca2009-05-23 14:23:22 +0000296 /* Invalid character follows ']' */
297 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
298 "follow ']' in format field specifier");
Eric Smith7ade6482007-08-26 22:27:13 +0000299 return 0;
300 }
301
302 /* empty string is an error */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200303 if (name->start == name->end) {
Eric Smith7ade6482007-08-26 22:27:13 +0000304 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
305 return 0;
306 }
307
308 return 2;
309}
310
311
312/* input: field_name
313 output: 'first' points to the part before the first '[' or '.'
314 'first_idx' is -1 if 'first' is not an integer, otherwise
315 it's the value of first converted to an integer
316 'rest' is an iterator to return the rest
317*/
318static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200319field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
Eric Smith8ec90442009-03-14 12:29:34 +0000320 Py_ssize_t *first_idx, FieldNameIterator *rest,
321 AutoNumber *auto_number)
Eric Smith7ade6482007-08-26 22:27:13 +0000322{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200323 Py_UCS4 c;
324 Py_ssize_t i = start;
Eric Smith8ec90442009-03-14 12:29:34 +0000325 int field_name_is_empty;
326 int using_numeric_index;
Eric Smith7ade6482007-08-26 22:27:13 +0000327
328 /* find the part up until the first '.' or '[' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200329 while (i < end) {
330 switch (c = PyUnicode_READ_CHAR(str, i++)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000331 case '[':
332 case '.':
333 /* backup so that we this character is available to the
334 "rest" iterator */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 i--;
Eric Smith7ade6482007-08-26 22:27:13 +0000336 break;
337 default:
338 continue;
339 }
340 break;
341 }
342
343 /* set up the return values */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200344 SubString_init(first, str, start, i);
345 FieldNameIterator_init(rest, str, i, end);
Eric Smith7ade6482007-08-26 22:27:13 +0000346
347 /* see if "first" is an integer, in which case it's used as an index */
348 *first_idx = get_integer(first);
Benjamin Peterson59a1b2f2010-06-07 22:31:26 +0000349 if (*first_idx == -1 && PyErr_Occurred())
350 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000351
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200352 field_name_is_empty = first->start >= first->end;
Eric Smith8ec90442009-03-14 12:29:34 +0000353
354 /* If the field name is omitted or if we have a numeric index
355 specified, then we're doing numeric indexing into args. */
356 using_numeric_index = field_name_is_empty || *first_idx != -1;
357
358 /* We always get here exactly one time for each field we're
359 processing. And we get here in field order (counting by left
360 braces). So this is the perfect place to handle automatic field
361 numbering if the field name is omitted. */
362
363 /* Check if we need to do the auto-numbering. It's not needed if
364 we're called from string.Format routines, because it's handled
365 in that class by itself. */
366 if (auto_number) {
367 /* Initialize our auto numbering state if this is the first
368 time we're either auto-numbering or manually numbering. */
369 if (auto_number->an_state == ANS_INIT && using_numeric_index)
370 auto_number->an_state = field_name_is_empty ?
371 ANS_AUTO : ANS_MANUAL;
372
373 /* Make sure our state is consistent with what we're doing
374 this time through. Only check if we're using a numeric
375 index. */
376 if (using_numeric_index)
377 if (autonumber_state_error(auto_number->an_state,
378 field_name_is_empty))
379 return 0;
380 /* Zero length field means we want to do auto-numbering of the
381 fields. */
382 if (field_name_is_empty)
383 *first_idx = (auto_number->an_field_number)++;
Eric Smith7ade6482007-08-26 22:27:13 +0000384 }
385
386 return 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000387}
388
389
Eric Smith8c663262007-08-25 02:26:07 +0000390/*
391 get_field_object returns the object inside {}, before the
392 format_spec. It handles getindex and getattr lookups and consumes
393 the entire input string.
394*/
395static PyObject *
Eric Smith8ec90442009-03-14 12:29:34 +0000396get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
397 AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000398{
Eric Smith7ade6482007-08-26 22:27:13 +0000399 PyObject *obj = NULL;
400 int ok;
401 int is_attribute;
402 SubString name;
403 SubString first;
Eric Smith8c663262007-08-25 02:26:07 +0000404 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000405 FieldNameIterator rest;
Eric Smith8c663262007-08-25 02:26:07 +0000406
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200407 if (!field_name_split(input->str, input->start, input->end, &first,
Eric Smith8ec90442009-03-14 12:29:34 +0000408 &index, &rest, auto_number)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000409 goto error;
410 }
Eric Smith8c663262007-08-25 02:26:07 +0000411
Eric Smith7ade6482007-08-26 22:27:13 +0000412 if (index == -1) {
413 /* look up in kwargs */
Eric Smith7a6dd292007-08-27 23:30:47 +0000414 PyObject *key = SubString_new_object(&first);
Eric Smith7ade6482007-08-26 22:27:13 +0000415 if (key == NULL)
416 goto error;
Eric Smith27bbca62010-11-04 17:06:58 +0000417
418 /* Use PyObject_GetItem instead of PyDict_GetItem because this
419 code is no longer just used with kwargs. It might be passed
420 a non-dict when called through format_map. */
421 if ((kwargs == NULL) || (obj = PyObject_GetItem(kwargs, key)) == NULL) {
Eric Smith11529192007-09-04 23:04:22 +0000422 PyErr_SetObject(PyExc_KeyError, key);
Eric Smith7ade6482007-08-26 22:27:13 +0000423 Py_DECREF(key);
424 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000425 }
Neal Norwitz8a4eb292007-08-27 07:24:17 +0000426 Py_DECREF(key);
Eric Smith0cb431c2007-08-28 01:07:27 +0000427 }
428 else {
Eric V. Smith12ebefc2011-07-18 14:03:41 -0400429 /* If args is NULL, we have a format string with a positional field
430 with only kwargs to retrieve it from. This can only happen when
431 used with format_map(), where positional arguments are not
432 allowed. */
433 if (args == NULL) {
434 PyErr_SetString(PyExc_ValueError, "Format string contains "
435 "positional fields");
436 goto error;
437 }
438
Eric Smith7ade6482007-08-26 22:27:13 +0000439 /* look up in args */
440 obj = PySequence_GetItem(args, index);
Eric Smith11529192007-09-04 23:04:22 +0000441 if (obj == NULL)
Eric Smith7ade6482007-08-26 22:27:13 +0000442 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000443 }
Eric Smith7ade6482007-08-26 22:27:13 +0000444
445 /* iterate over the rest of the field_name */
446 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
447 &name)) == 2) {
448 PyObject *tmp;
449
450 if (is_attribute)
451 /* getattr lookup "." */
452 tmp = getattr(obj, &name);
453 else
454 /* getitem lookup "[]" */
455 if (index == -1)
456 tmp = getitem_str(obj, &name);
457 else
458 if (PySequence_Check(obj))
459 tmp = getitem_sequence(obj, index);
460 else
461 /* not a sequence */
462 tmp = getitem_idx(obj, index);
463 if (tmp == NULL)
464 goto error;
465
466 /* assign to obj */
467 Py_DECREF(obj);
468 obj = tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000469 }
Eric Smith7ade6482007-08-26 22:27:13 +0000470 /* end of iterator, this is the non-error case */
471 if (ok == 1)
472 return obj;
473error:
474 Py_XDECREF(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000475 return NULL;
476}
477
478/************************************************************************/
479/***************** Field rendering functions **************************/
480/************************************************************************/
481
482/*
483 render_field() is the main function in this section. It takes the
484 field object and field specification string generated by
485 get_field_and_spec, and renders the field into the output string.
486
Eric Smith8c663262007-08-25 02:26:07 +0000487 render_field calls fieldobj.__format__(format_spec) method, and
488 appends to the output.
489*/
490static int
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200491render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
Eric Smith8c663262007-08-25 02:26:07 +0000492{
493 int ok = 0;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000494 PyObject *result = NULL;
Eric Smith1d138f12008-05-31 01:40:08 +0000495 PyObject *format_spec_object = NULL;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200496 int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
497 int err;
Victor Stinner7931d9a2011-11-04 00:22:48 +0100498
Eric Smith1d138f12008-05-31 01:40:08 +0000499 /* If we know the type exactly, skip the lookup of __format__ and just
500 call the formatter directly. */
501 if (PyUnicode_CheckExact(fieldobj))
Victor Stinnerd3f08822012-05-29 12:57:52 +0200502 formatter = _PyUnicode_FormatAdvancedWriter;
Eric Smith1d138f12008-05-31 01:40:08 +0000503 else if (PyLong_CheckExact(fieldobj))
Victor Stinnerd3f08822012-05-29 12:57:52 +0200504 formatter = _PyLong_FormatAdvancedWriter;
Eric Smith1d138f12008-05-31 01:40:08 +0000505 else if (PyFloat_CheckExact(fieldobj))
Victor Stinnerd3f08822012-05-29 12:57:52 +0200506 formatter = _PyFloat_FormatAdvancedWriter;
507 else if (PyComplex_CheckExact(fieldobj))
508 formatter = _PyComplex_FormatAdvancedWriter;
Eric Smithba8c0282008-06-02 14:57:32 +0000509
510 if (formatter) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000511 /* we know exactly which formatter will be called when __format__ is
512 looked up, so call it directly, instead. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200513 err = formatter(writer, fieldobj, format_spec->str,
514 format_spec->start, format_spec->end);
515 return (err == 0);
Eric Smithba8c0282008-06-02 14:57:32 +0000516 }
Eric Smith1d138f12008-05-31 01:40:08 +0000517 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000518 /* We need to create an object out of the pointers we have, because
519 __format__ takes a string/unicode object for format_spec. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200520 if (format_spec->str)
521 format_spec_object = PyUnicode_Substring(format_spec->str,
522 format_spec->start,
523 format_spec->end);
524 else
525 format_spec_object = PyUnicode_New(0, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000526 if (format_spec_object == NULL)
527 goto done;
Eric Smith1d138f12008-05-31 01:40:08 +0000528
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 result = PyObject_Format(fieldobj, format_spec_object);
Eric Smith1d138f12008-05-31 01:40:08 +0000530 }
Victor Stinneree4544c2012-05-09 22:24:08 +0200531 if (result == NULL)
532 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000533
Victor Stinnerd3f08822012-05-29 12:57:52 +0200534 if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
Victor Stinneree4544c2012-05-09 22:24:08 +0200535 goto done;
Victor Stinneree4544c2012-05-09 22:24:08 +0200536 ok = 1;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200537
Eric Smith8c663262007-08-25 02:26:07 +0000538done:
Eric Smith1d138f12008-05-31 01:40:08 +0000539 Py_XDECREF(format_spec_object);
Eric Smith8c663262007-08-25 02:26:07 +0000540 Py_XDECREF(result);
541 return ok;
542}
543
544static int
545parse_field(SubString *str, SubString *field_name, SubString *format_spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200546 Py_UCS4 *conversion)
Eric Smith8c663262007-08-25 02:26:07 +0000547{
Eric Smith8ec90442009-03-14 12:29:34 +0000548 /* Note this function works if the field name is zero length,
549 which is good. Zero length field names are handled later, in
550 field_name_split. */
551
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200552 Py_UCS4 c = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000553
554 /* initialize these, as they may be empty */
555 *conversion = '\0';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200556 SubString_init(format_spec, NULL, 0, 0);
Eric Smith8c663262007-08-25 02:26:07 +0000557
Eric Smith8ec90442009-03-14 12:29:34 +0000558 /* Search for the field name. it's terminated by the end of
559 the string, or a ':' or '!' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200560 field_name->str = str->str;
561 field_name->start = str->start;
562 while (str->start < str->end) {
563 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
Eric Smith8c663262007-08-25 02:26:07 +0000564 case ':':
565 case '!':
566 break;
567 default:
568 continue;
569 }
570 break;
571 }
572
573 if (c == '!' || c == ':') {
574 /* we have a format specifier and/or a conversion */
575 /* don't include the last character */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200576 field_name->end = str->start-1;
Eric Smith8c663262007-08-25 02:26:07 +0000577
578 /* the format specifier is the rest of the string */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200579 format_spec->str = str->str;
580 format_spec->start = str->start;
Eric Smith8c663262007-08-25 02:26:07 +0000581 format_spec->end = str->end;
582
583 /* see if there's a conversion specifier */
584 if (c == '!') {
585 /* there must be another character present */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200586 if (format_spec->start >= format_spec->end) {
Eric Smith8c663262007-08-25 02:26:07 +0000587 PyErr_SetString(PyExc_ValueError,
588 "end of format while looking for conversion "
589 "specifier");
590 return 0;
591 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200592 *conversion = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
Eric Smith8c663262007-08-25 02:26:07 +0000593
594 /* if there is another character, it must be a colon */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200595 if (format_spec->start < format_spec->end) {
596 c = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
Eric Smith8c663262007-08-25 02:26:07 +0000597 if (c != ':') {
598 PyErr_SetString(PyExc_ValueError,
599 "expected ':' after format specifier");
600 return 0;
601 }
602 }
603 }
Eric Smith0cb431c2007-08-28 01:07:27 +0000604 }
Eric Smith8ec90442009-03-14 12:29:34 +0000605 else
Eric Smith8c663262007-08-25 02:26:07 +0000606 /* end of string, there's no format_spec or conversion */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200607 field_name->end = str->start;
Eric Smith8ec90442009-03-14 12:29:34 +0000608
609 return 1;
Eric Smith8c663262007-08-25 02:26:07 +0000610}
611
612/************************************************************************/
613/******* Output string allocation and escape-to-markup processing ******/
614/************************************************************************/
615
616/* MarkupIterator breaks the string into pieces of either literal
617 text, or things inside {} that need to be marked up. it is
618 designed to make it easy to wrap a Python iterator around it, for
619 use with the Formatter class */
620
621typedef struct {
622 SubString str;
Eric Smith8c663262007-08-25 02:26:07 +0000623} MarkupIterator;
624
625static int
Victor Stinner7931d9a2011-11-04 00:22:48 +0100626MarkupIterator_init(MarkupIterator *self, PyObject *str,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200627 Py_ssize_t start, Py_ssize_t end)
Eric Smith8c663262007-08-25 02:26:07 +0000628{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200629 SubString_init(&self->str, str, start, end);
Eric Smith8c663262007-08-25 02:26:07 +0000630 return 1;
631}
632
633/* returns 0 on error, 1 on non-error termination, and 2 if it got a
634 string (or something to be expanded) */
635static int
Eric Smith625cbf22007-08-29 03:22:59 +0000636MarkupIterator_next(MarkupIterator *self, SubString *literal,
Eric Smith8ec90442009-03-14 12:29:34 +0000637 int *field_present, SubString *field_name,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200638 SubString *format_spec, Py_UCS4 *conversion,
Eric Smith8c663262007-08-25 02:26:07 +0000639 int *format_spec_needs_expanding)
640{
641 int at_end;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200642 Py_UCS4 c = 0;
643 Py_ssize_t start;
Eric Smith8c663262007-08-25 02:26:07 +0000644 int count;
645 Py_ssize_t len;
Eric Smith625cbf22007-08-29 03:22:59 +0000646 int markup_follows = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000647
Eric Smith625cbf22007-08-29 03:22:59 +0000648 /* initialize all of the output variables */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200649 SubString_init(literal, NULL, 0, 0);
650 SubString_init(field_name, NULL, 0, 0);
651 SubString_init(format_spec, NULL, 0, 0);
Eric Smith625cbf22007-08-29 03:22:59 +0000652 *conversion = '\0';
Eric Smith8c663262007-08-25 02:26:07 +0000653 *format_spec_needs_expanding = 0;
Eric Smith8ec90442009-03-14 12:29:34 +0000654 *field_present = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000655
Eric Smith625cbf22007-08-29 03:22:59 +0000656 /* No more input, end of iterator. This is the normal exit
657 path. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200658 if (self->str.start >= self->str.end)
Eric Smith8c663262007-08-25 02:26:07 +0000659 return 1;
660
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200661 start = self->str.start;
Eric Smith8c663262007-08-25 02:26:07 +0000662
Eric Smith625cbf22007-08-29 03:22:59 +0000663 /* First read any literal text. Read until the end of string, an
664 escaped '{' or '}', or an unescaped '{'. In order to never
665 allocate memory and so I can just pass pointers around, if
666 there's an escaped '{' or '}' then we'll return the literal
667 including the brace, but no format object. The next time
668 through, we'll return the rest of the literal, skipping past
669 the second consecutive brace. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200670 while (self->str.start < self->str.end) {
671 switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
Eric Smith625cbf22007-08-29 03:22:59 +0000672 case '{':
673 case '}':
674 markup_follows = 1;
675 break;
676 default:
677 continue;
Eric Smith8c663262007-08-25 02:26:07 +0000678 }
Eric Smith625cbf22007-08-29 03:22:59 +0000679 break;
Eric Smith0cb431c2007-08-28 01:07:27 +0000680 }
Eric Smith625cbf22007-08-29 03:22:59 +0000681
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200682 at_end = self->str.start >= self->str.end;
683 len = self->str.start - start;
Eric Smith625cbf22007-08-29 03:22:59 +0000684
Victor Stinner7931d9a2011-11-04 00:22:48 +0100685 if ((c == '}') && (at_end ||
686 (c != PyUnicode_READ_CHAR(self->str.str,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200687 self->str.start)))) {
Eric Smith625cbf22007-08-29 03:22:59 +0000688 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
689 "in format string");
690 return 0;
691 }
692 if (at_end && c == '{') {
693 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
694 "in format string");
695 return 0;
696 }
697 if (!at_end) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200698 if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
Eric Smith625cbf22007-08-29 03:22:59 +0000699 /* escaped } or {, skip it in the input. there is no
700 markup object following us, just this literal text */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200701 self->str.start++;
Eric Smith625cbf22007-08-29 03:22:59 +0000702 markup_follows = 0;
703 }
704 else
705 len--;
706 }
707
708 /* record the literal text */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200709 literal->str = self->str.str;
710 literal->start = start;
Eric Smith625cbf22007-08-29 03:22:59 +0000711 literal->end = start + len;
712
713 if (!markup_follows)
714 return 2;
715
716 /* this is markup, find the end of the string by counting nested
717 braces. note that this prohibits escaped braces, so that
718 format_specs cannot have braces in them. */
Eric Smith8ec90442009-03-14 12:29:34 +0000719 *field_present = 1;
Eric Smith625cbf22007-08-29 03:22:59 +0000720 count = 1;
721
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200722 start = self->str.start;
Eric Smith625cbf22007-08-29 03:22:59 +0000723
724 /* we know we can't have a zero length string, so don't worry
725 about that case */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200726 while (self->str.start < self->str.end) {
727 switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
Eric Smith625cbf22007-08-29 03:22:59 +0000728 case '{':
729 /* the format spec needs to be recursively expanded.
730 this is an optimization, and not strictly needed */
731 *format_spec_needs_expanding = 1;
732 count++;
733 break;
734 case '}':
735 count--;
736 if (count <= 0) {
737 /* we're done. parse and get out */
738 SubString s;
739
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200740 SubString_init(&s, self->str.str, start, self->str.start - 1);
Eric Smith625cbf22007-08-29 03:22:59 +0000741 if (parse_field(&s, field_name, format_spec, conversion) == 0)
742 return 0;
743
Eric Smith625cbf22007-08-29 03:22:59 +0000744 /* success */
745 return 2;
Eric Smith8c663262007-08-25 02:26:07 +0000746 }
747 break;
748 }
Eric Smith8c663262007-08-25 02:26:07 +0000749 }
Eric Smith625cbf22007-08-29 03:22:59 +0000750
751 /* end of string while searching for matching '}' */
752 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
753 return 0;
Eric Smith8c663262007-08-25 02:26:07 +0000754}
755
756
757/* do the !r or !s conversion on obj */
758static PyObject *
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200759do_conversion(PyObject *obj, Py_UCS4 conversion)
Eric Smith8c663262007-08-25 02:26:07 +0000760{
761 /* XXX in pre-3.0, do we need to convert this to unicode, since it
762 might have returned a string? */
763 switch (conversion) {
764 case 'r':
765 return PyObject_Repr(obj);
766 case 's':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200767 return PyObject_Str(obj);
Georg Brandl559e5d72008-06-11 18:37:52 +0000768 case 'a':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200769 return PyObject_ASCII(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000770 default:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000771 if (conversion > 32 && conversion < 127) {
772 /* It's the ASCII subrange; casting to char is safe
773 (assuming the execution character set is an ASCII
774 superset). */
775 PyErr_Format(PyExc_ValueError,
Martin v. Löwis5a6f4582008-04-07 03:22:07 +0000776 "Unknown conversion specifier %c",
777 (char)conversion);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000778 } else
779 PyErr_Format(PyExc_ValueError,
780 "Unknown conversion specifier \\x%x",
781 (unsigned int)conversion);
Eric Smith8c663262007-08-25 02:26:07 +0000782 return NULL;
783 }
784}
785
786/* given:
787
788 {field_name!conversion:format_spec}
789
790 compute the result and write it to output.
791 format_spec_needs_expanding is an optimization. if it's false,
792 just output the string directly, otherwise recursively expand the
Eric Smith8ec90442009-03-14 12:29:34 +0000793 format_spec string.
794
795 field_name is allowed to be zero length, in which case we
796 are doing auto field numbering.
797*/
Eric Smith8c663262007-08-25 02:26:07 +0000798
799static int
800output_markup(SubString *field_name, SubString *format_spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200801 int format_spec_needs_expanding, Py_UCS4 conversion,
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200802 _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000803 int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000804{
805 PyObject *tmp = NULL;
806 PyObject *fieldobj = NULL;
807 SubString expanded_format_spec;
808 SubString *actual_format_spec;
809 int result = 0;
810
811 /* convert field_name to an object */
Eric Smith8ec90442009-03-14 12:29:34 +0000812 fieldobj = get_field_object(field_name, args, kwargs, auto_number);
Eric Smith8c663262007-08-25 02:26:07 +0000813 if (fieldobj == NULL)
814 goto done;
815
816 if (conversion != '\0') {
817 tmp = do_conversion(fieldobj, conversion);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200818 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
Eric Smith8c663262007-08-25 02:26:07 +0000819 goto done;
820
821 /* do the assignment, transferring ownership: fieldobj = tmp */
822 Py_DECREF(fieldobj);
823 fieldobj = tmp;
824 tmp = NULL;
825 }
826
827 /* if needed, recurively compute the format_spec */
828 if (format_spec_needs_expanding) {
Eric Smith8ec90442009-03-14 12:29:34 +0000829 tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
830 auto_number);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200831 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
Eric Smith8c663262007-08-25 02:26:07 +0000832 goto done;
833
834 /* note that in the case we're expanding the format string,
835 tmp must be kept around until after the call to
836 render_field. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200837 SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
Eric Smith8c663262007-08-25 02:26:07 +0000838 actual_format_spec = &expanded_format_spec;
Eric Smith0cb431c2007-08-28 01:07:27 +0000839 }
840 else
Eric Smith8c663262007-08-25 02:26:07 +0000841 actual_format_spec = format_spec;
842
Victor Stinner202fdca2012-05-07 12:47:02 +0200843 if (render_field(fieldobj, actual_format_spec, writer) == 0)
Eric Smith8c663262007-08-25 02:26:07 +0000844 goto done;
845
846 result = 1;
847
848done:
849 Py_XDECREF(fieldobj);
850 Py_XDECREF(tmp);
851
852 return result;
853}
854
855/*
Eric Smith8fd3eba2008-02-17 19:48:00 +0000856 do_markup is the top-level loop for the format() method. It
Eric Smith8c663262007-08-25 02:26:07 +0000857 searches through the format string for escapes to markup codes, and
858 calls other functions to move non-markup text to the output,
859 and to perform the markup to the output.
860*/
861static int
862do_markup(SubString *input, PyObject *args, PyObject *kwargs,
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200863 _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000864{
865 MarkupIterator iter;
Eric Smith8c663262007-08-25 02:26:07 +0000866 int format_spec_needs_expanding;
867 int result;
Eric Smith8ec90442009-03-14 12:29:34 +0000868 int field_present;
Eric Smith625cbf22007-08-29 03:22:59 +0000869 SubString literal;
Eric Smith8c663262007-08-25 02:26:07 +0000870 SubString field_name;
871 SubString format_spec;
Victor Stinnercfc4c132013-04-03 01:48:39 +0200872 Py_UCS4 conversion;
Eric Smith8c663262007-08-25 02:26:07 +0000873
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200874 MarkupIterator_init(&iter, input->str, input->start, input->end);
Eric Smith8ec90442009-03-14 12:29:34 +0000875 while ((result = MarkupIterator_next(&iter, &literal, &field_present,
876 &field_name, &format_spec,
877 &conversion,
Eric Smith8c663262007-08-25 02:26:07 +0000878 &format_spec_needs_expanding)) == 2) {
Victor Stinnercfc4c132013-04-03 01:48:39 +0200879 if (literal.end != literal.start) {
880 if (!field_present && iter.str.start == iter.str.end)
881 writer->overallocate = 0;
882 if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
883 literal.start, literal.end) < 0)
Victor Stinneree4544c2012-05-09 22:24:08 +0200884 return 0;
Victor Stinneree4544c2012-05-09 22:24:08 +0200885 }
886
Victor Stinnerd3f08822012-05-29 12:57:52 +0200887 if (field_present) {
888 if (iter.str.start == iter.str.end)
Victor Stinnerd7b7c742012-06-04 22:52:12 +0200889 writer->overallocate = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000890 if (!output_markup(&field_name, &format_spec,
Victor Stinner202fdca2012-05-07 12:47:02 +0200891 format_spec_needs_expanding, conversion, writer,
Eric Smith8ec90442009-03-14 12:29:34 +0000892 args, kwargs, recursion_depth, auto_number))
Eric Smith8c663262007-08-25 02:26:07 +0000893 return 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200894 }
Eric Smith8c663262007-08-25 02:26:07 +0000895 }
896 return result;
897}
898
899
900/*
901 build_string allocates the output string and then
902 calls do_markup to do the heavy lifting.
903*/
904static PyObject *
905build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000906 int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000907{
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200908 _PyUnicodeWriter writer;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200909 Py_ssize_t minlen;
Eric Smith8c663262007-08-25 02:26:07 +0000910
911 /* check the recursion level */
Eric Smith45c07872007-09-05 02:02:43 +0000912 if (recursion_depth <= 0) {
Eric Smith8c663262007-08-25 02:26:07 +0000913 PyErr_SetString(PyExc_ValueError,
914 "Max string recursion exceeded");
Antoine Pitrou4574e622011-10-07 02:26:47 +0200915 return NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000916 }
917
Victor Stinnerd3f08822012-05-29 12:57:52 +0200918 minlen = PyUnicode_GET_LENGTH(input->str) + 100;
919 _PyUnicodeWriter_Init(&writer, minlen);
Eric Smith8c663262007-08-25 02:26:07 +0000920
Victor Stinner202fdca2012-05-07 12:47:02 +0200921 if (!do_markup(input, args, kwargs, &writer, recursion_depth,
Eric Smith8ec90442009-03-14 12:29:34 +0000922 auto_number)) {
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200923 _PyUnicodeWriter_Dealloc(&writer);
Antoine Pitrou4574e622011-10-07 02:26:47 +0200924 return NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000925 }
926
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200927 return _PyUnicodeWriter_Finish(&writer);
Eric Smith8c663262007-08-25 02:26:07 +0000928}
929
930/************************************************************************/
931/*********** main routine ***********************************************/
932/************************************************************************/
933
934/* this is the main entry point */
935static PyObject *
936do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
937{
938 SubString input;
939
940 /* PEP 3101 says only 2 levels, so that
941 "{0:{1}}".format('abc', 's') # works
942 "{0:{1:{2}}}".format('abc', 's', '') # fails
943 */
Eric Smith45c07872007-09-05 02:02:43 +0000944 int recursion_depth = 2;
Eric Smith8c663262007-08-25 02:26:07 +0000945
Eric Smith8ec90442009-03-14 12:29:34 +0000946 AutoNumber auto_number;
947
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200948 if (PyUnicode_READY(self) == -1)
949 return NULL;
950
Eric Smith8ec90442009-03-14 12:29:34 +0000951 AutoNumber_Init(&auto_number);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200952 SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
Eric Smith8ec90442009-03-14 12:29:34 +0000953 return build_string(&input, args, kwargs, recursion_depth, &auto_number);
Eric Smith8c663262007-08-25 02:26:07 +0000954}
Eric Smithf6db4092007-08-27 23:52:26 +0000955
Eric Smith27bbca62010-11-04 17:06:58 +0000956static PyObject *
957do_string_format_map(PyObject *self, PyObject *obj)
958{
959 return do_string_format(self, NULL, obj);
960}
Eric Smithf6db4092007-08-27 23:52:26 +0000961
962
963/************************************************************************/
964/*********** formatteriterator ******************************************/
965/************************************************************************/
966
967/* This is used to implement string.Formatter.vparse(). It exists so
968 Formatter can share code with the built in unicode.format() method.
969 It's really just a wrapper around MarkupIterator that is callable
970 from Python. */
971
972typedef struct {
973 PyObject_HEAD
Victor Stinner7931d9a2011-11-04 00:22:48 +0100974 PyObject *str;
Eric Smithf6db4092007-08-27 23:52:26 +0000975 MarkupIterator it_markup;
976} formatteriterobject;
977
978static void
979formatteriter_dealloc(formatteriterobject *it)
980{
981 Py_XDECREF(it->str);
982 PyObject_FREE(it);
983}
984
985/* returns a tuple:
Eric Smith625cbf22007-08-29 03:22:59 +0000986 (literal, field_name, format_spec, conversion)
987
988 literal is any literal text to output. might be zero length
989 field_name is the string before the ':'. might be None
990 format_spec is the string after the ':'. mibht be None
991 conversion is either None, or the string after the '!'
Eric Smithf6db4092007-08-27 23:52:26 +0000992*/
993static PyObject *
994formatteriter_next(formatteriterobject *it)
995{
996 SubString literal;
997 SubString field_name;
998 SubString format_spec;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200999 Py_UCS4 conversion;
Eric Smithf6db4092007-08-27 23:52:26 +00001000 int format_spec_needs_expanding;
Eric Smith8ec90442009-03-14 12:29:34 +00001001 int field_present;
1002 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1003 &field_name, &format_spec, &conversion,
Eric Smithf6db4092007-08-27 23:52:26 +00001004 &format_spec_needs_expanding);
1005
1006 /* all of the SubString objects point into it->str, so no
1007 memory management needs to be done on them */
1008 assert(0 <= result && result <= 2);
Eric Smith0cb431c2007-08-28 01:07:27 +00001009 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001010 /* if 0, error has already been set, if 1, iterator is empty */
1011 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001012 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001013 PyObject *literal_str = NULL;
1014 PyObject *field_name_str = NULL;
1015 PyObject *format_spec_str = NULL;
1016 PyObject *conversion_str = NULL;
1017 PyObject *tuple = NULL;
1018
Eric Smith625cbf22007-08-29 03:22:59 +00001019 literal_str = SubString_new_object(&literal);
1020 if (literal_str == NULL)
1021 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001022
Eric Smith625cbf22007-08-29 03:22:59 +00001023 field_name_str = SubString_new_object(&field_name);
1024 if (field_name_str == NULL)
1025 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001026
Eric Smith625cbf22007-08-29 03:22:59 +00001027 /* if field_name is non-zero length, return a string for
1028 format_spec (even if zero length), else return None */
Eric Smith8ec90442009-03-14 12:29:34 +00001029 format_spec_str = (field_present ?
Eric Smith625cbf22007-08-29 03:22:59 +00001030 SubString_new_object_or_empty :
1031 SubString_new_object)(&format_spec);
1032 if (format_spec_str == NULL)
1033 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001034
Eric Smith625cbf22007-08-29 03:22:59 +00001035 /* if the conversion is not specified, return a None,
1036 otherwise create a one length string with the conversion
1037 character */
1038 if (conversion == '\0') {
Eric Smithf6db4092007-08-27 23:52:26 +00001039 conversion_str = Py_None;
Eric Smithf6db4092007-08-27 23:52:26 +00001040 Py_INCREF(conversion_str);
1041 }
Eric Smith625cbf22007-08-29 03:22:59 +00001042 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001043 conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1044 &conversion, 1);
Eric Smith625cbf22007-08-29 03:22:59 +00001045 if (conversion_str == NULL)
1046 goto done;
1047
Eric Smith9e7c8da2007-08-28 11:15:20 +00001048 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
Eric Smithf6db4092007-08-27 23:52:26 +00001049 conversion_str);
Eric Smith625cbf22007-08-29 03:22:59 +00001050 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001051 Py_XDECREF(literal_str);
1052 Py_XDECREF(field_name_str);
1053 Py_XDECREF(format_spec_str);
1054 Py_XDECREF(conversion_str);
1055 return tuple;
1056 }
1057}
1058
1059static PyMethodDef formatteriter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001060 {NULL, NULL} /* sentinel */
Eric Smithf6db4092007-08-27 23:52:26 +00001061};
1062
Eric Smith8fd3eba2008-02-17 19:48:00 +00001063static PyTypeObject PyFormatterIter_Type = {
Eric Smithf6db4092007-08-27 23:52:26 +00001064 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001065 "formatteriterator", /* tp_name */
1066 sizeof(formatteriterobject), /* tp_basicsize */
1067 0, /* tp_itemsize */
Eric Smithf6db4092007-08-27 23:52:26 +00001068 /* methods */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 (destructor)formatteriter_dealloc, /* tp_dealloc */
1070 0, /* tp_print */
1071 0, /* tp_getattr */
1072 0, /* tp_setattr */
1073 0, /* tp_reserved */
1074 0, /* tp_repr */
1075 0, /* tp_as_number */
1076 0, /* tp_as_sequence */
1077 0, /* tp_as_mapping */
1078 0, /* tp_hash */
1079 0, /* tp_call */
1080 0, /* tp_str */
1081 PyObject_GenericGetAttr, /* tp_getattro */
1082 0, /* tp_setattro */
1083 0, /* tp_as_buffer */
1084 Py_TPFLAGS_DEFAULT, /* tp_flags */
1085 0, /* tp_doc */
1086 0, /* tp_traverse */
1087 0, /* tp_clear */
1088 0, /* tp_richcompare */
1089 0, /* tp_weaklistoffset */
1090 PyObject_SelfIter, /* tp_iter */
1091 (iternextfunc)formatteriter_next, /* tp_iternext */
1092 formatteriter_methods, /* tp_methods */
Eric Smithf6db4092007-08-27 23:52:26 +00001093 0,
1094};
1095
1096/* unicode_formatter_parser is used to implement
1097 string.Formatter.vformat. it parses a string and returns tuples
1098 describing the parsed elements. It's a wrapper around
1099 stringlib/string_format.h's MarkupIterator */
1100static PyObject *
Victor Stinner7931d9a2011-11-04 00:22:48 +01001101formatter_parser(PyObject *ignored, PyObject *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001102{
1103 formatteriterobject *it;
1104
Eric Smitha1eac722011-01-29 11:15:35 +00001105 if (!PyUnicode_Check(self)) {
1106 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1107 return NULL;
1108 }
1109
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001110 if (PyUnicode_READY(self) == -1)
1111 return NULL;
1112
Eric Smithf6db4092007-08-27 23:52:26 +00001113 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1114 if (it == NULL)
1115 return NULL;
1116
1117 /* take ownership, give the object to the iterator */
1118 Py_INCREF(self);
1119 it->str = self;
1120
1121 /* initialize the contained MarkupIterator */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122 MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
Eric Smithf6db4092007-08-27 23:52:26 +00001123 return (PyObject *)it;
1124}
1125
1126
1127/************************************************************************/
1128/*********** fieldnameiterator ******************************************/
1129/************************************************************************/
1130
1131
1132/* This is used to implement string.Formatter.vparse(). It parses the
1133 field name into attribute and item values. It's a Python-callable
1134 wrapper around FieldNameIterator */
1135
1136typedef struct {
1137 PyObject_HEAD
Victor Stinner7931d9a2011-11-04 00:22:48 +01001138 PyObject *str;
Eric Smithf6db4092007-08-27 23:52:26 +00001139 FieldNameIterator it_field;
1140} fieldnameiterobject;
1141
1142static void
1143fieldnameiter_dealloc(fieldnameiterobject *it)
1144{
1145 Py_XDECREF(it->str);
1146 PyObject_FREE(it);
1147}
1148
1149/* returns a tuple:
1150 (is_attr, value)
1151 is_attr is true if we used attribute syntax (e.g., '.foo')
1152 false if we used index syntax (e.g., '[foo]')
1153 value is an integer or string
1154*/
1155static PyObject *
1156fieldnameiter_next(fieldnameiterobject *it)
1157{
1158 int result;
1159 int is_attr;
1160 Py_ssize_t idx;
1161 SubString name;
1162
1163 result = FieldNameIterator_next(&it->it_field, &is_attr,
1164 &idx, &name);
Eric Smith0cb431c2007-08-28 01:07:27 +00001165 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001166 /* if 0, error has already been set, if 1, iterator is empty */
1167 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001168 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001169 PyObject* result = NULL;
1170 PyObject* is_attr_obj = NULL;
1171 PyObject* obj = NULL;
1172
1173 is_attr_obj = PyBool_FromLong(is_attr);
1174 if (is_attr_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001175 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001176
1177 /* either an integer or a string */
1178 if (idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001179 obj = PyLong_FromSsize_t(idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001180 else
1181 obj = SubString_new_object(&name);
1182 if (obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001183 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001184
1185 /* return a tuple of values */
1186 result = PyTuple_Pack(2, is_attr_obj, obj);
Eric Smithf6db4092007-08-27 23:52:26 +00001187
Eric Smith625cbf22007-08-29 03:22:59 +00001188 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001189 Py_XDECREF(is_attr_obj);
1190 Py_XDECREF(obj);
Eric Smith625cbf22007-08-29 03:22:59 +00001191 return result;
Eric Smithf6db4092007-08-27 23:52:26 +00001192 }
Eric Smithf6db4092007-08-27 23:52:26 +00001193}
1194
1195static PyMethodDef fieldnameiter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001196 {NULL, NULL} /* sentinel */
Eric Smithf6db4092007-08-27 23:52:26 +00001197};
1198
1199static PyTypeObject PyFieldNameIter_Type = {
1200 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 "fieldnameiterator", /* tp_name */
1202 sizeof(fieldnameiterobject), /* tp_basicsize */
1203 0, /* tp_itemsize */
Eric Smithf6db4092007-08-27 23:52:26 +00001204 /* methods */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1206 0, /* tp_print */
1207 0, /* tp_getattr */
1208 0, /* tp_setattr */
1209 0, /* tp_reserved */
1210 0, /* tp_repr */
1211 0, /* tp_as_number */
1212 0, /* tp_as_sequence */
1213 0, /* tp_as_mapping */
1214 0, /* tp_hash */
1215 0, /* tp_call */
1216 0, /* tp_str */
1217 PyObject_GenericGetAttr, /* tp_getattro */
1218 0, /* tp_setattro */
1219 0, /* tp_as_buffer */
1220 Py_TPFLAGS_DEFAULT, /* tp_flags */
1221 0, /* tp_doc */
1222 0, /* tp_traverse */
1223 0, /* tp_clear */
1224 0, /* tp_richcompare */
1225 0, /* tp_weaklistoffset */
1226 PyObject_SelfIter, /* tp_iter */
1227 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1228 fieldnameiter_methods, /* tp_methods */
Eric Smithf6db4092007-08-27 23:52:26 +00001229 0};
1230
1231/* unicode_formatter_field_name_split is used to implement
1232 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1233 returns a tuple of (first, rest): "first", the part before the
1234 first '.' or '['; and "rest", an iterator for the rest of the field
1235 name. it's a wrapper around stringlib/string_format.h's
1236 field_name_split. The iterator it returns is a
1237 FieldNameIterator */
1238static PyObject *
Victor Stinner7931d9a2011-11-04 00:22:48 +01001239formatter_field_name_split(PyObject *ignored, PyObject *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001240{
1241 SubString first;
1242 Py_ssize_t first_idx;
1243 fieldnameiterobject *it;
1244
1245 PyObject *first_obj = NULL;
1246 PyObject *result = NULL;
1247
Eric Smitha1eac722011-01-29 11:15:35 +00001248 if (!PyUnicode_Check(self)) {
1249 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1250 return NULL;
1251 }
1252
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001253 if (PyUnicode_READY(self) == -1)
1254 return NULL;
1255
Eric Smithf6db4092007-08-27 23:52:26 +00001256 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1257 if (it == NULL)
1258 return NULL;
1259
1260 /* take ownership, give the object to the iterator. this is
1261 just to keep the field_name alive */
1262 Py_INCREF(self);
1263 it->str = self;
1264
Eric Smith8ec90442009-03-14 12:29:34 +00001265 /* Pass in auto_number = NULL. We'll return an empty string for
1266 first_obj in that case. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001267 if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
Eric Smith8ec90442009-03-14 12:29:34 +00001268 &first, &first_idx, &it->it_field, NULL))
Eric Smith625cbf22007-08-29 03:22:59 +00001269 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001270
Eric Smith0cb431c2007-08-28 01:07:27 +00001271 /* first becomes an integer, if possible; else a string */
Eric Smithf6db4092007-08-27 23:52:26 +00001272 if (first_idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001273 first_obj = PyLong_FromSsize_t(first_idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001274 else
1275 /* convert "first" into a string object */
1276 first_obj = SubString_new_object(&first);
1277 if (first_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001278 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001279
1280 /* return a tuple of values */
1281 result = PyTuple_Pack(2, first_obj, it);
1282
Eric Smith625cbf22007-08-29 03:22:59 +00001283done:
Eric Smithf6db4092007-08-27 23:52:26 +00001284 Py_XDECREF(it);
1285 Py_XDECREF(first_obj);
1286 return result;
1287}