blob: d72e47d348aa81a6738d461de7723d0f5e803ddd [file] [log] [blame]
Eric Smith8c663262007-08-25 02:26:07 +00001/*
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002 unicode_format.h -- implementation of str.format().
Eric Smith8c663262007-08-25 02:26:07 +00003*/
4
Eric Smith8c663262007-08-25 02:26:07 +00005/************************************************************************/
6/*********** Global data structures and forward declarations *********/
7/************************************************************************/
8
9/*
10 A SubString consists of the characters between two string or
11 unicode pointers.
12*/
13typedef struct {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020014 PyObject *str; /* borrowed reference */
15 Py_ssize_t start, end;
Eric Smith8c663262007-08-25 02:26:07 +000016} SubString;
17
18
Eric Smith8ec90442009-03-14 12:29:34 +000019typedef enum {
20 ANS_INIT,
21 ANS_AUTO,
Georg Brandlfb526ac2009-05-01 08:59:13 +000022 ANS_MANUAL
Eric Smith8ec90442009-03-14 12:29:34 +000023} AutoNumberState; /* Keep track if we're auto-numbering fields */
24
25/* Keeps track of our auto-numbering state, and which number field we're on */
26typedef struct {
27 AutoNumberState an_state;
28 int an_field_number;
29} AutoNumber;
30
31
Eric Smith8c663262007-08-25 02:26:07 +000032/* forward declaration for recursion */
33static PyObject *
34build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +000035 int recursion_depth, AutoNumber *auto_number);
Eric Smith8c663262007-08-25 02:26:07 +000036
37
38
39/************************************************************************/
40/************************** Utility functions ************************/
41/************************************************************************/
42
Eric Smith8ec90442009-03-14 12:29:34 +000043static void
44AutoNumber_Init(AutoNumber *auto_number)
45{
46 auto_number->an_state = ANS_INIT;
47 auto_number->an_field_number = 0;
48}
49
Eric Smith8c663262007-08-25 02:26:07 +000050/* fill in a SubString from a pointer and length */
51Py_LOCAL_INLINE(void)
Antoine Pitroudbf697a2011-10-06 15:34:41 +020052SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
Eric Smith8c663262007-08-25 02:26:07 +000053{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020054 str->str = s;
55 str->start = start;
56 str->end = end;
Eric Smith8c663262007-08-25 02:26:07 +000057}
58
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020059/* return a new string. if str->str is NULL, return None */
Eric Smith8c663262007-08-25 02:26:07 +000060Py_LOCAL_INLINE(PyObject *)
61SubString_new_object(SubString *str)
62{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020063 if (str->str == NULL) {
Eric Smith625cbf22007-08-29 03:22:59 +000064 Py_INCREF(Py_None);
65 return Py_None;
66 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020067 return PyUnicode_Substring(str->str, str->start, str->end);
Eric Smith625cbf22007-08-29 03:22:59 +000068}
69
Eric V. Smithab2aa6d2015-08-26 14:10:32 -040070/* return a new string. if str->str is NULL, return a new empty string */
Eric Smith625cbf22007-08-29 03:22:59 +000071Py_LOCAL_INLINE(PyObject *)
72SubString_new_object_or_empty(SubString *str)
73{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020074 if (str->str == NULL) {
Victor Stinnerb37b1742011-12-01 03:18:59 +010075 return PyUnicode_New(0, 0);
Eric Smith625cbf22007-08-29 03:22:59 +000076 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020077 return SubString_new_object(str);
Eric Smith8c663262007-08-25 02:26:07 +000078}
79
Eric Smith8ec90442009-03-14 12:29:34 +000080/* Return 1 if an error has been detected switching between automatic
81 field numbering and manual field specification, else return 0. Set
82 ValueError on error. */
83static int
84autonumber_state_error(AutoNumberState state, int field_name_is_empty)
85{
86 if (state == ANS_MANUAL) {
87 if (field_name_is_empty) {
88 PyErr_SetString(PyExc_ValueError, "cannot switch from "
89 "manual field specification to "
90 "automatic field numbering");
91 return 1;
92 }
93 }
94 else {
95 if (!field_name_is_empty) {
96 PyErr_SetString(PyExc_ValueError, "cannot switch from "
97 "automatic field numbering to "
98 "manual field specification");
99 return 1;
100 }
101 }
102 return 0;
103}
104
105
Eric Smith8c663262007-08-25 02:26:07 +0000106/************************************************************************/
Eric Smith8c663262007-08-25 02:26:07 +0000107/*********** Format string parsing -- integers and identifiers *********/
108/************************************************************************/
109
Eric Smith7ade6482007-08-26 22:27:13 +0000110static Py_ssize_t
111get_integer(const SubString *str)
Eric Smith8c663262007-08-25 02:26:07 +0000112{
Eric Smith7ade6482007-08-26 22:27:13 +0000113 Py_ssize_t accumulator = 0;
114 Py_ssize_t digitval;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200115 Py_ssize_t i;
Eric Smith8c663262007-08-25 02:26:07 +0000116
Eric Smith7ade6482007-08-26 22:27:13 +0000117 /* empty string is an error */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200118 if (str->start >= str->end)
Eric Smith7ade6482007-08-26 22:27:13 +0000119 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000120
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200121 for (i = str->start; i < str->end; i++) {
122 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
Eric Smith8c663262007-08-25 02:26:07 +0000123 if (digitval < 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000124 return -1;
Eric Smith8c663262007-08-25 02:26:07 +0000125 /*
Mark Dickinsonc7d93b72011-09-25 15:34:32 +0100126 Detect possible overflow before it happens:
127
128 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
129 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
Eric Smith8c663262007-08-25 02:26:07 +0000130 */
Mark Dickinsonc7d93b72011-09-25 15:34:32 +0100131 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
Eric Smith8c663262007-08-25 02:26:07 +0000132 PyErr_Format(PyExc_ValueError,
133 "Too many decimal digits in format string");
134 return -1;
135 }
Mark Dickinsonc7d93b72011-09-25 15:34:32 +0100136 accumulator = accumulator * 10 + digitval;
Eric Smith8c663262007-08-25 02:26:07 +0000137 }
Eric Smith7ade6482007-08-26 22:27:13 +0000138 return accumulator;
Eric Smith8c663262007-08-25 02:26:07 +0000139}
140
141/************************************************************************/
142/******** Functions to get field objects and specification strings ******/
143/************************************************************************/
144
Eric Smith7ade6482007-08-26 22:27:13 +0000145/* do the equivalent of obj.name */
Eric Smith8c663262007-08-25 02:26:07 +0000146static PyObject *
Eric Smith7ade6482007-08-26 22:27:13 +0000147getattr(PyObject *obj, SubString *name)
Eric Smith8c663262007-08-25 02:26:07 +0000148{
Eric Smith7ade6482007-08-26 22:27:13 +0000149 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000150 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000151 if (str == NULL)
152 return NULL;
153 newobj = PyObject_GetAttr(obj, str);
154 Py_DECREF(str);
155 return newobj;
Eric Smith8c663262007-08-25 02:26:07 +0000156}
157
Eric Smith7ade6482007-08-26 22:27:13 +0000158/* do the equivalent of obj[idx], where obj is a sequence */
159static PyObject *
160getitem_sequence(PyObject *obj, Py_ssize_t idx)
161{
162 return PySequence_GetItem(obj, idx);
163}
164
165/* do the equivalent of obj[idx], where obj is not a sequence */
166static PyObject *
167getitem_idx(PyObject *obj, Py_ssize_t idx)
168{
169 PyObject *newobj;
Christian Heimes217cfd12007-12-02 14:31:20 +0000170 PyObject *idx_obj = PyLong_FromSsize_t(idx);
Eric Smith7ade6482007-08-26 22:27:13 +0000171 if (idx_obj == NULL)
172 return NULL;
173 newobj = PyObject_GetItem(obj, idx_obj);
174 Py_DECREF(idx_obj);
175 return newobj;
176}
177
178/* do the equivalent of obj[name] */
179static PyObject *
180getitem_str(PyObject *obj, SubString *name)
181{
182 PyObject *newobj;
Eric Smith7a6dd292007-08-27 23:30:47 +0000183 PyObject *str = SubString_new_object(name);
Eric Smith7ade6482007-08-26 22:27:13 +0000184 if (str == NULL)
185 return NULL;
186 newobj = PyObject_GetItem(obj, str);
187 Py_DECREF(str);
188 return newobj;
189}
190
191typedef struct {
192 /* the entire string we're parsing. we assume that someone else
193 is managing its lifetime, and that it will exist for the
194 lifetime of the iterator. can be empty */
195 SubString str;
196
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 /* index to where we are inside field_name */
198 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000199} FieldNameIterator;
200
201
202static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200203FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
204 Py_ssize_t start, Py_ssize_t end)
Eric Smith7ade6482007-08-26 22:27:13 +0000205{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200206 SubString_init(&self->str, s, start, end);
207 self->index = start;
Eric Smith7ade6482007-08-26 22:27:13 +0000208 return 1;
209}
210
211static int
212_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
213{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200214 Py_UCS4 c;
Eric Smith7ade6482007-08-26 22:27:13 +0000215
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200216 name->str = self->str.str;
217 name->start = self->index;
Eric Smith7ade6482007-08-26 22:27:13 +0000218
219 /* return everything until '.' or '[' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200220 while (self->index < self->str.end) {
221 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
222 switch (c) {
Eric Smith7ade6482007-08-26 22:27:13 +0000223 case '[':
224 case '.':
225 /* backup so that we this character will be seen next time */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200226 self->index--;
Eric Smith7ade6482007-08-26 22:27:13 +0000227 break;
228 default:
229 continue;
230 }
231 break;
232 }
233 /* end of string is okay */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200234 name->end = self->index;
Eric Smith7ade6482007-08-26 22:27:13 +0000235 return 1;
236}
237
238static int
239_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
240{
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000241 int bracket_seen = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200242 Py_UCS4 c;
Eric Smith7ade6482007-08-26 22:27:13 +0000243
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200244 name->str = self->str.str;
245 name->start = self->index;
Eric Smith7ade6482007-08-26 22:27:13 +0000246
247 /* return everything until ']' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200248 while (self->index < self->str.end) {
249 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
250 switch (c) {
Eric Smith7ade6482007-08-26 22:27:13 +0000251 case ']':
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000252 bracket_seen = 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000253 break;
254 default:
255 continue;
256 }
257 break;
258 }
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000259 /* make sure we ended with a ']' */
260 if (!bracket_seen) {
261 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
262 return 0;
263 }
264
Eric Smith7ade6482007-08-26 22:27:13 +0000265 /* end of string is okay */
266 /* don't include the ']' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200267 name->end = self->index-1;
Eric Smith7ade6482007-08-26 22:27:13 +0000268 return 1;
269}
270
271/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
272static int
273FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
274 Py_ssize_t *name_idx, SubString *name)
275{
276 /* check at end of input */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200277 if (self->index >= self->str.end)
Eric Smith7ade6482007-08-26 22:27:13 +0000278 return 1;
279
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200280 switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000281 case '.':
282 *is_attribute = 1;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000283 if (_FieldNameIterator_attr(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000284 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000285 *name_idx = -1;
286 break;
287 case '[':
288 *is_attribute = 0;
Eric Smith4cb4e4e2007-09-03 08:40:29 +0000289 if (_FieldNameIterator_item(self, name) == 0)
Eric Smith7ade6482007-08-26 22:27:13 +0000290 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000291 *name_idx = get_integer(name);
Benjamin Peterson59a1b2f2010-06-07 22:31:26 +0000292 if (*name_idx == -1 && PyErr_Occurred())
293 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000294 break;
295 default:
Eric Smith41669ca2009-05-23 14:23:22 +0000296 /* Invalid character follows ']' */
297 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
298 "follow ']' in format field specifier");
Eric Smith7ade6482007-08-26 22:27:13 +0000299 return 0;
300 }
301
302 /* empty string is an error */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200303 if (name->start == name->end) {
Eric Smith7ade6482007-08-26 22:27:13 +0000304 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
305 return 0;
306 }
307
308 return 2;
309}
310
311
312/* input: field_name
313 output: 'first' points to the part before the first '[' or '.'
314 'first_idx' is -1 if 'first' is not an integer, otherwise
315 it's the value of first converted to an integer
316 'rest' is an iterator to return the rest
317*/
318static int
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200319field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
Eric Smith8ec90442009-03-14 12:29:34 +0000320 Py_ssize_t *first_idx, FieldNameIterator *rest,
321 AutoNumber *auto_number)
Eric Smith7ade6482007-08-26 22:27:13 +0000322{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200323 Py_UCS4 c;
324 Py_ssize_t i = start;
Eric Smith8ec90442009-03-14 12:29:34 +0000325 int field_name_is_empty;
326 int using_numeric_index;
Eric Smith7ade6482007-08-26 22:27:13 +0000327
328 /* find the part up until the first '.' or '[' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200329 while (i < end) {
330 switch (c = PyUnicode_READ_CHAR(str, i++)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000331 case '[':
332 case '.':
333 /* backup so that we this character is available to the
334 "rest" iterator */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 i--;
Eric Smith7ade6482007-08-26 22:27:13 +0000336 break;
337 default:
338 continue;
339 }
340 break;
341 }
342
343 /* set up the return values */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200344 SubString_init(first, str, start, i);
345 FieldNameIterator_init(rest, str, i, end);
Eric Smith7ade6482007-08-26 22:27:13 +0000346
347 /* see if "first" is an integer, in which case it's used as an index */
348 *first_idx = get_integer(first);
Benjamin Peterson59a1b2f2010-06-07 22:31:26 +0000349 if (*first_idx == -1 && PyErr_Occurred())
350 return 0;
Eric Smith7ade6482007-08-26 22:27:13 +0000351
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200352 field_name_is_empty = first->start >= first->end;
Eric Smith8ec90442009-03-14 12:29:34 +0000353
354 /* If the field name is omitted or if we have a numeric index
355 specified, then we're doing numeric indexing into args. */
356 using_numeric_index = field_name_is_empty || *first_idx != -1;
357
358 /* We always get here exactly one time for each field we're
359 processing. And we get here in field order (counting by left
360 braces). So this is the perfect place to handle automatic field
361 numbering if the field name is omitted. */
362
363 /* Check if we need to do the auto-numbering. It's not needed if
364 we're called from string.Format routines, because it's handled
365 in that class by itself. */
366 if (auto_number) {
367 /* Initialize our auto numbering state if this is the first
368 time we're either auto-numbering or manually numbering. */
369 if (auto_number->an_state == ANS_INIT && using_numeric_index)
370 auto_number->an_state = field_name_is_empty ?
371 ANS_AUTO : ANS_MANUAL;
372
373 /* Make sure our state is consistent with what we're doing
374 this time through. Only check if we're using a numeric
375 index. */
376 if (using_numeric_index)
377 if (autonumber_state_error(auto_number->an_state,
378 field_name_is_empty))
379 return 0;
380 /* Zero length field means we want to do auto-numbering of the
381 fields. */
382 if (field_name_is_empty)
383 *first_idx = (auto_number->an_field_number)++;
Eric Smith7ade6482007-08-26 22:27:13 +0000384 }
385
386 return 1;
Eric Smith7ade6482007-08-26 22:27:13 +0000387}
388
389
Eric Smith8c663262007-08-25 02:26:07 +0000390/*
391 get_field_object returns the object inside {}, before the
392 format_spec. It handles getindex and getattr lookups and consumes
393 the entire input string.
394*/
395static PyObject *
Eric Smith8ec90442009-03-14 12:29:34 +0000396get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
397 AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000398{
Eric Smith7ade6482007-08-26 22:27:13 +0000399 PyObject *obj = NULL;
400 int ok;
401 int is_attribute;
402 SubString name;
403 SubString first;
Eric Smith8c663262007-08-25 02:26:07 +0000404 Py_ssize_t index;
Eric Smith7ade6482007-08-26 22:27:13 +0000405 FieldNameIterator rest;
Eric Smith8c663262007-08-25 02:26:07 +0000406
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200407 if (!field_name_split(input->str, input->start, input->end, &first,
Eric Smith8ec90442009-03-14 12:29:34 +0000408 &index, &rest, auto_number)) {
Eric Smith7ade6482007-08-26 22:27:13 +0000409 goto error;
410 }
Eric Smith8c663262007-08-25 02:26:07 +0000411
Eric Smith7ade6482007-08-26 22:27:13 +0000412 if (index == -1) {
413 /* look up in kwargs */
Eric Smith7a6dd292007-08-27 23:30:47 +0000414 PyObject *key = SubString_new_object(&first);
Eric Smith7ade6482007-08-26 22:27:13 +0000415 if (key == NULL)
416 goto error;
Eric Smith27bbca62010-11-04 17:06:58 +0000417
418 /* Use PyObject_GetItem instead of PyDict_GetItem because this
419 code is no longer just used with kwargs. It might be passed
420 a non-dict when called through format_map. */
421 if ((kwargs == NULL) || (obj = PyObject_GetItem(kwargs, key)) == NULL) {
Eric Smith11529192007-09-04 23:04:22 +0000422 PyErr_SetObject(PyExc_KeyError, key);
Eric Smith7ade6482007-08-26 22:27:13 +0000423 Py_DECREF(key);
424 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000425 }
Neal Norwitz8a4eb292007-08-27 07:24:17 +0000426 Py_DECREF(key);
Eric Smith0cb431c2007-08-28 01:07:27 +0000427 }
428 else {
Eric V. Smith12ebefc2011-07-18 14:03:41 -0400429 /* If args is NULL, we have a format string with a positional field
430 with only kwargs to retrieve it from. This can only happen when
431 used with format_map(), where positional arguments are not
432 allowed. */
433 if (args == NULL) {
434 PyErr_SetString(PyExc_ValueError, "Format string contains "
435 "positional fields");
436 goto error;
437 }
438
Eric Smith7ade6482007-08-26 22:27:13 +0000439 /* look up in args */
440 obj = PySequence_GetItem(args, index);
Eric Smith11529192007-09-04 23:04:22 +0000441 if (obj == NULL)
Eric Smith7ade6482007-08-26 22:27:13 +0000442 goto error;
Eric Smith8c663262007-08-25 02:26:07 +0000443 }
Eric Smith7ade6482007-08-26 22:27:13 +0000444
445 /* iterate over the rest of the field_name */
446 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
447 &name)) == 2) {
448 PyObject *tmp;
449
450 if (is_attribute)
451 /* getattr lookup "." */
452 tmp = getattr(obj, &name);
453 else
454 /* getitem lookup "[]" */
455 if (index == -1)
456 tmp = getitem_str(obj, &name);
457 else
458 if (PySequence_Check(obj))
459 tmp = getitem_sequence(obj, index);
460 else
461 /* not a sequence */
462 tmp = getitem_idx(obj, index);
463 if (tmp == NULL)
464 goto error;
465
466 /* assign to obj */
467 Py_DECREF(obj);
468 obj = tmp;
Eric Smith8c663262007-08-25 02:26:07 +0000469 }
Eric Smith7ade6482007-08-26 22:27:13 +0000470 /* end of iterator, this is the non-error case */
471 if (ok == 1)
472 return obj;
473error:
474 Py_XDECREF(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000475 return NULL;
476}
477
478/************************************************************************/
479/***************** Field rendering functions **************************/
480/************************************************************************/
481
482/*
483 render_field() is the main function in this section. It takes the
484 field object and field specification string generated by
485 get_field_and_spec, and renders the field into the output string.
486
Eric Smith8c663262007-08-25 02:26:07 +0000487 render_field calls fieldobj.__format__(format_spec) method, and
488 appends to the output.
489*/
490static int
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200491render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
Eric Smith8c663262007-08-25 02:26:07 +0000492{
493 int ok = 0;
Eric Smith8fd3eba2008-02-17 19:48:00 +0000494 PyObject *result = NULL;
Eric Smith1d138f12008-05-31 01:40:08 +0000495 PyObject *format_spec_object = NULL;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200496 int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
497 int err;
Victor Stinner7931d9a2011-11-04 00:22:48 +0100498
Eric Smith1d138f12008-05-31 01:40:08 +0000499 /* If we know the type exactly, skip the lookup of __format__ and just
500 call the formatter directly. */
501 if (PyUnicode_CheckExact(fieldobj))
Victor Stinnerd3f08822012-05-29 12:57:52 +0200502 formatter = _PyUnicode_FormatAdvancedWriter;
Eric Smith1d138f12008-05-31 01:40:08 +0000503 else if (PyLong_CheckExact(fieldobj))
Victor Stinnerd3f08822012-05-29 12:57:52 +0200504 formatter = _PyLong_FormatAdvancedWriter;
Eric Smith1d138f12008-05-31 01:40:08 +0000505 else if (PyFloat_CheckExact(fieldobj))
Victor Stinnerd3f08822012-05-29 12:57:52 +0200506 formatter = _PyFloat_FormatAdvancedWriter;
507 else if (PyComplex_CheckExact(fieldobj))
508 formatter = _PyComplex_FormatAdvancedWriter;
Eric Smithba8c0282008-06-02 14:57:32 +0000509
510 if (formatter) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000511 /* we know exactly which formatter will be called when __format__ is
512 looked up, so call it directly, instead. */
Victor Stinnerd3f08822012-05-29 12:57:52 +0200513 err = formatter(writer, fieldobj, format_spec->str,
514 format_spec->start, format_spec->end);
515 return (err == 0);
Eric Smithba8c0282008-06-02 14:57:32 +0000516 }
Eric Smith1d138f12008-05-31 01:40:08 +0000517 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000518 /* We need to create an object out of the pointers we have, because
519 __format__ takes a string/unicode object for format_spec. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200520 if (format_spec->str)
521 format_spec_object = PyUnicode_Substring(format_spec->str,
522 format_spec->start,
523 format_spec->end);
524 else
525 format_spec_object = PyUnicode_New(0, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000526 if (format_spec_object == NULL)
527 goto done;
Eric Smith1d138f12008-05-31 01:40:08 +0000528
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 result = PyObject_Format(fieldobj, format_spec_object);
Eric Smith1d138f12008-05-31 01:40:08 +0000530 }
Victor Stinneree4544c2012-05-09 22:24:08 +0200531 if (result == NULL)
532 goto done;
Eric Smith8c663262007-08-25 02:26:07 +0000533
Victor Stinnerd3f08822012-05-29 12:57:52 +0200534 if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
Victor Stinneree4544c2012-05-09 22:24:08 +0200535 goto done;
Victor Stinneree4544c2012-05-09 22:24:08 +0200536 ok = 1;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200537
Eric Smith8c663262007-08-25 02:26:07 +0000538done:
Eric Smith1d138f12008-05-31 01:40:08 +0000539 Py_XDECREF(format_spec_object);
Eric Smith8c663262007-08-25 02:26:07 +0000540 Py_XDECREF(result);
541 return ok;
542}
543
544static int
545parse_field(SubString *str, SubString *field_name, SubString *format_spec,
Benjamin Peterson4d944742013-05-17 18:22:31 -0500546 int *format_spec_needs_expanding, Py_UCS4 *conversion)
Eric Smith8c663262007-08-25 02:26:07 +0000547{
Eric Smith8ec90442009-03-14 12:29:34 +0000548 /* Note this function works if the field name is zero length,
549 which is good. Zero length field names are handled later, in
550 field_name_split. */
551
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200552 Py_UCS4 c = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000553
554 /* initialize these, as they may be empty */
555 *conversion = '\0';
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200556 SubString_init(format_spec, NULL, 0, 0);
Eric Smith8c663262007-08-25 02:26:07 +0000557
Eric Smith8ec90442009-03-14 12:29:34 +0000558 /* Search for the field name. it's terminated by the end of
559 the string, or a ':' or '!' */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200560 field_name->str = str->str;
561 field_name->start = str->start;
562 while (str->start < str->end) {
563 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
Benjamin Peterson4d944742013-05-17 18:22:31 -0500564 case '{':
565 PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
566 return 0;
567 case '[':
568 for (; str->start < str->end; str->start++)
569 if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
570 break;
571 continue;
572 case '}':
Eric Smith8c663262007-08-25 02:26:07 +0000573 case ':':
574 case '!':
575 break;
576 default:
577 continue;
578 }
579 break;
580 }
581
Benjamin Peterson4d944742013-05-17 18:22:31 -0500582 field_name->end = str->start - 1;
Eric Smith8c663262007-08-25 02:26:07 +0000583 if (c == '!' || c == ':') {
Benjamin Peterson4d944742013-05-17 18:22:31 -0500584 Py_ssize_t count;
Eric Smith8c663262007-08-25 02:26:07 +0000585 /* we have a format specifier and/or a conversion */
586 /* don't include the last character */
Eric Smith8c663262007-08-25 02:26:07 +0000587
588 /* see if there's a conversion specifier */
589 if (c == '!') {
590 /* there must be another character present */
Benjamin Peterson4d944742013-05-17 18:22:31 -0500591 if (str->start >= str->end) {
Eric Smith8c663262007-08-25 02:26:07 +0000592 PyErr_SetString(PyExc_ValueError,
Benjamin Peterson4d944742013-05-17 18:22:31 -0500593 "end of string while looking for conversion "
Eric Smith8c663262007-08-25 02:26:07 +0000594 "specifier");
595 return 0;
596 }
Benjamin Peterson4d944742013-05-17 18:22:31 -0500597 *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
Eric Smith8c663262007-08-25 02:26:07 +0000598
Benjamin Peterson4d944742013-05-17 18:22:31 -0500599 if (str->start < str->end) {
600 c = PyUnicode_READ_CHAR(str->str, str->start++);
601 if (c == '}')
602 return 1;
Eric Smith8c663262007-08-25 02:26:07 +0000603 if (c != ':') {
604 PyErr_SetString(PyExc_ValueError,
Benjamin Peterson4d944742013-05-17 18:22:31 -0500605 "expected ':' after conversion specifier");
Eric Smith8c663262007-08-25 02:26:07 +0000606 return 0;
607 }
608 }
609 }
Benjamin Peterson4d944742013-05-17 18:22:31 -0500610 format_spec->str = str->str;
611 format_spec->start = str->start;
612 count = 1;
613 while (str->start < str->end) {
614 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
615 case '{':
616 *format_spec_needs_expanding = 1;
617 count++;
618 break;
619 case '}':
620 count--;
621 if (count == 0) {
622 format_spec->end = str->start - 1;
623 return 1;
624 }
625 break;
626 default:
627 break;
628 }
629 }
630
631 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
632 return 0;
Eric Smith0cb431c2007-08-28 01:07:27 +0000633 }
Benjamin Peterson4d944742013-05-17 18:22:31 -0500634 else if (c != '}') {
635 PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
636 return 0;
637 }
Eric Smith8ec90442009-03-14 12:29:34 +0000638
639 return 1;
Eric Smith8c663262007-08-25 02:26:07 +0000640}
641
642/************************************************************************/
643/******* Output string allocation and escape-to-markup processing ******/
644/************************************************************************/
645
646/* MarkupIterator breaks the string into pieces of either literal
647 text, or things inside {} that need to be marked up. it is
648 designed to make it easy to wrap a Python iterator around it, for
649 use with the Formatter class */
650
651typedef struct {
652 SubString str;
Eric Smith8c663262007-08-25 02:26:07 +0000653} MarkupIterator;
654
655static int
Victor Stinner7931d9a2011-11-04 00:22:48 +0100656MarkupIterator_init(MarkupIterator *self, PyObject *str,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200657 Py_ssize_t start, Py_ssize_t end)
Eric Smith8c663262007-08-25 02:26:07 +0000658{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200659 SubString_init(&self->str, str, start, end);
Eric Smith8c663262007-08-25 02:26:07 +0000660 return 1;
661}
662
663/* returns 0 on error, 1 on non-error termination, and 2 if it got a
664 string (or something to be expanded) */
665static int
Eric Smith625cbf22007-08-29 03:22:59 +0000666MarkupIterator_next(MarkupIterator *self, SubString *literal,
Eric Smith8ec90442009-03-14 12:29:34 +0000667 int *field_present, SubString *field_name,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200668 SubString *format_spec, Py_UCS4 *conversion,
Eric Smith8c663262007-08-25 02:26:07 +0000669 int *format_spec_needs_expanding)
670{
Benjamin Peterson4d944742013-05-17 18:22:31 -0500671 int at_end;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200672 Py_UCS4 c = 0;
673 Py_ssize_t start;
Eric Smith8c663262007-08-25 02:26:07 +0000674 Py_ssize_t len;
Eric Smith625cbf22007-08-29 03:22:59 +0000675 int markup_follows = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000676
Eric Smith625cbf22007-08-29 03:22:59 +0000677 /* initialize all of the output variables */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200678 SubString_init(literal, NULL, 0, 0);
679 SubString_init(field_name, NULL, 0, 0);
680 SubString_init(format_spec, NULL, 0, 0);
Eric Smith625cbf22007-08-29 03:22:59 +0000681 *conversion = '\0';
Eric Smith8c663262007-08-25 02:26:07 +0000682 *format_spec_needs_expanding = 0;
Eric Smith8ec90442009-03-14 12:29:34 +0000683 *field_present = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000684
Eric Smith625cbf22007-08-29 03:22:59 +0000685 /* No more input, end of iterator. This is the normal exit
686 path. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200687 if (self->str.start >= self->str.end)
Eric Smith8c663262007-08-25 02:26:07 +0000688 return 1;
689
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200690 start = self->str.start;
Eric Smith8c663262007-08-25 02:26:07 +0000691
Eric Smith625cbf22007-08-29 03:22:59 +0000692 /* First read any literal text. Read until the end of string, an
693 escaped '{' or '}', or an unescaped '{'. In order to never
694 allocate memory and so I can just pass pointers around, if
695 there's an escaped '{' or '}' then we'll return the literal
696 including the brace, but no format object. The next time
697 through, we'll return the rest of the literal, skipping past
698 the second consecutive brace. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200699 while (self->str.start < self->str.end) {
700 switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
Eric Smith625cbf22007-08-29 03:22:59 +0000701 case '{':
702 case '}':
703 markup_follows = 1;
704 break;
705 default:
706 continue;
Eric Smith8c663262007-08-25 02:26:07 +0000707 }
Eric Smith625cbf22007-08-29 03:22:59 +0000708 break;
Eric Smith0cb431c2007-08-28 01:07:27 +0000709 }
Eric Smith625cbf22007-08-29 03:22:59 +0000710
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200711 at_end = self->str.start >= self->str.end;
712 len = self->str.start - start;
Eric Smith625cbf22007-08-29 03:22:59 +0000713
Victor Stinner7931d9a2011-11-04 00:22:48 +0100714 if ((c == '}') && (at_end ||
715 (c != PyUnicode_READ_CHAR(self->str.str,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200716 self->str.start)))) {
Eric Smith625cbf22007-08-29 03:22:59 +0000717 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
718 "in format string");
719 return 0;
720 }
721 if (at_end && c == '{') {
722 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
723 "in format string");
724 return 0;
725 }
726 if (!at_end) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200727 if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
Eric Smith625cbf22007-08-29 03:22:59 +0000728 /* escaped } or {, skip it in the input. there is no
729 markup object following us, just this literal text */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200730 self->str.start++;
Eric Smith625cbf22007-08-29 03:22:59 +0000731 markup_follows = 0;
732 }
733 else
734 len--;
735 }
736
737 /* record the literal text */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200738 literal->str = self->str.str;
739 literal->start = start;
Eric Smith625cbf22007-08-29 03:22:59 +0000740 literal->end = start + len;
741
742 if (!markup_follows)
743 return 2;
744
Benjamin Peterson4d944742013-05-17 18:22:31 -0500745 /* this is markup; parse the field */
Eric Smith8ec90442009-03-14 12:29:34 +0000746 *field_present = 1;
Benjamin Peterson4d944742013-05-17 18:22:31 -0500747 if (!parse_field(&self->str, field_name, format_spec,
748 format_spec_needs_expanding, conversion))
749 return 0;
750 return 2;
Eric Smith8c663262007-08-25 02:26:07 +0000751}
752
753
754/* do the !r or !s conversion on obj */
755static PyObject *
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200756do_conversion(PyObject *obj, Py_UCS4 conversion)
Eric Smith8c663262007-08-25 02:26:07 +0000757{
758 /* XXX in pre-3.0, do we need to convert this to unicode, since it
759 might have returned a string? */
760 switch (conversion) {
761 case 'r':
762 return PyObject_Repr(obj);
763 case 's':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200764 return PyObject_Str(obj);
Georg Brandl559e5d72008-06-11 18:37:52 +0000765 case 'a':
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200766 return PyObject_ASCII(obj);
Eric Smith8c663262007-08-25 02:26:07 +0000767 default:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000768 if (conversion > 32 && conversion < 127) {
769 /* It's the ASCII subrange; casting to char is safe
770 (assuming the execution character set is an ASCII
771 superset). */
772 PyErr_Format(PyExc_ValueError,
Martin v. Löwis5a6f4582008-04-07 03:22:07 +0000773 "Unknown conversion specifier %c",
774 (char)conversion);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000775 } else
776 PyErr_Format(PyExc_ValueError,
777 "Unknown conversion specifier \\x%x",
778 (unsigned int)conversion);
Eric Smith8c663262007-08-25 02:26:07 +0000779 return NULL;
780 }
781}
782
783/* given:
784
785 {field_name!conversion:format_spec}
786
787 compute the result and write it to output.
788 format_spec_needs_expanding is an optimization. if it's false,
789 just output the string directly, otherwise recursively expand the
Eric Smith8ec90442009-03-14 12:29:34 +0000790 format_spec string.
791
792 field_name is allowed to be zero length, in which case we
793 are doing auto field numbering.
794*/
Eric Smith8c663262007-08-25 02:26:07 +0000795
796static int
797output_markup(SubString *field_name, SubString *format_spec,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200798 int format_spec_needs_expanding, Py_UCS4 conversion,
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200799 _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000800 int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000801{
802 PyObject *tmp = NULL;
803 PyObject *fieldobj = NULL;
804 SubString expanded_format_spec;
805 SubString *actual_format_spec;
806 int result = 0;
807
808 /* convert field_name to an object */
Eric Smith8ec90442009-03-14 12:29:34 +0000809 fieldobj = get_field_object(field_name, args, kwargs, auto_number);
Eric Smith8c663262007-08-25 02:26:07 +0000810 if (fieldobj == NULL)
811 goto done;
812
813 if (conversion != '\0') {
814 tmp = do_conversion(fieldobj, conversion);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200815 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
Eric Smith8c663262007-08-25 02:26:07 +0000816 goto done;
817
818 /* do the assignment, transferring ownership: fieldobj = tmp */
819 Py_DECREF(fieldobj);
820 fieldobj = tmp;
821 tmp = NULL;
822 }
823
824 /* if needed, recurively compute the format_spec */
825 if (format_spec_needs_expanding) {
Eric Smith8ec90442009-03-14 12:29:34 +0000826 tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
827 auto_number);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200828 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
Eric Smith8c663262007-08-25 02:26:07 +0000829 goto done;
830
831 /* note that in the case we're expanding the format string,
832 tmp must be kept around until after the call to
833 render_field. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200834 SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
Eric Smith8c663262007-08-25 02:26:07 +0000835 actual_format_spec = &expanded_format_spec;
Eric Smith0cb431c2007-08-28 01:07:27 +0000836 }
837 else
Eric Smith8c663262007-08-25 02:26:07 +0000838 actual_format_spec = format_spec;
839
Victor Stinner202fdca2012-05-07 12:47:02 +0200840 if (render_field(fieldobj, actual_format_spec, writer) == 0)
Eric Smith8c663262007-08-25 02:26:07 +0000841 goto done;
842
843 result = 1;
844
845done:
846 Py_XDECREF(fieldobj);
847 Py_XDECREF(tmp);
848
849 return result;
850}
851
852/*
Eric Smith8fd3eba2008-02-17 19:48:00 +0000853 do_markup is the top-level loop for the format() method. It
Eric Smith8c663262007-08-25 02:26:07 +0000854 searches through the format string for escapes to markup codes, and
855 calls other functions to move non-markup text to the output,
856 and to perform the markup to the output.
857*/
858static int
859do_markup(SubString *input, PyObject *args, PyObject *kwargs,
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200860 _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000861{
862 MarkupIterator iter;
Eric Smith8c663262007-08-25 02:26:07 +0000863 int format_spec_needs_expanding;
864 int result;
Eric Smith8ec90442009-03-14 12:29:34 +0000865 int field_present;
Eric Smith625cbf22007-08-29 03:22:59 +0000866 SubString literal;
Eric Smith8c663262007-08-25 02:26:07 +0000867 SubString field_name;
868 SubString format_spec;
Victor Stinnercfc4c132013-04-03 01:48:39 +0200869 Py_UCS4 conversion;
Eric Smith8c663262007-08-25 02:26:07 +0000870
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200871 MarkupIterator_init(&iter, input->str, input->start, input->end);
Eric Smith8ec90442009-03-14 12:29:34 +0000872 while ((result = MarkupIterator_next(&iter, &literal, &field_present,
873 &field_name, &format_spec,
874 &conversion,
Eric Smith8c663262007-08-25 02:26:07 +0000875 &format_spec_needs_expanding)) == 2) {
Victor Stinnercfc4c132013-04-03 01:48:39 +0200876 if (literal.end != literal.start) {
877 if (!field_present && iter.str.start == iter.str.end)
878 writer->overallocate = 0;
879 if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
880 literal.start, literal.end) < 0)
Victor Stinneree4544c2012-05-09 22:24:08 +0200881 return 0;
Victor Stinneree4544c2012-05-09 22:24:08 +0200882 }
883
Victor Stinnerd3f08822012-05-29 12:57:52 +0200884 if (field_present) {
885 if (iter.str.start == iter.str.end)
Victor Stinnerd7b7c742012-06-04 22:52:12 +0200886 writer->overallocate = 0;
Eric Smith8c663262007-08-25 02:26:07 +0000887 if (!output_markup(&field_name, &format_spec,
Victor Stinner202fdca2012-05-07 12:47:02 +0200888 format_spec_needs_expanding, conversion, writer,
Eric Smith8ec90442009-03-14 12:29:34 +0000889 args, kwargs, recursion_depth, auto_number))
Eric Smith8c663262007-08-25 02:26:07 +0000890 return 0;
Victor Stinnerd3f08822012-05-29 12:57:52 +0200891 }
Eric Smith8c663262007-08-25 02:26:07 +0000892 }
893 return result;
894}
895
896
897/*
898 build_string allocates the output string and then
899 calls do_markup to do the heavy lifting.
900*/
901static PyObject *
902build_string(SubString *input, PyObject *args, PyObject *kwargs,
Eric Smith8ec90442009-03-14 12:29:34 +0000903 int recursion_depth, AutoNumber *auto_number)
Eric Smith8c663262007-08-25 02:26:07 +0000904{
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200905 _PyUnicodeWriter writer;
Eric Smith8c663262007-08-25 02:26:07 +0000906
907 /* check the recursion level */
Eric Smith45c07872007-09-05 02:02:43 +0000908 if (recursion_depth <= 0) {
Eric Smith8c663262007-08-25 02:26:07 +0000909 PyErr_SetString(PyExc_ValueError,
910 "Max string recursion exceeded");
Antoine Pitrou4574e622011-10-07 02:26:47 +0200911 return NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000912 }
913
Victor Stinner8f674cc2013-04-17 23:02:17 +0200914 _PyUnicodeWriter_Init(&writer);
915 writer.overallocate = 1;
916 writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
Eric Smith8c663262007-08-25 02:26:07 +0000917
Victor Stinner202fdca2012-05-07 12:47:02 +0200918 if (!do_markup(input, args, kwargs, &writer, recursion_depth,
Eric Smith8ec90442009-03-14 12:29:34 +0000919 auto_number)) {
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200920 _PyUnicodeWriter_Dealloc(&writer);
Antoine Pitrou4574e622011-10-07 02:26:47 +0200921 return NULL;
Eric Smith8c663262007-08-25 02:26:07 +0000922 }
923
Victor Stinner3b1a74a2012-05-09 22:25:00 +0200924 return _PyUnicodeWriter_Finish(&writer);
Eric Smith8c663262007-08-25 02:26:07 +0000925}
926
927/************************************************************************/
928/*********** main routine ***********************************************/
929/************************************************************************/
930
931/* this is the main entry point */
932static PyObject *
933do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
934{
935 SubString input;
936
937 /* PEP 3101 says only 2 levels, so that
938 "{0:{1}}".format('abc', 's') # works
939 "{0:{1:{2}}}".format('abc', 's', '') # fails
940 */
Eric Smith45c07872007-09-05 02:02:43 +0000941 int recursion_depth = 2;
Eric Smith8c663262007-08-25 02:26:07 +0000942
Eric Smith8ec90442009-03-14 12:29:34 +0000943 AutoNumber auto_number;
944
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200945 if (PyUnicode_READY(self) == -1)
946 return NULL;
947
Eric Smith8ec90442009-03-14 12:29:34 +0000948 AutoNumber_Init(&auto_number);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200949 SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
Eric Smith8ec90442009-03-14 12:29:34 +0000950 return build_string(&input, args, kwargs, recursion_depth, &auto_number);
Eric Smith8c663262007-08-25 02:26:07 +0000951}
Eric Smithf6db4092007-08-27 23:52:26 +0000952
Eric Smith27bbca62010-11-04 17:06:58 +0000953static PyObject *
954do_string_format_map(PyObject *self, PyObject *obj)
955{
956 return do_string_format(self, NULL, obj);
957}
Eric Smithf6db4092007-08-27 23:52:26 +0000958
959
960/************************************************************************/
961/*********** formatteriterator ******************************************/
962/************************************************************************/
963
964/* This is used to implement string.Formatter.vparse(). It exists so
965 Formatter can share code with the built in unicode.format() method.
966 It's really just a wrapper around MarkupIterator that is callable
967 from Python. */
968
969typedef struct {
970 PyObject_HEAD
Victor Stinner7931d9a2011-11-04 00:22:48 +0100971 PyObject *str;
Eric Smithf6db4092007-08-27 23:52:26 +0000972 MarkupIterator it_markup;
973} formatteriterobject;
974
975static void
976formatteriter_dealloc(formatteriterobject *it)
977{
978 Py_XDECREF(it->str);
979 PyObject_FREE(it);
980}
981
982/* returns a tuple:
Eric Smith625cbf22007-08-29 03:22:59 +0000983 (literal, field_name, format_spec, conversion)
984
985 literal is any literal text to output. might be zero length
986 field_name is the string before the ':'. might be None
987 format_spec is the string after the ':'. mibht be None
988 conversion is either None, or the string after the '!'
Eric Smithf6db4092007-08-27 23:52:26 +0000989*/
990static PyObject *
991formatteriter_next(formatteriterobject *it)
992{
993 SubString literal;
994 SubString field_name;
995 SubString format_spec;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200996 Py_UCS4 conversion;
Eric Smithf6db4092007-08-27 23:52:26 +0000997 int format_spec_needs_expanding;
Eric Smith8ec90442009-03-14 12:29:34 +0000998 int field_present;
999 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1000 &field_name, &format_spec, &conversion,
Eric Smithf6db4092007-08-27 23:52:26 +00001001 &format_spec_needs_expanding);
1002
1003 /* all of the SubString objects point into it->str, so no
1004 memory management needs to be done on them */
1005 assert(0 <= result && result <= 2);
Eric Smith0cb431c2007-08-28 01:07:27 +00001006 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001007 /* if 0, error has already been set, if 1, iterator is empty */
1008 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001009 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001010 PyObject *literal_str = NULL;
1011 PyObject *field_name_str = NULL;
1012 PyObject *format_spec_str = NULL;
1013 PyObject *conversion_str = NULL;
1014 PyObject *tuple = NULL;
1015
Eric Smith625cbf22007-08-29 03:22:59 +00001016 literal_str = SubString_new_object(&literal);
1017 if (literal_str == NULL)
1018 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001019
Eric Smith625cbf22007-08-29 03:22:59 +00001020 field_name_str = SubString_new_object(&field_name);
1021 if (field_name_str == NULL)
1022 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001023
Eric Smith625cbf22007-08-29 03:22:59 +00001024 /* if field_name is non-zero length, return a string for
1025 format_spec (even if zero length), else return None */
Eric Smith8ec90442009-03-14 12:29:34 +00001026 format_spec_str = (field_present ?
Eric Smith625cbf22007-08-29 03:22:59 +00001027 SubString_new_object_or_empty :
1028 SubString_new_object)(&format_spec);
1029 if (format_spec_str == NULL)
1030 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001031
Eric Smith625cbf22007-08-29 03:22:59 +00001032 /* if the conversion is not specified, return a None,
1033 otherwise create a one length string with the conversion
1034 character */
1035 if (conversion == '\0') {
Eric Smithf6db4092007-08-27 23:52:26 +00001036 conversion_str = Py_None;
Eric Smithf6db4092007-08-27 23:52:26 +00001037 Py_INCREF(conversion_str);
1038 }
Eric Smith625cbf22007-08-29 03:22:59 +00001039 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001040 conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1041 &conversion, 1);
Eric Smith625cbf22007-08-29 03:22:59 +00001042 if (conversion_str == NULL)
1043 goto done;
1044
Eric Smith9e7c8da2007-08-28 11:15:20 +00001045 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
Eric Smithf6db4092007-08-27 23:52:26 +00001046 conversion_str);
Eric Smith625cbf22007-08-29 03:22:59 +00001047 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001048 Py_XDECREF(literal_str);
1049 Py_XDECREF(field_name_str);
1050 Py_XDECREF(format_spec_str);
1051 Py_XDECREF(conversion_str);
1052 return tuple;
1053 }
1054}
1055
1056static PyMethodDef formatteriter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 {NULL, NULL} /* sentinel */
Eric Smithf6db4092007-08-27 23:52:26 +00001058};
1059
Eric Smith8fd3eba2008-02-17 19:48:00 +00001060static PyTypeObject PyFormatterIter_Type = {
Eric Smithf6db4092007-08-27 23:52:26 +00001061 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062 "formatteriterator", /* tp_name */
1063 sizeof(formatteriterobject), /* tp_basicsize */
1064 0, /* tp_itemsize */
Eric Smithf6db4092007-08-27 23:52:26 +00001065 /* methods */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 (destructor)formatteriter_dealloc, /* tp_dealloc */
1067 0, /* tp_print */
1068 0, /* tp_getattr */
1069 0, /* tp_setattr */
1070 0, /* tp_reserved */
1071 0, /* tp_repr */
1072 0, /* tp_as_number */
1073 0, /* tp_as_sequence */
1074 0, /* tp_as_mapping */
1075 0, /* tp_hash */
1076 0, /* tp_call */
1077 0, /* tp_str */
1078 PyObject_GenericGetAttr, /* tp_getattro */
1079 0, /* tp_setattro */
1080 0, /* tp_as_buffer */
1081 Py_TPFLAGS_DEFAULT, /* tp_flags */
1082 0, /* tp_doc */
1083 0, /* tp_traverse */
1084 0, /* tp_clear */
1085 0, /* tp_richcompare */
1086 0, /* tp_weaklistoffset */
1087 PyObject_SelfIter, /* tp_iter */
1088 (iternextfunc)formatteriter_next, /* tp_iternext */
1089 formatteriter_methods, /* tp_methods */
Eric Smithf6db4092007-08-27 23:52:26 +00001090 0,
1091};
1092
1093/* unicode_formatter_parser is used to implement
1094 string.Formatter.vformat. it parses a string and returns tuples
1095 describing the parsed elements. It's a wrapper around
1096 stringlib/string_format.h's MarkupIterator */
1097static PyObject *
Victor Stinner7931d9a2011-11-04 00:22:48 +01001098formatter_parser(PyObject *ignored, PyObject *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001099{
1100 formatteriterobject *it;
1101
Eric Smitha1eac722011-01-29 11:15:35 +00001102 if (!PyUnicode_Check(self)) {
1103 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1104 return NULL;
1105 }
1106
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001107 if (PyUnicode_READY(self) == -1)
1108 return NULL;
1109
Eric Smithf6db4092007-08-27 23:52:26 +00001110 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1111 if (it == NULL)
1112 return NULL;
1113
1114 /* take ownership, give the object to the iterator */
1115 Py_INCREF(self);
1116 it->str = self;
1117
1118 /* initialize the contained MarkupIterator */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001119 MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
Eric Smithf6db4092007-08-27 23:52:26 +00001120 return (PyObject *)it;
1121}
1122
1123
1124/************************************************************************/
1125/*********** fieldnameiterator ******************************************/
1126/************************************************************************/
1127
1128
1129/* This is used to implement string.Formatter.vparse(). It parses the
1130 field name into attribute and item values. It's a Python-callable
1131 wrapper around FieldNameIterator */
1132
1133typedef struct {
1134 PyObject_HEAD
Victor Stinner7931d9a2011-11-04 00:22:48 +01001135 PyObject *str;
Eric Smithf6db4092007-08-27 23:52:26 +00001136 FieldNameIterator it_field;
1137} fieldnameiterobject;
1138
1139static void
1140fieldnameiter_dealloc(fieldnameiterobject *it)
1141{
1142 Py_XDECREF(it->str);
1143 PyObject_FREE(it);
1144}
1145
1146/* returns a tuple:
1147 (is_attr, value)
1148 is_attr is true if we used attribute syntax (e.g., '.foo')
1149 false if we used index syntax (e.g., '[foo]')
1150 value is an integer or string
1151*/
1152static PyObject *
1153fieldnameiter_next(fieldnameiterobject *it)
1154{
1155 int result;
1156 int is_attr;
1157 Py_ssize_t idx;
1158 SubString name;
1159
1160 result = FieldNameIterator_next(&it->it_field, &is_attr,
1161 &idx, &name);
Eric Smith0cb431c2007-08-28 01:07:27 +00001162 if (result == 0 || result == 1)
Eric Smithf6db4092007-08-27 23:52:26 +00001163 /* if 0, error has already been set, if 1, iterator is empty */
1164 return NULL;
Eric Smith0cb431c2007-08-28 01:07:27 +00001165 else {
Eric Smithf6db4092007-08-27 23:52:26 +00001166 PyObject* result = NULL;
1167 PyObject* is_attr_obj = NULL;
1168 PyObject* obj = NULL;
1169
1170 is_attr_obj = PyBool_FromLong(is_attr);
1171 if (is_attr_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001172 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001173
1174 /* either an integer or a string */
1175 if (idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001176 obj = PyLong_FromSsize_t(idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001177 else
1178 obj = SubString_new_object(&name);
1179 if (obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001180 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001181
1182 /* return a tuple of values */
1183 result = PyTuple_Pack(2, is_attr_obj, obj);
Eric Smithf6db4092007-08-27 23:52:26 +00001184
Eric Smith625cbf22007-08-29 03:22:59 +00001185 done:
Eric Smithf6db4092007-08-27 23:52:26 +00001186 Py_XDECREF(is_attr_obj);
1187 Py_XDECREF(obj);
Eric Smith625cbf22007-08-29 03:22:59 +00001188 return result;
Eric Smithf6db4092007-08-27 23:52:26 +00001189 }
Eric Smithf6db4092007-08-27 23:52:26 +00001190}
1191
1192static PyMethodDef fieldnameiter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 {NULL, NULL} /* sentinel */
Eric Smithf6db4092007-08-27 23:52:26 +00001194};
1195
1196static PyTypeObject PyFieldNameIter_Type = {
1197 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 "fieldnameiterator", /* tp_name */
1199 sizeof(fieldnameiterobject), /* tp_basicsize */
1200 0, /* tp_itemsize */
Eric Smithf6db4092007-08-27 23:52:26 +00001201 /* methods */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1203 0, /* tp_print */
1204 0, /* tp_getattr */
1205 0, /* tp_setattr */
1206 0, /* tp_reserved */
1207 0, /* tp_repr */
1208 0, /* tp_as_number */
1209 0, /* tp_as_sequence */
1210 0, /* tp_as_mapping */
1211 0, /* tp_hash */
1212 0, /* tp_call */
1213 0, /* tp_str */
1214 PyObject_GenericGetAttr, /* tp_getattro */
1215 0, /* tp_setattro */
1216 0, /* tp_as_buffer */
1217 Py_TPFLAGS_DEFAULT, /* tp_flags */
1218 0, /* tp_doc */
1219 0, /* tp_traverse */
1220 0, /* tp_clear */
1221 0, /* tp_richcompare */
1222 0, /* tp_weaklistoffset */
1223 PyObject_SelfIter, /* tp_iter */
1224 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1225 fieldnameiter_methods, /* tp_methods */
Eric Smithf6db4092007-08-27 23:52:26 +00001226 0};
1227
1228/* unicode_formatter_field_name_split is used to implement
1229 string.Formatter.vformat. it takes an PEP 3101 "field name", and
1230 returns a tuple of (first, rest): "first", the part before the
1231 first '.' or '['; and "rest", an iterator for the rest of the field
1232 name. it's a wrapper around stringlib/string_format.h's
1233 field_name_split. The iterator it returns is a
1234 FieldNameIterator */
1235static PyObject *
Victor Stinner7931d9a2011-11-04 00:22:48 +01001236formatter_field_name_split(PyObject *ignored, PyObject *self)
Eric Smithf6db4092007-08-27 23:52:26 +00001237{
1238 SubString first;
1239 Py_ssize_t first_idx;
1240 fieldnameiterobject *it;
1241
1242 PyObject *first_obj = NULL;
1243 PyObject *result = NULL;
1244
Eric Smitha1eac722011-01-29 11:15:35 +00001245 if (!PyUnicode_Check(self)) {
1246 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1247 return NULL;
1248 }
1249
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001250 if (PyUnicode_READY(self) == -1)
1251 return NULL;
1252
Eric Smithf6db4092007-08-27 23:52:26 +00001253 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1254 if (it == NULL)
1255 return NULL;
1256
1257 /* take ownership, give the object to the iterator. this is
1258 just to keep the field_name alive */
1259 Py_INCREF(self);
1260 it->str = self;
1261
Eric Smith8ec90442009-03-14 12:29:34 +00001262 /* Pass in auto_number = NULL. We'll return an empty string for
1263 first_obj in that case. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001264 if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
Eric Smith8ec90442009-03-14 12:29:34 +00001265 &first, &first_idx, &it->it_field, NULL))
Eric Smith625cbf22007-08-29 03:22:59 +00001266 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001267
Eric Smith0cb431c2007-08-28 01:07:27 +00001268 /* first becomes an integer, if possible; else a string */
Eric Smithf6db4092007-08-27 23:52:26 +00001269 if (first_idx != -1)
Christian Heimes217cfd12007-12-02 14:31:20 +00001270 first_obj = PyLong_FromSsize_t(first_idx);
Eric Smithf6db4092007-08-27 23:52:26 +00001271 else
1272 /* convert "first" into a string object */
1273 first_obj = SubString_new_object(&first);
1274 if (first_obj == NULL)
Eric Smith625cbf22007-08-29 03:22:59 +00001275 goto done;
Eric Smithf6db4092007-08-27 23:52:26 +00001276
1277 /* return a tuple of values */
1278 result = PyTuple_Pack(2, first_obj, it);
1279
Eric Smith625cbf22007-08-29 03:22:59 +00001280done:
Eric Smithf6db4092007-08-27 23:52:26 +00001281 Py_XDECREF(it);
1282 Py_XDECREF(first_obj);
1283 return result;
1284}