| /* |
| unicode_format.h -- implementation of str.format(). |
| */ |
| |
| /************************************************************************/ |
| /*********** Global data structures and forward declarations *********/ |
| /************************************************************************/ |
| |
| /* |
| A SubString consists of the characters between two string or |
| unicode pointers. |
| */ |
| typedef struct { |
| PyObject *str; /* borrowed reference */ |
| Py_ssize_t start, end; |
| } SubString; |
| |
| |
| typedef enum { |
| ANS_INIT, |
| ANS_AUTO, |
| ANS_MANUAL |
| } AutoNumberState; /* Keep track if we're auto-numbering fields */ |
| |
| /* Keeps track of our auto-numbering state, and which number field we're on */ |
| typedef struct { |
| AutoNumberState an_state; |
| int an_field_number; |
| } AutoNumber; |
| |
| |
| /* forward declaration for recursion */ |
| static PyObject * |
| build_string(SubString *input, PyObject *args, PyObject *kwargs, |
| int recursion_depth, AutoNumber *auto_number); |
| |
| |
| |
| /************************************************************************/ |
| /************************** Utility functions ************************/ |
| /************************************************************************/ |
| |
| static void |
| AutoNumber_Init(AutoNumber *auto_number) |
| { |
| auto_number->an_state = ANS_INIT; |
| auto_number->an_field_number = 0; |
| } |
| |
| /* fill in a SubString from a pointer and length */ |
| Py_LOCAL_INLINE(void) |
| SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end) |
| { |
| str->str = s; |
| str->start = start; |
| str->end = end; |
| } |
| |
| /* return a new string. if str->str is NULL, return None */ |
| Py_LOCAL_INLINE(PyObject *) |
| SubString_new_object(SubString *str) |
| { |
| if (str->str == NULL) { |
| Py_INCREF(Py_None); |
| return Py_None; |
| } |
| return PyUnicode_Substring(str->str, str->start, str->end); |
| } |
| |
| /* return a new string. if str->str is NULL, return a new empty string */ |
| Py_LOCAL_INLINE(PyObject *) |
| SubString_new_object_or_empty(SubString *str) |
| { |
| if (str->str == NULL) { |
| return PyUnicode_New(0, 0); |
| } |
| return SubString_new_object(str); |
| } |
| |
| /* Return 1 if an error has been detected switching between automatic |
| field numbering and manual field specification, else return 0. Set |
| ValueError on error. */ |
| static int |
| autonumber_state_error(AutoNumberState state, int field_name_is_empty) |
| { |
| if (state == ANS_MANUAL) { |
| if (field_name_is_empty) { |
| PyErr_SetString(PyExc_ValueError, "cannot switch from " |
| "manual field specification to " |
| "automatic field numbering"); |
| return 1; |
| } |
| } |
| else { |
| if (!field_name_is_empty) { |
| PyErr_SetString(PyExc_ValueError, "cannot switch from " |
| "automatic field numbering to " |
| "manual field specification"); |
| return 1; |
| } |
| } |
| return 0; |
| } |
| |
| |
| /************************************************************************/ |
| /*********** Format string parsing -- integers and identifiers *********/ |
| /************************************************************************/ |
| |
| static Py_ssize_t |
| get_integer(const SubString *str) |
| { |
| Py_ssize_t accumulator = 0; |
| Py_ssize_t digitval; |
| Py_ssize_t i; |
| |
| /* empty string is an error */ |
| if (str->start >= str->end) |
| return -1; |
| |
| for (i = str->start; i < str->end; i++) { |
| digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i)); |
| if (digitval < 0) |
| return -1; |
| /* |
| Detect possible overflow before it happens: |
| |
| accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if |
| accumulator > (PY_SSIZE_T_MAX - digitval) / 10. |
| */ |
| if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) { |
| PyErr_Format(PyExc_ValueError, |
| "Too many decimal digits in format string"); |
| return -1; |
| } |
| accumulator = accumulator * 10 + digitval; |
| } |
| return accumulator; |
| } |
| |
| /************************************************************************/ |
| /******** Functions to get field objects and specification strings ******/ |
| /************************************************************************/ |
| |
| /* do the equivalent of obj.name */ |
| static PyObject * |
| getattr(PyObject *obj, SubString *name) |
| { |
| PyObject *newobj; |
| PyObject *str = SubString_new_object(name); |
| if (str == NULL) |
| return NULL; |
| newobj = PyObject_GetAttr(obj, str); |
| Py_DECREF(str); |
| return newobj; |
| } |
| |
| /* do the equivalent of obj[idx], where obj is a sequence */ |
| static PyObject * |
| getitem_sequence(PyObject *obj, Py_ssize_t idx) |
| { |
| return PySequence_GetItem(obj, idx); |
| } |
| |
| /* do the equivalent of obj[idx], where obj is not a sequence */ |
| static PyObject * |
| getitem_idx(PyObject *obj, Py_ssize_t idx) |
| { |
| PyObject *newobj; |
| PyObject *idx_obj = PyLong_FromSsize_t(idx); |
| if (idx_obj == NULL) |
| return NULL; |
| newobj = PyObject_GetItem(obj, idx_obj); |
| Py_DECREF(idx_obj); |
| return newobj; |
| } |
| |
| /* do the equivalent of obj[name] */ |
| static PyObject * |
| getitem_str(PyObject *obj, SubString *name) |
| { |
| PyObject *newobj; |
| PyObject *str = SubString_new_object(name); |
| if (str == NULL) |
| return NULL; |
| newobj = PyObject_GetItem(obj, str); |
| Py_DECREF(str); |
| return newobj; |
| } |
| |
| typedef struct { |
| /* the entire string we're parsing. we assume that someone else |
| is managing its lifetime, and that it will exist for the |
| lifetime of the iterator. can be empty */ |
| SubString str; |
| |
| /* index to where we are inside field_name */ |
| Py_ssize_t index; |
| } FieldNameIterator; |
| |
| |
| static int |
| FieldNameIterator_init(FieldNameIterator *self, PyObject *s, |
| Py_ssize_t start, Py_ssize_t end) |
| { |
| SubString_init(&self->str, s, start, end); |
| self->index = start; |
| return 1; |
| } |
| |
| static int |
| _FieldNameIterator_attr(FieldNameIterator *self, SubString *name) |
| { |
| Py_UCS4 c; |
| |
| name->str = self->str.str; |
| name->start = self->index; |
| |
| /* return everything until '.' or '[' */ |
| while (self->index < self->str.end) { |
| c = PyUnicode_READ_CHAR(self->str.str, self->index++); |
| switch (c) { |
| case '[': |
| case '.': |
| /* backup so that we this character will be seen next time */ |
| self->index--; |
| break; |
| default: |
| continue; |
| } |
| break; |
| } |
| /* end of string is okay */ |
| name->end = self->index; |
| return 1; |
| } |
| |
| static int |
| _FieldNameIterator_item(FieldNameIterator *self, SubString *name) |
| { |
| int bracket_seen = 0; |
| Py_UCS4 c; |
| |
| name->str = self->str.str; |
| name->start = self->index; |
| |
| /* return everything until ']' */ |
| while (self->index < self->str.end) { |
| c = PyUnicode_READ_CHAR(self->str.str, self->index++); |
| switch (c) { |
| case ']': |
| bracket_seen = 1; |
| break; |
| default: |
| continue; |
| } |
| break; |
| } |
| /* make sure we ended with a ']' */ |
| if (!bracket_seen) { |
| PyErr_SetString(PyExc_ValueError, "Missing ']' in format string"); |
| return 0; |
| } |
| |
| /* end of string is okay */ |
| /* don't include the ']' */ |
| name->end = self->index-1; |
| return 1; |
| } |
| |
| /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */ |
| static int |
| FieldNameIterator_next(FieldNameIterator *self, int *is_attribute, |
| Py_ssize_t *name_idx, SubString *name) |
| { |
| /* check at end of input */ |
| if (self->index >= self->str.end) |
| return 1; |
| |
| switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) { |
| case '.': |
| *is_attribute = 1; |
| if (_FieldNameIterator_attr(self, name) == 0) |
| return 0; |
| *name_idx = -1; |
| break; |
| case '[': |
| *is_attribute = 0; |
| if (_FieldNameIterator_item(self, name) == 0) |
| return 0; |
| *name_idx = get_integer(name); |
| if (*name_idx == -1 && PyErr_Occurred()) |
| return 0; |
| break; |
| default: |
| /* Invalid character follows ']' */ |
| PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may " |
| "follow ']' in format field specifier"); |
| return 0; |
| } |
| |
| /* empty string is an error */ |
| if (name->start == name->end) { |
| PyErr_SetString(PyExc_ValueError, "Empty attribute in format string"); |
| return 0; |
| } |
| |
| return 2; |
| } |
| |
| |
| /* input: field_name |
| output: 'first' points to the part before the first '[' or '.' |
| 'first_idx' is -1 if 'first' is not an integer, otherwise |
| it's the value of first converted to an integer |
| 'rest' is an iterator to return the rest |
| */ |
| static int |
| field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first, |
| Py_ssize_t *first_idx, FieldNameIterator *rest, |
| AutoNumber *auto_number) |
| { |
| Py_UCS4 c; |
| Py_ssize_t i = start; |
| int field_name_is_empty; |
| int using_numeric_index; |
| |
| /* find the part up until the first '.' or '[' */ |
| while (i < end) { |
| switch (c = PyUnicode_READ_CHAR(str, i++)) { |
| case '[': |
| case '.': |
| /* backup so that we this character is available to the |
| "rest" iterator */ |
| i--; |
| break; |
| default: |
| continue; |
| } |
| break; |
| } |
| |
| /* set up the return values */ |
| SubString_init(first, str, start, i); |
| FieldNameIterator_init(rest, str, i, end); |
| |
| /* see if "first" is an integer, in which case it's used as an index */ |
| *first_idx = get_integer(first); |
| if (*first_idx == -1 && PyErr_Occurred()) |
| return 0; |
| |
| field_name_is_empty = first->start >= first->end; |
| |
| /* If the field name is omitted or if we have a numeric index |
| specified, then we're doing numeric indexing into args. */ |
| using_numeric_index = field_name_is_empty || *first_idx != -1; |
| |
| /* We always get here exactly one time for each field we're |
| processing. And we get here in field order (counting by left |
| braces). So this is the perfect place to handle automatic field |
| numbering if the field name is omitted. */ |
| |
| /* Check if we need to do the auto-numbering. It's not needed if |
| we're called from string.Format routines, because it's handled |
| in that class by itself. */ |
| if (auto_number) { |
| /* Initialize our auto numbering state if this is the first |
| time we're either auto-numbering or manually numbering. */ |
| if (auto_number->an_state == ANS_INIT && using_numeric_index) |
| auto_number->an_state = field_name_is_empty ? |
| ANS_AUTO : ANS_MANUAL; |
| |
| /* Make sure our state is consistent with what we're doing |
| this time through. Only check if we're using a numeric |
| index. */ |
| if (using_numeric_index) |
| if (autonumber_state_error(auto_number->an_state, |
| field_name_is_empty)) |
| return 0; |
| /* Zero length field means we want to do auto-numbering of the |
| fields. */ |
| if (field_name_is_empty) |
| *first_idx = (auto_number->an_field_number)++; |
| } |
| |
| return 1; |
| } |
| |
| |
| /* |
| get_field_object returns the object inside {}, before the |
| format_spec. It handles getindex and getattr lookups and consumes |
| the entire input string. |
| */ |
| static PyObject * |
| get_field_object(SubString *input, PyObject *args, PyObject *kwargs, |
| AutoNumber *auto_number) |
| { |
| PyObject *obj = NULL; |
| int ok; |
| int is_attribute; |
| SubString name; |
| SubString first; |
| Py_ssize_t index; |
| FieldNameIterator rest; |
| |
| if (!field_name_split(input->str, input->start, input->end, &first, |
| &index, &rest, auto_number)) { |
| goto error; |
| } |
| |
| if (index == -1) { |
| /* look up in kwargs */ |
| PyObject *key = SubString_new_object(&first); |
| if (key == NULL) |
| goto error; |
| |
| /* Use PyObject_GetItem instead of PyDict_GetItem because this |
| code is no longer just used with kwargs. It might be passed |
| a non-dict when called through format_map. */ |
| if ((kwargs == NULL) || (obj = PyObject_GetItem(kwargs, key)) == NULL) { |
| PyErr_SetObject(PyExc_KeyError, key); |
| Py_DECREF(key); |
| goto error; |
| } |
| Py_DECREF(key); |
| } |
| else { |
| /* If args is NULL, we have a format string with a positional field |
| with only kwargs to retrieve it from. This can only happen when |
| used with format_map(), where positional arguments are not |
| allowed. */ |
| if (args == NULL) { |
| PyErr_SetString(PyExc_ValueError, "Format string contains " |
| "positional fields"); |
| goto error; |
| } |
| |
| /* look up in args */ |
| obj = PySequence_GetItem(args, index); |
| if (obj == NULL) |
| goto error; |
| } |
| |
| /* iterate over the rest of the field_name */ |
| while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index, |
| &name)) == 2) { |
| PyObject *tmp; |
| |
| if (is_attribute) |
| /* getattr lookup "." */ |
| tmp = getattr(obj, &name); |
| else |
| /* getitem lookup "[]" */ |
| if (index == -1) |
| tmp = getitem_str(obj, &name); |
| else |
| if (PySequence_Check(obj)) |
| tmp = getitem_sequence(obj, index); |
| else |
| /* not a sequence */ |
| tmp = getitem_idx(obj, index); |
| if (tmp == NULL) |
| goto error; |
| |
| /* assign to obj */ |
| Py_DECREF(obj); |
| obj = tmp; |
| } |
| /* end of iterator, this is the non-error case */ |
| if (ok == 1) |
| return obj; |
| error: |
| Py_XDECREF(obj); |
| return NULL; |
| } |
| |
| /************************************************************************/ |
| /***************** Field rendering functions **************************/ |
| /************************************************************************/ |
| |
| /* |
| render_field() is the main function in this section. It takes the |
| field object and field specification string generated by |
| get_field_and_spec, and renders the field into the output string. |
| |
| render_field calls fieldobj.__format__(format_spec) method, and |
| appends to the output. |
| */ |
| static int |
| render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer) |
| { |
| int ok = 0; |
| PyObject *result = NULL; |
| PyObject *format_spec_object = NULL; |
| int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; |
| int err; |
| |
| /* If we know the type exactly, skip the lookup of __format__ and just |
| call the formatter directly. */ |
| if (PyUnicode_CheckExact(fieldobj)) |
| formatter = _PyUnicode_FormatAdvancedWriter; |
| else if (PyLong_CheckExact(fieldobj)) |
| formatter = _PyLong_FormatAdvancedWriter; |
| else if (PyFloat_CheckExact(fieldobj)) |
| formatter = _PyFloat_FormatAdvancedWriter; |
| else if (PyComplex_CheckExact(fieldobj)) |
| formatter = _PyComplex_FormatAdvancedWriter; |
| |
| if (formatter) { |
| /* we know exactly which formatter will be called when __format__ is |
| looked up, so call it directly, instead. */ |
| err = formatter(writer, fieldobj, format_spec->str, |
| format_spec->start, format_spec->end); |
| return (err == 0); |
| } |
| else { |
| /* We need to create an object out of the pointers we have, because |
| __format__ takes a string/unicode object for format_spec. */ |
| if (format_spec->str) |
| format_spec_object = PyUnicode_Substring(format_spec->str, |
| format_spec->start, |
| format_spec->end); |
| else |
| format_spec_object = PyUnicode_New(0, 0); |
| if (format_spec_object == NULL) |
| goto done; |
| |
| result = PyObject_Format(fieldobj, format_spec_object); |
| } |
| if (result == NULL) |
| goto done; |
| |
| if (_PyUnicodeWriter_WriteStr(writer, result) == -1) |
| goto done; |
| ok = 1; |
| |
| done: |
| Py_XDECREF(format_spec_object); |
| Py_XDECREF(result); |
| return ok; |
| } |
| |
| static int |
| parse_field(SubString *str, SubString *field_name, SubString *format_spec, |
| int *format_spec_needs_expanding, Py_UCS4 *conversion) |
| { |
| /* Note this function works if the field name is zero length, |
| which is good. Zero length field names are handled later, in |
| field_name_split. */ |
| |
| Py_UCS4 c = 0; |
| |
| /* initialize these, as they may be empty */ |
| *conversion = '\0'; |
| SubString_init(format_spec, NULL, 0, 0); |
| |
| /* Search for the field name. it's terminated by the end of |
| the string, or a ':' or '!' */ |
| field_name->str = str->str; |
| field_name->start = str->start; |
| while (str->start < str->end) { |
| switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { |
| case '{': |
| PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name"); |
| return 0; |
| case '[': |
| for (; str->start < str->end; str->start++) |
| if (PyUnicode_READ_CHAR(str->str, str->start) == ']') |
| break; |
| continue; |
| case '}': |
| case ':': |
| case '!': |
| break; |
| default: |
| continue; |
| } |
| break; |
| } |
| |
| field_name->end = str->start - 1; |
| if (c == '!' || c == ':') { |
| Py_ssize_t count; |
| /* we have a format specifier and/or a conversion */ |
| /* don't include the last character */ |
| |
| /* see if there's a conversion specifier */ |
| if (c == '!') { |
| /* there must be another character present */ |
| if (str->start >= str->end) { |
| PyErr_SetString(PyExc_ValueError, |
| "end of string while looking for conversion " |
| "specifier"); |
| return 0; |
| } |
| *conversion = PyUnicode_READ_CHAR(str->str, str->start++); |
| |
| if (str->start < str->end) { |
| c = PyUnicode_READ_CHAR(str->str, str->start++); |
| if (c == '}') |
| return 1; |
| if (c != ':') { |
| PyErr_SetString(PyExc_ValueError, |
| "expected ':' after conversion specifier"); |
| return 0; |
| } |
| } |
| } |
| format_spec->str = str->str; |
| format_spec->start = str->start; |
| count = 1; |
| while (str->start < str->end) { |
| switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { |
| case '{': |
| *format_spec_needs_expanding = 1; |
| count++; |
| break; |
| case '}': |
| count--; |
| if (count == 0) { |
| format_spec->end = str->start - 1; |
| return 1; |
| } |
| break; |
| default: |
| break; |
| } |
| } |
| |
| PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec"); |
| return 0; |
| } |
| else if (c != '}') { |
| PyErr_SetString(PyExc_ValueError, "expected '}' before end of string"); |
| return 0; |
| } |
| |
| return 1; |
| } |
| |
| /************************************************************************/ |
| /******* Output string allocation and escape-to-markup processing ******/ |
| /************************************************************************/ |
| |
| /* MarkupIterator breaks the string into pieces of either literal |
| text, or things inside {} that need to be marked up. it is |
| designed to make it easy to wrap a Python iterator around it, for |
| use with the Formatter class */ |
| |
| typedef struct { |
| SubString str; |
| } MarkupIterator; |
| |
| static int |
| MarkupIterator_init(MarkupIterator *self, PyObject *str, |
| Py_ssize_t start, Py_ssize_t end) |
| { |
| SubString_init(&self->str, str, start, end); |
| return 1; |
| } |
| |
| /* returns 0 on error, 1 on non-error termination, and 2 if it got a |
| string (or something to be expanded) */ |
| static int |
| MarkupIterator_next(MarkupIterator *self, SubString *literal, |
| int *field_present, SubString *field_name, |
| SubString *format_spec, Py_UCS4 *conversion, |
| int *format_spec_needs_expanding) |
| { |
| int at_end; |
| Py_UCS4 c = 0; |
| Py_ssize_t start; |
| Py_ssize_t len; |
| int markup_follows = 0; |
| |
| /* initialize all of the output variables */ |
| SubString_init(literal, NULL, 0, 0); |
| SubString_init(field_name, NULL, 0, 0); |
| SubString_init(format_spec, NULL, 0, 0); |
| *conversion = '\0'; |
| *format_spec_needs_expanding = 0; |
| *field_present = 0; |
| |
| /* No more input, end of iterator. This is the normal exit |
| path. */ |
| if (self->str.start >= self->str.end) |
| return 1; |
| |
| start = self->str.start; |
| |
| /* First read any literal text. Read until the end of string, an |
| escaped '{' or '}', or an unescaped '{'. In order to never |
| allocate memory and so I can just pass pointers around, if |
| there's an escaped '{' or '}' then we'll return the literal |
| including the brace, but no format object. The next time |
| through, we'll return the rest of the literal, skipping past |
| the second consecutive brace. */ |
| while (self->str.start < self->str.end) { |
| switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) { |
| case '{': |
| case '}': |
| markup_follows = 1; |
| break; |
| default: |
| continue; |
| } |
| break; |
| } |
| |
| at_end = self->str.start >= self->str.end; |
| len = self->str.start - start; |
| |
| if ((c == '}') && (at_end || |
| (c != PyUnicode_READ_CHAR(self->str.str, |
| self->str.start)))) { |
| PyErr_SetString(PyExc_ValueError, "Single '}' encountered " |
| "in format string"); |
| return 0; |
| } |
| if (at_end && c == '{') { |
| PyErr_SetString(PyExc_ValueError, "Single '{' encountered " |
| "in format string"); |
| return 0; |
| } |
| if (!at_end) { |
| if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) { |
| /* escaped } or {, skip it in the input. there is no |
| markup object following us, just this literal text */ |
| self->str.start++; |
| markup_follows = 0; |
| } |
| else |
| len--; |
| } |
| |
| /* record the literal text */ |
| literal->str = self->str.str; |
| literal->start = start; |
| literal->end = start + len; |
| |
| if (!markup_follows) |
| return 2; |
| |
| /* this is markup; parse the field */ |
| *field_present = 1; |
| if (!parse_field(&self->str, field_name, format_spec, |
| format_spec_needs_expanding, conversion)) |
| return 0; |
| return 2; |
| } |
| |
| |
| /* do the !r or !s conversion on obj */ |
| static PyObject * |
| do_conversion(PyObject *obj, Py_UCS4 conversion) |
| { |
| /* XXX in pre-3.0, do we need to convert this to unicode, since it |
| might have returned a string? */ |
| switch (conversion) { |
| case 'r': |
| return PyObject_Repr(obj); |
| case 's': |
| return PyObject_Str(obj); |
| case 'a': |
| return PyObject_ASCII(obj); |
| default: |
| if (conversion > 32 && conversion < 127) { |
| /* It's the ASCII subrange; casting to char is safe |
| (assuming the execution character set is an ASCII |
| superset). */ |
| PyErr_Format(PyExc_ValueError, |
| "Unknown conversion specifier %c", |
| (char)conversion); |
| } else |
| PyErr_Format(PyExc_ValueError, |
| "Unknown conversion specifier \\x%x", |
| (unsigned int)conversion); |
| return NULL; |
| } |
| } |
| |
| /* given: |
| |
| {field_name!conversion:format_spec} |
| |
| compute the result and write it to output. |
| format_spec_needs_expanding is an optimization. if it's false, |
| just output the string directly, otherwise recursively expand the |
| format_spec string. |
| |
| field_name is allowed to be zero length, in which case we |
| are doing auto field numbering. |
| */ |
| |
| static int |
| output_markup(SubString *field_name, SubString *format_spec, |
| int format_spec_needs_expanding, Py_UCS4 conversion, |
| _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs, |
| int recursion_depth, AutoNumber *auto_number) |
| { |
| PyObject *tmp = NULL; |
| PyObject *fieldobj = NULL; |
| SubString expanded_format_spec; |
| SubString *actual_format_spec; |
| int result = 0; |
| |
| /* convert field_name to an object */ |
| fieldobj = get_field_object(field_name, args, kwargs, auto_number); |
| if (fieldobj == NULL) |
| goto done; |
| |
| if (conversion != '\0') { |
| tmp = do_conversion(fieldobj, conversion); |
| if (tmp == NULL || PyUnicode_READY(tmp) == -1) |
| goto done; |
| |
| /* do the assignment, transferring ownership: fieldobj = tmp */ |
| Py_DECREF(fieldobj); |
| fieldobj = tmp; |
| tmp = NULL; |
| } |
| |
| /* if needed, recurively compute the format_spec */ |
| if (format_spec_needs_expanding) { |
| tmp = build_string(format_spec, args, kwargs, recursion_depth-1, |
| auto_number); |
| if (tmp == NULL || PyUnicode_READY(tmp) == -1) |
| goto done; |
| |
| /* note that in the case we're expanding the format string, |
| tmp must be kept around until after the call to |
| render_field. */ |
| SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp)); |
| actual_format_spec = &expanded_format_spec; |
| } |
| else |
| actual_format_spec = format_spec; |
| |
| if (render_field(fieldobj, actual_format_spec, writer) == 0) |
| goto done; |
| |
| result = 1; |
| |
| done: |
| Py_XDECREF(fieldobj); |
| Py_XDECREF(tmp); |
| |
| return result; |
| } |
| |
| /* |
| do_markup is the top-level loop for the format() method. It |
| searches through the format string for escapes to markup codes, and |
| calls other functions to move non-markup text to the output, |
| and to perform the markup to the output. |
| */ |
| static int |
| do_markup(SubString *input, PyObject *args, PyObject *kwargs, |
| _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number) |
| { |
| MarkupIterator iter; |
| int format_spec_needs_expanding; |
| int result; |
| int field_present; |
| SubString literal; |
| SubString field_name; |
| SubString format_spec; |
| Py_UCS4 conversion; |
| |
| MarkupIterator_init(&iter, input->str, input->start, input->end); |
| while ((result = MarkupIterator_next(&iter, &literal, &field_present, |
| &field_name, &format_spec, |
| &conversion, |
| &format_spec_needs_expanding)) == 2) { |
| if (literal.end != literal.start) { |
| if (!field_present && iter.str.start == iter.str.end) |
| writer->overallocate = 0; |
| if (_PyUnicodeWriter_WriteSubstring(writer, literal.str, |
| literal.start, literal.end) < 0) |
| return 0; |
| } |
| |
| if (field_present) { |
| if (iter.str.start == iter.str.end) |
| writer->overallocate = 0; |
| if (!output_markup(&field_name, &format_spec, |
| format_spec_needs_expanding, conversion, writer, |
| args, kwargs, recursion_depth, auto_number)) |
| return 0; |
| } |
| } |
| return result; |
| } |
| |
| |
| /* |
| build_string allocates the output string and then |
| calls do_markup to do the heavy lifting. |
| */ |
| static PyObject * |
| build_string(SubString *input, PyObject *args, PyObject *kwargs, |
| int recursion_depth, AutoNumber *auto_number) |
| { |
| _PyUnicodeWriter writer; |
| |
| /* check the recursion level */ |
| if (recursion_depth <= 0) { |
| PyErr_SetString(PyExc_ValueError, |
| "Max string recursion exceeded"); |
| return NULL; |
| } |
| |
| _PyUnicodeWriter_Init(&writer); |
| writer.overallocate = 1; |
| writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100; |
| |
| if (!do_markup(input, args, kwargs, &writer, recursion_depth, |
| auto_number)) { |
| _PyUnicodeWriter_Dealloc(&writer); |
| return NULL; |
| } |
| |
| return _PyUnicodeWriter_Finish(&writer); |
| } |
| |
| /************************************************************************/ |
| /*********** main routine ***********************************************/ |
| /************************************************************************/ |
| |
| /* this is the main entry point */ |
| static PyObject * |
| do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) |
| { |
| SubString input; |
| |
| /* PEP 3101 says only 2 levels, so that |
| "{0:{1}}".format('abc', 's') # works |
| "{0:{1:{2}}}".format('abc', 's', '') # fails |
| */ |
| int recursion_depth = 2; |
| |
| AutoNumber auto_number; |
| |
| if (PyUnicode_READY(self) == -1) |
| return NULL; |
| |
| AutoNumber_Init(&auto_number); |
| SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self)); |
| return build_string(&input, args, kwargs, recursion_depth, &auto_number); |
| } |
| |
| static PyObject * |
| do_string_format_map(PyObject *self, PyObject *obj) |
| { |
| return do_string_format(self, NULL, obj); |
| } |
| |
| |
| /************************************************************************/ |
| /*********** formatteriterator ******************************************/ |
| /************************************************************************/ |
| |
| /* This is used to implement string.Formatter.vparse(). It exists so |
| Formatter can share code with the built in unicode.format() method. |
| It's really just a wrapper around MarkupIterator that is callable |
| from Python. */ |
| |
| typedef struct { |
| PyObject_HEAD |
| PyObject *str; |
| MarkupIterator it_markup; |
| } formatteriterobject; |
| |
| static void |
| formatteriter_dealloc(formatteriterobject *it) |
| { |
| Py_XDECREF(it->str); |
| PyObject_FREE(it); |
| } |
| |
| /* returns a tuple: |
| (literal, field_name, format_spec, conversion) |
| |
| literal is any literal text to output. might be zero length |
| field_name is the string before the ':'. might be None |
| format_spec is the string after the ':'. mibht be None |
| conversion is either None, or the string after the '!' |
| */ |
| static PyObject * |
| formatteriter_next(formatteriterobject *it) |
| { |
| SubString literal; |
| SubString field_name; |
| SubString format_spec; |
| Py_UCS4 conversion; |
| int format_spec_needs_expanding; |
| int field_present; |
| int result = MarkupIterator_next(&it->it_markup, &literal, &field_present, |
| &field_name, &format_spec, &conversion, |
| &format_spec_needs_expanding); |
| |
| /* all of the SubString objects point into it->str, so no |
| memory management needs to be done on them */ |
| assert(0 <= result && result <= 2); |
| if (result == 0 || result == 1) |
| /* if 0, error has already been set, if 1, iterator is empty */ |
| return NULL; |
| else { |
| PyObject *literal_str = NULL; |
| PyObject *field_name_str = NULL; |
| PyObject *format_spec_str = NULL; |
| PyObject *conversion_str = NULL; |
| PyObject *tuple = NULL; |
| |
| literal_str = SubString_new_object(&literal); |
| if (literal_str == NULL) |
| goto done; |
| |
| field_name_str = SubString_new_object(&field_name); |
| if (field_name_str == NULL) |
| goto done; |
| |
| /* if field_name is non-zero length, return a string for |
| format_spec (even if zero length), else return None */ |
| format_spec_str = (field_present ? |
| SubString_new_object_or_empty : |
| SubString_new_object)(&format_spec); |
| if (format_spec_str == NULL) |
| goto done; |
| |
| /* if the conversion is not specified, return a None, |
| otherwise create a one length string with the conversion |
| character */ |
| if (conversion == '\0') { |
| conversion_str = Py_None; |
| Py_INCREF(conversion_str); |
| } |
| else |
| conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, |
| &conversion, 1); |
| if (conversion_str == NULL) |
| goto done; |
| |
| tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str, |
| conversion_str); |
| done: |
| Py_XDECREF(literal_str); |
| Py_XDECREF(field_name_str); |
| Py_XDECREF(format_spec_str); |
| Py_XDECREF(conversion_str); |
| return tuple; |
| } |
| } |
| |
| static PyMethodDef formatteriter_methods[] = { |
| {NULL, NULL} /* sentinel */ |
| }; |
| |
| static PyTypeObject PyFormatterIter_Type = { |
| PyVarObject_HEAD_INIT(&PyType_Type, 0) |
| "formatteriterator", /* tp_name */ |
| sizeof(formatteriterobject), /* tp_basicsize */ |
| 0, /* tp_itemsize */ |
| /* methods */ |
| (destructor)formatteriter_dealloc, /* tp_dealloc */ |
| 0, /* tp_print */ |
| 0, /* tp_getattr */ |
| 0, /* tp_setattr */ |
| 0, /* tp_reserved */ |
| 0, /* tp_repr */ |
| 0, /* tp_as_number */ |
| 0, /* tp_as_sequence */ |
| 0, /* tp_as_mapping */ |
| 0, /* tp_hash */ |
| 0, /* tp_call */ |
| 0, /* tp_str */ |
| PyObject_GenericGetAttr, /* tp_getattro */ |
| 0, /* tp_setattro */ |
| 0, /* tp_as_buffer */ |
| Py_TPFLAGS_DEFAULT, /* tp_flags */ |
| 0, /* tp_doc */ |
| 0, /* tp_traverse */ |
| 0, /* tp_clear */ |
| 0, /* tp_richcompare */ |
| 0, /* tp_weaklistoffset */ |
| PyObject_SelfIter, /* tp_iter */ |
| (iternextfunc)formatteriter_next, /* tp_iternext */ |
| formatteriter_methods, /* tp_methods */ |
| 0, |
| }; |
| |
| /* unicode_formatter_parser is used to implement |
| string.Formatter.vformat. it parses a string and returns tuples |
| describing the parsed elements. It's a wrapper around |
| stringlib/string_format.h's MarkupIterator */ |
| static PyObject * |
| formatter_parser(PyObject *ignored, PyObject *self) |
| { |
| formatteriterobject *it; |
| |
| if (!PyUnicode_Check(self)) { |
| PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name); |
| return NULL; |
| } |
| |
| if (PyUnicode_READY(self) == -1) |
| return NULL; |
| |
| it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); |
| if (it == NULL) |
| return NULL; |
| |
| /* take ownership, give the object to the iterator */ |
| Py_INCREF(self); |
| it->str = self; |
| |
| /* initialize the contained MarkupIterator */ |
| MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self)); |
| return (PyObject *)it; |
| } |
| |
| |
| /************************************************************************/ |
| /*********** fieldnameiterator ******************************************/ |
| /************************************************************************/ |
| |
| |
| /* This is used to implement string.Formatter.vparse(). It parses the |
| field name into attribute and item values. It's a Python-callable |
| wrapper around FieldNameIterator */ |
| |
| typedef struct { |
| PyObject_HEAD |
| PyObject *str; |
| FieldNameIterator it_field; |
| } fieldnameiterobject; |
| |
| static void |
| fieldnameiter_dealloc(fieldnameiterobject *it) |
| { |
| Py_XDECREF(it->str); |
| PyObject_FREE(it); |
| } |
| |
| /* returns a tuple: |
| (is_attr, value) |
| is_attr is true if we used attribute syntax (e.g., '.foo') |
| false if we used index syntax (e.g., '[foo]') |
| value is an integer or string |
| */ |
| static PyObject * |
| fieldnameiter_next(fieldnameiterobject *it) |
| { |
| int result; |
| int is_attr; |
| Py_ssize_t idx; |
| SubString name; |
| |
| result = FieldNameIterator_next(&it->it_field, &is_attr, |
| &idx, &name); |
| if (result == 0 || result == 1) |
| /* if 0, error has already been set, if 1, iterator is empty */ |
| return NULL; |
| else { |
| PyObject* result = NULL; |
| PyObject* is_attr_obj = NULL; |
| PyObject* obj = NULL; |
| |
| is_attr_obj = PyBool_FromLong(is_attr); |
| if (is_attr_obj == NULL) |
| goto done; |
| |
| /* either an integer or a string */ |
| if (idx != -1) |
| obj = PyLong_FromSsize_t(idx); |
| else |
| obj = SubString_new_object(&name); |
| if (obj == NULL) |
| goto done; |
| |
| /* return a tuple of values */ |
| result = PyTuple_Pack(2, is_attr_obj, obj); |
| |
| done: |
| Py_XDECREF(is_attr_obj); |
| Py_XDECREF(obj); |
| return result; |
| } |
| } |
| |
| static PyMethodDef fieldnameiter_methods[] = { |
| {NULL, NULL} /* sentinel */ |
| }; |
| |
| static PyTypeObject PyFieldNameIter_Type = { |
| PyVarObject_HEAD_INIT(&PyType_Type, 0) |
| "fieldnameiterator", /* tp_name */ |
| sizeof(fieldnameiterobject), /* tp_basicsize */ |
| 0, /* tp_itemsize */ |
| /* methods */ |
| (destructor)fieldnameiter_dealloc, /* tp_dealloc */ |
| 0, /* tp_print */ |
| 0, /* tp_getattr */ |
| 0, /* tp_setattr */ |
| 0, /* tp_reserved */ |
| 0, /* tp_repr */ |
| 0, /* tp_as_number */ |
| 0, /* tp_as_sequence */ |
| 0, /* tp_as_mapping */ |
| 0, /* tp_hash */ |
| 0, /* tp_call */ |
| 0, /* tp_str */ |
| PyObject_GenericGetAttr, /* tp_getattro */ |
| 0, /* tp_setattro */ |
| 0, /* tp_as_buffer */ |
| Py_TPFLAGS_DEFAULT, /* tp_flags */ |
| 0, /* tp_doc */ |
| 0, /* tp_traverse */ |
| 0, /* tp_clear */ |
| 0, /* tp_richcompare */ |
| 0, /* tp_weaklistoffset */ |
| PyObject_SelfIter, /* tp_iter */ |
| (iternextfunc)fieldnameiter_next, /* tp_iternext */ |
| fieldnameiter_methods, /* tp_methods */ |
| 0}; |
| |
| /* unicode_formatter_field_name_split is used to implement |
| string.Formatter.vformat. it takes a PEP 3101 "field name", and |
| returns a tuple of (first, rest): "first", the part before the |
| first '.' or '['; and "rest", an iterator for the rest of the field |
| name. it's a wrapper around stringlib/string_format.h's |
| field_name_split. The iterator it returns is a |
| FieldNameIterator */ |
| static PyObject * |
| formatter_field_name_split(PyObject *ignored, PyObject *self) |
| { |
| SubString first; |
| Py_ssize_t first_idx; |
| fieldnameiterobject *it; |
| |
| PyObject *first_obj = NULL; |
| PyObject *result = NULL; |
| |
| if (!PyUnicode_Check(self)) { |
| PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name); |
| return NULL; |
| } |
| |
| if (PyUnicode_READY(self) == -1) |
| return NULL; |
| |
| it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); |
| if (it == NULL) |
| return NULL; |
| |
| /* take ownership, give the object to the iterator. this is |
| just to keep the field_name alive */ |
| Py_INCREF(self); |
| it->str = self; |
| |
| /* Pass in auto_number = NULL. We'll return an empty string for |
| first_obj in that case. */ |
| if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self), |
| &first, &first_idx, &it->it_field, NULL)) |
| goto done; |
| |
| /* first becomes an integer, if possible; else a string */ |
| if (first_idx != -1) |
| first_obj = PyLong_FromSsize_t(first_idx); |
| else |
| /* convert "first" into a string object */ |
| first_obj = SubString_new_object(&first); |
| if (first_obj == NULL) |
| goto done; |
| |
| /* return a tuple of values */ |
| result = PyTuple_Pack(2, first_obj, it); |
| |
| done: |
| Py_XDECREF(it); |
| Py_XDECREF(first_obj); |
| return result; |
| } |