More bug 460020: lots of string optimizations inhibited for string
subclasses, all "the usual" ones (slicing etc), plus replace, translate,
ljust, rjust, center and strip. I don't know how to be sure they've all
been caught.
Question: Should we complain if someone tries to intern an instance of
a string subclass? I hate to slow any code on those paths.
diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py
index 11a3a5d..a29eb23 100644
--- a/Lib/test/test_descr.py
+++ b/Lib/test/test_descr.py
@@ -1481,9 +1481,32 @@
verify(str(s) == "12345")
verify(str(s).__class__ is str)
- s = madstring("\x00" * 5)
- verify(str(s) == "\x00" * 5)
+ base = "\x00" * 5
+ s = madstring(base)
+ verify(str(s) == base)
verify(str(s).__class__ is str)
+ verify((s + "").__class__ is str)
+ verify(("" + s).__class__ is str)
+ verify((s * 0).__class__ is str)
+ verify((s * 1).__class__ is str)
+ verify((s * 2).__class__ is str)
+ verify(s[:].__class__ is str)
+ verify(s[0:0].__class__ is str)
+ verify(s.strip().__class__ is str)
+ identitytab = ''.join([chr(i) for i in range(256)])
+ verify(s.translate(identitytab).__class__ is str)
+ verify(s.translate(identitytab) == base)
+ verify(s.translate(identitytab, "x").__class__ is str)
+ verify(s.translate(identitytab, "x") == base)
+ verify(s.translate(identitytab, "\x00") == "")
+ verify(s.replace("x", "x").__class__ is str)
+ verify(s.replace("x", "x") == base)
+ verify(s.ljust(len(s)).__class__ is str)
+ verify(s.ljust(len(s)) == base)
+ verify(s.rjust(len(s)).__class__ is str)
+ verify(s.rjust(len(s)) == base)
+ verify(s.center(len(s)).__class__ is str)
+ verify(s.center(len(s)) == base)
class madunicode(unicode):
_rev = None
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index 9c873ec..b220859 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -673,11 +673,12 @@
}
#define b ((PyStringObject *)bb)
/* Optimize cases with empty left or right operand */
- if (a->ob_size == 0) {
- Py_INCREF(bb);
- return bb;
- }
- if (b->ob_size == 0) {
+ if ((a->ob_size == 0 || b->ob_size == 0) &&
+ PyString_CheckExact(a) && PyString_CheckExact(b)) {
+ if (a->ob_size == 0) {
+ Py_INCREF(bb);
+ return bb;
+ }
Py_INCREF(a);
return (PyObject *)a;
}
@@ -719,7 +720,7 @@
"repeated string is too long");
return NULL;
}
- if (size == a->ob_size) {
+ if (size == a->ob_size && PyString_CheckExact(a)) {
Py_INCREF(a);
return (PyObject *)a;
}
@@ -759,7 +760,8 @@
j = 0; /* Avoid signed/unsigned bug in next line */
if (j > a->ob_size)
j = a->ob_size;
- if (i == 0 && j == a->ob_size) { /* It's the same as a */
+ if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
+ /* It's the same as a */
Py_INCREF(a);
return (PyObject *)a;
}
@@ -1378,7 +1380,7 @@
j++;
}
- if (i == 0 && j == len) {
+ if (i == 0 && j == len && PyString_CheckExact(self)) {
Py_INCREF(self);
return (PyObject*)self;
}
@@ -1735,7 +1737,7 @@
if (Py_CHARMASK((*output++ = table[c])) != c)
changed = 1;
}
- if (changed)
+ if (changed || !PyString_CheckExact(input_obj))
return result;
Py_DECREF(result);
Py_INCREF(input_obj);
@@ -1755,7 +1757,7 @@
continue;
changed = 1;
}
- if (!changed) {
+ if (!changed && PyString_CheckExact(input_obj)) {
Py_DECREF(result);
Py_INCREF(input_obj);
return input_obj;
@@ -1917,7 +1919,8 @@
{
const char *str = PyString_AS_STRING(self), *sub, *repl;
char *new_s;
- int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
+ const int len = PyString_GET_SIZE(self);
+ int sub_len, repl_len, out_len;
int count = -1;
PyObject *new;
PyObject *subobj, *replobj;
@@ -1960,9 +1963,16 @@
return NULL;
}
if (out_len == -1) {
- /* we're returning another reference to self */
- new = (PyObject*)self;
- Py_INCREF(new);
+ if (PyString_CheckExact(self)) {
+ /* we're returning another reference to self */
+ new = (PyObject*)self;
+ Py_INCREF(new);
+ }
+ else {
+ new = PyString_FromStringAndSize(str, len);
+ if (new == NULL)
+ return NULL;
+ }
}
else {
new = PyString_FromStringAndSize(new_s, out_len);
@@ -2182,11 +2192,8 @@
return u;
}
-static
-PyObject *pad(PyStringObject *self,
- int left,
- int right,
- char fill)
+static PyObject *
+pad(PyStringObject *self, int left, int right, char fill)
{
PyObject *u;
@@ -2195,7 +2202,7 @@
if (right < 0)
right = 0;
- if (left == 0 && right == 0) {
+ if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Py_INCREF(self);
return (PyObject *)self;
}
@@ -2217,10 +2224,10 @@
}
static char ljust__doc__[] =
-"S.ljust(width) -> string\n\
-\n\
-Return S left justified in a string of length width. Padding is\n\
-done using spaces.";
+"S.ljust(width) -> string\n"
+"\n"
+"Return S left justified in a string of length width. Padding is\n"
+"done using spaces.";
static PyObject *
string_ljust(PyStringObject *self, PyObject *args)
@@ -2229,7 +2236,7 @@
if (!PyArg_ParseTuple(args, "i:ljust", &width))
return NULL;
- if (PyString_GET_SIZE(self) >= width) {
+ if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Py_INCREF(self);
return (PyObject*) self;
}
@@ -2239,10 +2246,10 @@
static char rjust__doc__[] =
-"S.rjust(width) -> string\n\
-\n\
-Return S right justified in a string of length width. Padding is\n\
-done using spaces.";
+"S.rjust(width) -> string\n"
+"\n"
+"Return S right justified in a string of length width. Padding is\n"
+"done using spaces.";
static PyObject *
string_rjust(PyStringObject *self, PyObject *args)
@@ -2251,7 +2258,7 @@
if (!PyArg_ParseTuple(args, "i:rjust", &width))
return NULL;
- if (PyString_GET_SIZE(self) >= width) {
+ if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Py_INCREF(self);
return (PyObject*) self;
}
@@ -2261,10 +2268,10 @@
static char center__doc__[] =
-"S.center(width) -> string\n\
-\n\
-Return S centered in a string of length width. Padding is done\n\
-using spaces.";
+"S.center(width) -> string\n"
+"\n"
+"Return S centered in a string of length width. Padding is done\n"
+"using spaces.";
static PyObject *
string_center(PyStringObject *self, PyObject *args)
@@ -2275,7 +2282,7 @@
if (!PyArg_ParseTuple(args, "i:center", &width))
return NULL;
- if (PyString_GET_SIZE(self) >= width) {
+ if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Py_INCREF(self);
return (PyObject*) self;
}
@@ -2286,51 +2293,11 @@
return pad(self, left, marg - left, ' ');
}
-#if 0
-static char zfill__doc__[] =
-"S.zfill(width) -> string\n\
-\n\
-Pad a numeric string x with zeros on the left, to fill a field\n\
-of the specified width. The string x is never truncated.";
-
-static PyObject *
-string_zfill(PyStringObject *self, PyObject *args)
-{
- int fill;
- PyObject *u;
- char *str;
-
- int width;
- if (!PyArg_ParseTuple(args, "i:zfill", &width))
- return NULL;
-
- if (PyString_GET_SIZE(self) >= width) {
- Py_INCREF(self);
- return (PyObject*) self;
- }
-
- fill = width - PyString_GET_SIZE(self);
-
- u = pad(self, fill, 0, '0');
- if (u == NULL)
- return NULL;
-
- str = PyString_AS_STRING(u);
- if (str[fill] == '+' || str[fill] == '-') {
- /* move sign to beginning of string */
- str[0] = str[fill];
- str[fill] = '0';
- }
-
- return u;
-}
-#endif
-
static char isspace__doc__[] =
-"S.isspace() -> int\n\
-\n\
-Return 1 if there are only whitespace characters in S,\n\
-0 otherwise.";
+"S.isspace() -> int\n"
+"\n"
+"Return 1 if there are only whitespace characters in S,\n"
+"0 otherwise.";
static PyObject*
string_isspace(PyStringObject *self)