Merged revisions 61750,61752,61754,61756,61760,61763,61768,61772,61775,61805,61809,61812,61819,61917,61920,61930,61933-61934 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/trunk-bytearray ........ r61750 | christian.heimes | 2008-03-22 20:47:44 +0100 (Sat, 22 Mar 2008) | 1 line Copied files from py3k w/o modifications ........ r61752 | christian.heimes | 2008-03-22 20:53:20 +0100 (Sat, 22 Mar 2008) | 7 lines Take One * Added initialization code, warnings, flags etc. to the appropriate places * Added new buffer interface to string type * Modified tests * Modified Makefile.pre.in to compile the new files * Added bytesobject.c to Python.h ........ r61754 | christian.heimes | 2008-03-22 21:22:19 +0100 (Sat, 22 Mar 2008) | 2 lines Disabled bytearray.extend for now since it causes an infinite recursion Fixed serveral unit tests ........ r61756 | christian.heimes | 2008-03-22 21:43:38 +0100 (Sat, 22 Mar 2008) | 5 lines Added PyBytes support to several places: str + bytearray ord(bytearray) bytearray(str, encoding) ........ r61760 | christian.heimes | 2008-03-22 21:56:32 +0100 (Sat, 22 Mar 2008) | 1 line Fixed more unit tests related to type('') is not unicode ........ r61763 | christian.heimes | 2008-03-22 22:20:28 +0100 (Sat, 22 Mar 2008) | 2 lines Fixed more unit tests Fixed bytearray.extend ........ r61768 | christian.heimes | 2008-03-22 22:40:50 +0100 (Sat, 22 Mar 2008) | 1 line Implemented old buffer interface for bytearray ........ r61772 | christian.heimes | 2008-03-22 23:24:52 +0100 (Sat, 22 Mar 2008) | 1 line Added backport of the io module ........ r61775 | christian.heimes | 2008-03-23 03:50:49 +0100 (Sun, 23 Mar 2008) | 1 line Fix str assignement to bytearray. Assignment of a str of size 1 is interpreted as a single byte ........ r61805 | christian.heimes | 2008-03-23 19:33:48 +0100 (Sun, 23 Mar 2008) | 3 lines Fixed more tests Fixed bytearray() comparsion with unicode() Fixed iterator assignment of bytearray ........ r61809 | christian.heimes | 2008-03-23 21:02:21 +0100 (Sun, 23 Mar 2008) | 2 lines str(bytesarray()) now returns the bytes and not the representation of the bytearray object Enabled and fixed more unit tests ........ r61812 | christian.heimes | 2008-03-23 21:53:08 +0100 (Sun, 23 Mar 2008) | 3 lines Clear error PyNumber_AsSsize_t() fails Use CHARMASK for ob_svall access disabled a test with memoryview again ........ r61819 | christian.heimes | 2008-03-23 23:05:57 +0100 (Sun, 23 Mar 2008) | 1 line Untested updates to the PCBuild directory ........ r61917 | christian.heimes | 2008-03-26 00:57:06 +0100 (Wed, 26 Mar 2008) | 1 line The type system of Python 2.6 has subtle differences to 3.0's. I've removed the Py_TPFLAGS_BASETYPE flags from bytearray for now. bytearray can't be subclasses until the issues with bytearray subclasses are fixed. ........ r61920 | christian.heimes | 2008-03-26 01:44:08 +0100 (Wed, 26 Mar 2008) | 2 lines Disabled last failing test I don't understand what the test is testing and how it suppose to work. Ka-Ping, please check it out. ........ r61930 | christian.heimes | 2008-03-26 12:46:18 +0100 (Wed, 26 Mar 2008) | 1 line Re-enabled bytes warning code ........ r61933 | christian.heimes | 2008-03-26 13:20:46 +0100 (Wed, 26 Mar 2008) | 1 line Fixed a bug in the new buffer protocol. The buffer slots weren't copied into a subclass. ........ r61934 | christian.heimes | 2008-03-26 13:25:09 +0100 (Wed, 26 Mar 2008) | 1 line Re-enabled bytearray subclassing - all tests are passing. ........

commit: 1a6387e68300b6f554f4f4f044491b7034733442 [log] [tgz]
author: Christian Heimes <christian@cheimes.de> Wed Mar 26 12:49:49 2008 +0000
committer: Christian Heimes <christian@cheimes.de> Wed Mar 26 12:49:49 2008 +0000
tree: 315d25fb2954657cb9ecdac96c90be822c8047d9
parent: 630b57a0a17aac91e9e411143fa4c7d8b9387c1c [diff] [blame]
diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h
new file mode 100644
index 0000000..fe478c3
--- /dev/null
+++ b/Objects/stringlib/transmogrify.h

@@ -0,0 +1,362 @@
+/* NOTE: this API is -ONLY- for use with single byte character strings. */
+/* Do not use it with Unicode. */
+
+#include "bytes_methods.h"
+
+#ifndef STRINGLIB_MUTABLE
+#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0"
+#define STRINGLIB_MUTABLE 0
+#endif
+
+/* the more complicated methods.  parts of these should be pulled out into the
+   shared code in bytes_methods.c to cut down on duplicate code bloat.  */
+
+PyDoc_STRVAR(expandtabs__doc__,
+"B.expandtabs([tabsize]) -> copy of B\n\
+\n\
+Return a copy of B where all tab characters are expanded using spaces.\n\
+If tabsize is not given, a tab size of 8 characters is assumed.");
+
+static PyObject*
+stringlib_expandtabs(PyObject *self, PyObject *args)
+{
+    const char *e, *p;
+    char *q;
+    Py_ssize_t i, j, old_j;
+    PyObject *u;
+    int tabsize = 8;
+
+    if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
+	return NULL;
+
+    /* First pass: determine size of output string */
+    i = j = old_j = 0;
+    e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
+    for (p = STRINGLIB_STR(self); p < e; p++)
+        if (*p == '\t') {
+	    if (tabsize > 0) {
+		j += tabsize - (j % tabsize);
+                /* XXX: this depends on a signed integer overflow to < 0 */
+                /* C compilers, including gcc, do -NOT- guarantee this. */
+		if (old_j > j) {
+		    PyErr_SetString(PyExc_OverflowError,
+				    "result is too long");
+		    return NULL;
+		}
+		old_j = j;
+            }
+	}
+        else {
+            j++;
+            if (*p == '\n' || *p == '\r') {
+                i += j;
+                old_j = j = 0;
+                /* XXX: this depends on a signed integer overflow to < 0 */
+                /* C compilers, including gcc, do -NOT- guarantee this. */
+                if (i < 0) {
+                    PyErr_SetString(PyExc_OverflowError,
+                                    "result is too long");
+                    return NULL;
+                }
+            }
+        }
+
+    if ((i + j) < 0) {
+        /* XXX: this depends on a signed integer overflow to < 0 */
+        /* C compilers, including gcc, do -NOT- guarantee this. */
+        PyErr_SetString(PyExc_OverflowError, "result is too long");
+        return NULL;
+    }
+
+    /* Second pass: create output string and fill it */
+    u = STRINGLIB_NEW(NULL, i + j);
+    if (!u)
+        return NULL;
+
+    j = 0;
+    q = STRINGLIB_STR(u);
+
+    for (p = STRINGLIB_STR(self); p < e; p++)
+        if (*p == '\t') {
+	    if (tabsize > 0) {
+		i = tabsize - (j % tabsize);
+		j += i;
+		while (i--)
+		    *q++ = ' ';
+	    }
+	}
+	else {
+            j++;
+	    *q++ = *p;
+            if (*p == '\n' || *p == '\r')
+                j = 0;
+        }
+
+    return u;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
+{
+    PyObject *u;
+
+    if (left < 0)
+        left = 0;
+    if (right < 0)
+        right = 0;
+
+    if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+        /* We're defined as returning a copy;  If the object is mutable
+         * that means we must make an identical copy. */
+        return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+        Py_INCREF(self);
+        return (PyObject *)self;
+#endif /* STRINGLIB_MUTABLE */
+    }
+
+    u = STRINGLIB_NEW(NULL,
+				   left + STRINGLIB_LEN(self) + right);
+    if (u) {
+        if (left)
+            memset(STRINGLIB_STR(u), fill, left);
+        Py_MEMCPY(STRINGLIB_STR(u) + left,
+	       STRINGLIB_STR(self),
+	       STRINGLIB_LEN(self));
+        if (right)
+            memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
+		   fill, right);
+    }
+
+    return u;
+}
+
+PyDoc_STRVAR(ljust__doc__,
+"B.ljust(width[, fillchar]) -> copy of B\n"
+"\n"
+"Return B left justified in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space).");
+
+static PyObject *
+stringlib_ljust(PyObject *self, PyObject *args)
+{
+    Py_ssize_t width;
+    char fillchar = ' ';
+
+    if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
+        return NULL;
+
+    if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+        /* We're defined as returning a copy;  If the object is mutable
+         * that means we must make an identical copy. */
+        return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+        Py_INCREF(self);
+        return (PyObject*) self;
+#endif
+    }
+
+    return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
+}
+
+
+PyDoc_STRVAR(rjust__doc__,
+"B.rjust(width[, fillchar]) -> copy of B\n"
+"\n"
+"Return B right justified in a string of length width. Padding is\n"
+"done using the specified fill character (default is a space)");
+
+static PyObject *
+stringlib_rjust(PyObject *self, PyObject *args)
+{
+    Py_ssize_t width;
+    char fillchar = ' ';
+
+    if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
+        return NULL;
+
+    if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+        /* We're defined as returning a copy;  If the object is mutable
+         * that means we must make an identical copy. */
+        return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+        Py_INCREF(self);
+        return (PyObject*) self;
+#endif
+    }
+
+    return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
+}
+
+
+PyDoc_STRVAR(center__doc__,
+"B.center(width[, fillchar]) -> copy of B\n"
+"\n"
+"Return B centered in a string of length width.  Padding is\n"
+"done using the specified fill character (default is a space).");
+
+static PyObject *
+stringlib_center(PyObject *self, PyObject *args)
+{
+    Py_ssize_t marg, left;
+    Py_ssize_t width;
+    char fillchar = ' ';
+
+    if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
+        return NULL;
+
+    if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+        /* We're defined as returning a copy;  If the object is mutable
+         * that means we must make an identical copy. */
+        return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+        Py_INCREF(self);
+        return (PyObject*) self;
+#endif
+    }
+
+    marg = width - STRINGLIB_LEN(self);
+    left = marg / 2 + (marg & width & 1);
+
+    return pad(self, left, marg - left, fillchar);
+}
+
+PyDoc_STRVAR(zfill__doc__,
+"B.zfill(width) -> copy of B\n"
+"\n"
+"Pad a numeric string B with zeros on the left, to fill a field\n"
+"of the specified width.  B is never truncated.");
+
+static PyObject *
+stringlib_zfill(PyObject *self, PyObject *args)
+{
+    Py_ssize_t fill;
+    PyObject *s;
+    char *p;
+    Py_ssize_t width;
+
+    if (!PyArg_ParseTuple(args, "n:zfill", &width))
+        return NULL;
+
+    if (STRINGLIB_LEN(self) >= width) {
+        if (STRINGLIB_CHECK_EXACT(self)) {
+#if STRINGLIB_MUTABLE
+            /* We're defined as returning a copy;  If the object is mutable
+             * that means we must make an identical copy. */
+            return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+#else
+            Py_INCREF(self);
+            return (PyObject*) self;
+#endif
+        }
+        else
+            return STRINGLIB_NEW(
+                STRINGLIB_STR(self),
+                STRINGLIB_LEN(self)
+            );
+    }
+
+    fill = width - STRINGLIB_LEN(self);
+
+    s = pad(self, fill, 0, '0');
+
+    if (s == NULL)
+        return NULL;
+
+    p = STRINGLIB_STR(s);
+    if (p[fill] == '+' || p[fill] == '-') {
+        /* move sign to beginning of string */
+        p[0] = p[fill];
+        p[fill] = '0';
+    }
+
+    return (PyObject*) s;
+}
+
+
+#define _STRINGLIB_SPLIT_APPEND(data, left, right)		\
+	str = STRINGLIB_NEW((data) + (left),	                \
+					 (right) - (left));	\
+	if (str == NULL)					\
+		goto onError;					\
+	if (PyList_Append(list, str)) {				\
+		Py_DECREF(str);					\
+		goto onError;					\
+	}							\
+	else							\
+		Py_DECREF(str);
+
+PyDoc_STRVAR(splitlines__doc__,
+"B.splitlines([keepends]) -> list of lines\n\
+\n\
+Return a list of the lines in B, breaking at line boundaries.\n\
+Line breaks are not included in the resulting list unless keepends\n\
+is given and true.");
+
+static PyObject*
+stringlib_splitlines(PyObject *self, PyObject *args)
+{
+    register Py_ssize_t i;
+    register Py_ssize_t j;
+    Py_ssize_t len;
+    int keepends = 0;
+    PyObject *list;
+    PyObject *str;
+    char *data;
+
+    if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
+        return NULL;
+
+    data = STRINGLIB_STR(self);
+    len = STRINGLIB_LEN(self);
+
+    /* This does not use the preallocated list because splitlines is
+       usually run with hundreds of newlines.  The overhead of
+       switching between PyList_SET_ITEM and append causes about a
+       2-3% slowdown for that common case.  A smarter implementation
+       could move the if check out, so the SET_ITEMs are done first
+       and the appends only done when the prealloc buffer is full.
+       That's too much work for little gain.*/
+
+    list = PyList_New(0);
+    if (!list)
+        goto onError;
+
+    for (i = j = 0; i < len; ) {
+	Py_ssize_t eol;
+
+	/* Find a line and append it */
+	while (i < len && data[i] != '\n' && data[i] != '\r')
+	    i++;
+
+	/* Skip the line break reading CRLF as one line break */
+	eol = i;
+	if (i < len) {
+	    if (data[i] == '\r' && i + 1 < len &&
+		data[i+1] == '\n')
+		i += 2;
+	    else
+		i++;
+	    if (keepends)
+		eol = i;
+	}
+	_STRINGLIB_SPLIT_APPEND(data, j, eol);
+	j = i;
+    }
+    if (j < len) {
+	_STRINGLIB_SPLIT_APPEND(data, j, len);
+    }
+
+    return list;
+
+ onError:
+    Py_XDECREF(list);
+    return NULL;
+}
+
+#undef _STRINGLIB_SPLIT_APPEND
+
commit	1a6387e68300b6f554f4f4f044491b7034733442	[log] [tgz]
author	Christian Heimes <christian@cheimes.de>	Wed Mar 26 12:49:49 2008 +0000
committer	Christian Heimes <christian@cheimes.de>	Wed Mar 26 12:49:49 2008 +0000
tree	315d25fb2954657cb9ecdac96c90be822c8047d9
parent	630b57a0a17aac91e9e411143fa4c7d8b9387c1c [diff] [blame]