Optimize slicing of bytes and bytearray by avoiding useless copying.

This restores the behavior that was present in Python 2.x.
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 428ee57..fc12452 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -411,18 +411,18 @@
         }
         else {
             char *source_buf = PyByteArray_AS_STRING(self);
-            char *result_buf = (char *)PyMem_Malloc(slicelength);
+            char *result_buf;
             PyObject *result;
 
-            if (result_buf == NULL)
-                return PyErr_NoMemory();
+            result = PyByteArray_FromStringAndSize(NULL, slicelength);
+            if (result == NULL)
+                return NULL;
 
+            result_buf = PyByteArray_AS_STRING(result);
             for (cur = start, i = 0; i < slicelength;
                  cur += step, i++) {
                      result_buf[i] = source_buf[cur];
             }
-            result = PyByteArray_FromStringAndSize(result_buf, slicelength);
-            PyMem_Free(result_buf);
             return result;
         }
     }
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index a4a2e65..d3b598e 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -951,19 +951,17 @@
 				slicelength);
 		}
 		else {
-			source_buf = PyBytes_AsString((PyObject*)self);
-			result_buf = (char *)PyMem_Malloc(slicelength);
-			if (result_buf == NULL)
-				return PyErr_NoMemory();
+			source_buf = PyBytes_AS_STRING(self);
+			result = PyBytes_FromStringAndSize(NULL, slicelength);
+			if (result == NULL)
+				return NULL;
 
+			result_buf = PyBytes_AS_STRING(result);
 			for (cur = start, i = 0; i < slicelength;
 			     cur += step, i++) {
 				result_buf[i] = source_buf[cur];
 			}
 
-			result = PyBytes_FromStringAndSize(result_buf,
-							    slicelength);
-			PyMem_Free(result_buf);
 			return result;
 		}
 	}