Optimize slicing of bytes and bytearray by avoiding useless copying.

This restores the behavior that was present in Python 2.x.
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index a4a2e65..d3b598e 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -951,19 +951,17 @@
 				slicelength);
 		}
 		else {
-			source_buf = PyBytes_AsString((PyObject*)self);
-			result_buf = (char *)PyMem_Malloc(slicelength);
-			if (result_buf == NULL)
-				return PyErr_NoMemory();
+			source_buf = PyBytes_AS_STRING(self);
+			result = PyBytes_FromStringAndSize(NULL, slicelength);
+			if (result == NULL)
+				return NULL;
 
+			result_buf = PyBytes_AS_STRING(result);
 			for (cur = start, i = 0; i < slicelength;
 			     cur += step, i++) {
 				result_buf[i] = source_buf[cur];
 			}
 
-			result = PyBytes_FromStringAndSize(result_buf,
-							    slicelength);
-			PyMem_Free(result_buf);
 			return result;
 		}
 	}