blob: b0e1b53d6a7951d057852f39b70e5d671bcabd87 [file] [log] [blame]
Guido van Rossum254348e2007-11-21 19:29:53 +00001/* PyBytes (bytearray) implementation */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003#define PY_SSIZE_T_CLEAN
4#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00005#include "structmember.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00006#include "bytes_methods.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00007
Neal Norwitz6968b052007-02-27 19:02:19 +00008static PyBytesObject *nullbytes = NULL;
9
10void
11PyBytes_Fini(void)
12{
13 Py_CLEAR(nullbytes);
14}
15
16int
17PyBytes_Init(void)
18{
19 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
20 if (nullbytes == NULL)
21 return 0;
22 nullbytes->ob_bytes = NULL;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000023 Py_Size(nullbytes) = nullbytes->ob_alloc = 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000024 nullbytes->ob_exports = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +000025 return 1;
26}
27
28/* end nullbytes support */
29
Guido van Rossumad7d8d12007-04-13 01:39:34 +000030/* Helpers */
31
32static int
33_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000034{
Gregory P. Smith60d241f2007-10-16 06:31:30 +000035 long face_value;
36
Christian Heimes217cfd12007-12-02 14:31:20 +000037 if (PyLong_Check(arg)) {
38 face_value = PyLong_AsLong(arg);
Gregory P. Smith60d241f2007-10-16 06:31:30 +000039 if (face_value < 0 || face_value >= 256) {
40 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
41 return 0;
42 }
43 } else {
44 PyErr_Format(PyExc_TypeError, "an integer is required");
Neal Norwitz6968b052007-02-27 19:02:19 +000045 return 0;
46 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +000047
48 *value = face_value;
Neal Norwitz6968b052007-02-27 19:02:19 +000049 return 1;
50}
51
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000052static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000053bytes_getbuffer(PyBytesObject *obj, Py_buffer *view, int flags)
Guido van Rossum75d38e92007-08-24 17:33:11 +000054{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000055 int ret;
56 void *ptr;
57 if (view == NULL) {
58 obj->ob_exports++;
59 return 0;
60 }
Guido van Rossum75d38e92007-08-24 17:33:11 +000061 if (obj->ob_bytes == NULL)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000062 ptr = "";
63 else
64 ptr = obj->ob_bytes;
65 ret = PyBuffer_FillInfo(view, ptr, Py_Size(obj), 0, flags);
66 if (ret >= 0) {
67 obj->ob_exports++;
68 }
69 return ret;
70}
71
72static void
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000073bytes_releasebuffer(PyBytesObject *obj, Py_buffer *view)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000074{
75 obj->ob_exports--;
76}
77
Neal Norwitz2bad9702007-08-27 06:19:22 +000078static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000079_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000080{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000081 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000082
Gregory P. Smith60d241f2007-10-16 06:31:30 +000083 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000084 {
85 PyErr_Format(PyExc_TypeError,
86 "Type %.100s doesn't support the buffer API",
87 Py_Type(obj)->tp_name);
88 return -1;
89 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000090
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000091 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
92 return -1;
93 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000094}
95
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000096/* Direct API functions */
97
98PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +000099PyBytes_FromObject(PyObject *input)
100{
101 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
102 input, NULL);
103}
104
105PyObject *
106PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000107{
108 PyBytesObject *new;
Neal Norwitz61ec0d32007-10-26 06:44:10 +0000109 Py_ssize_t alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000110
Guido van Rossumd624f182006-04-24 13:47:05 +0000111 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000112
113 new = PyObject_New(PyBytesObject, &PyBytes_Type);
114 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000115 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000116
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000117 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000118 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000119 alloc = 0;
120 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000121 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000122 alloc = size + 1;
123 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +0000124 if (new->ob_bytes == NULL) {
125 Py_DECREF(new);
Neal Norwitz16596dd2007-08-30 05:44:54 +0000126 return PyErr_NoMemory();
Guido van Rossumd624f182006-04-24 13:47:05 +0000127 }
128 if (bytes != NULL)
129 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000130 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000131 }
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000132 Py_Size(new) = size;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000133 new->ob_alloc = alloc;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000134 new->ob_exports = 0;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000135
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000136 return (PyObject *)new;
137}
138
139Py_ssize_t
140PyBytes_Size(PyObject *self)
141{
142 assert(self != NULL);
143 assert(PyBytes_Check(self));
144
Guido van Rossum20188312006-05-05 15:15:40 +0000145 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000146}
147
148char *
149PyBytes_AsString(PyObject *self)
150{
151 assert(self != NULL);
152 assert(PyBytes_Check(self));
153
Guido van Rossum20188312006-05-05 15:15:40 +0000154 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000155}
156
157int
158PyBytes_Resize(PyObject *self, Py_ssize_t size)
159{
160 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000161 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000162
163 assert(self != NULL);
164 assert(PyBytes_Check(self));
165 assert(size >= 0);
166
Guido van Rossuma0867f72006-05-05 04:34:18 +0000167 if (size < alloc / 2) {
168 /* Major downsize; resize down to exact size */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000169 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000170 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000171 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000172 /* Within allocated size; quick exit */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000173 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000174 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000175 return 0;
176 }
177 else if (size <= alloc * 1.125) {
178 /* Moderate upsize; overallocate similar to list_resize() */
179 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
180 }
181 else {
182 /* Major upsize; resize up to exact size */
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000183 alloc = size + 1;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000184 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000185
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000186 if (((PyBytesObject *)self)->ob_exports > 0) {
187 /*
Guido van Rossuma74184e2007-08-29 04:05:57 +0000188 fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
189 ((PyBytesObject *)self)->ob_bytes);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000190 */
191 PyErr_SetString(PyExc_BufferError,
Guido van Rossuma74184e2007-08-29 04:05:57 +0000192 "Existing exports of data: object cannot be re-sized");
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000193 return -1;
194 }
195
Guido van Rossuma0867f72006-05-05 04:34:18 +0000196 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000197 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000198 PyErr_NoMemory();
199 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000200 }
201
Guido van Rossumd624f182006-04-24 13:47:05 +0000202 ((PyBytesObject *)self)->ob_bytes = sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000203 Py_Size(self) = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000204 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000205 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
206
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000207 return 0;
208}
209
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000210PyObject *
211PyBytes_Concat(PyObject *a, PyObject *b)
212{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000213 Py_ssize_t size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000214 Py_buffer va, vb;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000215 PyBytesObject *result = NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000216
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000217 va.len = -1;
218 vb.len = -1;
219 if (_getbuffer(a, &va) < 0 ||
220 _getbuffer(b, &vb) < 0) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000221 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
222 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000223 goto done;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000224 }
225
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000226 size = va.len + vb.len;
227 if (size < 0) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000228 return PyErr_NoMemory();
Guido van Rossum98297ee2007-11-06 21:34:58 +0000229 goto done;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000230 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000231
232 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
233 if (result != NULL) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000234 memcpy(result->ob_bytes, va.buf, va.len);
235 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000236 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000237
Guido van Rossum98297ee2007-11-06 21:34:58 +0000238 done:
239 if (va.len != -1)
240 PyObject_ReleaseBuffer(a, &va);
241 if (vb.len != -1)
242 PyObject_ReleaseBuffer(b, &vb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000243 return (PyObject *)result;
244}
245
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000246/* Functions stuffed into the type object */
247
248static Py_ssize_t
249bytes_length(PyBytesObject *self)
250{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000251 return Py_Size(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000252}
253
254static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000255bytes_iconcat(PyBytesObject *self, PyObject *other)
256{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000257 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000258 Py_ssize_t size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000259 Py_buffer vo;
Guido van Rossum13e57212006-04-27 22:54:26 +0000260
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000261 if (_getbuffer(other, &vo) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000262 PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
263 Py_Type(self)->tp_name);
264 return NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000265 }
266
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000267 mysize = Py_Size(self);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000268 size = mysize + vo.len;
269 if (size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000270 PyObject_ReleaseBuffer(other, &vo);
271 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000272 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000273 if (size < self->ob_alloc) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000274 Py_Size(self) = size;
275 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000276 }
277 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000278 PyObject_ReleaseBuffer(other, &vo);
279 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000280 }
281 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
282 PyObject_ReleaseBuffer(other, &vo);
Guido van Rossum13e57212006-04-27 22:54:26 +0000283 Py_INCREF(self);
284 return (PyObject *)self;
285}
286
287static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000288bytes_repeat(PyBytesObject *self, Py_ssize_t count)
289{
290 PyBytesObject *result;
291 Py_ssize_t mysize;
292 Py_ssize_t size;
293
294 if (count < 0)
295 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000296 mysize = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000297 size = mysize * count;
298 if (count != 0 && size / count != mysize)
299 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000300 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000301 if (result != NULL && size != 0) {
302 if (mysize == 1)
303 memset(result->ob_bytes, self->ob_bytes[0], size);
304 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000305 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000306 for (i = 0; i < count; i++)
307 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
308 }
309 }
310 return (PyObject *)result;
311}
312
313static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000314bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
315{
316 Py_ssize_t mysize;
317 Py_ssize_t size;
318
319 if (count < 0)
320 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000321 mysize = Py_Size(self);
Guido van Rossum13e57212006-04-27 22:54:26 +0000322 size = mysize * count;
323 if (count != 0 && size / count != mysize)
324 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000325 if (size < self->ob_alloc) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000326 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000327 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000328 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000329 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000330 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000331
Guido van Rossum13e57212006-04-27 22:54:26 +0000332 if (mysize == 1)
333 memset(self->ob_bytes, self->ob_bytes[0], size);
334 else {
335 Py_ssize_t i;
336 for (i = 1; i < count; i++)
337 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
338 }
339
340 Py_INCREF(self);
341 return (PyObject *)self;
342}
343
Guido van Rossum13e57212006-04-27 22:54:26 +0000344static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000345bytes_getitem(PyBytesObject *self, Py_ssize_t i)
346{
347 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000348 i += Py_Size(self);
349 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossum254348e2007-11-21 19:29:53 +0000350 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
Guido van Rossumd624f182006-04-24 13:47:05 +0000351 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000352 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000353 return PyLong_FromLong((unsigned char)(self->ob_bytes[i]));
Guido van Rossumd624f182006-04-24 13:47:05 +0000354}
355
356static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000357bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000358{
Thomas Wouters376446d2006-12-19 08:30:14 +0000359 if (PyIndex_Check(item)) {
360 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000361
Thomas Wouters376446d2006-12-19 08:30:14 +0000362 if (i == -1 && PyErr_Occurred())
363 return NULL;
364
365 if (i < 0)
366 i += PyBytes_GET_SIZE(self);
367
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000368 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossum254348e2007-11-21 19:29:53 +0000369 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
Thomas Wouters376446d2006-12-19 08:30:14 +0000370 return NULL;
371 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000372 return PyLong_FromLong((unsigned char)(self->ob_bytes[i]));
Thomas Wouters376446d2006-12-19 08:30:14 +0000373 }
374 else if (PySlice_Check(item)) {
375 Py_ssize_t start, stop, step, slicelength, cur, i;
376 if (PySlice_GetIndicesEx((PySliceObject *)item,
377 PyBytes_GET_SIZE(self),
378 &start, &stop, &step, &slicelength) < 0) {
379 return NULL;
380 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000381
Thomas Wouters376446d2006-12-19 08:30:14 +0000382 if (slicelength <= 0)
383 return PyBytes_FromStringAndSize("", 0);
384 else if (step == 1) {
385 return PyBytes_FromStringAndSize(self->ob_bytes + start,
386 slicelength);
387 }
388 else {
389 char *source_buf = PyBytes_AS_STRING(self);
390 char *result_buf = (char *)PyMem_Malloc(slicelength);
391 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000392
Thomas Wouters376446d2006-12-19 08:30:14 +0000393 if (result_buf == NULL)
394 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000395
Thomas Wouters376446d2006-12-19 08:30:14 +0000396 for (cur = start, i = 0; i < slicelength;
397 cur += step, i++) {
398 result_buf[i] = source_buf[cur];
399 }
400 result = PyBytes_FromStringAndSize(result_buf, slicelength);
401 PyMem_Free(result_buf);
402 return result;
403 }
404 }
405 else {
Guido van Rossum254348e2007-11-21 19:29:53 +0000406 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
Thomas Wouters376446d2006-12-19 08:30:14 +0000407 return NULL;
408 }
409}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000410
Guido van Rossumd624f182006-04-24 13:47:05 +0000411static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000412bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000413 PyObject *values)
414{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000415 Py_ssize_t avail, needed;
416 void *bytes;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000417 Py_buffer vbytes;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000418 int res = 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000419
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000420 vbytes.len = -1;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000421 if (values == (PyObject *)self) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000422 /* Make a copy and call this function recursively */
Guido van Rossumd624f182006-04-24 13:47:05 +0000423 int err;
424 values = PyBytes_FromObject(values);
425 if (values == NULL)
426 return -1;
427 err = bytes_setslice(self, lo, hi, values);
428 Py_DECREF(values);
429 return err;
430 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000431 if (values == NULL) {
432 /* del b[lo:hi] */
433 bytes = NULL;
434 needed = 0;
435 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000436 else {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000437 if (_getbuffer(values, &vbytes) < 0) {
438 PyErr_Format(PyExc_TypeError,
439 "can't set bytes slice from %.100s",
440 Py_Type(values)->tp_name);
441 return -1;
442 }
443 needed = vbytes.len;
444 bytes = vbytes.buf;
Guido van Rossumd624f182006-04-24 13:47:05 +0000445 }
446
447 if (lo < 0)
448 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000449 if (hi < lo)
450 hi = lo;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000451 if (hi > Py_Size(self))
452 hi = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000453
454 avail = hi - lo;
455 if (avail < 0)
456 lo = hi = avail = 0;
457
458 if (avail != needed) {
459 if (avail > needed) {
460 /*
461 0 lo hi old_size
462 | |<----avail----->|<-----tomove------>|
463 | |<-needed->|<-----tomove------>|
464 0 lo new_hi new_size
465 */
466 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000467 Py_Size(self) - hi);
Guido van Rossumd624f182006-04-24 13:47:05 +0000468 }
Guido van Rossuma74184e2007-08-29 04:05:57 +0000469 /* XXX(nnorwitz): need to verify this can't overflow! */
Thomas Wouters376446d2006-12-19 08:30:14 +0000470 if (PyBytes_Resize((PyObject *)self,
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000471 Py_Size(self) + needed - avail) < 0) {
472 res = -1;
473 goto finish;
474 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000475 if (avail < needed) {
476 /*
477 0 lo hi old_size
478 | |<-avail->|<-----tomove------>|
479 | |<----needed---->|<-----tomove------>|
480 0 lo new_hi new_size
481 */
482 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000483 Py_Size(self) - lo - needed);
Guido van Rossumd624f182006-04-24 13:47:05 +0000484 }
485 }
486
487 if (needed > 0)
488 memcpy(self->ob_bytes + lo, bytes, needed);
489
Guido van Rossum75d38e92007-08-24 17:33:11 +0000490
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000491 finish:
Guido van Rossum75d38e92007-08-24 17:33:11 +0000492 if (vbytes.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000493 PyObject_ReleaseBuffer(values, &vbytes);
494 return res;
Guido van Rossumd624f182006-04-24 13:47:05 +0000495}
496
497static int
498bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
499{
500 Py_ssize_t ival;
501
502 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000503 i += Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000504
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000505 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossum254348e2007-11-21 19:29:53 +0000506 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
Guido van Rossumd624f182006-04-24 13:47:05 +0000507 return -1;
508 }
509
510 if (value == NULL)
511 return bytes_setslice(self, i, i+1, NULL);
512
Thomas Woutersd204a712006-08-22 13:41:17 +0000513 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000514 if (ival == -1 && PyErr_Occurred())
515 return -1;
516
517 if (ival < 0 || ival >= 256) {
518 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
519 return -1;
520 }
521
522 self->ob_bytes[i] = ival;
523 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000524}
525
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000526static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000527bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
528{
529 Py_ssize_t start, stop, step, slicelen, needed;
530 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000531
Thomas Wouters376446d2006-12-19 08:30:14 +0000532 if (PyIndex_Check(item)) {
533 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
534
535 if (i == -1 && PyErr_Occurred())
536 return -1;
537
538 if (i < 0)
539 i += PyBytes_GET_SIZE(self);
540
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000541 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossum254348e2007-11-21 19:29:53 +0000542 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
Thomas Wouters376446d2006-12-19 08:30:14 +0000543 return -1;
544 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000545
Thomas Wouters376446d2006-12-19 08:30:14 +0000546 if (values == NULL) {
547 /* Fall through to slice assignment */
548 start = i;
549 stop = i + 1;
550 step = 1;
551 slicelen = 1;
552 }
553 else {
554 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
555 if (ival == -1 && PyErr_Occurred())
556 return -1;
557 if (ival < 0 || ival >= 256) {
558 PyErr_SetString(PyExc_ValueError,
559 "byte must be in range(0, 256)");
560 return -1;
561 }
562 self->ob_bytes[i] = (char)ival;
563 return 0;
564 }
565 }
566 else if (PySlice_Check(item)) {
567 if (PySlice_GetIndicesEx((PySliceObject *)item,
568 PyBytes_GET_SIZE(self),
569 &start, &stop, &step, &slicelen) < 0) {
570 return -1;
571 }
572 }
573 else {
Guido van Rossum254348e2007-11-21 19:29:53 +0000574 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
Thomas Wouters376446d2006-12-19 08:30:14 +0000575 return -1;
576 }
577
578 if (values == NULL) {
579 bytes = NULL;
580 needed = 0;
581 }
582 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
583 /* Make a copy an call this function recursively */
584 int err;
585 values = PyBytes_FromObject(values);
586 if (values == NULL)
587 return -1;
588 err = bytes_ass_subscript(self, item, values);
589 Py_DECREF(values);
590 return err;
591 }
592 else {
593 assert(PyBytes_Check(values));
594 bytes = ((PyBytesObject *)values)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000595 needed = Py_Size(values);
Thomas Wouters376446d2006-12-19 08:30:14 +0000596 }
597 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
598 if ((step < 0 && start < stop) ||
599 (step > 0 && start > stop))
600 stop = start;
601 if (step == 1) {
602 if (slicelen != needed) {
603 if (slicelen > needed) {
604 /*
605 0 start stop old_size
606 | |<---slicelen--->|<-----tomove------>|
607 | |<-needed->|<-----tomove------>|
608 0 lo new_hi new_size
609 */
610 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000611 Py_Size(self) - stop);
Thomas Wouters376446d2006-12-19 08:30:14 +0000612 }
613 if (PyBytes_Resize((PyObject *)self,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000614 Py_Size(self) + needed - slicelen) < 0)
Thomas Wouters376446d2006-12-19 08:30:14 +0000615 return -1;
616 if (slicelen < needed) {
617 /*
618 0 lo hi old_size
619 | |<-avail->|<-----tomove------>|
620 | |<----needed---->|<-----tomove------>|
621 0 lo new_hi new_size
622 */
623 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000624 Py_Size(self) - start - needed);
Thomas Wouters376446d2006-12-19 08:30:14 +0000625 }
626 }
627
628 if (needed > 0)
629 memcpy(self->ob_bytes + start, bytes, needed);
630
631 return 0;
632 }
633 else {
634 if (needed == 0) {
635 /* Delete slice */
636 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000637
Thomas Wouters376446d2006-12-19 08:30:14 +0000638 if (step < 0) {
639 stop = start + 1;
640 start = stop + step * (slicelen - 1) - 1;
641 step = -step;
642 }
643 for (cur = start, i = 0;
644 i < slicelen; cur += step, i++) {
645 Py_ssize_t lim = step - 1;
646
647 if (cur + step >= PyBytes_GET_SIZE(self))
648 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000649
Thomas Wouters376446d2006-12-19 08:30:14 +0000650 memmove(self->ob_bytes + cur - i,
651 self->ob_bytes + cur + 1, lim);
652 }
653 /* Move the tail of the bytes, in one chunk */
654 cur = start + slicelen*step;
655 if (cur < PyBytes_GET_SIZE(self)) {
656 memmove(self->ob_bytes + cur - slicelen,
657 self->ob_bytes + cur,
658 PyBytes_GET_SIZE(self) - cur);
659 }
660 if (PyBytes_Resize((PyObject *)self,
661 PyBytes_GET_SIZE(self) - slicelen) < 0)
662 return -1;
663
664 return 0;
665 }
666 else {
667 /* Assign slice */
668 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000669
Thomas Wouters376446d2006-12-19 08:30:14 +0000670 if (needed != slicelen) {
671 PyErr_Format(PyExc_ValueError,
672 "attempt to assign bytes of size %zd "
673 "to extended slice of size %zd",
674 needed, slicelen);
675 return -1;
676 }
677 for (cur = start, i = 0; i < slicelen; cur += step, i++)
678 self->ob_bytes[cur] = bytes[i];
679 return 0;
680 }
681 }
682}
683
684static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000685bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
686{
Guido van Rossumd624f182006-04-24 13:47:05 +0000687 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000688 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000689 const char *encoding = NULL;
690 const char *errors = NULL;
691 Py_ssize_t count;
692 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000693 PyObject *(*iternext)(PyObject *);
694
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000695 if (Py_Size(self) != 0) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000696 /* Empty previous contents (yes, do this first of all!) */
697 if (PyBytes_Resize((PyObject *)self, 0) < 0)
698 return -1;
699 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000700
Guido van Rossumd624f182006-04-24 13:47:05 +0000701 /* Parse arguments */
702 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
703 &arg, &encoding, &errors))
704 return -1;
705
706 /* Make a quick exit if no first argument */
707 if (arg == NULL) {
708 if (encoding != NULL || errors != NULL) {
709 PyErr_SetString(PyExc_TypeError,
710 "encoding or errors without sequence argument");
711 return -1;
712 }
713 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000714 }
715
Guido van Rossumd624f182006-04-24 13:47:05 +0000716 if (PyUnicode_Check(arg)) {
717 /* Encode via the codec registry */
Guido van Rossum4355a472007-05-04 05:00:04 +0000718 PyObject *encoded, *new;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000719 if (encoding == NULL) {
720 PyErr_SetString(PyExc_TypeError,
721 "string argument without an encoding");
722 return -1;
723 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000724 encoded = PyCodec_Encode(arg, encoding, errors);
725 if (encoded == NULL)
726 return -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000727 assert(PyString_Check(encoded));
Guido van Rossuma74184e2007-08-29 04:05:57 +0000728 new = bytes_iconcat(self, encoded);
729 Py_DECREF(encoded);
730 if (new == NULL)
731 return -1;
732 Py_DECREF(new);
733 return 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000734 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000735
Guido van Rossumd624f182006-04-24 13:47:05 +0000736 /* If it's not unicode, there can't be encoding or errors */
737 if (encoding != NULL || errors != NULL) {
738 PyErr_SetString(PyExc_TypeError,
739 "encoding or errors without a string argument");
740 return -1;
741 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000742
Guido van Rossumd624f182006-04-24 13:47:05 +0000743 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000744 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000745 if (count == -1 && PyErr_Occurred())
746 PyErr_Clear();
747 else {
748 if (count < 0) {
749 PyErr_SetString(PyExc_ValueError, "negative count");
750 return -1;
751 }
752 if (count > 0) {
753 if (PyBytes_Resize((PyObject *)self, count))
754 return -1;
755 memset(self->ob_bytes, 0, count);
756 }
757 return 0;
758 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000759
Guido van Rossum254348e2007-11-21 19:29:53 +0000760 /* Use the buffer API */
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000761 if (PyObject_CheckBuffer(arg)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000762 Py_ssize_t size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000763 Py_buffer view;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000764 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000765 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000766 size = view.len;
767 if (PyBytes_Resize((PyObject *)self, size) < 0) goto fail;
768 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
769 goto fail;
770 PyObject_ReleaseBuffer(arg, &view);
Guido van Rossumd624f182006-04-24 13:47:05 +0000771 return 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000772 fail:
773 PyObject_ReleaseBuffer(arg, &view);
774 return -1;
Guido van Rossumd624f182006-04-24 13:47:05 +0000775 }
776
777 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000778
779 /* Get the iterator */
780 it = PyObject_GetIter(arg);
781 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000782 return -1;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000783 iternext = *Py_Type(it)->tp_iternext;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000784
785 /* Run the iterator to exhaustion */
786 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000787 PyObject *item;
788 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000789
Guido van Rossumd624f182006-04-24 13:47:05 +0000790 /* Get the next item */
791 item = iternext(it);
792 if (item == NULL) {
793 if (PyErr_Occurred()) {
794 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
795 goto error;
796 PyErr_Clear();
797 }
798 break;
799 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000800
Guido van Rossumd624f182006-04-24 13:47:05 +0000801 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000802 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000803 Py_DECREF(item);
804 if (value == -1 && PyErr_Occurred())
805 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000806
Guido van Rossumd624f182006-04-24 13:47:05 +0000807 /* Range check */
808 if (value < 0 || value >= 256) {
809 PyErr_SetString(PyExc_ValueError,
810 "bytes must be in range(0, 256)");
811 goto error;
812 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000813
Guido van Rossumd624f182006-04-24 13:47:05 +0000814 /* Append the byte */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000815 if (Py_Size(self) < self->ob_alloc)
816 Py_Size(self)++;
817 else if (PyBytes_Resize((PyObject *)self, Py_Size(self)+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000818 goto error;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000819 self->ob_bytes[Py_Size(self)-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000820 }
821
822 /* Clean up and return success */
823 Py_DECREF(it);
824 return 0;
825
826 error:
827 /* Error handling when it != NULL */
828 Py_DECREF(it);
829 return -1;
830}
831
Georg Brandlee91be42007-02-24 19:41:35 +0000832/* Mostly copied from string_repr, but without the
833 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000834static PyObject *
835bytes_repr(PyBytesObject *self)
836{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000837 static const char *hexdigits = "0123456789abcdef";
Guido van Rossum254348e2007-11-21 19:29:53 +0000838 const char *quote_prefix = "bytearray(b";
Guido van Rossum98297ee2007-11-06 21:34:58 +0000839 const char *quote_postfix = ")";
840 Py_ssize_t length = Py_Size(self);
Guido van Rossum254348e2007-11-21 19:29:53 +0000841 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
842 size_t newsize = 14 + 4 * length;
Georg Brandlee91be42007-02-24 19:41:35 +0000843 PyObject *v;
Guido van Rossum254348e2007-11-21 19:29:53 +0000844 if (newsize > PY_SSIZE_T_MAX || newsize / 4 - 3 != length) {
Georg Brandlee91be42007-02-24 19:41:35 +0000845 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum254348e2007-11-21 19:29:53 +0000846 "bytearray object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000847 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000848 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000849 v = PyUnicode_FromUnicode(NULL, newsize);
Georg Brandlee91be42007-02-24 19:41:35 +0000850 if (v == NULL) {
851 return NULL;
852 }
853 else {
854 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000855 register Py_UNICODE c;
856 register Py_UNICODE *p;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000857 int quote;
858
859 /* Figure out which quote to use; single is preferred */
860 quote = '\'';
861 {
862 char *test, *start;
863 start = PyBytes_AS_STRING(self);
864 for (test = start; test < start+length; ++test) {
865 if (*test == '"') {
866 quote = '\''; /* back to single */
867 goto decided;
868 }
869 else if (*test == '\'')
870 quote = '"';
871 }
872 decided:
873 ;
874 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000875
Walter Dörwald1ab83302007-05-18 17:15:44 +0000876 p = PyUnicode_AS_UNICODE(v);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000877 while (*quote_prefix)
878 *p++ = *quote_prefix++;
Georg Brandlee91be42007-02-24 19:41:35 +0000879 *p++ = quote;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000880
881 for (i = 0; i < length; i++) {
Georg Brandlee91be42007-02-24 19:41:35 +0000882 /* There's at least enough room for a hex escape
883 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000884 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Georg Brandlee91be42007-02-24 19:41:35 +0000885 c = self->ob_bytes[i];
Guido van Rossum98297ee2007-11-06 21:34:58 +0000886 if (c == '\'' || c == '\\')
Georg Brandlee91be42007-02-24 19:41:35 +0000887 *p++ = '\\', *p++ = c;
888 else if (c == '\t')
889 *p++ = '\\', *p++ = 't';
890 else if (c == '\n')
891 *p++ = '\\', *p++ = 'n';
892 else if (c == '\r')
893 *p++ = '\\', *p++ = 'r';
894 else if (c == 0)
Guido van Rossum57b93ad2007-05-08 19:09:34 +0000895 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
Georg Brandlee91be42007-02-24 19:41:35 +0000896 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000897 *p++ = '\\';
898 *p++ = 'x';
899 *p++ = hexdigits[(c & 0xf0) >> 4];
900 *p++ = hexdigits[c & 0xf];
Georg Brandlee91be42007-02-24 19:41:35 +0000901 }
902 else
903 *p++ = c;
904 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000905 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Georg Brandlee91be42007-02-24 19:41:35 +0000906 *p++ = quote;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000907 while (*quote_postfix) {
908 *p++ = *quote_postfix++;
909 }
Georg Brandlee91be42007-02-24 19:41:35 +0000910 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000911 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
912 Py_DECREF(v);
913 return NULL;
914 }
Georg Brandlee91be42007-02-24 19:41:35 +0000915 return v;
916 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000917}
918
919static PyObject *
Guido van Rossum98297ee2007-11-06 21:34:58 +0000920bytes_str(PyObject *op)
Guido van Rossumd624f182006-04-24 13:47:05 +0000921{
Guido van Rossum98297ee2007-11-06 21:34:58 +0000922 if (Py_BytesWarningFlag) {
923 if (PyErr_WarnEx(PyExc_BytesWarning,
Guido van Rossum254348e2007-11-21 19:29:53 +0000924 "str() on a bytearray instance", 1))
Guido van Rossum98297ee2007-11-06 21:34:58 +0000925 return NULL;
926 }
927 return bytes_repr((PyBytesObject*)op);
Guido van Rossumd624f182006-04-24 13:47:05 +0000928}
929
930static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000931bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000932{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000933 Py_ssize_t self_size, other_size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000934 Py_buffer self_bytes, other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000935 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000936 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000937 int cmp;
938
Jeremy Hylton18c3ff82007-08-29 18:47:16 +0000939 /* Bytes can be compared to anything that supports the (binary)
940 buffer API. Except that a comparison with Unicode is always an
941 error, even if the comparison is for equality. */
942 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
943 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
Guido van Rossum98297ee2007-11-06 21:34:58 +0000944 if (Py_BytesWarningFlag && op == Py_EQ) {
945 if (PyErr_WarnEx(PyExc_BytesWarning,
Guido van Rossum254348e2007-11-21 19:29:53 +0000946 "Comparsion between bytearray and string", 1))
Guido van Rossum98297ee2007-11-06 21:34:58 +0000947 return NULL;
948 }
949
Guido van Rossum1e35e762007-10-09 17:21:10 +0000950 Py_INCREF(Py_NotImplemented);
951 return Py_NotImplemented;
Jeremy Hylton18c3ff82007-08-29 18:47:16 +0000952 }
Guido van Rossumebea9be2007-04-09 00:49:13 +0000953
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000954 self_size = _getbuffer(self, &self_bytes);
955 if (self_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000956 PyErr_Clear();
Guido van Rossumebea9be2007-04-09 00:49:13 +0000957 Py_INCREF(Py_NotImplemented);
958 return Py_NotImplemented;
959 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000960
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000961 other_size = _getbuffer(other, &other_bytes);
962 if (other_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000963 PyErr_Clear();
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000964 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossumd624f182006-04-24 13:47:05 +0000965 Py_INCREF(Py_NotImplemented);
966 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000967 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000968
969 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000970 /* Shortcut: if the lengths differ, the objects differ */
971 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000972 }
973 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000974 minsize = self_size;
975 if (other_size < minsize)
976 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000977
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000978 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000979 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000980
Guido van Rossumd624f182006-04-24 13:47:05 +0000981 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000982 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000983 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000984 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000985 cmp = 1;
986 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000987
Guido van Rossumd624f182006-04-24 13:47:05 +0000988 switch (op) {
989 case Py_LT: cmp = cmp < 0; break;
990 case Py_LE: cmp = cmp <= 0; break;
991 case Py_EQ: cmp = cmp == 0; break;
992 case Py_NE: cmp = cmp != 0; break;
993 case Py_GT: cmp = cmp > 0; break;
994 case Py_GE: cmp = cmp >= 0; break;
995 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000996 }
997
998 res = cmp ? Py_True : Py_False;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000999 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossum75d38e92007-08-24 17:33:11 +00001000 PyObject_ReleaseBuffer(other, &other_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001001 Py_INCREF(res);
1002 return res;
1003}
1004
1005static void
1006bytes_dealloc(PyBytesObject *self)
1007{
Guido van Rossumd624f182006-04-24 13:47:05 +00001008 if (self->ob_bytes != 0) {
1009 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001010 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001011 Py_Type(self)->tp_free((PyObject *)self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001012}
1013
Neal Norwitz6968b052007-02-27 19:02:19 +00001014
1015/* -------------------------------------------------------------------- */
1016/* Methods */
1017
1018#define STRINGLIB_CHAR char
1019#define STRINGLIB_CMP memcmp
1020#define STRINGLIB_LEN PyBytes_GET_SIZE
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001021#define STRINGLIB_STR PyBytes_AS_STRING
Neal Norwitz6968b052007-02-27 19:02:19 +00001022#define STRINGLIB_NEW PyBytes_FromStringAndSize
1023#define STRINGLIB_EMPTY nullbytes
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001024#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
1025#define STRINGLIB_MUTABLE 1
Neal Norwitz6968b052007-02-27 19:02:19 +00001026
1027#include "stringlib/fastsearch.h"
1028#include "stringlib/count.h"
1029#include "stringlib/find.h"
1030#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001031#include "stringlib/ctype.h"
1032#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001033
1034
1035/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1036were copied from the old char* style string object. */
1037
1038Py_LOCAL_INLINE(void)
1039_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1040{
1041 if (*end > len)
1042 *end = len;
1043 else if (*end < 0)
1044 *end += len;
1045 if (*end < 0)
1046 *end = 0;
1047 if (*start < 0)
1048 *start += len;
1049 if (*start < 0)
1050 *start = 0;
1051}
1052
1053
1054Py_LOCAL_INLINE(Py_ssize_t)
1055bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1056{
1057 PyObject *subobj;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00001058 Py_buffer subbuf;
Neal Norwitz6968b052007-02-27 19:02:19 +00001059 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Guido van Rossum06b8b022007-08-31 13:48:41 +00001060 Py_ssize_t res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001061
1062 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1063 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1064 return -2;
Guido van Rossum06b8b022007-08-31 13:48:41 +00001065 if (_getbuffer(subobj, &subbuf) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00001066 return -2;
Neal Norwitz6968b052007-02-27 19:02:19 +00001067 if (dir > 0)
Guido van Rossum06b8b022007-08-31 13:48:41 +00001068 res = stringlib_find_slice(
Neal Norwitz6968b052007-02-27 19:02:19 +00001069 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossum06b8b022007-08-31 13:48:41 +00001070 subbuf.buf, subbuf.len, start, end);
Neal Norwitz6968b052007-02-27 19:02:19 +00001071 else
Guido van Rossum06b8b022007-08-31 13:48:41 +00001072 res = stringlib_rfind_slice(
Neal Norwitz6968b052007-02-27 19:02:19 +00001073 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossum06b8b022007-08-31 13:48:41 +00001074 subbuf.buf, subbuf.len, start, end);
1075 PyObject_ReleaseBuffer(subobj, &subbuf);
1076 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001077}
1078
Neal Norwitz6968b052007-02-27 19:02:19 +00001079PyDoc_STRVAR(find__doc__,
1080"B.find(sub [,start [,end]]) -> int\n\
1081\n\
1082Return the lowest index in B where subsection sub is found,\n\
1083such that sub is contained within s[start,end]. Optional\n\
1084arguments start and end are interpreted as in slice notation.\n\
1085\n\
1086Return -1 on failure.");
1087
1088static PyObject *
1089bytes_find(PyBytesObject *self, PyObject *args)
1090{
1091 Py_ssize_t result = bytes_find_internal(self, args, +1);
1092 if (result == -2)
1093 return NULL;
Christian Heimes217cfd12007-12-02 14:31:20 +00001094 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001095}
1096
1097PyDoc_STRVAR(count__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001098"B.count(sub [,start [,end]]) -> int\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001099\n\
1100Return the number of non-overlapping occurrences of subsection sub in\n\
1101bytes B[start:end]. Optional arguments start and end are interpreted\n\
1102as in slice notation.");
1103
1104static PyObject *
1105bytes_count(PyBytesObject *self, PyObject *args)
1106{
1107 PyObject *sub_obj;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001108 const char *str = PyBytes_AS_STRING(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00001109 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001110 Py_buffer vsub;
1111 PyObject *count_obj;
Neal Norwitz6968b052007-02-27 19:02:19 +00001112
1113 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1114 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1115 return NULL;
1116
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001117 if (_getbuffer(sub_obj, &vsub) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00001118 return NULL;
1119
Martin v. Löwis5b222132007-06-10 09:51:05 +00001120 _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Neal Norwitz6968b052007-02-27 19:02:19 +00001121
Christian Heimes217cfd12007-12-02 14:31:20 +00001122 count_obj = PyLong_FromSsize_t(
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001123 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
Neal Norwitz6968b052007-02-27 19:02:19 +00001124 );
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001125 PyObject_ReleaseBuffer(sub_obj, &vsub);
1126 return count_obj;
Neal Norwitz6968b052007-02-27 19:02:19 +00001127}
1128
1129
1130PyDoc_STRVAR(index__doc__,
1131"B.index(sub [,start [,end]]) -> int\n\
1132\n\
1133Like B.find() but raise ValueError when the subsection is not found.");
1134
1135static PyObject *
1136bytes_index(PyBytesObject *self, PyObject *args)
1137{
1138 Py_ssize_t result = bytes_find_internal(self, args, +1);
1139 if (result == -2)
1140 return NULL;
1141 if (result == -1) {
1142 PyErr_SetString(PyExc_ValueError,
1143 "subsection not found");
1144 return NULL;
1145 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001146 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001147}
1148
1149
1150PyDoc_STRVAR(rfind__doc__,
1151"B.rfind(sub [,start [,end]]) -> int\n\
1152\n\
1153Return the highest index in B where subsection sub is found,\n\
1154such that sub is contained within s[start,end]. Optional\n\
1155arguments start and end are interpreted as in slice notation.\n\
1156\n\
1157Return -1 on failure.");
1158
1159static PyObject *
1160bytes_rfind(PyBytesObject *self, PyObject *args)
1161{
1162 Py_ssize_t result = bytes_find_internal(self, args, -1);
1163 if (result == -2)
1164 return NULL;
Christian Heimes217cfd12007-12-02 14:31:20 +00001165 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001166}
1167
1168
1169PyDoc_STRVAR(rindex__doc__,
1170"B.rindex(sub [,start [,end]]) -> int\n\
1171\n\
1172Like B.rfind() but raise ValueError when the subsection is not found.");
1173
1174static PyObject *
1175bytes_rindex(PyBytesObject *self, PyObject *args)
1176{
1177 Py_ssize_t result = bytes_find_internal(self, args, -1);
1178 if (result == -2)
1179 return NULL;
1180 if (result == -1) {
1181 PyErr_SetString(PyExc_ValueError,
1182 "subsection not found");
1183 return NULL;
1184 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001185 return PyLong_FromSsize_t(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00001186}
1187
1188
Guido van Rossum98297ee2007-11-06 21:34:58 +00001189static int
1190bytes_contains(PyObject *self, PyObject *arg)
1191{
1192 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1193 if (ival == -1 && PyErr_Occurred()) {
1194 Py_buffer varg;
1195 int pos;
1196 PyErr_Clear();
1197 if (_getbuffer(arg, &varg) < 0)
1198 return -1;
1199 pos = stringlib_find(PyBytes_AS_STRING(self), Py_Size(self),
1200 varg.buf, varg.len, 0);
1201 PyObject_ReleaseBuffer(arg, &varg);
1202 return pos >= 0;
1203 }
1204 if (ival < 0 || ival >= 256) {
1205 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1206 return -1;
1207 }
1208
1209 return memchr(PyBytes_AS_STRING(self), ival, Py_Size(self)) != NULL;
1210}
1211
1212
Neal Norwitz6968b052007-02-27 19:02:19 +00001213/* Matches the end (direction >= 0) or start (direction < 0) of self
1214 * against substr, using the start and end arguments. Returns
1215 * -1 on error, 0 if not found and 1 if found.
1216 */
1217Py_LOCAL(int)
1218_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1219 Py_ssize_t end, int direction)
1220{
1221 Py_ssize_t len = PyBytes_GET_SIZE(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00001222 const char* str;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001223 Py_buffer vsubstr;
Guido van Rossum40d20bc2007-10-22 00:09:51 +00001224 int rv = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001225
Neal Norwitz6968b052007-02-27 19:02:19 +00001226 str = PyBytes_AS_STRING(self);
1227
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001228 if (_getbuffer(substr, &vsubstr) < 0)
1229 return -1;
1230
Neal Norwitz6968b052007-02-27 19:02:19 +00001231 _adjust_indices(&start, &end, len);
1232
1233 if (direction < 0) {
1234 /* startswith */
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001235 if (start+vsubstr.len > len) {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001236 goto done;
1237 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001238 } else {
1239 /* endswith */
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001240 if (end-start < vsubstr.len || start > len) {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001241 goto done;
1242 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001243
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001244 if (end-vsubstr.len > start)
1245 start = end - vsubstr.len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001246 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001247 if (end-start >= vsubstr.len)
1248 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1249
1250done:
1251 PyObject_ReleaseBuffer(substr, &vsubstr);
1252 return rv;
Neal Norwitz6968b052007-02-27 19:02:19 +00001253}
1254
1255
1256PyDoc_STRVAR(startswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001257"B.startswith(prefix [,start [,end]]) -> bool\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001258\n\
1259Return True if B starts with the specified prefix, False otherwise.\n\
1260With optional start, test B beginning at that position.\n\
1261With optional end, stop comparing B at that position.\n\
1262prefix can also be a tuple of strings to try.");
1263
1264static PyObject *
1265bytes_startswith(PyBytesObject *self, PyObject *args)
1266{
1267 Py_ssize_t start = 0;
1268 Py_ssize_t end = PY_SSIZE_T_MAX;
1269 PyObject *subobj;
1270 int result;
1271
1272 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1273 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1274 return NULL;
1275 if (PyTuple_Check(subobj)) {
1276 Py_ssize_t i;
1277 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1278 result = _bytes_tailmatch(self,
1279 PyTuple_GET_ITEM(subobj, i),
1280 start, end, -1);
1281 if (result == -1)
1282 return NULL;
1283 else if (result) {
1284 Py_RETURN_TRUE;
1285 }
1286 }
1287 Py_RETURN_FALSE;
1288 }
1289 result = _bytes_tailmatch(self, subobj, start, end, -1);
1290 if (result == -1)
1291 return NULL;
1292 else
1293 return PyBool_FromLong(result);
1294}
1295
1296PyDoc_STRVAR(endswith__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00001297"B.endswith(suffix [,start [,end]]) -> bool\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001298\n\
1299Return True if B ends with the specified suffix, False otherwise.\n\
1300With optional start, test B beginning at that position.\n\
1301With optional end, stop comparing B at that position.\n\
1302suffix can also be a tuple of strings to try.");
1303
1304static PyObject *
1305bytes_endswith(PyBytesObject *self, PyObject *args)
1306{
1307 Py_ssize_t start = 0;
1308 Py_ssize_t end = PY_SSIZE_T_MAX;
1309 PyObject *subobj;
1310 int result;
1311
1312 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1313 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1314 return NULL;
1315 if (PyTuple_Check(subobj)) {
1316 Py_ssize_t i;
1317 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1318 result = _bytes_tailmatch(self,
1319 PyTuple_GET_ITEM(subobj, i),
1320 start, end, +1);
1321 if (result == -1)
1322 return NULL;
1323 else if (result) {
1324 Py_RETURN_TRUE;
1325 }
1326 }
1327 Py_RETURN_FALSE;
1328 }
1329 result = _bytes_tailmatch(self, subobj, start, end, +1);
1330 if (result == -1)
1331 return NULL;
1332 else
1333 return PyBool_FromLong(result);
1334}
1335
1336
Neal Norwitz6968b052007-02-27 19:02:19 +00001337PyDoc_STRVAR(translate__doc__,
Guido van Rossum254348e2007-11-21 19:29:53 +00001338"B.translate(table[, deletechars]) -> bytearray\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00001339\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00001340Return a copy of B, where all characters occurring in the\n\
1341optional argument deletechars are removed, and the remaining\n\
1342characters have been mapped through the given translation\n\
1343table, which must be a bytes object of length 256.");
Neal Norwitz6968b052007-02-27 19:02:19 +00001344
1345static PyObject *
1346bytes_translate(PyBytesObject *self, PyObject *args)
1347{
1348 register char *input, *output;
1349 register const char *table;
1350 register Py_ssize_t i, c, changed = 0;
1351 PyObject *input_obj = (PyObject*)self;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001352 const char *output_start;
1353 Py_ssize_t inlen;
Neal Norwitz6968b052007-02-27 19:02:19 +00001354 PyObject *result;
1355 int trans_table[256];
1356 PyObject *tableobj, *delobj = NULL;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001357 Py_buffer vtable, vdel;
Neal Norwitz6968b052007-02-27 19:02:19 +00001358
1359 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1360 &tableobj, &delobj))
1361 return NULL;
1362
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001363 if (_getbuffer(tableobj, &vtable) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00001364 return NULL;
1365
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001366 if (vtable.len != 256) {
Neal Norwitz6968b052007-02-27 19:02:19 +00001367 PyErr_SetString(PyExc_ValueError,
1368 "translation table must be 256 characters long");
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001369 result = NULL;
1370 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001371 }
1372
1373 if (delobj != NULL) {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001374 if (_getbuffer(delobj, &vdel) < 0) {
1375 result = NULL;
1376 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001377 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001378 }
1379 else {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001380 vdel.buf = NULL;
1381 vdel.len = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001382 }
1383
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001384 table = (const char *)vtable.buf;
Neal Norwitz6968b052007-02-27 19:02:19 +00001385 inlen = PyBytes_GET_SIZE(input_obj);
1386 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1387 if (result == NULL)
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001388 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001389 output_start = output = PyBytes_AsString(result);
1390 input = PyBytes_AS_STRING(input_obj);
1391
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001392 if (vdel.len == 0) {
Neal Norwitz6968b052007-02-27 19:02:19 +00001393 /* If no deletions are required, use faster code */
1394 for (i = inlen; --i >= 0; ) {
1395 c = Py_CHARMASK(*input++);
1396 if (Py_CHARMASK((*output++ = table[c])) != c)
1397 changed = 1;
1398 }
1399 if (changed || !PyBytes_CheckExact(input_obj))
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001400 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001401 Py_DECREF(result);
1402 Py_INCREF(input_obj);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001403 result = input_obj;
1404 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001405 }
1406
1407 for (i = 0; i < 256; i++)
1408 trans_table[i] = Py_CHARMASK(table[i]);
1409
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001410 for (i = 0; i < vdel.len; i++)
1411 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001412
1413 for (i = inlen; --i >= 0; ) {
1414 c = Py_CHARMASK(*input++);
1415 if (trans_table[c] != -1)
1416 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1417 continue;
1418 changed = 1;
1419 }
1420 if (!changed && PyBytes_CheckExact(input_obj)) {
1421 Py_DECREF(result);
1422 Py_INCREF(input_obj);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001423 result = input_obj;
1424 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001425 }
1426 /* Fix the size of the resulting string */
1427 if (inlen > 0)
1428 PyBytes_Resize(result, output - output_start);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001429
1430done:
1431 PyObject_ReleaseBuffer(tableobj, &vtable);
1432 if (delobj != NULL)
1433 PyObject_ReleaseBuffer(delobj, &vdel);
Neal Norwitz6968b052007-02-27 19:02:19 +00001434 return result;
1435}
1436
1437
1438#define FORWARD 1
1439#define REVERSE -1
1440
1441/* find and count characters and substrings */
1442
1443#define findchar(target, target_len, c) \
1444 ((char *)memchr((const void *)(target), c, target_len))
1445
1446/* Don't call if length < 2 */
1447#define Py_STRING_MATCH(target, offset, pattern, length) \
1448 (target[offset] == pattern[0] && \
1449 target[offset+length-1] == pattern[length-1] && \
1450 !memcmp(target+offset+1, pattern+1, length-2) )
1451
1452
1453/* Bytes ops must return a string. */
1454/* If the object is subclass of bytes, create a copy */
1455Py_LOCAL(PyBytesObject *)
1456return_self(PyBytesObject *self)
1457{
1458 if (PyBytes_CheckExact(self)) {
1459 Py_INCREF(self);
1460 return (PyBytesObject *)self;
1461 }
1462 return (PyBytesObject *)PyBytes_FromStringAndSize(
1463 PyBytes_AS_STRING(self),
1464 PyBytes_GET_SIZE(self));
1465}
1466
1467Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitz61ec0d32007-10-26 06:44:10 +00001468countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Neal Norwitz6968b052007-02-27 19:02:19 +00001469{
1470 Py_ssize_t count=0;
1471 const char *start=target;
1472 const char *end=target+target_len;
1473
1474 while ( (start=findchar(start, end-start, c)) != NULL ) {
1475 count++;
1476 if (count >= maxcount)
1477 break;
1478 start += 1;
1479 }
1480 return count;
1481}
1482
1483Py_LOCAL(Py_ssize_t)
1484findstring(const char *target, Py_ssize_t target_len,
1485 const char *pattern, Py_ssize_t pattern_len,
1486 Py_ssize_t start,
1487 Py_ssize_t end,
1488 int direction)
1489{
1490 if (start < 0) {
1491 start += target_len;
1492 if (start < 0)
1493 start = 0;
1494 }
1495 if (end > target_len) {
1496 end = target_len;
1497 } else if (end < 0) {
1498 end += target_len;
1499 if (end < 0)
1500 end = 0;
1501 }
1502
1503 /* zero-length substrings always match at the first attempt */
1504 if (pattern_len == 0)
1505 return (direction > 0) ? start : end;
1506
1507 end -= pattern_len;
1508
1509 if (direction < 0) {
1510 for (; end >= start; end--)
1511 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1512 return end;
1513 } else {
1514 for (; start <= end; start++)
1515 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1516 return start;
1517 }
1518 return -1;
1519}
1520
1521Py_LOCAL_INLINE(Py_ssize_t)
1522countstring(const char *target, Py_ssize_t target_len,
1523 const char *pattern, Py_ssize_t pattern_len,
1524 Py_ssize_t start,
1525 Py_ssize_t end,
1526 int direction, Py_ssize_t maxcount)
1527{
1528 Py_ssize_t count=0;
1529
1530 if (start < 0) {
1531 start += target_len;
1532 if (start < 0)
1533 start = 0;
1534 }
1535 if (end > target_len) {
1536 end = target_len;
1537 } else if (end < 0) {
1538 end += target_len;
1539 if (end < 0)
1540 end = 0;
1541 }
1542
1543 /* zero-length substrings match everywhere */
1544 if (pattern_len == 0 || maxcount == 0) {
1545 if (target_len+1 < maxcount)
1546 return target_len+1;
1547 return maxcount;
1548 }
1549
1550 end -= pattern_len;
1551 if (direction < 0) {
1552 for (; (end >= start); end--)
1553 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1554 count++;
1555 if (--maxcount <= 0) break;
1556 end -= pattern_len-1;
1557 }
1558 } else {
1559 for (; (start <= end); start++)
1560 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1561 count++;
1562 if (--maxcount <= 0)
1563 break;
1564 start += pattern_len-1;
1565 }
1566 }
1567 return count;
1568}
1569
1570
1571/* Algorithms for different cases of string replacement */
1572
1573/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1574Py_LOCAL(PyBytesObject *)
1575replace_interleave(PyBytesObject *self,
1576 const char *to_s, Py_ssize_t to_len,
1577 Py_ssize_t maxcount)
1578{
1579 char *self_s, *result_s;
1580 Py_ssize_t self_len, result_len;
1581 Py_ssize_t count, i, product;
1582 PyBytesObject *result;
1583
1584 self_len = PyBytes_GET_SIZE(self);
1585
1586 /* 1 at the end plus 1 after every character */
1587 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001588 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001589 count = maxcount;
1590
1591 /* Check for overflow */
1592 /* result_len = count * to_len + self_len; */
1593 product = count * to_len;
1594 if (product / to_len != count) {
1595 PyErr_SetString(PyExc_OverflowError,
1596 "replace string is too long");
1597 return NULL;
1598 }
1599 result_len = product + self_len;
1600 if (result_len < 0) {
1601 PyErr_SetString(PyExc_OverflowError,
1602 "replace string is too long");
1603 return NULL;
1604 }
1605
1606 if (! (result = (PyBytesObject *)
1607 PyBytes_FromStringAndSize(NULL, result_len)) )
1608 return NULL;
1609
1610 self_s = PyBytes_AS_STRING(self);
1611 result_s = PyBytes_AS_STRING(result);
1612
1613 /* TODO: special case single character, which doesn't need memcpy */
1614
1615 /* Lay the first one down (guaranteed this will occur) */
1616 Py_MEMCPY(result_s, to_s, to_len);
1617 result_s += to_len;
1618 count -= 1;
1619
1620 for (i=0; i<count; i++) {
1621 *result_s++ = *self_s++;
1622 Py_MEMCPY(result_s, to_s, to_len);
1623 result_s += to_len;
1624 }
1625
1626 /* Copy the rest of the original string */
1627 Py_MEMCPY(result_s, self_s, self_len-i);
1628
1629 return result;
1630}
1631
1632/* Special case for deleting a single character */
1633/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1634Py_LOCAL(PyBytesObject *)
1635replace_delete_single_character(PyBytesObject *self,
1636 char from_c, Py_ssize_t maxcount)
1637{
1638 char *self_s, *result_s;
1639 char *start, *next, *end;
1640 Py_ssize_t self_len, result_len;
1641 Py_ssize_t count;
1642 PyBytesObject *result;
1643
1644 self_len = PyBytes_GET_SIZE(self);
1645 self_s = PyBytes_AS_STRING(self);
1646
1647 count = countchar(self_s, self_len, from_c, maxcount);
1648 if (count == 0) {
1649 return return_self(self);
1650 }
1651
1652 result_len = self_len - count; /* from_len == 1 */
1653 assert(result_len>=0);
1654
1655 if ( (result = (PyBytesObject *)
1656 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1657 return NULL;
1658 result_s = PyBytes_AS_STRING(result);
1659
1660 start = self_s;
1661 end = self_s + self_len;
1662 while (count-- > 0) {
1663 next = findchar(start, end-start, from_c);
1664 if (next == NULL)
1665 break;
1666 Py_MEMCPY(result_s, start, next-start);
1667 result_s += (next-start);
1668 start = next+1;
1669 }
1670 Py_MEMCPY(result_s, start, end-start);
1671
1672 return result;
1673}
1674
1675/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1676
1677Py_LOCAL(PyBytesObject *)
1678replace_delete_substring(PyBytesObject *self,
1679 const char *from_s, Py_ssize_t from_len,
1680 Py_ssize_t maxcount)
1681{
1682 char *self_s, *result_s;
1683 char *start, *next, *end;
1684 Py_ssize_t self_len, result_len;
1685 Py_ssize_t count, offset;
1686 PyBytesObject *result;
1687
1688 self_len = PyBytes_GET_SIZE(self);
1689 self_s = PyBytes_AS_STRING(self);
1690
1691 count = countstring(self_s, self_len,
1692 from_s, from_len,
1693 0, self_len, 1,
1694 maxcount);
1695
1696 if (count == 0) {
1697 /* no matches */
1698 return return_self(self);
1699 }
1700
1701 result_len = self_len - (count * from_len);
1702 assert (result_len>=0);
1703
1704 if ( (result = (PyBytesObject *)
1705 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1706 return NULL;
1707
1708 result_s = PyBytes_AS_STRING(result);
1709
1710 start = self_s;
1711 end = self_s + self_len;
1712 while (count-- > 0) {
1713 offset = findstring(start, end-start,
1714 from_s, from_len,
1715 0, end-start, FORWARD);
1716 if (offset == -1)
1717 break;
1718 next = start + offset;
1719
1720 Py_MEMCPY(result_s, start, next-start);
1721
1722 result_s += (next-start);
1723 start = next+from_len;
1724 }
1725 Py_MEMCPY(result_s, start, end-start);
1726 return result;
1727}
1728
1729/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1730Py_LOCAL(PyBytesObject *)
1731replace_single_character_in_place(PyBytesObject *self,
1732 char from_c, char to_c,
1733 Py_ssize_t maxcount)
1734{
1735 char *self_s, *result_s, *start, *end, *next;
1736 Py_ssize_t self_len;
1737 PyBytesObject *result;
1738
1739 /* The result string will be the same size */
1740 self_s = PyBytes_AS_STRING(self);
1741 self_len = PyBytes_GET_SIZE(self);
1742
1743 next = findchar(self_s, self_len, from_c);
1744
1745 if (next == NULL) {
1746 /* No matches; return the original bytes */
1747 return return_self(self);
1748 }
1749
1750 /* Need to make a new bytes */
1751 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1752 if (result == NULL)
1753 return NULL;
1754 result_s = PyBytes_AS_STRING(result);
1755 Py_MEMCPY(result_s, self_s, self_len);
1756
1757 /* change everything in-place, starting with this one */
1758 start = result_s + (next-self_s);
1759 *start = to_c;
1760 start++;
1761 end = result_s + self_len;
1762
1763 while (--maxcount > 0) {
1764 next = findchar(start, end-start, from_c);
1765 if (next == NULL)
1766 break;
1767 *next = to_c;
1768 start = next+1;
1769 }
1770
1771 return result;
1772}
1773
1774/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1775Py_LOCAL(PyBytesObject *)
1776replace_substring_in_place(PyBytesObject *self,
1777 const char *from_s, Py_ssize_t from_len,
1778 const char *to_s, Py_ssize_t to_len,
1779 Py_ssize_t maxcount)
1780{
1781 char *result_s, *start, *end;
1782 char *self_s;
1783 Py_ssize_t self_len, offset;
1784 PyBytesObject *result;
1785
1786 /* The result bytes will be the same size */
1787
1788 self_s = PyBytes_AS_STRING(self);
1789 self_len = PyBytes_GET_SIZE(self);
1790
1791 offset = findstring(self_s, self_len,
1792 from_s, from_len,
1793 0, self_len, FORWARD);
1794 if (offset == -1) {
1795 /* No matches; return the original bytes */
1796 return return_self(self);
1797 }
1798
1799 /* Need to make a new bytes */
1800 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1801 if (result == NULL)
1802 return NULL;
1803 result_s = PyBytes_AS_STRING(result);
1804 Py_MEMCPY(result_s, self_s, self_len);
1805
1806 /* change everything in-place, starting with this one */
1807 start = result_s + offset;
1808 Py_MEMCPY(start, to_s, from_len);
1809 start += from_len;
1810 end = result_s + self_len;
1811
1812 while ( --maxcount > 0) {
1813 offset = findstring(start, end-start,
1814 from_s, from_len,
1815 0, end-start, FORWARD);
1816 if (offset==-1)
1817 break;
1818 Py_MEMCPY(start+offset, to_s, from_len);
1819 start += offset+from_len;
1820 }
1821
1822 return result;
1823}
1824
1825/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1826Py_LOCAL(PyBytesObject *)
1827replace_single_character(PyBytesObject *self,
1828 char from_c,
1829 const char *to_s, Py_ssize_t to_len,
1830 Py_ssize_t maxcount)
1831{
1832 char *self_s, *result_s;
1833 char *start, *next, *end;
1834 Py_ssize_t self_len, result_len;
1835 Py_ssize_t count, product;
1836 PyBytesObject *result;
1837
1838 self_s = PyBytes_AS_STRING(self);
1839 self_len = PyBytes_GET_SIZE(self);
1840
1841 count = countchar(self_s, self_len, from_c, maxcount);
1842 if (count == 0) {
1843 /* no matches, return unchanged */
1844 return return_self(self);
1845 }
1846
1847 /* use the difference between current and new, hence the "-1" */
1848 /* result_len = self_len + count * (to_len-1) */
1849 product = count * (to_len-1);
1850 if (product / (to_len-1) != count) {
1851 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1852 return NULL;
1853 }
1854 result_len = self_len + product;
1855 if (result_len < 0) {
1856 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1857 return NULL;
1858 }
1859
1860 if ( (result = (PyBytesObject *)
1861 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1862 return NULL;
1863 result_s = PyBytes_AS_STRING(result);
1864
1865 start = self_s;
1866 end = self_s + self_len;
1867 while (count-- > 0) {
1868 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001869 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001870 break;
1871
1872 if (next == start) {
1873 /* replace with the 'to' */
1874 Py_MEMCPY(result_s, to_s, to_len);
1875 result_s += to_len;
1876 start += 1;
1877 } else {
1878 /* copy the unchanged old then the 'to' */
1879 Py_MEMCPY(result_s, start, next-start);
1880 result_s += (next-start);
1881 Py_MEMCPY(result_s, to_s, to_len);
1882 result_s += to_len;
1883 start = next+1;
1884 }
1885 }
1886 /* Copy the remainder of the remaining bytes */
1887 Py_MEMCPY(result_s, start, end-start);
1888
1889 return result;
1890}
1891
1892/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1893Py_LOCAL(PyBytesObject *)
1894replace_substring(PyBytesObject *self,
1895 const char *from_s, Py_ssize_t from_len,
1896 const char *to_s, Py_ssize_t to_len,
1897 Py_ssize_t maxcount)
1898{
1899 char *self_s, *result_s;
1900 char *start, *next, *end;
1901 Py_ssize_t self_len, result_len;
1902 Py_ssize_t count, offset, product;
1903 PyBytesObject *result;
1904
1905 self_s = PyBytes_AS_STRING(self);
1906 self_len = PyBytes_GET_SIZE(self);
1907
1908 count = countstring(self_s, self_len,
1909 from_s, from_len,
1910 0, self_len, FORWARD, maxcount);
1911 if (count == 0) {
1912 /* no matches, return unchanged */
1913 return return_self(self);
1914 }
1915
1916 /* Check for overflow */
1917 /* result_len = self_len + count * (to_len-from_len) */
1918 product = count * (to_len-from_len);
1919 if (product / (to_len-from_len) != count) {
1920 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1921 return NULL;
1922 }
1923 result_len = self_len + product;
1924 if (result_len < 0) {
1925 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1926 return NULL;
1927 }
1928
1929 if ( (result = (PyBytesObject *)
1930 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1931 return NULL;
1932 result_s = PyBytes_AS_STRING(result);
1933
1934 start = self_s;
1935 end = self_s + self_len;
1936 while (count-- > 0) {
1937 offset = findstring(start, end-start,
1938 from_s, from_len,
1939 0, end-start, FORWARD);
1940 if (offset == -1)
1941 break;
1942 next = start+offset;
1943 if (next == start) {
1944 /* replace with the 'to' */
1945 Py_MEMCPY(result_s, to_s, to_len);
1946 result_s += to_len;
1947 start += from_len;
1948 } else {
1949 /* copy the unchanged old then the 'to' */
1950 Py_MEMCPY(result_s, start, next-start);
1951 result_s += (next-start);
1952 Py_MEMCPY(result_s, to_s, to_len);
1953 result_s += to_len;
1954 start = next+from_len;
1955 }
1956 }
1957 /* Copy the remainder of the remaining bytes */
1958 Py_MEMCPY(result_s, start, end-start);
1959
1960 return result;
1961}
1962
1963
1964Py_LOCAL(PyBytesObject *)
1965replace(PyBytesObject *self,
1966 const char *from_s, Py_ssize_t from_len,
1967 const char *to_s, Py_ssize_t to_len,
1968 Py_ssize_t maxcount)
1969{
1970 if (maxcount < 0) {
1971 maxcount = PY_SSIZE_T_MAX;
1972 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1973 /* nothing to do; return the original bytes */
1974 return return_self(self);
1975 }
1976
1977 if (maxcount == 0 ||
1978 (from_len == 0 && to_len == 0)) {
1979 /* nothing to do; return the original bytes */
1980 return return_self(self);
1981 }
1982
1983 /* Handle zero-length special cases */
1984
1985 if (from_len == 0) {
1986 /* insert the 'to' bytes everywhere. */
1987 /* >>> "Python".replace("", ".") */
1988 /* '.P.y.t.h.o.n.' */
1989 return replace_interleave(self, to_s, to_len, maxcount);
1990 }
1991
1992 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1993 /* point for an empty self bytes to generate a non-empty bytes */
1994 /* Special case so the remaining code always gets a non-empty bytes */
1995 if (PyBytes_GET_SIZE(self) == 0) {
1996 return return_self(self);
1997 }
1998
1999 if (to_len == 0) {
2000 /* delete all occurances of 'from' bytes */
2001 if (from_len == 1) {
2002 return replace_delete_single_character(
2003 self, from_s[0], maxcount);
2004 } else {
2005 return replace_delete_substring(self, from_s, from_len, maxcount);
2006 }
2007 }
2008
2009 /* Handle special case where both bytes have the same length */
2010
2011 if (from_len == to_len) {
2012 if (from_len == 1) {
2013 return replace_single_character_in_place(
2014 self,
2015 from_s[0],
2016 to_s[0],
2017 maxcount);
2018 } else {
2019 return replace_substring_in_place(
2020 self, from_s, from_len, to_s, to_len, maxcount);
2021 }
2022 }
2023
2024 /* Otherwise use the more generic algorithms */
2025 if (from_len == 1) {
2026 return replace_single_character(self, from_s[0],
2027 to_s, to_len, maxcount);
2028 } else {
2029 /* len('from')>=2, len('to')>=1 */
2030 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2031 }
2032}
2033
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002034
Neal Norwitz6968b052007-02-27 19:02:19 +00002035PyDoc_STRVAR(replace__doc__,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002036"B.replace(old, new[, count]) -> bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002037\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002038Return a copy of B with all occurrences of subsection\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002039old replaced by new. If the optional argument count is\n\
2040given, only the first count occurrences are replaced.");
2041
2042static PyObject *
2043bytes_replace(PyBytesObject *self, PyObject *args)
2044{
2045 Py_ssize_t count = -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002046 PyObject *from, *to, *res;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002047 Py_buffer vfrom, vto;
Neal Norwitz6968b052007-02-27 19:02:19 +00002048
2049 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2050 return NULL;
2051
Guido van Rossuma74184e2007-08-29 04:05:57 +00002052 if (_getbuffer(from, &vfrom) < 0)
2053 return NULL;
2054 if (_getbuffer(to, &vto) < 0) {
2055 PyObject_ReleaseBuffer(from, &vfrom);
2056 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002057 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002058
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002059 res = (PyObject *)replace((PyBytesObject *) self,
Guido van Rossuma74184e2007-08-29 04:05:57 +00002060 vfrom.buf, vfrom.len,
2061 vto.buf, vto.len, count);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002062
Guido van Rossuma74184e2007-08-29 04:05:57 +00002063 PyObject_ReleaseBuffer(from, &vfrom);
2064 PyObject_ReleaseBuffer(to, &vto);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002065 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00002066}
2067
2068
2069/* Overallocate the initial list to reduce the number of reallocs for small
2070 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2071 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2072 text (roughly 11 words per line) and field delimited data (usually 1-10
2073 fields). For large strings the split algorithms are bandwidth limited
2074 so increasing the preallocation likely will not improve things.*/
2075
2076#define MAX_PREALLOC 12
2077
2078/* 5 splits gives 6 elements */
2079#define PREALLOC_SIZE(maxsplit) \
2080 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2081
2082#define SPLIT_APPEND(data, left, right) \
2083 str = PyBytes_FromStringAndSize((data) + (left), \
2084 (right) - (left)); \
2085 if (str == NULL) \
2086 goto onError; \
2087 if (PyList_Append(list, str)) { \
2088 Py_DECREF(str); \
2089 goto onError; \
2090 } \
2091 else \
2092 Py_DECREF(str);
2093
2094#define SPLIT_ADD(data, left, right) { \
2095 str = PyBytes_FromStringAndSize((data) + (left), \
2096 (right) - (left)); \
2097 if (str == NULL) \
2098 goto onError; \
2099 if (count < MAX_PREALLOC) { \
2100 PyList_SET_ITEM(list, count, str); \
2101 } else { \
2102 if (PyList_Append(list, str)) { \
2103 Py_DECREF(str); \
2104 goto onError; \
2105 } \
2106 else \
2107 Py_DECREF(str); \
2108 } \
2109 count++; }
2110
2111/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002112#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00002113
2114
2115Py_LOCAL_INLINE(PyObject *)
2116split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2117{
Guido van Rossum8f950672007-09-10 16:53:45 +00002118 register Py_ssize_t i, j, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002119 PyObject *str;
2120 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2121
2122 if (list == NULL)
2123 return NULL;
2124
2125 i = j = 0;
2126 while ((j < len) && (maxcount-- > 0)) {
Guido van Rossum8f950672007-09-10 16:53:45 +00002127 for(; j < len; j++) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002128 /* I found that using memchr makes no difference */
2129 if (s[j] == ch) {
2130 SPLIT_ADD(s, i, j);
2131 i = j = j + 1;
2132 break;
2133 }
2134 }
2135 }
2136 if (i <= len) {
2137 SPLIT_ADD(s, i, len);
2138 }
2139 FIX_PREALLOC_SIZE(list);
2140 return list;
2141
2142 onError:
2143 Py_DECREF(list);
2144 return NULL;
2145}
2146
Guido van Rossum8f950672007-09-10 16:53:45 +00002147
2148Py_LOCAL_INLINE(PyObject *)
2149split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2150{
2151 register Py_ssize_t i, j, count = 0;
2152 PyObject *str;
2153 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2154
2155 if (list == NULL)
2156 return NULL;
2157
2158 for (i = j = 0; i < len; ) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002159 /* find a token */
2160 while (i < len && ISSPACE(s[i]))
2161 i++;
2162 j = i;
2163 while (i < len && !ISSPACE(s[i]))
2164 i++;
2165 if (j < i) {
2166 if (maxcount-- <= 0)
2167 break;
2168 SPLIT_ADD(s, j, i);
2169 while (i < len && ISSPACE(s[i]))
2170 i++;
2171 j = i;
2172 }
Guido van Rossum8f950672007-09-10 16:53:45 +00002173 }
2174 if (j < len) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002175 SPLIT_ADD(s, j, len);
Guido van Rossum8f950672007-09-10 16:53:45 +00002176 }
2177 FIX_PREALLOC_SIZE(list);
2178 return list;
2179
2180 onError:
2181 Py_DECREF(list);
2182 return NULL;
2183}
2184
Neal Norwitz6968b052007-02-27 19:02:19 +00002185PyDoc_STRVAR(split__doc__,
Guido van Rossum254348e2007-11-21 19:29:53 +00002186"B.split([sep[, maxsplit]]) -> list of bytearray\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002187\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002188Return a list of the sections in B, using sep as the delimiter.\n\
2189If sep is not given, B is split on ASCII whitespace characters\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002190(space, tab, return, newline, formfeed, vertical tab).\n\
2191If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002192
2193static PyObject *
2194bytes_split(PyBytesObject *self, PyObject *args)
2195{
2196 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
Guido van Rossum8f950672007-09-10 16:53:45 +00002197 Py_ssize_t maxsplit = -1, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002198 const char *s = PyBytes_AS_STRING(self), *sub;
Guido van Rossum8f950672007-09-10 16:53:45 +00002199 PyObject *list, *str, *subobj = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002200 Py_buffer vsub;
Neal Norwitz6968b052007-02-27 19:02:19 +00002201#ifdef USE_FAST
2202 Py_ssize_t pos;
2203#endif
2204
Guido van Rossum8f950672007-09-10 16:53:45 +00002205 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Neal Norwitz6968b052007-02-27 19:02:19 +00002206 return NULL;
2207 if (maxsplit < 0)
2208 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum8f950672007-09-10 16:53:45 +00002209
2210 if (subobj == Py_None)
2211 return split_whitespace(s, len, maxsplit);
2212
2213 if (_getbuffer(subobj, &vsub) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00002214 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002215 sub = vsub.buf;
2216 n = vsub.len;
Neal Norwitz6968b052007-02-27 19:02:19 +00002217
2218 if (n == 0) {
2219 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum8f950672007-09-10 16:53:45 +00002220 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002221 return NULL;
2222 }
Guido van Rossum8f950672007-09-10 16:53:45 +00002223 if (n == 1)
Neal Norwitz6968b052007-02-27 19:02:19 +00002224 return split_char(s, len, sub[0], maxsplit);
2225
2226 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum8f950672007-09-10 16:53:45 +00002227 if (list == NULL) {
2228 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002229 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002230 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002231
2232#ifdef USE_FAST
2233 i = j = 0;
2234 while (maxsplit-- > 0) {
2235 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2236 if (pos < 0)
2237 break;
2238 j = i+pos;
2239 SPLIT_ADD(s, i, j);
2240 i = j + n;
2241 }
2242#else
2243 i = j = 0;
2244 while ((j+n <= len) && (maxsplit-- > 0)) {
2245 for (; j+n <= len; j++) {
2246 if (Py_STRING_MATCH(s, j, sub, n)) {
2247 SPLIT_ADD(s, i, j);
2248 i = j = j + n;
2249 break;
2250 }
2251 }
2252 }
2253#endif
2254 SPLIT_ADD(s, i, len);
2255 FIX_PREALLOC_SIZE(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002256 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002257 return list;
2258
2259 onError:
2260 Py_DECREF(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002261 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002262 return NULL;
2263}
2264
Guido van Rossum98297ee2007-11-06 21:34:58 +00002265/* stringlib's partition shares nullbytes in some cases.
2266 undo this, we don't want the nullbytes to be shared. */
2267static PyObject *
2268make_nullbytes_unique(PyObject *result)
2269{
2270 if (result != NULL) {
2271 int i;
2272 assert(PyTuple_Check(result));
2273 assert(PyTuple_GET_SIZE(result) == 3);
2274 for (i = 0; i < 3; i++) {
2275 if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
2276 PyObject *new = PyBytes_FromStringAndSize(NULL, 0);
2277 if (new == NULL) {
2278 Py_DECREF(result);
2279 result = NULL;
2280 break;
2281 }
2282 Py_DECREF(nullbytes);
2283 PyTuple_SET_ITEM(result, i, new);
2284 }
2285 }
2286 }
2287 return result;
2288}
2289
Neal Norwitz6968b052007-02-27 19:02:19 +00002290PyDoc_STRVAR(partition__doc__,
2291"B.partition(sep) -> (head, sep, tail)\n\
2292\n\
2293Searches for the separator sep in B, and returns the part before it,\n\
2294the separator itself, and the part after it. If the separator is not\n\
Guido van Rossum254348e2007-11-21 19:29:53 +00002295found, returns B and two empty bytearray objects.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002296
2297static PyObject *
2298bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2299{
2300 PyObject *bytesep, *result;
2301
2302 bytesep = PyBytes_FromObject(sep_obj);
2303 if (! bytesep)
2304 return NULL;
2305
2306 result = stringlib_partition(
2307 (PyObject*) self,
2308 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002309 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002310 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2311 );
2312
2313 Py_DECREF(bytesep);
Guido van Rossum98297ee2007-11-06 21:34:58 +00002314 return make_nullbytes_unique(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002315}
2316
2317PyDoc_STRVAR(rpartition__doc__,
2318"B.rpartition(sep) -> (tail, sep, head)\n\
2319\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002320Searches for the separator sep in B, starting at the end of B,\n\
2321and returns the part before it, the separator itself, and the\n\
2322part after it. If the separator is not found, returns two empty\n\
Guido van Rossum254348e2007-11-21 19:29:53 +00002323bytearray objects and B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002324
2325static PyObject *
2326bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2327{
2328 PyObject *bytesep, *result;
2329
2330 bytesep = PyBytes_FromObject(sep_obj);
2331 if (! bytesep)
2332 return NULL;
2333
2334 result = stringlib_rpartition(
2335 (PyObject*) self,
2336 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002337 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002338 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2339 );
2340
2341 Py_DECREF(bytesep);
Guido van Rossum98297ee2007-11-06 21:34:58 +00002342 return make_nullbytes_unique(result);
Neal Norwitz6968b052007-02-27 19:02:19 +00002343}
2344
2345Py_LOCAL_INLINE(PyObject *)
2346rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2347{
2348 register Py_ssize_t i, j, count=0;
2349 PyObject *str;
2350 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2351
2352 if (list == NULL)
2353 return NULL;
2354
2355 i = j = len - 1;
2356 while ((i >= 0) && (maxcount-- > 0)) {
2357 for (; i >= 0; i--) {
2358 if (s[i] == ch) {
2359 SPLIT_ADD(s, i + 1, j + 1);
2360 j = i = i - 1;
2361 break;
2362 }
2363 }
2364 }
2365 if (j >= -1) {
2366 SPLIT_ADD(s, 0, j + 1);
2367 }
2368 FIX_PREALLOC_SIZE(list);
2369 if (PyList_Reverse(list) < 0)
2370 goto onError;
2371
2372 return list;
2373
2374 onError:
2375 Py_DECREF(list);
2376 return NULL;
2377}
2378
Guido van Rossum8f950672007-09-10 16:53:45 +00002379Py_LOCAL_INLINE(PyObject *)
2380rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2381{
2382 register Py_ssize_t i, j, count = 0;
2383 PyObject *str;
2384 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2385
2386 if (list == NULL)
2387 return NULL;
2388
2389 for (i = j = len - 1; i >= 0; ) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002390 /* find a token */
2391 while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
2392 i--;
2393 j = i;
2394 while (i >= 0 && !Py_UNICODE_ISSPACE(s[i]))
2395 i--;
2396 if (j > i) {
2397 if (maxcount-- <= 0)
2398 break;
2399 SPLIT_ADD(s, i + 1, j + 1);
2400 while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
2401 i--;
2402 j = i;
2403 }
Guido van Rossum8f950672007-09-10 16:53:45 +00002404 }
2405 if (j >= 0) {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002406 SPLIT_ADD(s, 0, j + 1);
Guido van Rossum8f950672007-09-10 16:53:45 +00002407 }
2408 FIX_PREALLOC_SIZE(list);
2409 if (PyList_Reverse(list) < 0)
2410 goto onError;
2411
2412 return list;
2413
2414 onError:
2415 Py_DECREF(list);
2416 return NULL;
2417}
2418
Neal Norwitz6968b052007-02-27 19:02:19 +00002419PyDoc_STRVAR(rsplit__doc__,
Guido van Rossum254348e2007-11-21 19:29:53 +00002420"B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002421\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002422Return a list of the sections in B, using sep as the delimiter,\n\
2423starting at the end of B and working to the front.\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002424If sep is not given, B is split on ASCII whitespace characters\n\
2425(space, tab, return, newline, formfeed, vertical tab).\n\
2426If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002427
2428static PyObject *
2429bytes_rsplit(PyBytesObject *self, PyObject *args)
2430{
2431 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
Guido van Rossum8f950672007-09-10 16:53:45 +00002432 Py_ssize_t maxsplit = -1, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002433 const char *s = PyBytes_AS_STRING(self), *sub;
Guido van Rossum8f950672007-09-10 16:53:45 +00002434 PyObject *list, *str, *subobj = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002435 Py_buffer vsub;
Neal Norwitz6968b052007-02-27 19:02:19 +00002436
Guido van Rossum8f950672007-09-10 16:53:45 +00002437 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Neal Norwitz6968b052007-02-27 19:02:19 +00002438 return NULL;
2439 if (maxsplit < 0)
2440 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum8f950672007-09-10 16:53:45 +00002441
2442 if (subobj == Py_None)
2443 return rsplit_whitespace(s, len, maxsplit);
2444
2445 if (_getbuffer(subobj, &vsub) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00002446 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002447 sub = vsub.buf;
2448 n = vsub.len;
Neal Norwitz6968b052007-02-27 19:02:19 +00002449
2450 if (n == 0) {
2451 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum8f950672007-09-10 16:53:45 +00002452 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002453 return NULL;
2454 }
2455 else if (n == 1)
2456 return rsplit_char(s, len, sub[0], maxsplit);
2457
2458 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum8f950672007-09-10 16:53:45 +00002459 if (list == NULL) {
2460 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002461 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002462 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002463
2464 j = len;
2465 i = j - n;
2466
2467 while ( (i >= 0) && (maxsplit-- > 0) ) {
2468 for (; i>=0; i--) {
2469 if (Py_STRING_MATCH(s, i, sub, n)) {
2470 SPLIT_ADD(s, i + n, j);
2471 j = i;
2472 i -= n;
2473 break;
2474 }
2475 }
2476 }
2477 SPLIT_ADD(s, 0, j);
2478 FIX_PREALLOC_SIZE(list);
2479 if (PyList_Reverse(list) < 0)
2480 goto onError;
Guido van Rossum8f950672007-09-10 16:53:45 +00002481 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002482 return list;
2483
2484onError:
2485 Py_DECREF(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002486 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002487 return NULL;
2488}
2489
Neal Norwitz6968b052007-02-27 19:02:19 +00002490PyDoc_STRVAR(reverse__doc__,
2491"B.reverse() -> None\n\
2492\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002493Reverse the order of the values in B in place.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002494static PyObject *
2495bytes_reverse(PyBytesObject *self, PyObject *unused)
2496{
2497 char swap, *head, *tail;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002498 Py_ssize_t i, j, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002499
2500 j = n / 2;
2501 head = self->ob_bytes;
2502 tail = head + n - 1;
2503 for (i = 0; i < j; i++) {
2504 swap = *head;
2505 *head++ = *tail;
2506 *tail-- = swap;
2507 }
2508
2509 Py_RETURN_NONE;
2510}
2511
2512PyDoc_STRVAR(insert__doc__,
2513"B.insert(index, int) -> None\n\
2514\n\
Guido van Rossum254348e2007-11-21 19:29:53 +00002515Insert a single item into the bytearray before the given index.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002516static PyObject *
2517bytes_insert(PyBytesObject *self, PyObject *args)
2518{
2519 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002520 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002521
2522 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2523 return NULL;
2524
2525 if (n == PY_SSIZE_T_MAX) {
2526 PyErr_SetString(PyExc_OverflowError,
2527 "cannot add more objects to bytes");
2528 return NULL;
2529 }
2530 if (value < 0 || value >= 256) {
2531 PyErr_SetString(PyExc_ValueError,
2532 "byte must be in range(0, 256)");
2533 return NULL;
2534 }
2535 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2536 return NULL;
2537
2538 if (where < 0) {
2539 where += n;
2540 if (where < 0)
2541 where = 0;
2542 }
2543 if (where > n)
2544 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002545 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002546 self->ob_bytes[where] = value;
2547
2548 Py_RETURN_NONE;
2549}
2550
2551PyDoc_STRVAR(append__doc__,
2552"B.append(int) -> None\n\
2553\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002554Append a single item to the end of B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002555static PyObject *
2556bytes_append(PyBytesObject *self, PyObject *arg)
2557{
2558 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002559 Py_ssize_t n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002560
2561 if (! _getbytevalue(arg, &value))
2562 return NULL;
2563 if (n == PY_SSIZE_T_MAX) {
2564 PyErr_SetString(PyExc_OverflowError,
2565 "cannot add more objects to bytes");
2566 return NULL;
2567 }
2568 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2569 return NULL;
2570
2571 self->ob_bytes[n] = value;
2572
2573 Py_RETURN_NONE;
2574}
2575
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002576PyDoc_STRVAR(extend__doc__,
2577"B.extend(iterable int) -> None\n\
2578\n\
2579Append all the elements from the iterator or sequence to the\n\
2580end of B.");
2581static PyObject *
2582bytes_extend(PyBytesObject *self, PyObject *arg)
2583{
2584 PyObject *it, *item, *tmp, *res;
2585 Py_ssize_t buf_size = 0, len = 0;
2586 int value;
2587 char *buf;
2588
2589 /* bytes_setslice code only accepts something supporting PEP 3118. */
2590 if (PyObject_CheckBuffer(arg)) {
2591 if (bytes_setslice(self, Py_Size(self), Py_Size(self), arg) == -1)
2592 return NULL;
2593
2594 Py_RETURN_NONE;
2595 }
2596
2597 it = PyObject_GetIter(arg);
2598 if (it == NULL)
2599 return NULL;
2600
Christian Heimes255f53b2007-12-08 15:33:56 +00002601 /* Try to determine the length of the argument. 32 is abitrary. */
2602 buf_size = _PyObject_LengthHint(arg, 32);
Alexandre Vassalotti09121e82007-12-04 05:51:13 +00002603
2604 buf = (char *)PyMem_Malloc(buf_size * sizeof(char));
2605 if (buf == NULL)
2606 return PyErr_NoMemory();
2607
2608 while ((item = PyIter_Next(it)) != NULL) {
2609 if (! _getbytevalue(item, &value)) {
2610 Py_DECREF(item);
2611 Py_DECREF(it);
2612 return NULL;
2613 }
2614 buf[len++] = value;
2615 Py_DECREF(item);
2616 if (len >= buf_size) {
2617 buf_size = len + (len >> 1) + 1;
2618 buf = (char *)PyMem_Realloc(buf, buf_size * sizeof(char));
2619 if (buf == NULL) {
2620 Py_DECREF(it);
2621 return PyErr_NoMemory();
2622 }
2623 }
2624 }
2625 Py_DECREF(it);
2626
2627 /* XXX: Is possible to avoid a full copy of the buffer? */
2628 tmp = PyBytes_FromStringAndSize(buf, len);
2629 res = bytes_extend(self, tmp);
2630 Py_DECREF(tmp);
2631 PyMem_Free(buf);
2632
2633 return res;
2634}
2635
Neal Norwitz6968b052007-02-27 19:02:19 +00002636PyDoc_STRVAR(pop__doc__,
2637"B.pop([index]) -> int\n\
2638\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002639Remove and return a single item from B. If no index\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002640argument is give, will pop the last value.");
2641static PyObject *
2642bytes_pop(PyBytesObject *self, PyObject *args)
2643{
2644 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002645 Py_ssize_t where = -1, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002646
2647 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2648 return NULL;
2649
2650 if (n == 0) {
2651 PyErr_SetString(PyExc_OverflowError,
2652 "cannot pop an empty bytes");
2653 return NULL;
2654 }
2655 if (where < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002656 where += Py_Size(self);
2657 if (where < 0 || where >= Py_Size(self)) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002658 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2659 return NULL;
2660 }
2661
2662 value = self->ob_bytes[where];
2663 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2664 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2665 return NULL;
2666
Christian Heimes217cfd12007-12-02 14:31:20 +00002667 return PyLong_FromLong(value);
Neal Norwitz6968b052007-02-27 19:02:19 +00002668}
2669
2670PyDoc_STRVAR(remove__doc__,
2671"B.remove(int) -> None\n\
2672\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002673Remove the first occurance of a value in B.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002674static PyObject *
2675bytes_remove(PyBytesObject *self, PyObject *arg)
2676{
2677 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002678 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002679
2680 if (! _getbytevalue(arg, &value))
2681 return NULL;
2682
2683 for (where = 0; where < n; where++) {
2684 if (self->ob_bytes[where] == value)
2685 break;
2686 }
2687 if (where == n) {
2688 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2689 return NULL;
2690 }
2691
2692 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2693 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2694 return NULL;
2695
2696 Py_RETURN_NONE;
2697}
2698
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002699/* XXX These two helpers could be optimized if argsize == 1 */
2700
Neal Norwitz2bad9702007-08-27 06:19:22 +00002701static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002702lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2703 void *argptr, Py_ssize_t argsize)
2704{
2705 Py_ssize_t i = 0;
2706 while (i < mysize && memchr(argptr, myptr[i], argsize))
2707 i++;
2708 return i;
2709}
2710
Neal Norwitz2bad9702007-08-27 06:19:22 +00002711static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002712rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2713 void *argptr, Py_ssize_t argsize)
2714{
2715 Py_ssize_t i = mysize - 1;
2716 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2717 i--;
2718 return i + 1;
2719}
2720
2721PyDoc_STRVAR(strip__doc__,
Guido van Rossum254348e2007-11-21 19:29:53 +00002722"B.strip([bytes]) -> bytearray\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002723\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002724Strip leading and trailing bytes contained in the argument.\n\
2725If the argument is omitted, strip ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002726static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002727bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002728{
2729 Py_ssize_t left, right, mysize, argsize;
2730 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002731 PyObject *arg = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002732 Py_buffer varg;
Guido van Rossum8f950672007-09-10 16:53:45 +00002733 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002734 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002735 if (arg == Py_None) {
2736 argptr = "\t\n\r\f\v ";
2737 argsize = 6;
2738 }
2739 else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002740 if (_getbuffer(arg, &varg) < 0)
2741 return NULL;
2742 argptr = varg.buf;
2743 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002744 }
2745 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002746 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002747 left = lstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumeb29e9a2007-08-08 21:55:33 +00002748 if (left == mysize)
2749 right = left;
2750 else
2751 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossum8f950672007-09-10 16:53:45 +00002752 if (arg != Py_None)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002753 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002754 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2755}
2756
2757PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum254348e2007-11-21 19:29:53 +00002758"B.lstrip([bytes]) -> bytearray\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002759\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002760Strip leading bytes contained in the argument.\n\
2761If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002762static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002763bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002764{
2765 Py_ssize_t left, right, mysize, argsize;
2766 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002767 PyObject *arg = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002768 Py_buffer varg;
Guido van Rossum8f950672007-09-10 16:53:45 +00002769 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002770 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002771 if (arg == Py_None) {
2772 argptr = "\t\n\r\f\v ";
2773 argsize = 6;
2774 }
2775 else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002776 if (_getbuffer(arg, &varg) < 0)
2777 return NULL;
2778 argptr = varg.buf;
2779 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002780 }
2781 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002782 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002783 left = lstrip_helper(myptr, mysize, argptr, argsize);
2784 right = mysize;
Guido van Rossum8f950672007-09-10 16:53:45 +00002785 if (arg != Py_None)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002786 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002787 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2788}
2789
2790PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum254348e2007-11-21 19:29:53 +00002791"B.rstrip([bytes]) -> bytearray\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002792\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002793Strip trailing bytes contained in the argument.\n\
2794If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002795static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002796bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002797{
2798 Py_ssize_t left, right, mysize, argsize;
2799 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002800 PyObject *arg = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002801 Py_buffer varg;
Guido van Rossum8f950672007-09-10 16:53:45 +00002802 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002803 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002804 if (arg == Py_None) {
2805 argptr = "\t\n\r\f\v ";
2806 argsize = 6;
2807 }
2808 else {
Guido van Rossum98297ee2007-11-06 21:34:58 +00002809 if (_getbuffer(arg, &varg) < 0)
2810 return NULL;
2811 argptr = varg.buf;
2812 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002813 }
2814 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002815 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002816 left = 0;
2817 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossum8f950672007-09-10 16:53:45 +00002818 if (arg != Py_None)
Guido van Rossum98297ee2007-11-06 21:34:58 +00002819 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002820 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2821}
Neal Norwitz6968b052007-02-27 19:02:19 +00002822
Guido van Rossumd624f182006-04-24 13:47:05 +00002823PyDoc_STRVAR(decode_doc,
Guido van Rossum98297ee2007-11-06 21:34:58 +00002824"B.decode([encoding[, errors]]) -> unicode object.\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002825\n\
2826Decodes B using the codec registered for encoding. encoding defaults\n\
2827to the default encoding. errors may be given to set a different error\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002828handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2829a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Skip Montanaro11019402007-12-09 23:05:36 +00002830as well as any other name registered with codecs.register_error that is\n\
Guido van Rossumd624f182006-04-24 13:47:05 +00002831able to handle UnicodeDecodeErrors.");
2832
2833static PyObject *
2834bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002835{
Guido van Rossumd624f182006-04-24 13:47:05 +00002836 const char *encoding = NULL;
2837 const char *errors = NULL;
2838
2839 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2840 return NULL;
2841 if (encoding == NULL)
2842 encoding = PyUnicode_GetDefaultEncoding();
2843 return PyCodec_Decode(self, encoding, errors);
2844}
2845
Guido van Rossuma0867f72006-05-05 04:34:18 +00002846PyDoc_STRVAR(alloc_doc,
2847"B.__alloc__() -> int\n\
2848\n\
2849Returns the number of bytes actually allocated.");
2850
2851static PyObject *
2852bytes_alloc(PyBytesObject *self)
2853{
Christian Heimes217cfd12007-12-02 14:31:20 +00002854 return PyLong_FromSsize_t(self->ob_alloc);
Guido van Rossuma0867f72006-05-05 04:34:18 +00002855}
2856
Guido van Rossum20188312006-05-05 15:15:40 +00002857PyDoc_STRVAR(join_doc,
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002858"B.join(iterable_of_bytes) -> bytes\n\
Guido van Rossum20188312006-05-05 15:15:40 +00002859\n\
Guido van Rossum254348e2007-11-21 19:29:53 +00002860Concatenates any number of bytearray objects, with B in between each pair.");
Guido van Rossum20188312006-05-05 15:15:40 +00002861
2862static PyObject *
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002863bytes_join(PyBytesObject *self, PyObject *it)
Guido van Rossum20188312006-05-05 15:15:40 +00002864{
2865 PyObject *seq;
Martin v. Löwis5d7428b2007-07-21 18:47:48 +00002866 Py_ssize_t mysize = Py_Size(self);
Guido van Rossum20188312006-05-05 15:15:40 +00002867 Py_ssize_t i;
2868 Py_ssize_t n;
2869 PyObject **items;
2870 Py_ssize_t totalsize = 0;
2871 PyObject *result;
2872 char *dest;
2873
2874 seq = PySequence_Fast(it, "can only join an iterable");
2875 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002876 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002877 n = PySequence_Fast_GET_SIZE(seq);
2878 items = PySequence_Fast_ITEMS(seq);
2879
2880 /* Compute the total size, and check that they are all bytes */
Guido van Rossum98297ee2007-11-06 21:34:58 +00002881 /* XXX Shouldn't we use _getbuffer() on these items instead? */
Guido van Rossum20188312006-05-05 15:15:40 +00002882 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002883 PyObject *obj = items[i];
Guido van Rossum98297ee2007-11-06 21:34:58 +00002884 if (!PyBytes_Check(obj) && !PyString_Check(obj)) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002885 PyErr_Format(PyExc_TypeError,
2886 "can only join an iterable of bytes "
2887 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002888 /* XXX %ld isn't right on Win64 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002889 (long)i, Py_Type(obj)->tp_name);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002890 goto error;
2891 }
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002892 if (i > 0)
2893 totalsize += mysize;
Guido van Rossum98297ee2007-11-06 21:34:58 +00002894 totalsize += Py_Size(obj);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002895 if (totalsize < 0) {
2896 PyErr_NoMemory();
2897 goto error;
2898 }
Guido van Rossum20188312006-05-05 15:15:40 +00002899 }
2900
2901 /* Allocate the result, and copy the bytes */
2902 result = PyBytes_FromStringAndSize(NULL, totalsize);
2903 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002904 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002905 dest = PyBytes_AS_STRING(result);
2906 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002907 PyObject *obj = items[i];
Guido van Rossum98297ee2007-11-06 21:34:58 +00002908 Py_ssize_t size = Py_Size(obj);
2909 char *buf;
2910 if (PyBytes_Check(obj))
2911 buf = PyBytes_AS_STRING(obj);
2912 else
2913 buf = PyString_AS_STRING(obj);
2914 if (i) {
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002915 memcpy(dest, self->ob_bytes, mysize);
2916 dest += mysize;
2917 }
Guido van Rossum98297ee2007-11-06 21:34:58 +00002918 memcpy(dest, buf, size);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002919 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002920 }
2921
2922 /* Done */
2923 Py_DECREF(seq);
2924 return result;
2925
2926 /* Error handling */
2927 error:
2928 Py_DECREF(seq);
2929 return NULL;
2930}
2931
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002932PyDoc_STRVAR(fromhex_doc,
Guido van Rossum254348e2007-11-21 19:29:53 +00002933"bytearray.fromhex(string) -> bytearray\n\
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002934\n\
Guido van Rossum254348e2007-11-21 19:29:53 +00002935Create a bytearray object from a string of hexadecimal numbers.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00002936Spaces between two numbers are accepted.\n\
Guido van Rossum254348e2007-11-21 19:29:53 +00002937Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002938
2939static int
Guido van Rossumae404e22007-10-26 21:46:44 +00002940hex_digit_to_int(Py_UNICODE c)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002941{
Guido van Rossumae404e22007-10-26 21:46:44 +00002942 if (c >= 128)
2943 return -1;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002944 if (ISDIGIT(c))
Georg Brandlb3f568f2007-02-27 08:49:18 +00002945 return c - '0';
2946 else {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002947 if (ISUPPER(c))
2948 c = TOLOWER(c);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002949 if (c >= 'a' && c <= 'f')
2950 return c - 'a' + 10;
2951 }
2952 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002953}
2954
2955static PyObject *
2956bytes_fromhex(PyObject *cls, PyObject *args)
2957{
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002958 PyObject *newbytes, *hexobj;
2959 char *buf;
Guido van Rossumae404e22007-10-26 21:46:44 +00002960 Py_UNICODE *hex;
2961 Py_ssize_t hexlen, byteslen, i, j;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002962 int top, bot;
2963
Guido van Rossumae404e22007-10-26 21:46:44 +00002964 if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002965 return NULL;
Guido van Rossumae404e22007-10-26 21:46:44 +00002966 assert(PyUnicode_Check(hexobj));
2967 hexlen = PyUnicode_GET_SIZE(hexobj);
2968 hex = PyUnicode_AS_UNICODE(hexobj);
2969 byteslen = hexlen/2; /* This overestimates if there are spaces */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002970 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
Guido van Rossumae404e22007-10-26 21:46:44 +00002971 if (!newbytes)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002972 return NULL;
2973 buf = PyBytes_AS_STRING(newbytes);
Guido van Rossumae404e22007-10-26 21:46:44 +00002974 for (i = j = 0; i < hexlen; i += 2) {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002975 /* skip over spaces in the input */
Guido van Rossumae404e22007-10-26 21:46:44 +00002976 while (hex[i] == ' ')
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002977 i++;
Guido van Rossumae404e22007-10-26 21:46:44 +00002978 if (i >= hexlen)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002979 break;
Guido van Rossumae404e22007-10-26 21:46:44 +00002980 top = hex_digit_to_int(hex[i]);
2981 bot = hex_digit_to_int(hex[i+1]);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002982 if (top == -1 || bot == -1) {
2983 PyErr_Format(PyExc_ValueError,
Guido van Rossumae404e22007-10-26 21:46:44 +00002984 "non-hexadecimal number found in "
2985 "fromhex() arg at position %zd", i);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002986 goto error;
2987 }
2988 buf[j++] = (top << 4) + bot;
2989 }
2990 if (PyBytes_Resize(newbytes, j) < 0)
2991 goto error;
2992 return newbytes;
2993
2994 error:
2995 Py_DECREF(newbytes);
2996 return NULL;
2997}
2998
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002999PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3000
3001static PyObject *
3002bytes_reduce(PyBytesObject *self)
3003{
Guido van Rossuma6c04be2007-11-03 00:24:24 +00003004 PyObject *latin1, *dict;
Martin v. Löwis9c121062007-08-05 20:26:11 +00003005 if (self->ob_bytes)
Guido van Rossuma74184e2007-08-29 04:05:57 +00003006 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
3007 Py_Size(self), NULL);
Martin v. Löwis9c121062007-08-05 20:26:11 +00003008 else
Guido van Rossuma74184e2007-08-29 04:05:57 +00003009 latin1 = PyUnicode_FromString("");
Guido van Rossuma6c04be2007-11-03 00:24:24 +00003010
3011 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
3012 if (dict == NULL) {
3013 PyErr_Clear();
3014 dict = Py_None;
3015 Py_INCREF(dict);
3016 }
3017
3018 return Py_BuildValue("(O(Ns)N)", Py_Type(self), latin1, "latin-1", dict);
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003019}
3020
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003021static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003022 (lenfunc)bytes_length, /* sq_length */
Guido van Rossum98297ee2007-11-06 21:34:58 +00003023 (binaryfunc)PyBytes_Concat, /* sq_concat */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003024 (ssizeargfunc)bytes_repeat, /* sq_repeat */
3025 (ssizeargfunc)bytes_getitem, /* sq_item */
3026 0, /* sq_slice */
3027 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
3028 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00003029 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003030 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
3031 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003032};
3033
3034static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00003035 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00003036 (binaryfunc)bytes_subscript,
3037 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003038};
3039
3040static PyBufferProcs bytes_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00003041 (getbufferproc)bytes_getbuffer,
3042 (releasebufferproc)bytes_releasebuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003043};
3044
3045static PyMethodDef
3046bytes_methods[] = {
Guido van Rossumae404e22007-10-26 21:46:44 +00003047 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
3048 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
3049 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003050 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
3051 _Py_capitalize__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003052 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00003053 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
3054 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
3055 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003056 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3057 expandtabs__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00003058 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
3059 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
3060 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3061 fromhex_doc},
3062 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
3063 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
3064 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
3065 _Py_isalnum__doc__},
3066 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
3067 _Py_isalpha__doc__},
3068 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
3069 _Py_isdigit__doc__},
3070 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
3071 _Py_islower__doc__},
3072 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
3073 _Py_isspace__doc__},
3074 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
3075 _Py_istitle__doc__},
3076 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
3077 _Py_isupper__doc__},
3078 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
3079 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3080 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
3081 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3082 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
3083 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3084 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
3085 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
3086 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3087 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
3088 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
3089 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3090 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
3091 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
3092 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
3093 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003094 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3095 splitlines__doc__},
Guido van Rossumae404e22007-10-26 21:46:44 +00003096 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS ,
3097 startswith__doc__},
3098 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3099 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
3100 _Py_swapcase__doc__},
3101 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3102 {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
3103 translate__doc__},
3104 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
3105 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossuma0867f72006-05-05 04:34:18 +00003106 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003107};
3108
3109PyDoc_STRVAR(bytes_doc,
Guido van Rossum254348e2007-11-21 19:29:53 +00003110"bytearray(iterable_of_ints) -> bytearray.\n\
3111bytearray(string, encoding[, errors]) -> bytearray.\n\
3112bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
3113bytearray(memory_view) -> bytearray.\n\
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003114\n\
Guido van Rossum254348e2007-11-21 19:29:53 +00003115Construct an mutable bytearray object from:\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003116 - an iterable yielding integers in range(256)\n\
3117 - a text string encoded using the specified encoding\n\
Guido van Rossum254348e2007-11-21 19:29:53 +00003118 - a bytes or a bytearray object\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003119 - any object implementing the buffer API.\n\
3120\n\
Guido van Rossum254348e2007-11-21 19:29:53 +00003121bytearray(int) -> bytearray.\n\
Guido van Rossum98297ee2007-11-06 21:34:58 +00003122\n\
Guido van Rossum254348e2007-11-21 19:29:53 +00003123Construct a zero-initialized bytearray of the given length.");
Guido van Rossum98297ee2007-11-06 21:34:58 +00003124
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003125
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003126static PyObject *bytes_iter(PyObject *seq);
3127
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003128PyTypeObject PyBytes_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003129 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum254348e2007-11-21 19:29:53 +00003130 "bytearray",
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003131 sizeof(PyBytesObject),
3132 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00003133 (destructor)bytes_dealloc, /* tp_dealloc */
3134 0, /* tp_print */
3135 0, /* tp_getattr */
3136 0, /* tp_setattr */
3137 0, /* tp_compare */
3138 (reprfunc)bytes_repr, /* tp_repr */
3139 0, /* tp_as_number */
3140 &bytes_as_sequence, /* tp_as_sequence */
3141 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00003142 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00003143 0, /* tp_call */
Guido van Rossum98297ee2007-11-06 21:34:58 +00003144 bytes_str, /* tp_str */
Guido van Rossumd624f182006-04-24 13:47:05 +00003145 PyObject_GenericGetAttr, /* tp_getattro */
3146 0, /* tp_setattro */
3147 &bytes_as_buffer, /* tp_as_buffer */
Guido van Rossuma6c04be2007-11-03 00:24:24 +00003148 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00003149 bytes_doc, /* tp_doc */
3150 0, /* tp_traverse */
3151 0, /* tp_clear */
3152 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3153 0, /* tp_weaklistoffset */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003154 bytes_iter, /* tp_iter */
Guido van Rossumd624f182006-04-24 13:47:05 +00003155 0, /* tp_iternext */
3156 bytes_methods, /* tp_methods */
3157 0, /* tp_members */
3158 0, /* tp_getset */
3159 0, /* tp_base */
3160 0, /* tp_dict */
3161 0, /* tp_descr_get */
3162 0, /* tp_descr_set */
3163 0, /* tp_dictoffset */
3164 (initproc)bytes_init, /* tp_init */
3165 PyType_GenericAlloc, /* tp_alloc */
3166 PyType_GenericNew, /* tp_new */
3167 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003168};
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003169
3170/*********************** Bytes Iterator ****************************/
3171
3172typedef struct {
3173 PyObject_HEAD
3174 Py_ssize_t it_index;
3175 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3176} bytesiterobject;
3177
3178static void
3179bytesiter_dealloc(bytesiterobject *it)
3180{
3181 _PyObject_GC_UNTRACK(it);
3182 Py_XDECREF(it->it_seq);
3183 PyObject_GC_Del(it);
3184}
3185
3186static int
3187bytesiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
3188{
3189 Py_VISIT(it->it_seq);
3190 return 0;
3191}
3192
3193static PyObject *
3194bytesiter_next(bytesiterobject *it)
3195{
3196 PyBytesObject *seq;
3197 PyObject *item;
3198
3199 assert(it != NULL);
3200 seq = it->it_seq;
3201 if (seq == NULL)
3202 return NULL;
3203 assert(PyBytes_Check(seq));
3204
3205 if (it->it_index < PyBytes_GET_SIZE(seq)) {
Christian Heimes217cfd12007-12-02 14:31:20 +00003206 item = PyLong_FromLong(
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003207 (unsigned char)seq->ob_bytes[it->it_index]);
3208 if (item != NULL)
3209 ++it->it_index;
3210 return item;
3211 }
3212
3213 Py_DECREF(seq);
3214 it->it_seq = NULL;
3215 return NULL;
3216}
3217
3218static PyObject *
3219bytesiter_length_hint(bytesiterobject *it)
3220{
3221 Py_ssize_t len = 0;
3222 if (it->it_seq)
3223 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
Christian Heimes217cfd12007-12-02 14:31:20 +00003224 return PyLong_FromSsize_t(len);
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003225}
3226
3227PyDoc_STRVAR(length_hint_doc,
3228 "Private method returning an estimate of len(list(it)).");
3229
3230static PyMethodDef bytesiter_methods[] = {
3231 {"__length_hint__", (PyCFunction)bytesiter_length_hint, METH_NOARGS,
3232 length_hint_doc},
3233 {NULL, NULL} /* sentinel */
3234};
3235
3236PyTypeObject PyBytesIter_Type = {
3237 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum254348e2007-11-21 19:29:53 +00003238 "bytearray_iterator", /* tp_name */
Guido van Rossuma5d2d552007-10-26 17:39:48 +00003239 sizeof(bytesiterobject), /* tp_basicsize */
3240 0, /* tp_itemsize */
3241 /* methods */
3242 (destructor)bytesiter_dealloc, /* tp_dealloc */
3243 0, /* tp_print */
3244 0, /* tp_getattr */
3245 0, /* tp_setattr */
3246 0, /* tp_compare */
3247 0, /* tp_repr */
3248 0, /* tp_as_number */
3249 0, /* tp_as_sequence */
3250 0, /* tp_as_mapping */
3251 0, /* tp_hash */
3252 0, /* tp_call */
3253 0, /* tp_str */
3254 PyObject_GenericGetAttr, /* tp_getattro */
3255 0, /* tp_setattro */
3256 0, /* tp_as_buffer */
3257 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3258 0, /* tp_doc */
3259 (traverseproc)bytesiter_traverse, /* tp_traverse */
3260 0, /* tp_clear */
3261 0, /* tp_richcompare */
3262 0, /* tp_weaklistoffset */
3263 PyObject_SelfIter, /* tp_iter */
3264 (iternextfunc)bytesiter_next, /* tp_iternext */
3265 bytesiter_methods, /* tp_methods */
3266 0,
3267};
3268
3269static PyObject *
3270bytes_iter(PyObject *seq)
3271{
3272 bytesiterobject *it;
3273
3274 if (!PyBytes_Check(seq)) {
3275 PyErr_BadInternalCall();
3276 return NULL;
3277 }
3278 it = PyObject_GC_New(bytesiterobject, &PyBytesIter_Type);
3279 if (it == NULL)
3280 return NULL;
3281 it->it_index = 0;
3282 Py_INCREF(seq);
3283 it->it_seq = (PyBytesObject *)seq;
3284 _PyObject_GC_TRACK(it);
3285 return (PyObject *)it;
3286}