blob: 182cbfca61110cecc7f06746e17c56913ce9c225 [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz6968b052007-02-27 19:02:19 +00009/* The nullbytes are used by the stringlib during partition.
10 * If partition is removed from bytes, nullbytes and its helper
11 * Init/Fini should also be removed.
12 */
13static PyBytesObject *nullbytes = NULL;
14
15void
16PyBytes_Fini(void)
17{
18 Py_CLEAR(nullbytes);
19}
20
21int
22PyBytes_Init(void)
23{
24 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
25 if (nullbytes == NULL)
26 return 0;
27 nullbytes->ob_bytes = NULL;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000028 Py_Size(nullbytes) = nullbytes->ob_alloc = 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000029 nullbytes->ob_exports = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +000030 return 1;
31}
32
33/* end nullbytes support */
34
Guido van Rossumad7d8d12007-04-13 01:39:34 +000035/* Helpers */
36
37static int
38_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000039{
40 PyObject *intarg = PyNumber_Int(arg);
41 if (! intarg)
42 return 0;
43 *value = PyInt_AsLong(intarg);
44 Py_DECREF(intarg);
45 if (*value < 0 || *value >= 256) {
46 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
47 return 0;
48 }
49 return 1;
50}
51
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000052static int
53bytes_getbuffer(PyBytesObject *obj, PyBuffer *view, int flags)
Guido van Rossum75d38e92007-08-24 17:33:11 +000054{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000055 int ret;
56 void *ptr;
57 if (view == NULL) {
58 obj->ob_exports++;
59 return 0;
60 }
Guido van Rossum75d38e92007-08-24 17:33:11 +000061 if (obj->ob_bytes == NULL)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000062 ptr = "";
63 else
64 ptr = obj->ob_bytes;
65 ret = PyBuffer_FillInfo(view, ptr, Py_Size(obj), 0, flags);
66 if (ret >= 0) {
67 obj->ob_exports++;
68 }
69 return ret;
70}
71
72static void
73bytes_releasebuffer(PyBytesObject *obj, PyBuffer *view)
74{
75 obj->ob_exports--;
76}
77
Neal Norwitz2bad9702007-08-27 06:19:22 +000078static Py_ssize_t
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000079_getbuffer(PyObject *obj, PyBuffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000080{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000081 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000082
83 if (buffer == NULL ||
84 PyUnicode_Check(obj) ||
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000085 buffer->bf_getbuffer == NULL) return -1;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000086
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000087 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
88 return -1;
89 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000090}
91
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000092/* Direct API functions */
93
94PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +000095PyBytes_FromObject(PyObject *input)
96{
97 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
98 input, NULL);
99}
100
101PyObject *
102PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000103{
104 PyBytesObject *new;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000105 int alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000106
Guido van Rossumd624f182006-04-24 13:47:05 +0000107 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000108
109 new = PyObject_New(PyBytesObject, &PyBytes_Type);
110 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000111 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000112
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000113 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000114 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000115 alloc = 0;
116 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000117 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000118 alloc = size + 1;
119 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +0000120 if (new->ob_bytes == NULL) {
121 Py_DECREF(new);
122 return NULL;
123 }
124 if (bytes != NULL)
125 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000126 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000127 }
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000128 Py_Size(new) = size;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000129 new->ob_alloc = alloc;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000130 new->ob_exports = 0;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000131
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000132 return (PyObject *)new;
133}
134
135Py_ssize_t
136PyBytes_Size(PyObject *self)
137{
138 assert(self != NULL);
139 assert(PyBytes_Check(self));
140
Guido van Rossum20188312006-05-05 15:15:40 +0000141 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000142}
143
144char *
145PyBytes_AsString(PyObject *self)
146{
147 assert(self != NULL);
148 assert(PyBytes_Check(self));
149
Guido van Rossum20188312006-05-05 15:15:40 +0000150 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000151}
152
153int
154PyBytes_Resize(PyObject *self, Py_ssize_t size)
155{
156 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000157 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000158
159 assert(self != NULL);
160 assert(PyBytes_Check(self));
161 assert(size >= 0);
162
Guido van Rossuma0867f72006-05-05 04:34:18 +0000163 if (size < alloc / 2) {
164 /* Major downsize; resize down to exact size */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000165 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000166 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000167 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000168 /* Within allocated size; quick exit */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000169 Py_Size(self) = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000170 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000171 return 0;
172 }
173 else if (size <= alloc * 1.125) {
174 /* Moderate upsize; overallocate similar to list_resize() */
175 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
176 }
177 else {
178 /* Major upsize; resize up to exact size */
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000179 alloc = size + 1;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000180 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000181
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000182 if (((PyBytesObject *)self)->ob_exports > 0) {
183 /*
184 fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports, ((PyBytesObject *)self)->ob_bytes);
185 */
186 PyErr_SetString(PyExc_BufferError,
187 "Existing exports of data: object cannot be re-sized");
188 return -1;
189 }
190
Guido van Rossuma0867f72006-05-05 04:34:18 +0000191 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000192 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000193 PyErr_NoMemory();
194 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000195 }
196
Guido van Rossumd624f182006-04-24 13:47:05 +0000197 ((PyBytesObject *)self)->ob_bytes = sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000198 Py_Size(self) = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000199 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000200 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
201
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000202 return 0;
203}
204
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000205PyObject *
206PyBytes_Concat(PyObject *a, PyObject *b)
207{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000208 Py_ssize_t size;
209 PyBuffer va, vb;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000210 PyBytesObject *result;
211
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000212 va.len = -1;
213 vb.len = -1;
214 if (_getbuffer(a, &va) < 0 ||
215 _getbuffer(b, &vb) < 0) {
Guido van Rossum75d38e92007-08-24 17:33:11 +0000216 if (va.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000217 PyObject_ReleaseBuffer(a, &va);
218 if (vb.len != -1)
219 PyObject_ReleaseBuffer(b, &vb);
220 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
221 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
222 return NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000223 }
224
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000225 size = va.len + vb.len;
226 if (size < 0) {
227 PyObject_ReleaseBuffer(a, &va);
228 PyObject_ReleaseBuffer(b, &vb);
229 return PyErr_NoMemory();
230 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000231
232 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
233 if (result != NULL) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000234 memcpy(result->ob_bytes, va.buf, va.len);
235 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000236 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000237
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000238 PyObject_ReleaseBuffer(a, &va);
239 PyObject_ReleaseBuffer(b, &vb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000240 return (PyObject *)result;
241}
242
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000243/* Functions stuffed into the type object */
244
245static Py_ssize_t
246bytes_length(PyBytesObject *self)
247{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000248 return Py_Size(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000249}
250
251static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000252bytes_concat(PyBytesObject *self, PyObject *other)
253{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000254 return PyBytes_Concat((PyObject *)self, other);
Guido van Rossumd624f182006-04-24 13:47:05 +0000255}
256
257static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000258bytes_iconcat(PyBytesObject *self, PyObject *other)
259{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000260 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000261 Py_ssize_t size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000262 PyBuffer vo;
Guido van Rossum13e57212006-04-27 22:54:26 +0000263
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000264 if (_getbuffer(other, &vo) < 0) {
265 PyErr_Format(PyExc_TypeError,
266 "can't concat bytes to %.100s", Py_Type(self)->tp_name);
267 return NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000268 }
269
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000270 mysize = Py_Size(self);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000271 size = mysize + vo.len;
272 if (size < 0) {
273 PyObject_ReleaseBuffer(other, &vo);
274 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000275 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000276 if (size < self->ob_alloc) {
277 Py_Size(self) = size;
278 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
279 }
280 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
281 PyObject_ReleaseBuffer(other, &vo);
282 return NULL;
283 }
284 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
285 PyObject_ReleaseBuffer(other, &vo);
Guido van Rossum13e57212006-04-27 22:54:26 +0000286 Py_INCREF(self);
287 return (PyObject *)self;
288}
289
290static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000291bytes_repeat(PyBytesObject *self, Py_ssize_t count)
292{
293 PyBytesObject *result;
294 Py_ssize_t mysize;
295 Py_ssize_t size;
296
297 if (count < 0)
298 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000299 mysize = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000300 size = mysize * count;
301 if (count != 0 && size / count != mysize)
302 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000303 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000304 if (result != NULL && size != 0) {
305 if (mysize == 1)
306 memset(result->ob_bytes, self->ob_bytes[0], size);
307 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000308 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000309 for (i = 0; i < count; i++)
310 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
311 }
312 }
313 return (PyObject *)result;
314}
315
316static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000317bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
318{
319 Py_ssize_t mysize;
320 Py_ssize_t size;
321
322 if (count < 0)
323 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000324 mysize = Py_Size(self);
Guido van Rossum13e57212006-04-27 22:54:26 +0000325 size = mysize * count;
326 if (count != 0 && size / count != mysize)
327 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000328 if (size < self->ob_alloc) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000329 Py_Size(self) = size;
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000330 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000331 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000332 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000333 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000334
Guido van Rossum13e57212006-04-27 22:54:26 +0000335 if (mysize == 1)
336 memset(self->ob_bytes, self->ob_bytes[0], size);
337 else {
338 Py_ssize_t i;
339 for (i = 1; i < count; i++)
340 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
341 }
342
343 Py_INCREF(self);
344 return (PyObject *)self;
345}
346
347static int
348bytes_substring(PyBytesObject *self, PyBytesObject *other)
349{
350 Py_ssize_t i;
351
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000352 if (Py_Size(other) == 1) {
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000353 return memchr(self->ob_bytes, other->ob_bytes[0],
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000354 Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000355 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000356 if (Py_Size(other) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000357 return 1; /* Edge case */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000358 for (i = 0; i + Py_Size(other) <= Py_Size(self); i++) {
Guido van Rossum13e57212006-04-27 22:54:26 +0000359 /* XXX Yeah, yeah, lots of optimizations possible... */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000360 if (memcmp(self->ob_bytes + i, other->ob_bytes, Py_Size(other)) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000361 return 1;
362 }
363 return 0;
364}
365
366static int
367bytes_contains(PyBytesObject *self, PyObject *value)
368{
369 Py_ssize_t ival;
370
371 if (PyBytes_Check(value))
372 return bytes_substring(self, (PyBytesObject *)value);
373
Thomas Woutersd204a712006-08-22 13:41:17 +0000374 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000375 if (ival == -1 && PyErr_Occurred())
376 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000377 if (ival < 0 || ival >= 256) {
378 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
379 return -1;
380 }
381
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000382 return memchr(self->ob_bytes, ival, Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000383}
384
385static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000386bytes_getitem(PyBytesObject *self, Py_ssize_t i)
387{
388 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000389 i += Py_Size(self);
390 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000391 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
392 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000393 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000394 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
395}
396
397static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000398bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000399{
Thomas Wouters376446d2006-12-19 08:30:14 +0000400 if (PyIndex_Check(item)) {
401 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000402
Thomas Wouters376446d2006-12-19 08:30:14 +0000403 if (i == -1 && PyErr_Occurred())
404 return NULL;
405
406 if (i < 0)
407 i += PyBytes_GET_SIZE(self);
408
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000409 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000410 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
411 return NULL;
412 }
413 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
414 }
415 else if (PySlice_Check(item)) {
416 Py_ssize_t start, stop, step, slicelength, cur, i;
417 if (PySlice_GetIndicesEx((PySliceObject *)item,
418 PyBytes_GET_SIZE(self),
419 &start, &stop, &step, &slicelength) < 0) {
420 return NULL;
421 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000422
Thomas Wouters376446d2006-12-19 08:30:14 +0000423 if (slicelength <= 0)
424 return PyBytes_FromStringAndSize("", 0);
425 else if (step == 1) {
426 return PyBytes_FromStringAndSize(self->ob_bytes + start,
427 slicelength);
428 }
429 else {
430 char *source_buf = PyBytes_AS_STRING(self);
431 char *result_buf = (char *)PyMem_Malloc(slicelength);
432 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000433
Thomas Wouters376446d2006-12-19 08:30:14 +0000434 if (result_buf == NULL)
435 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000436
Thomas Wouters376446d2006-12-19 08:30:14 +0000437 for (cur = start, i = 0; i < slicelength;
438 cur += step, i++) {
439 result_buf[i] = source_buf[cur];
440 }
441 result = PyBytes_FromStringAndSize(result_buf, slicelength);
442 PyMem_Free(result_buf);
443 return result;
444 }
445 }
446 else {
447 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
448 return NULL;
449 }
450}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000451
Guido van Rossumd624f182006-04-24 13:47:05 +0000452static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000453bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000454 PyObject *values)
455{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000456 Py_ssize_t avail, needed;
457 void *bytes;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000458 PyBuffer vbytes;
459 int res = 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000460
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000461 vbytes.len = -1;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000462 if (values == (PyObject *)self) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000463 /* Make a copy and call this function recursively */
Guido van Rossumd624f182006-04-24 13:47:05 +0000464 int err;
465 values = PyBytes_FromObject(values);
466 if (values == NULL)
467 return -1;
468 err = bytes_setslice(self, lo, hi, values);
469 Py_DECREF(values);
470 return err;
471 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000472 if (values == NULL) {
473 /* del b[lo:hi] */
474 bytes = NULL;
475 needed = 0;
476 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000477 else {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000478 if (_getbuffer(values, &vbytes) < 0) {
479 PyErr_Format(PyExc_TypeError,
480 "can't set bytes slice from %.100s",
481 Py_Type(values)->tp_name);
482 return -1;
483 }
484 needed = vbytes.len;
485 bytes = vbytes.buf;
Guido van Rossumd624f182006-04-24 13:47:05 +0000486 }
487
488 if (lo < 0)
489 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000490 if (hi < lo)
491 hi = lo;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000492 if (hi > Py_Size(self))
493 hi = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000494
495 avail = hi - lo;
496 if (avail < 0)
497 lo = hi = avail = 0;
498
499 if (avail != needed) {
500 if (avail > needed) {
501 /*
502 0 lo hi old_size
503 | |<----avail----->|<-----tomove------>|
504 | |<-needed->|<-----tomove------>|
505 0 lo new_hi new_size
506 */
507 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000508 Py_Size(self) - hi);
Guido van Rossumd624f182006-04-24 13:47:05 +0000509 }
Neal Norwitzfaa54a32007-08-19 04:23:20 +0000510 /* XXX(nnorwitz): need to verify this can't overflow! */
Thomas Wouters376446d2006-12-19 08:30:14 +0000511 if (PyBytes_Resize((PyObject *)self,
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000512 Py_Size(self) + needed - avail) < 0) {
513 res = -1;
514 goto finish;
515 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000516 if (avail < needed) {
517 /*
518 0 lo hi old_size
519 | |<-avail->|<-----tomove------>|
520 | |<----needed---->|<-----tomove------>|
521 0 lo new_hi new_size
522 */
523 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000524 Py_Size(self) - lo - needed);
Guido van Rossumd624f182006-04-24 13:47:05 +0000525 }
526 }
527
528 if (needed > 0)
529 memcpy(self->ob_bytes + lo, bytes, needed);
530
Guido van Rossum75d38e92007-08-24 17:33:11 +0000531
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000532 finish:
Guido van Rossum75d38e92007-08-24 17:33:11 +0000533 if (vbytes.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000534 PyObject_ReleaseBuffer(values, &vbytes);
535 return res;
Guido van Rossumd624f182006-04-24 13:47:05 +0000536}
537
538static int
539bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
540{
541 Py_ssize_t ival;
542
543 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000544 i += Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000545
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000546 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000547 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
548 return -1;
549 }
550
551 if (value == NULL)
552 return bytes_setslice(self, i, i+1, NULL);
553
Thomas Woutersd204a712006-08-22 13:41:17 +0000554 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000555 if (ival == -1 && PyErr_Occurred())
556 return -1;
557
558 if (ival < 0 || ival >= 256) {
559 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
560 return -1;
561 }
562
563 self->ob_bytes[i] = ival;
564 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000565}
566
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000567static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000568bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
569{
570 Py_ssize_t start, stop, step, slicelen, needed;
571 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000572
Thomas Wouters376446d2006-12-19 08:30:14 +0000573 if (PyIndex_Check(item)) {
574 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
575
576 if (i == -1 && PyErr_Occurred())
577 return -1;
578
579 if (i < 0)
580 i += PyBytes_GET_SIZE(self);
581
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000582 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000583 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
584 return -1;
585 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000586
Thomas Wouters376446d2006-12-19 08:30:14 +0000587 if (values == NULL) {
588 /* Fall through to slice assignment */
589 start = i;
590 stop = i + 1;
591 step = 1;
592 slicelen = 1;
593 }
594 else {
595 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
596 if (ival == -1 && PyErr_Occurred())
597 return -1;
598 if (ival < 0 || ival >= 256) {
599 PyErr_SetString(PyExc_ValueError,
600 "byte must be in range(0, 256)");
601 return -1;
602 }
603 self->ob_bytes[i] = (char)ival;
604 return 0;
605 }
606 }
607 else if (PySlice_Check(item)) {
608 if (PySlice_GetIndicesEx((PySliceObject *)item,
609 PyBytes_GET_SIZE(self),
610 &start, &stop, &step, &slicelen) < 0) {
611 return -1;
612 }
613 }
614 else {
615 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
616 return -1;
617 }
618
619 if (values == NULL) {
620 bytes = NULL;
621 needed = 0;
622 }
623 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
624 /* Make a copy an call this function recursively */
625 int err;
626 values = PyBytes_FromObject(values);
627 if (values == NULL)
628 return -1;
629 err = bytes_ass_subscript(self, item, values);
630 Py_DECREF(values);
631 return err;
632 }
633 else {
634 assert(PyBytes_Check(values));
635 bytes = ((PyBytesObject *)values)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000636 needed = Py_Size(values);
Thomas Wouters376446d2006-12-19 08:30:14 +0000637 }
638 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
639 if ((step < 0 && start < stop) ||
640 (step > 0 && start > stop))
641 stop = start;
642 if (step == 1) {
643 if (slicelen != needed) {
644 if (slicelen > needed) {
645 /*
646 0 start stop old_size
647 | |<---slicelen--->|<-----tomove------>|
648 | |<-needed->|<-----tomove------>|
649 0 lo new_hi new_size
650 */
651 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000652 Py_Size(self) - stop);
Thomas Wouters376446d2006-12-19 08:30:14 +0000653 }
654 if (PyBytes_Resize((PyObject *)self,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000655 Py_Size(self) + needed - slicelen) < 0)
Thomas Wouters376446d2006-12-19 08:30:14 +0000656 return -1;
657 if (slicelen < needed) {
658 /*
659 0 lo hi old_size
660 | |<-avail->|<-----tomove------>|
661 | |<----needed---->|<-----tomove------>|
662 0 lo new_hi new_size
663 */
664 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000665 Py_Size(self) - start - needed);
Thomas Wouters376446d2006-12-19 08:30:14 +0000666 }
667 }
668
669 if (needed > 0)
670 memcpy(self->ob_bytes + start, bytes, needed);
671
672 return 0;
673 }
674 else {
675 if (needed == 0) {
676 /* Delete slice */
677 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000678
Thomas Wouters376446d2006-12-19 08:30:14 +0000679 if (step < 0) {
680 stop = start + 1;
681 start = stop + step * (slicelen - 1) - 1;
682 step = -step;
683 }
684 for (cur = start, i = 0;
685 i < slicelen; cur += step, i++) {
686 Py_ssize_t lim = step - 1;
687
688 if (cur + step >= PyBytes_GET_SIZE(self))
689 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000690
Thomas Wouters376446d2006-12-19 08:30:14 +0000691 memmove(self->ob_bytes + cur - i,
692 self->ob_bytes + cur + 1, lim);
693 }
694 /* Move the tail of the bytes, in one chunk */
695 cur = start + slicelen*step;
696 if (cur < PyBytes_GET_SIZE(self)) {
697 memmove(self->ob_bytes + cur - slicelen,
698 self->ob_bytes + cur,
699 PyBytes_GET_SIZE(self) - cur);
700 }
701 if (PyBytes_Resize((PyObject *)self,
702 PyBytes_GET_SIZE(self) - slicelen) < 0)
703 return -1;
704
705 return 0;
706 }
707 else {
708 /* Assign slice */
709 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000710
Thomas Wouters376446d2006-12-19 08:30:14 +0000711 if (needed != slicelen) {
712 PyErr_Format(PyExc_ValueError,
713 "attempt to assign bytes of size %zd "
714 "to extended slice of size %zd",
715 needed, slicelen);
716 return -1;
717 }
718 for (cur = start, i = 0; i < slicelen; cur += step, i++)
719 self->ob_bytes[cur] = bytes[i];
720 return 0;
721 }
722 }
723}
724
725static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000726bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
727{
Guido van Rossumd624f182006-04-24 13:47:05 +0000728 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000729 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000730 const char *encoding = NULL;
731 const char *errors = NULL;
732 Py_ssize_t count;
733 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000734 PyObject *(*iternext)(PyObject *);
735
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000736 if (Py_Size(self) != 0) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000737 /* Empty previous contents (yes, do this first of all!) */
738 if (PyBytes_Resize((PyObject *)self, 0) < 0)
739 return -1;
740 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000741
Guido van Rossumd624f182006-04-24 13:47:05 +0000742 /* Parse arguments */
743 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
744 &arg, &encoding, &errors))
745 return -1;
746
747 /* Make a quick exit if no first argument */
748 if (arg == NULL) {
749 if (encoding != NULL || errors != NULL) {
750 PyErr_SetString(PyExc_TypeError,
751 "encoding or errors without sequence argument");
752 return -1;
753 }
754 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000755 }
756
Guido van Rossumd624f182006-04-24 13:47:05 +0000757 if (PyUnicode_Check(arg)) {
758 /* Encode via the codec registry */
Guido van Rossum4355a472007-05-04 05:00:04 +0000759 PyObject *encoded, *new;
Guido van Rossumd624f182006-04-24 13:47:05 +0000760 if (encoding == NULL)
761 encoding = PyUnicode_GetDefaultEncoding();
762 encoded = PyCodec_Encode(arg, encoding, errors);
763 if (encoded == NULL)
764 return -1;
Guido van Rossum4355a472007-05-04 05:00:04 +0000765 if (!PyBytes_Check(encoded) && !PyString_Check(encoded)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000766 PyErr_Format(PyExc_TypeError,
Guido van Rossum4355a472007-05-04 05:00:04 +0000767 "encoder did not return a str8 or bytes object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000768 Py_Type(encoded)->tp_name);
Guido van Rossumd624f182006-04-24 13:47:05 +0000769 Py_DECREF(encoded);
770 return -1;
771 }
Guido van Rossum4355a472007-05-04 05:00:04 +0000772 new = bytes_iconcat(self, encoded);
773 Py_DECREF(encoded);
774 if (new == NULL)
775 return -1;
776 Py_DECREF(new);
777 return 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000778 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000779
Guido van Rossumd624f182006-04-24 13:47:05 +0000780 /* If it's not unicode, there can't be encoding or errors */
781 if (encoding != NULL || errors != NULL) {
782 PyErr_SetString(PyExc_TypeError,
783 "encoding or errors without a string argument");
784 return -1;
785 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000786
Guido van Rossumd624f182006-04-24 13:47:05 +0000787 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000788 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000789 if (count == -1 && PyErr_Occurred())
790 PyErr_Clear();
791 else {
792 if (count < 0) {
793 PyErr_SetString(PyExc_ValueError, "negative count");
794 return -1;
795 }
796 if (count > 0) {
797 if (PyBytes_Resize((PyObject *)self, count))
798 return -1;
799 memset(self->ob_bytes, 0, count);
800 }
801 return 0;
802 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000803
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000804 /* Use the modern buffer interface */
805 if (PyObject_CheckBuffer(arg)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000806 Py_ssize_t size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000807 PyBuffer view;
808 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000809 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000810 size = view.len;
811 if (PyBytes_Resize((PyObject *)self, size) < 0) goto fail;
812 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
813 goto fail;
814 PyObject_ReleaseBuffer(arg, &view);
Guido van Rossumd624f182006-04-24 13:47:05 +0000815 return 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000816 fail:
817 PyObject_ReleaseBuffer(arg, &view);
818 return -1;
Guido van Rossumd624f182006-04-24 13:47:05 +0000819 }
820
821 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000822
823 /* Get the iterator */
824 it = PyObject_GetIter(arg);
825 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000826 return -1;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000827 iternext = *Py_Type(it)->tp_iternext;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000828
829 /* Run the iterator to exhaustion */
830 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000831 PyObject *item;
832 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000833
Guido van Rossumd624f182006-04-24 13:47:05 +0000834 /* Get the next item */
835 item = iternext(it);
836 if (item == NULL) {
837 if (PyErr_Occurred()) {
838 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
839 goto error;
840 PyErr_Clear();
841 }
842 break;
843 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000844
Guido van Rossumd624f182006-04-24 13:47:05 +0000845 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000846 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000847 Py_DECREF(item);
848 if (value == -1 && PyErr_Occurred())
849 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000850
Guido van Rossumd624f182006-04-24 13:47:05 +0000851 /* Range check */
852 if (value < 0 || value >= 256) {
853 PyErr_SetString(PyExc_ValueError,
854 "bytes must be in range(0, 256)");
855 goto error;
856 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000857
Guido van Rossumd624f182006-04-24 13:47:05 +0000858 /* Append the byte */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000859 if (Py_Size(self) < self->ob_alloc)
860 Py_Size(self)++;
861 else if (PyBytes_Resize((PyObject *)self, Py_Size(self)+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000862 goto error;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000863 self->ob_bytes[Py_Size(self)-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000864 }
865
866 /* Clean up and return success */
867 Py_DECREF(it);
868 return 0;
869
870 error:
871 /* Error handling when it != NULL */
872 Py_DECREF(it);
873 return -1;
874}
875
Georg Brandlee91be42007-02-24 19:41:35 +0000876/* Mostly copied from string_repr, but without the
877 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000878static PyObject *
879bytes_repr(PyBytesObject *self)
880{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000881 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000882 size_t newsize = 3 + 4 * Py_Size(self);
Georg Brandlee91be42007-02-24 19:41:35 +0000883 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000884 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(self)) {
Georg Brandlee91be42007-02-24 19:41:35 +0000885 PyErr_SetString(PyExc_OverflowError,
886 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000887 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000888 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000889 v = PyUnicode_FromUnicode(NULL, newsize);
Georg Brandlee91be42007-02-24 19:41:35 +0000890 if (v == NULL) {
891 return NULL;
892 }
893 else {
894 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000895 register Py_UNICODE c;
896 register Py_UNICODE *p;
Georg Brandlee91be42007-02-24 19:41:35 +0000897 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000898
Walter Dörwald1ab83302007-05-18 17:15:44 +0000899 p = PyUnicode_AS_UNICODE(v);
Georg Brandlee91be42007-02-24 19:41:35 +0000900 *p++ = 'b';
901 *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000902 for (i = 0; i < Py_Size(self); i++) {
Georg Brandlee91be42007-02-24 19:41:35 +0000903 /* There's at least enough room for a hex escape
904 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000905 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Georg Brandlee91be42007-02-24 19:41:35 +0000906 c = self->ob_bytes[i];
907 if (c == quote || c == '\\')
908 *p++ = '\\', *p++ = c;
909 else if (c == '\t')
910 *p++ = '\\', *p++ = 't';
911 else if (c == '\n')
912 *p++ = '\\', *p++ = 'n';
913 else if (c == '\r')
914 *p++ = '\\', *p++ = 'r';
915 else if (c == 0)
Guido van Rossum57b93ad2007-05-08 19:09:34 +0000916 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
Georg Brandlee91be42007-02-24 19:41:35 +0000917 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000918 *p++ = '\\';
919 *p++ = 'x';
920 *p++ = hexdigits[(c & 0xf0) >> 4];
921 *p++ = hexdigits[c & 0xf];
Georg Brandlee91be42007-02-24 19:41:35 +0000922 }
923 else
924 *p++ = c;
925 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000926 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Georg Brandlee91be42007-02-24 19:41:35 +0000927 *p++ = quote;
928 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000929 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
930 Py_DECREF(v);
931 return NULL;
932 }
Georg Brandlee91be42007-02-24 19:41:35 +0000933 return v;
934 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000935}
936
937static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000938bytes_str(PyBytesObject *self)
939{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000940 return PyString_FromStringAndSize(self->ob_bytes, Py_Size(self));
Guido van Rossumd624f182006-04-24 13:47:05 +0000941}
942
943static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000944bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000945{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000946 Py_ssize_t self_size, other_size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000947 PyBuffer self_bytes, other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000948 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000949 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000950 int cmp;
951
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000952 /* Bytes can be compared to anything that supports the (binary) buffer
953 API. Except Unicode. */
Guido van Rossumebea9be2007-04-09 00:49:13 +0000954
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000955 self_size = _getbuffer(self, &self_bytes);
956 if (self_size < 0) {
Guido van Rossumebea9be2007-04-09 00:49:13 +0000957 Py_INCREF(Py_NotImplemented);
958 return Py_NotImplemented;
959 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000960
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000961 other_size = _getbuffer(other, &other_bytes);
962 if (other_size < 0) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000963 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossumd624f182006-04-24 13:47:05 +0000964 Py_INCREF(Py_NotImplemented);
965 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000966 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000967
968 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000969 /* Shortcut: if the lengths differ, the objects differ */
970 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000971 }
972 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000973 minsize = self_size;
974 if (other_size < minsize)
975 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000976
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000977 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000978 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000979
Guido van Rossumd624f182006-04-24 13:47:05 +0000980 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000981 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000982 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000983 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000984 cmp = 1;
985 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000986
Guido van Rossumd624f182006-04-24 13:47:05 +0000987 switch (op) {
988 case Py_LT: cmp = cmp < 0; break;
989 case Py_LE: cmp = cmp <= 0; break;
990 case Py_EQ: cmp = cmp == 0; break;
991 case Py_NE: cmp = cmp != 0; break;
992 case Py_GT: cmp = cmp > 0; break;
993 case Py_GE: cmp = cmp >= 0; break;
994 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000995 }
996
997 res = cmp ? Py_True : Py_False;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000998 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossum75d38e92007-08-24 17:33:11 +0000999 PyObject_ReleaseBuffer(other, &other_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001000 Py_INCREF(res);
1001 return res;
1002}
1003
1004static void
1005bytes_dealloc(PyBytesObject *self)
1006{
Guido van Rossumd624f182006-04-24 13:47:05 +00001007 if (self->ob_bytes != 0) {
1008 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001009 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001010 Py_Type(self)->tp_free((PyObject *)self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001011}
1012
Neal Norwitz6968b052007-02-27 19:02:19 +00001013
1014/* -------------------------------------------------------------------- */
1015/* Methods */
1016
1017#define STRINGLIB_CHAR char
1018#define STRINGLIB_CMP memcmp
1019#define STRINGLIB_LEN PyBytes_GET_SIZE
1020#define STRINGLIB_NEW PyBytes_FromStringAndSize
1021#define STRINGLIB_EMPTY nullbytes
1022
1023#include "stringlib/fastsearch.h"
1024#include "stringlib/count.h"
1025#include "stringlib/find.h"
1026#include "stringlib/partition.h"
1027
1028
1029/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1030were copied from the old char* style string object. */
1031
1032Py_LOCAL_INLINE(void)
1033_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1034{
1035 if (*end > len)
1036 *end = len;
1037 else if (*end < 0)
1038 *end += len;
1039 if (*end < 0)
1040 *end = 0;
1041 if (*start < 0)
1042 *start += len;
1043 if (*start < 0)
1044 *start = 0;
1045}
1046
1047
1048Py_LOCAL_INLINE(Py_ssize_t)
1049bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1050{
1051 PyObject *subobj;
1052 const char *sub;
1053 Py_ssize_t sub_len;
1054 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1055
1056 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1057 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1058 return -2;
1059 if (PyBytes_Check(subobj)) {
1060 sub = PyBytes_AS_STRING(subobj);
1061 sub_len = PyBytes_GET_SIZE(subobj);
1062 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001063 /* XXX --> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001064 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1065 /* XXX - the "expected a character buffer object" is pretty
1066 confusing for a non-expert. remap to something else ? */
1067 return -2;
1068
1069 if (dir > 0)
1070 return stringlib_find_slice(
1071 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1072 sub, sub_len, start, end);
1073 else
1074 return stringlib_rfind_slice(
1075 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1076 sub, sub_len, start, end);
1077}
1078
1079
1080PyDoc_STRVAR(find__doc__,
1081"B.find(sub [,start [,end]]) -> int\n\
1082\n\
1083Return the lowest index in B where subsection sub is found,\n\
1084such that sub is contained within s[start,end]. Optional\n\
1085arguments start and end are interpreted as in slice notation.\n\
1086\n\
1087Return -1 on failure.");
1088
1089static PyObject *
1090bytes_find(PyBytesObject *self, PyObject *args)
1091{
1092 Py_ssize_t result = bytes_find_internal(self, args, +1);
1093 if (result == -2)
1094 return NULL;
1095 return PyInt_FromSsize_t(result);
1096}
1097
1098PyDoc_STRVAR(count__doc__,
1099"B.count(sub[, start[, end]]) -> int\n\
1100\n\
1101Return the number of non-overlapping occurrences of subsection sub in\n\
1102bytes B[start:end]. Optional arguments start and end are interpreted\n\
1103as in slice notation.");
1104
1105static PyObject *
1106bytes_count(PyBytesObject *self, PyObject *args)
1107{
1108 PyObject *sub_obj;
1109 const char *str = PyBytes_AS_STRING(self), *sub;
1110 Py_ssize_t sub_len;
1111 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1112
1113 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1114 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1115 return NULL;
1116
1117 if (PyBytes_Check(sub_obj)) {
1118 sub = PyBytes_AS_STRING(sub_obj);
1119 sub_len = PyBytes_GET_SIZE(sub_obj);
1120 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001121 /* XXX --> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001122 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1123 return NULL;
1124
Martin v. Löwis5b222132007-06-10 09:51:05 +00001125 _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Neal Norwitz6968b052007-02-27 19:02:19 +00001126
1127 return PyInt_FromSsize_t(
1128 stringlib_count(str + start, end - start, sub, sub_len)
1129 );
1130}
1131
1132
1133PyDoc_STRVAR(index__doc__,
1134"B.index(sub [,start [,end]]) -> int\n\
1135\n\
1136Like B.find() but raise ValueError when the subsection is not found.");
1137
1138static PyObject *
1139bytes_index(PyBytesObject *self, PyObject *args)
1140{
1141 Py_ssize_t result = bytes_find_internal(self, args, +1);
1142 if (result == -2)
1143 return NULL;
1144 if (result == -1) {
1145 PyErr_SetString(PyExc_ValueError,
1146 "subsection not found");
1147 return NULL;
1148 }
1149 return PyInt_FromSsize_t(result);
1150}
1151
1152
1153PyDoc_STRVAR(rfind__doc__,
1154"B.rfind(sub [,start [,end]]) -> int\n\
1155\n\
1156Return the highest index in B where subsection sub is found,\n\
1157such that sub is contained within s[start,end]. Optional\n\
1158arguments start and end are interpreted as in slice notation.\n\
1159\n\
1160Return -1 on failure.");
1161
1162static PyObject *
1163bytes_rfind(PyBytesObject *self, PyObject *args)
1164{
1165 Py_ssize_t result = bytes_find_internal(self, args, -1);
1166 if (result == -2)
1167 return NULL;
1168 return PyInt_FromSsize_t(result);
1169}
1170
1171
1172PyDoc_STRVAR(rindex__doc__,
1173"B.rindex(sub [,start [,end]]) -> int\n\
1174\n\
1175Like B.rfind() but raise ValueError when the subsection is not found.");
1176
1177static PyObject *
1178bytes_rindex(PyBytesObject *self, PyObject *args)
1179{
1180 Py_ssize_t result = bytes_find_internal(self, args, -1);
1181 if (result == -2)
1182 return NULL;
1183 if (result == -1) {
1184 PyErr_SetString(PyExc_ValueError,
1185 "subsection not found");
1186 return NULL;
1187 }
1188 return PyInt_FromSsize_t(result);
1189}
1190
1191
1192/* Matches the end (direction >= 0) or start (direction < 0) of self
1193 * against substr, using the start and end arguments. Returns
1194 * -1 on error, 0 if not found and 1 if found.
1195 */
1196Py_LOCAL(int)
1197_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1198 Py_ssize_t end, int direction)
1199{
1200 Py_ssize_t len = PyBytes_GET_SIZE(self);
1201 Py_ssize_t slen;
1202 const char* sub;
1203 const char* str;
1204
1205 if (PyBytes_Check(substr)) {
1206 sub = PyBytes_AS_STRING(substr);
1207 slen = PyBytes_GET_SIZE(substr);
1208 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001209 /* XXX --> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001210 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
1211 return -1;
1212 str = PyBytes_AS_STRING(self);
1213
1214 _adjust_indices(&start, &end, len);
1215
1216 if (direction < 0) {
1217 /* startswith */
1218 if (start+slen > len)
1219 return 0;
1220 } else {
1221 /* endswith */
1222 if (end-start < slen || start > len)
1223 return 0;
1224
1225 if (end-slen > start)
1226 start = end - slen;
1227 }
1228 if (end-start >= slen)
1229 return ! memcmp(str+start, sub, slen);
1230 return 0;
1231}
1232
1233
1234PyDoc_STRVAR(startswith__doc__,
1235"B.startswith(prefix[, start[, end]]) -> bool\n\
1236\n\
1237Return True if B starts with the specified prefix, False otherwise.\n\
1238With optional start, test B beginning at that position.\n\
1239With optional end, stop comparing B at that position.\n\
1240prefix can also be a tuple of strings to try.");
1241
1242static PyObject *
1243bytes_startswith(PyBytesObject *self, PyObject *args)
1244{
1245 Py_ssize_t start = 0;
1246 Py_ssize_t end = PY_SSIZE_T_MAX;
1247 PyObject *subobj;
1248 int result;
1249
1250 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1251 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1252 return NULL;
1253 if (PyTuple_Check(subobj)) {
1254 Py_ssize_t i;
1255 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1256 result = _bytes_tailmatch(self,
1257 PyTuple_GET_ITEM(subobj, i),
1258 start, end, -1);
1259 if (result == -1)
1260 return NULL;
1261 else if (result) {
1262 Py_RETURN_TRUE;
1263 }
1264 }
1265 Py_RETURN_FALSE;
1266 }
1267 result = _bytes_tailmatch(self, subobj, start, end, -1);
1268 if (result == -1)
1269 return NULL;
1270 else
1271 return PyBool_FromLong(result);
1272}
1273
1274PyDoc_STRVAR(endswith__doc__,
1275"B.endswith(suffix[, start[, end]]) -> bool\n\
1276\n\
1277Return True if B ends with the specified suffix, False otherwise.\n\
1278With optional start, test B beginning at that position.\n\
1279With optional end, stop comparing B at that position.\n\
1280suffix can also be a tuple of strings to try.");
1281
1282static PyObject *
1283bytes_endswith(PyBytesObject *self, PyObject *args)
1284{
1285 Py_ssize_t start = 0;
1286 Py_ssize_t end = PY_SSIZE_T_MAX;
1287 PyObject *subobj;
1288 int result;
1289
1290 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1291 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1292 return NULL;
1293 if (PyTuple_Check(subobj)) {
1294 Py_ssize_t i;
1295 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1296 result = _bytes_tailmatch(self,
1297 PyTuple_GET_ITEM(subobj, i),
1298 start, end, +1);
1299 if (result == -1)
1300 return NULL;
1301 else if (result) {
1302 Py_RETURN_TRUE;
1303 }
1304 }
1305 Py_RETURN_FALSE;
1306 }
1307 result = _bytes_tailmatch(self, subobj, start, end, +1);
1308 if (result == -1)
1309 return NULL;
1310 else
1311 return PyBool_FromLong(result);
1312}
1313
1314
1315
1316PyDoc_STRVAR(translate__doc__,
1317"B.translate(table [,deletechars]) -> bytes\n\
1318\n\
1319Return a copy of the bytes B, where all characters occurring\n\
1320in the optional argument deletechars are removed, and the\n\
1321remaining characters have been mapped through the given\n\
1322translation table, which must be a bytes of length 256.");
1323
1324static PyObject *
1325bytes_translate(PyBytesObject *self, PyObject *args)
1326{
1327 register char *input, *output;
1328 register const char *table;
1329 register Py_ssize_t i, c, changed = 0;
1330 PyObject *input_obj = (PyObject*)self;
1331 const char *table1, *output_start, *del_table=NULL;
1332 Py_ssize_t inlen, tablen, dellen = 0;
1333 PyObject *result;
1334 int trans_table[256];
1335 PyObject *tableobj, *delobj = NULL;
1336
1337 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1338 &tableobj, &delobj))
1339 return NULL;
1340
1341 if (PyBytes_Check(tableobj)) {
1342 table1 = PyBytes_AS_STRING(tableobj);
1343 tablen = PyBytes_GET_SIZE(tableobj);
1344 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001345 /* XXX -> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001346 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1347 return NULL;
1348
1349 if (tablen != 256) {
1350 PyErr_SetString(PyExc_ValueError,
1351 "translation table must be 256 characters long");
1352 return NULL;
1353 }
1354
1355 if (delobj != NULL) {
1356 if (PyBytes_Check(delobj)) {
1357 del_table = PyBytes_AS_STRING(delobj);
1358 dellen = PyBytes_GET_SIZE(delobj);
1359 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001360 /* XXX -> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001361 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1362 return NULL;
1363 }
1364 else {
1365 del_table = NULL;
1366 dellen = 0;
1367 }
1368
1369 table = table1;
1370 inlen = PyBytes_GET_SIZE(input_obj);
1371 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1372 if (result == NULL)
1373 return NULL;
1374 output_start = output = PyBytes_AsString(result);
1375 input = PyBytes_AS_STRING(input_obj);
1376
1377 if (dellen == 0) {
1378 /* If no deletions are required, use faster code */
1379 for (i = inlen; --i >= 0; ) {
1380 c = Py_CHARMASK(*input++);
1381 if (Py_CHARMASK((*output++ = table[c])) != c)
1382 changed = 1;
1383 }
1384 if (changed || !PyBytes_CheckExact(input_obj))
1385 return result;
1386 Py_DECREF(result);
1387 Py_INCREF(input_obj);
1388 return input_obj;
1389 }
1390
1391 for (i = 0; i < 256; i++)
1392 trans_table[i] = Py_CHARMASK(table[i]);
1393
1394 for (i = 0; i < dellen; i++)
1395 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1396
1397 for (i = inlen; --i >= 0; ) {
1398 c = Py_CHARMASK(*input++);
1399 if (trans_table[c] != -1)
1400 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1401 continue;
1402 changed = 1;
1403 }
1404 if (!changed && PyBytes_CheckExact(input_obj)) {
1405 Py_DECREF(result);
1406 Py_INCREF(input_obj);
1407 return input_obj;
1408 }
1409 /* Fix the size of the resulting string */
1410 if (inlen > 0)
1411 PyBytes_Resize(result, output - output_start);
1412 return result;
1413}
1414
1415
1416#define FORWARD 1
1417#define REVERSE -1
1418
1419/* find and count characters and substrings */
1420
1421#define findchar(target, target_len, c) \
1422 ((char *)memchr((const void *)(target), c, target_len))
1423
1424/* Don't call if length < 2 */
1425#define Py_STRING_MATCH(target, offset, pattern, length) \
1426 (target[offset] == pattern[0] && \
1427 target[offset+length-1] == pattern[length-1] && \
1428 !memcmp(target+offset+1, pattern+1, length-2) )
1429
1430
1431/* Bytes ops must return a string. */
1432/* If the object is subclass of bytes, create a copy */
1433Py_LOCAL(PyBytesObject *)
1434return_self(PyBytesObject *self)
1435{
1436 if (PyBytes_CheckExact(self)) {
1437 Py_INCREF(self);
1438 return (PyBytesObject *)self;
1439 }
1440 return (PyBytesObject *)PyBytes_FromStringAndSize(
1441 PyBytes_AS_STRING(self),
1442 PyBytes_GET_SIZE(self));
1443}
1444
1445Py_LOCAL_INLINE(Py_ssize_t)
1446countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1447{
1448 Py_ssize_t count=0;
1449 const char *start=target;
1450 const char *end=target+target_len;
1451
1452 while ( (start=findchar(start, end-start, c)) != NULL ) {
1453 count++;
1454 if (count >= maxcount)
1455 break;
1456 start += 1;
1457 }
1458 return count;
1459}
1460
1461Py_LOCAL(Py_ssize_t)
1462findstring(const char *target, Py_ssize_t target_len,
1463 const char *pattern, Py_ssize_t pattern_len,
1464 Py_ssize_t start,
1465 Py_ssize_t end,
1466 int direction)
1467{
1468 if (start < 0) {
1469 start += target_len;
1470 if (start < 0)
1471 start = 0;
1472 }
1473 if (end > target_len) {
1474 end = target_len;
1475 } else if (end < 0) {
1476 end += target_len;
1477 if (end < 0)
1478 end = 0;
1479 }
1480
1481 /* zero-length substrings always match at the first attempt */
1482 if (pattern_len == 0)
1483 return (direction > 0) ? start : end;
1484
1485 end -= pattern_len;
1486
1487 if (direction < 0) {
1488 for (; end >= start; end--)
1489 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1490 return end;
1491 } else {
1492 for (; start <= end; start++)
1493 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1494 return start;
1495 }
1496 return -1;
1497}
1498
1499Py_LOCAL_INLINE(Py_ssize_t)
1500countstring(const char *target, Py_ssize_t target_len,
1501 const char *pattern, Py_ssize_t pattern_len,
1502 Py_ssize_t start,
1503 Py_ssize_t end,
1504 int direction, Py_ssize_t maxcount)
1505{
1506 Py_ssize_t count=0;
1507
1508 if (start < 0) {
1509 start += target_len;
1510 if (start < 0)
1511 start = 0;
1512 }
1513 if (end > target_len) {
1514 end = target_len;
1515 } else if (end < 0) {
1516 end += target_len;
1517 if (end < 0)
1518 end = 0;
1519 }
1520
1521 /* zero-length substrings match everywhere */
1522 if (pattern_len == 0 || maxcount == 0) {
1523 if (target_len+1 < maxcount)
1524 return target_len+1;
1525 return maxcount;
1526 }
1527
1528 end -= pattern_len;
1529 if (direction < 0) {
1530 for (; (end >= start); end--)
1531 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1532 count++;
1533 if (--maxcount <= 0) break;
1534 end -= pattern_len-1;
1535 }
1536 } else {
1537 for (; (start <= end); start++)
1538 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1539 count++;
1540 if (--maxcount <= 0)
1541 break;
1542 start += pattern_len-1;
1543 }
1544 }
1545 return count;
1546}
1547
1548
1549/* Algorithms for different cases of string replacement */
1550
1551/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1552Py_LOCAL(PyBytesObject *)
1553replace_interleave(PyBytesObject *self,
1554 const char *to_s, Py_ssize_t to_len,
1555 Py_ssize_t maxcount)
1556{
1557 char *self_s, *result_s;
1558 Py_ssize_t self_len, result_len;
1559 Py_ssize_t count, i, product;
1560 PyBytesObject *result;
1561
1562 self_len = PyBytes_GET_SIZE(self);
1563
1564 /* 1 at the end plus 1 after every character */
1565 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001566 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001567 count = maxcount;
1568
1569 /* Check for overflow */
1570 /* result_len = count * to_len + self_len; */
1571 product = count * to_len;
1572 if (product / to_len != count) {
1573 PyErr_SetString(PyExc_OverflowError,
1574 "replace string is too long");
1575 return NULL;
1576 }
1577 result_len = product + self_len;
1578 if (result_len < 0) {
1579 PyErr_SetString(PyExc_OverflowError,
1580 "replace string is too long");
1581 return NULL;
1582 }
1583
1584 if (! (result = (PyBytesObject *)
1585 PyBytes_FromStringAndSize(NULL, result_len)) )
1586 return NULL;
1587
1588 self_s = PyBytes_AS_STRING(self);
1589 result_s = PyBytes_AS_STRING(result);
1590
1591 /* TODO: special case single character, which doesn't need memcpy */
1592
1593 /* Lay the first one down (guaranteed this will occur) */
1594 Py_MEMCPY(result_s, to_s, to_len);
1595 result_s += to_len;
1596 count -= 1;
1597
1598 for (i=0; i<count; i++) {
1599 *result_s++ = *self_s++;
1600 Py_MEMCPY(result_s, to_s, to_len);
1601 result_s += to_len;
1602 }
1603
1604 /* Copy the rest of the original string */
1605 Py_MEMCPY(result_s, self_s, self_len-i);
1606
1607 return result;
1608}
1609
1610/* Special case for deleting a single character */
1611/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1612Py_LOCAL(PyBytesObject *)
1613replace_delete_single_character(PyBytesObject *self,
1614 char from_c, Py_ssize_t maxcount)
1615{
1616 char *self_s, *result_s;
1617 char *start, *next, *end;
1618 Py_ssize_t self_len, result_len;
1619 Py_ssize_t count;
1620 PyBytesObject *result;
1621
1622 self_len = PyBytes_GET_SIZE(self);
1623 self_s = PyBytes_AS_STRING(self);
1624
1625 count = countchar(self_s, self_len, from_c, maxcount);
1626 if (count == 0) {
1627 return return_self(self);
1628 }
1629
1630 result_len = self_len - count; /* from_len == 1 */
1631 assert(result_len>=0);
1632
1633 if ( (result = (PyBytesObject *)
1634 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1635 return NULL;
1636 result_s = PyBytes_AS_STRING(result);
1637
1638 start = self_s;
1639 end = self_s + self_len;
1640 while (count-- > 0) {
1641 next = findchar(start, end-start, from_c);
1642 if (next == NULL)
1643 break;
1644 Py_MEMCPY(result_s, start, next-start);
1645 result_s += (next-start);
1646 start = next+1;
1647 }
1648 Py_MEMCPY(result_s, start, end-start);
1649
1650 return result;
1651}
1652
1653/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1654
1655Py_LOCAL(PyBytesObject *)
1656replace_delete_substring(PyBytesObject *self,
1657 const char *from_s, Py_ssize_t from_len,
1658 Py_ssize_t maxcount)
1659{
1660 char *self_s, *result_s;
1661 char *start, *next, *end;
1662 Py_ssize_t self_len, result_len;
1663 Py_ssize_t count, offset;
1664 PyBytesObject *result;
1665
1666 self_len = PyBytes_GET_SIZE(self);
1667 self_s = PyBytes_AS_STRING(self);
1668
1669 count = countstring(self_s, self_len,
1670 from_s, from_len,
1671 0, self_len, 1,
1672 maxcount);
1673
1674 if (count == 0) {
1675 /* no matches */
1676 return return_self(self);
1677 }
1678
1679 result_len = self_len - (count * from_len);
1680 assert (result_len>=0);
1681
1682 if ( (result = (PyBytesObject *)
1683 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1684 return NULL;
1685
1686 result_s = PyBytes_AS_STRING(result);
1687
1688 start = self_s;
1689 end = self_s + self_len;
1690 while (count-- > 0) {
1691 offset = findstring(start, end-start,
1692 from_s, from_len,
1693 0, end-start, FORWARD);
1694 if (offset == -1)
1695 break;
1696 next = start + offset;
1697
1698 Py_MEMCPY(result_s, start, next-start);
1699
1700 result_s += (next-start);
1701 start = next+from_len;
1702 }
1703 Py_MEMCPY(result_s, start, end-start);
1704 return result;
1705}
1706
1707/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1708Py_LOCAL(PyBytesObject *)
1709replace_single_character_in_place(PyBytesObject *self,
1710 char from_c, char to_c,
1711 Py_ssize_t maxcount)
1712{
1713 char *self_s, *result_s, *start, *end, *next;
1714 Py_ssize_t self_len;
1715 PyBytesObject *result;
1716
1717 /* The result string will be the same size */
1718 self_s = PyBytes_AS_STRING(self);
1719 self_len = PyBytes_GET_SIZE(self);
1720
1721 next = findchar(self_s, self_len, from_c);
1722
1723 if (next == NULL) {
1724 /* No matches; return the original bytes */
1725 return return_self(self);
1726 }
1727
1728 /* Need to make a new bytes */
1729 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1730 if (result == NULL)
1731 return NULL;
1732 result_s = PyBytes_AS_STRING(result);
1733 Py_MEMCPY(result_s, self_s, self_len);
1734
1735 /* change everything in-place, starting with this one */
1736 start = result_s + (next-self_s);
1737 *start = to_c;
1738 start++;
1739 end = result_s + self_len;
1740
1741 while (--maxcount > 0) {
1742 next = findchar(start, end-start, from_c);
1743 if (next == NULL)
1744 break;
1745 *next = to_c;
1746 start = next+1;
1747 }
1748
1749 return result;
1750}
1751
1752/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1753Py_LOCAL(PyBytesObject *)
1754replace_substring_in_place(PyBytesObject *self,
1755 const char *from_s, Py_ssize_t from_len,
1756 const char *to_s, Py_ssize_t to_len,
1757 Py_ssize_t maxcount)
1758{
1759 char *result_s, *start, *end;
1760 char *self_s;
1761 Py_ssize_t self_len, offset;
1762 PyBytesObject *result;
1763
1764 /* The result bytes will be the same size */
1765
1766 self_s = PyBytes_AS_STRING(self);
1767 self_len = PyBytes_GET_SIZE(self);
1768
1769 offset = findstring(self_s, self_len,
1770 from_s, from_len,
1771 0, self_len, FORWARD);
1772 if (offset == -1) {
1773 /* No matches; return the original bytes */
1774 return return_self(self);
1775 }
1776
1777 /* Need to make a new bytes */
1778 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1779 if (result == NULL)
1780 return NULL;
1781 result_s = PyBytes_AS_STRING(result);
1782 Py_MEMCPY(result_s, self_s, self_len);
1783
1784 /* change everything in-place, starting with this one */
1785 start = result_s + offset;
1786 Py_MEMCPY(start, to_s, from_len);
1787 start += from_len;
1788 end = result_s + self_len;
1789
1790 while ( --maxcount > 0) {
1791 offset = findstring(start, end-start,
1792 from_s, from_len,
1793 0, end-start, FORWARD);
1794 if (offset==-1)
1795 break;
1796 Py_MEMCPY(start+offset, to_s, from_len);
1797 start += offset+from_len;
1798 }
1799
1800 return result;
1801}
1802
1803/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1804Py_LOCAL(PyBytesObject *)
1805replace_single_character(PyBytesObject *self,
1806 char from_c,
1807 const char *to_s, Py_ssize_t to_len,
1808 Py_ssize_t maxcount)
1809{
1810 char *self_s, *result_s;
1811 char *start, *next, *end;
1812 Py_ssize_t self_len, result_len;
1813 Py_ssize_t count, product;
1814 PyBytesObject *result;
1815
1816 self_s = PyBytes_AS_STRING(self);
1817 self_len = PyBytes_GET_SIZE(self);
1818
1819 count = countchar(self_s, self_len, from_c, maxcount);
1820 if (count == 0) {
1821 /* no matches, return unchanged */
1822 return return_self(self);
1823 }
1824
1825 /* use the difference between current and new, hence the "-1" */
1826 /* result_len = self_len + count * (to_len-1) */
1827 product = count * (to_len-1);
1828 if (product / (to_len-1) != count) {
1829 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1830 return NULL;
1831 }
1832 result_len = self_len + product;
1833 if (result_len < 0) {
1834 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1835 return NULL;
1836 }
1837
1838 if ( (result = (PyBytesObject *)
1839 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1840 return NULL;
1841 result_s = PyBytes_AS_STRING(result);
1842
1843 start = self_s;
1844 end = self_s + self_len;
1845 while (count-- > 0) {
1846 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001847 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001848 break;
1849
1850 if (next == start) {
1851 /* replace with the 'to' */
1852 Py_MEMCPY(result_s, to_s, to_len);
1853 result_s += to_len;
1854 start += 1;
1855 } else {
1856 /* copy the unchanged old then the 'to' */
1857 Py_MEMCPY(result_s, start, next-start);
1858 result_s += (next-start);
1859 Py_MEMCPY(result_s, to_s, to_len);
1860 result_s += to_len;
1861 start = next+1;
1862 }
1863 }
1864 /* Copy the remainder of the remaining bytes */
1865 Py_MEMCPY(result_s, start, end-start);
1866
1867 return result;
1868}
1869
1870/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1871Py_LOCAL(PyBytesObject *)
1872replace_substring(PyBytesObject *self,
1873 const char *from_s, Py_ssize_t from_len,
1874 const char *to_s, Py_ssize_t to_len,
1875 Py_ssize_t maxcount)
1876{
1877 char *self_s, *result_s;
1878 char *start, *next, *end;
1879 Py_ssize_t self_len, result_len;
1880 Py_ssize_t count, offset, product;
1881 PyBytesObject *result;
1882
1883 self_s = PyBytes_AS_STRING(self);
1884 self_len = PyBytes_GET_SIZE(self);
1885
1886 count = countstring(self_s, self_len,
1887 from_s, from_len,
1888 0, self_len, FORWARD, maxcount);
1889 if (count == 0) {
1890 /* no matches, return unchanged */
1891 return return_self(self);
1892 }
1893
1894 /* Check for overflow */
1895 /* result_len = self_len + count * (to_len-from_len) */
1896 product = count * (to_len-from_len);
1897 if (product / (to_len-from_len) != count) {
1898 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1899 return NULL;
1900 }
1901 result_len = self_len + product;
1902 if (result_len < 0) {
1903 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1904 return NULL;
1905 }
1906
1907 if ( (result = (PyBytesObject *)
1908 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1909 return NULL;
1910 result_s = PyBytes_AS_STRING(result);
1911
1912 start = self_s;
1913 end = self_s + self_len;
1914 while (count-- > 0) {
1915 offset = findstring(start, end-start,
1916 from_s, from_len,
1917 0, end-start, FORWARD);
1918 if (offset == -1)
1919 break;
1920 next = start+offset;
1921 if (next == start) {
1922 /* replace with the 'to' */
1923 Py_MEMCPY(result_s, to_s, to_len);
1924 result_s += to_len;
1925 start += from_len;
1926 } else {
1927 /* copy the unchanged old then the 'to' */
1928 Py_MEMCPY(result_s, start, next-start);
1929 result_s += (next-start);
1930 Py_MEMCPY(result_s, to_s, to_len);
1931 result_s += to_len;
1932 start = next+from_len;
1933 }
1934 }
1935 /* Copy the remainder of the remaining bytes */
1936 Py_MEMCPY(result_s, start, end-start);
1937
1938 return result;
1939}
1940
1941
1942Py_LOCAL(PyBytesObject *)
1943replace(PyBytesObject *self,
1944 const char *from_s, Py_ssize_t from_len,
1945 const char *to_s, Py_ssize_t to_len,
1946 Py_ssize_t maxcount)
1947{
1948 if (maxcount < 0) {
1949 maxcount = PY_SSIZE_T_MAX;
1950 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1951 /* nothing to do; return the original bytes */
1952 return return_self(self);
1953 }
1954
1955 if (maxcount == 0 ||
1956 (from_len == 0 && to_len == 0)) {
1957 /* nothing to do; return the original bytes */
1958 return return_self(self);
1959 }
1960
1961 /* Handle zero-length special cases */
1962
1963 if (from_len == 0) {
1964 /* insert the 'to' bytes everywhere. */
1965 /* >>> "Python".replace("", ".") */
1966 /* '.P.y.t.h.o.n.' */
1967 return replace_interleave(self, to_s, to_len, maxcount);
1968 }
1969
1970 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1971 /* point for an empty self bytes to generate a non-empty bytes */
1972 /* Special case so the remaining code always gets a non-empty bytes */
1973 if (PyBytes_GET_SIZE(self) == 0) {
1974 return return_self(self);
1975 }
1976
1977 if (to_len == 0) {
1978 /* delete all occurances of 'from' bytes */
1979 if (from_len == 1) {
1980 return replace_delete_single_character(
1981 self, from_s[0], maxcount);
1982 } else {
1983 return replace_delete_substring(self, from_s, from_len, maxcount);
1984 }
1985 }
1986
1987 /* Handle special case where both bytes have the same length */
1988
1989 if (from_len == to_len) {
1990 if (from_len == 1) {
1991 return replace_single_character_in_place(
1992 self,
1993 from_s[0],
1994 to_s[0],
1995 maxcount);
1996 } else {
1997 return replace_substring_in_place(
1998 self, from_s, from_len, to_s, to_len, maxcount);
1999 }
2000 }
2001
2002 /* Otherwise use the more generic algorithms */
2003 if (from_len == 1) {
2004 return replace_single_character(self, from_s[0],
2005 to_s, to_len, maxcount);
2006 } else {
2007 /* len('from')>=2, len('to')>=1 */
2008 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2009 }
2010}
2011
2012PyDoc_STRVAR(replace__doc__,
2013"B.replace (old, new[, count]) -> bytes\n\
2014\n\
2015Return a copy of bytes B with all occurrences of subsection\n\
2016old replaced by new. If the optional argument count is\n\
2017given, only the first count occurrences are replaced.");
2018
2019static PyObject *
2020bytes_replace(PyBytesObject *self, PyObject *args)
2021{
2022 Py_ssize_t count = -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002023 PyObject *from, *to, *res;
Neal Norwitz6968b052007-02-27 19:02:19 +00002024 const char *from_s, *to_s;
2025 Py_ssize_t from_len, to_len;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002026 int relfrom=0, relto=0;
2027 PyBuffer vfrom, vto;
Neal Norwitz6968b052007-02-27 19:02:19 +00002028
2029 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2030 return NULL;
2031
2032 if (PyBytes_Check(from)) {
2033 from_s = PyBytes_AS_STRING(from);
2034 from_len = PyBytes_GET_SIZE(from);
2035 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002036 else {
Guido van Rossum75d38e92007-08-24 17:33:11 +00002037 if (PyObject_GetBuffer(from, &vfrom, PyBUF_CHARACTER) < 0)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002038 return NULL;
2039 from_s = vfrom.buf;
2040 from_len = vfrom.len;
2041 relfrom = 1;
2042 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002043
2044 if (PyBytes_Check(to)) {
2045 to_s = PyBytes_AS_STRING(to);
2046 to_len = PyBytes_GET_SIZE(to);
2047 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002048 else {
2049 if (PyObject_GetBuffer(to, &vto, PyBUF_CHARACTER) < 0) {
2050 if (relfrom)
2051 PyObject_ReleaseBuffer(from, &vfrom);
2052 return NULL;
2053 }
2054 to_s = vto.buf;
2055 to_len = vto.len;
2056 relto = 1;
2057 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002058
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002059 res = (PyObject *)replace((PyBytesObject *) self,
2060 from_s, from_len,
2061 to_s, to_len, count);
2062
2063 if (relfrom)
2064 PyObject_ReleaseBuffer(from, &vfrom);
2065 if (relto)
2066 PyObject_ReleaseBuffer(to, &vto);
2067 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00002068}
2069
2070
2071/* Overallocate the initial list to reduce the number of reallocs for small
2072 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2073 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2074 text (roughly 11 words per line) and field delimited data (usually 1-10
2075 fields). For large strings the split algorithms are bandwidth limited
2076 so increasing the preallocation likely will not improve things.*/
2077
2078#define MAX_PREALLOC 12
2079
2080/* 5 splits gives 6 elements */
2081#define PREALLOC_SIZE(maxsplit) \
2082 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2083
2084#define SPLIT_APPEND(data, left, right) \
2085 str = PyBytes_FromStringAndSize((data) + (left), \
2086 (right) - (left)); \
2087 if (str == NULL) \
2088 goto onError; \
2089 if (PyList_Append(list, str)) { \
2090 Py_DECREF(str); \
2091 goto onError; \
2092 } \
2093 else \
2094 Py_DECREF(str);
2095
2096#define SPLIT_ADD(data, left, right) { \
2097 str = PyBytes_FromStringAndSize((data) + (left), \
2098 (right) - (left)); \
2099 if (str == NULL) \
2100 goto onError; \
2101 if (count < MAX_PREALLOC) { \
2102 PyList_SET_ITEM(list, count, str); \
2103 } else { \
2104 if (PyList_Append(list, str)) { \
2105 Py_DECREF(str); \
2106 goto onError; \
2107 } \
2108 else \
2109 Py_DECREF(str); \
2110 } \
2111 count++; }
2112
2113/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002114#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00002115
2116
2117Py_LOCAL_INLINE(PyObject *)
2118split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2119{
2120 register Py_ssize_t i, j, count=0;
2121 PyObject *str;
2122 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2123
2124 if (list == NULL)
2125 return NULL;
2126
2127 i = j = 0;
2128 while ((j < len) && (maxcount-- > 0)) {
2129 for(; j<len; j++) {
2130 /* I found that using memchr makes no difference */
2131 if (s[j] == ch) {
2132 SPLIT_ADD(s, i, j);
2133 i = j = j + 1;
2134 break;
2135 }
2136 }
2137 }
2138 if (i <= len) {
2139 SPLIT_ADD(s, i, len);
2140 }
2141 FIX_PREALLOC_SIZE(list);
2142 return list;
2143
2144 onError:
2145 Py_DECREF(list);
2146 return NULL;
2147}
2148
2149PyDoc_STRVAR(split__doc__,
2150"B.split(sep [,maxsplit]) -> list of bytes\n\
2151\n\
2152Return a list of the bytes in the string B, using sep as the\n\
2153delimiter. If maxsplit is given, at most maxsplit\n\
2154splits are done.");
2155
2156static PyObject *
2157bytes_split(PyBytesObject *self, PyObject *args)
2158{
2159 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2160 Py_ssize_t maxsplit = -1, count=0;
2161 const char *s = PyBytes_AS_STRING(self), *sub;
2162 PyObject *list, *str, *subobj;
2163#ifdef USE_FAST
2164 Py_ssize_t pos;
2165#endif
2166
2167 if (!PyArg_ParseTuple(args, "O|n:split", &subobj, &maxsplit))
2168 return NULL;
2169 if (maxsplit < 0)
2170 maxsplit = PY_SSIZE_T_MAX;
2171 if (PyBytes_Check(subobj)) {
2172 sub = PyBytes_AS_STRING(subobj);
2173 n = PyBytes_GET_SIZE(subobj);
2174 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002175 /* XXX -> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00002176 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2177 return NULL;
2178
2179 if (n == 0) {
2180 PyErr_SetString(PyExc_ValueError, "empty separator");
2181 return NULL;
2182 }
2183 else if (n == 1)
2184 return split_char(s, len, sub[0], maxsplit);
2185
2186 list = PyList_New(PREALLOC_SIZE(maxsplit));
2187 if (list == NULL)
2188 return NULL;
2189
2190#ifdef USE_FAST
2191 i = j = 0;
2192 while (maxsplit-- > 0) {
2193 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2194 if (pos < 0)
2195 break;
2196 j = i+pos;
2197 SPLIT_ADD(s, i, j);
2198 i = j + n;
2199 }
2200#else
2201 i = j = 0;
2202 while ((j+n <= len) && (maxsplit-- > 0)) {
2203 for (; j+n <= len; j++) {
2204 if (Py_STRING_MATCH(s, j, sub, n)) {
2205 SPLIT_ADD(s, i, j);
2206 i = j = j + n;
2207 break;
2208 }
2209 }
2210 }
2211#endif
2212 SPLIT_ADD(s, i, len);
2213 FIX_PREALLOC_SIZE(list);
2214 return list;
2215
2216 onError:
2217 Py_DECREF(list);
2218 return NULL;
2219}
2220
2221PyDoc_STRVAR(partition__doc__,
2222"B.partition(sep) -> (head, sep, tail)\n\
2223\n\
2224Searches for the separator sep in B, and returns the part before it,\n\
2225the separator itself, and the part after it. If the separator is not\n\
2226found, returns B and two empty bytes.");
2227
2228static PyObject *
2229bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2230{
2231 PyObject *bytesep, *result;
2232
2233 bytesep = PyBytes_FromObject(sep_obj);
2234 if (! bytesep)
2235 return NULL;
2236
2237 result = stringlib_partition(
2238 (PyObject*) self,
2239 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002240 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002241 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2242 );
2243
2244 Py_DECREF(bytesep);
2245 return result;
2246}
2247
2248PyDoc_STRVAR(rpartition__doc__,
2249"B.rpartition(sep) -> (tail, sep, head)\n\
2250\n\
2251Searches for the separator sep in B, starting at the end of B, and returns\n\
2252the part before it, the separator itself, and the part after it. If the\n\
2253separator is not found, returns two empty bytes and B.");
2254
2255static PyObject *
2256bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2257{
2258 PyObject *bytesep, *result;
2259
2260 bytesep = PyBytes_FromObject(sep_obj);
2261 if (! bytesep)
2262 return NULL;
2263
2264 result = stringlib_rpartition(
2265 (PyObject*) self,
2266 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002267 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002268 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2269 );
2270
2271 Py_DECREF(bytesep);
2272 return result;
2273}
2274
2275Py_LOCAL_INLINE(PyObject *)
2276rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2277{
2278 register Py_ssize_t i, j, count=0;
2279 PyObject *str;
2280 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2281
2282 if (list == NULL)
2283 return NULL;
2284
2285 i = j = len - 1;
2286 while ((i >= 0) && (maxcount-- > 0)) {
2287 for (; i >= 0; i--) {
2288 if (s[i] == ch) {
2289 SPLIT_ADD(s, i + 1, j + 1);
2290 j = i = i - 1;
2291 break;
2292 }
2293 }
2294 }
2295 if (j >= -1) {
2296 SPLIT_ADD(s, 0, j + 1);
2297 }
2298 FIX_PREALLOC_SIZE(list);
2299 if (PyList_Reverse(list) < 0)
2300 goto onError;
2301
2302 return list;
2303
2304 onError:
2305 Py_DECREF(list);
2306 return NULL;
2307}
2308
2309PyDoc_STRVAR(rsplit__doc__,
2310"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2311\n\
2312Return a list of the sections in the byte B, using sep as the\n\
2313delimiter, starting at the end of the bytes and working\n\
2314to the front. If maxsplit is given, at most maxsplit splits are\n\
2315done.");
2316
2317static PyObject *
2318bytes_rsplit(PyBytesObject *self, PyObject *args)
2319{
2320 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2321 Py_ssize_t maxsplit = -1, count=0;
2322 const char *s = PyBytes_AS_STRING(self), *sub;
2323 PyObject *list, *str, *subobj;
2324
2325 if (!PyArg_ParseTuple(args, "O|n:rsplit", &subobj, &maxsplit))
2326 return NULL;
2327 if (maxsplit < 0)
2328 maxsplit = PY_SSIZE_T_MAX;
2329 if (PyBytes_Check(subobj)) {
2330 sub = PyBytes_AS_STRING(subobj);
2331 n = PyBytes_GET_SIZE(subobj);
2332 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002333 /* XXX -> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00002334 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2335 return NULL;
2336
2337 if (n == 0) {
2338 PyErr_SetString(PyExc_ValueError, "empty separator");
2339 return NULL;
2340 }
2341 else if (n == 1)
2342 return rsplit_char(s, len, sub[0], maxsplit);
2343
2344 list = PyList_New(PREALLOC_SIZE(maxsplit));
2345 if (list == NULL)
2346 return NULL;
2347
2348 j = len;
2349 i = j - n;
2350
2351 while ( (i >= 0) && (maxsplit-- > 0) ) {
2352 for (; i>=0; i--) {
2353 if (Py_STRING_MATCH(s, i, sub, n)) {
2354 SPLIT_ADD(s, i + n, j);
2355 j = i;
2356 i -= n;
2357 break;
2358 }
2359 }
2360 }
2361 SPLIT_ADD(s, 0, j);
2362 FIX_PREALLOC_SIZE(list);
2363 if (PyList_Reverse(list) < 0)
2364 goto onError;
2365 return list;
2366
2367onError:
2368 Py_DECREF(list);
2369 return NULL;
2370}
2371
2372PyDoc_STRVAR(extend__doc__,
2373"B.extend(iterable int) -> None\n\
2374\n\
2375Append all the elements from the iterator or sequence to the\n\
2376end of the bytes.");
2377static PyObject *
2378bytes_extend(PyBytesObject *self, PyObject *arg)
2379{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002380 if (bytes_setslice(self, Py_Size(self), Py_Size(self), arg) == -1)
Neal Norwitz6968b052007-02-27 19:02:19 +00002381 return NULL;
2382 Py_RETURN_NONE;
2383}
2384
2385
2386PyDoc_STRVAR(reverse__doc__,
2387"B.reverse() -> None\n\
2388\n\
2389Reverse the order of the values in bytes in place.");
2390static PyObject *
2391bytes_reverse(PyBytesObject *self, PyObject *unused)
2392{
2393 char swap, *head, *tail;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002394 Py_ssize_t i, j, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002395
2396 j = n / 2;
2397 head = self->ob_bytes;
2398 tail = head + n - 1;
2399 for (i = 0; i < j; i++) {
2400 swap = *head;
2401 *head++ = *tail;
2402 *tail-- = swap;
2403 }
2404
2405 Py_RETURN_NONE;
2406}
2407
2408PyDoc_STRVAR(insert__doc__,
2409"B.insert(index, int) -> None\n\
2410\n\
2411Insert a single item into the bytes before the given index.");
2412static PyObject *
2413bytes_insert(PyBytesObject *self, PyObject *args)
2414{
2415 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002416 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002417
2418 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2419 return NULL;
2420
2421 if (n == PY_SSIZE_T_MAX) {
2422 PyErr_SetString(PyExc_OverflowError,
2423 "cannot add more objects to bytes");
2424 return NULL;
2425 }
2426 if (value < 0 || value >= 256) {
2427 PyErr_SetString(PyExc_ValueError,
2428 "byte must be in range(0, 256)");
2429 return NULL;
2430 }
2431 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2432 return NULL;
2433
2434 if (where < 0) {
2435 where += n;
2436 if (where < 0)
2437 where = 0;
2438 }
2439 if (where > n)
2440 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002441 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002442 self->ob_bytes[where] = value;
2443
2444 Py_RETURN_NONE;
2445}
2446
2447PyDoc_STRVAR(append__doc__,
2448"B.append(int) -> None\n\
2449\n\
2450Append a single item to the end of the bytes.");
2451static PyObject *
2452bytes_append(PyBytesObject *self, PyObject *arg)
2453{
2454 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002455 Py_ssize_t n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002456
2457 if (! _getbytevalue(arg, &value))
2458 return NULL;
2459 if (n == PY_SSIZE_T_MAX) {
2460 PyErr_SetString(PyExc_OverflowError,
2461 "cannot add more objects to bytes");
2462 return NULL;
2463 }
2464 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2465 return NULL;
2466
2467 self->ob_bytes[n] = value;
2468
2469 Py_RETURN_NONE;
2470}
2471
2472PyDoc_STRVAR(pop__doc__,
2473"B.pop([index]) -> int\n\
2474\n\
2475Remove and return a single item from the bytes. If no index\n\
2476argument is give, will pop the last value.");
2477static PyObject *
2478bytes_pop(PyBytesObject *self, PyObject *args)
2479{
2480 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002481 Py_ssize_t where = -1, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002482
2483 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2484 return NULL;
2485
2486 if (n == 0) {
2487 PyErr_SetString(PyExc_OverflowError,
2488 "cannot pop an empty bytes");
2489 return NULL;
2490 }
2491 if (where < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002492 where += Py_Size(self);
2493 if (where < 0 || where >= Py_Size(self)) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002494 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2495 return NULL;
2496 }
2497
2498 value = self->ob_bytes[where];
2499 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2500 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2501 return NULL;
2502
2503 return PyInt_FromLong(value);
2504}
2505
2506PyDoc_STRVAR(remove__doc__,
2507"B.remove(int) -> None\n\
2508\n\
2509Remove the first occurance of a value in bytes");
2510static PyObject *
2511bytes_remove(PyBytesObject *self, PyObject *arg)
2512{
2513 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002514 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002515
2516 if (! _getbytevalue(arg, &value))
2517 return NULL;
2518
2519 for (where = 0; where < n; where++) {
2520 if (self->ob_bytes[where] == value)
2521 break;
2522 }
2523 if (where == n) {
2524 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2525 return NULL;
2526 }
2527
2528 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2529 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2530 return NULL;
2531
2532 Py_RETURN_NONE;
2533}
2534
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002535/* XXX These two helpers could be optimized if argsize == 1 */
2536
Neal Norwitz2bad9702007-08-27 06:19:22 +00002537static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002538lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2539 void *argptr, Py_ssize_t argsize)
2540{
2541 Py_ssize_t i = 0;
2542 while (i < mysize && memchr(argptr, myptr[i], argsize))
2543 i++;
2544 return i;
2545}
2546
Neal Norwitz2bad9702007-08-27 06:19:22 +00002547static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002548rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2549 void *argptr, Py_ssize_t argsize)
2550{
2551 Py_ssize_t i = mysize - 1;
2552 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2553 i--;
2554 return i + 1;
2555}
2556
2557PyDoc_STRVAR(strip__doc__,
2558"B.strip(bytes) -> bytes\n\
2559\n\
2560Strip leading and trailing bytes contained in the argument.");
2561static PyObject *
2562bytes_strip(PyBytesObject *self, PyObject *arg)
2563{
2564 Py_ssize_t left, right, mysize, argsize;
2565 void *myptr, *argptr;
2566 if (arg == NULL || !PyBytes_Check(arg)) {
2567 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2568 return NULL;
2569 }
2570 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002571 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002572 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002573 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002574 left = lstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumeb29e9a2007-08-08 21:55:33 +00002575 if (left == mysize)
2576 right = left;
2577 else
2578 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002579 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2580}
2581
2582PyDoc_STRVAR(lstrip__doc__,
2583"B.lstrip(bytes) -> bytes\n\
2584\n\
2585Strip leading bytes contained in the argument.");
2586static PyObject *
2587bytes_lstrip(PyBytesObject *self, PyObject *arg)
2588{
2589 Py_ssize_t left, right, mysize, argsize;
2590 void *myptr, *argptr;
2591 if (arg == NULL || !PyBytes_Check(arg)) {
2592 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2593 return NULL;
2594 }
2595 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002596 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002597 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002598 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002599 left = lstrip_helper(myptr, mysize, argptr, argsize);
2600 right = mysize;
2601 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2602}
2603
2604PyDoc_STRVAR(rstrip__doc__,
2605"B.rstrip(bytes) -> bytes\n\
2606\n\
2607Strip trailing bytes contained in the argument.");
2608static PyObject *
2609bytes_rstrip(PyBytesObject *self, PyObject *arg)
2610{
2611 Py_ssize_t left, right, mysize, argsize;
2612 void *myptr, *argptr;
2613 if (arg == NULL || !PyBytes_Check(arg)) {
2614 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2615 return NULL;
2616 }
2617 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002618 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002619 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002620 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002621 left = 0;
2622 right = rstrip_helper(myptr, mysize, argptr, argsize);
2623 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2624}
Neal Norwitz6968b052007-02-27 19:02:19 +00002625
Guido van Rossumd624f182006-04-24 13:47:05 +00002626PyDoc_STRVAR(decode_doc,
2627"B.decode([encoding[,errors]]) -> unicode obect.\n\
2628\n\
2629Decodes B using the codec registered for encoding. encoding defaults\n\
2630to the default encoding. errors may be given to set a different error\n\
2631handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2632a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2633as well as any other name registerd with codecs.register_error that is\n\
2634able to handle UnicodeDecodeErrors.");
2635
2636static PyObject *
2637bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002638{
Guido van Rossumd624f182006-04-24 13:47:05 +00002639 const char *encoding = NULL;
2640 const char *errors = NULL;
2641
2642 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2643 return NULL;
2644 if (encoding == NULL)
2645 encoding = PyUnicode_GetDefaultEncoding();
2646 return PyCodec_Decode(self, encoding, errors);
2647}
2648
Guido van Rossuma0867f72006-05-05 04:34:18 +00002649PyDoc_STRVAR(alloc_doc,
2650"B.__alloc__() -> int\n\
2651\n\
2652Returns the number of bytes actually allocated.");
2653
2654static PyObject *
2655bytes_alloc(PyBytesObject *self)
2656{
2657 return PyInt_FromSsize_t(self->ob_alloc);
2658}
2659
Guido van Rossum20188312006-05-05 15:15:40 +00002660PyDoc_STRVAR(join_doc,
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002661"B.join(iterable_of_bytes) -> bytes\n\
Guido van Rossum20188312006-05-05 15:15:40 +00002662\n\
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002663Concatenates any number of bytes objects, with B in between each pair.\n\
2664Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Guido van Rossum20188312006-05-05 15:15:40 +00002665
2666static PyObject *
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002667bytes_join(PyBytesObject *self, PyObject *it)
Guido van Rossum20188312006-05-05 15:15:40 +00002668{
2669 PyObject *seq;
Martin v. Löwis5d7428b2007-07-21 18:47:48 +00002670 Py_ssize_t mysize = Py_Size(self);
Guido van Rossum20188312006-05-05 15:15:40 +00002671 Py_ssize_t i;
2672 Py_ssize_t n;
2673 PyObject **items;
2674 Py_ssize_t totalsize = 0;
2675 PyObject *result;
2676 char *dest;
2677
2678 seq = PySequence_Fast(it, "can only join an iterable");
2679 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002680 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002681 n = PySequence_Fast_GET_SIZE(seq);
2682 items = PySequence_Fast_ITEMS(seq);
2683
2684 /* Compute the total size, and check that they are all bytes */
2685 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002686 PyObject *obj = items[i];
2687 if (!PyBytes_Check(obj)) {
2688 PyErr_Format(PyExc_TypeError,
2689 "can only join an iterable of bytes "
2690 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002691 /* XXX %ld isn't right on Win64 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002692 (long)i, Py_Type(obj)->tp_name);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002693 goto error;
2694 }
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002695 if (i > 0)
2696 totalsize += mysize;
Georg Brandlb3f568f2007-02-27 08:49:18 +00002697 totalsize += PyBytes_GET_SIZE(obj);
2698 if (totalsize < 0) {
2699 PyErr_NoMemory();
2700 goto error;
2701 }
Guido van Rossum20188312006-05-05 15:15:40 +00002702 }
2703
2704 /* Allocate the result, and copy the bytes */
2705 result = PyBytes_FromStringAndSize(NULL, totalsize);
2706 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002707 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002708 dest = PyBytes_AS_STRING(result);
2709 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002710 PyObject *obj = items[i];
2711 Py_ssize_t size = PyBytes_GET_SIZE(obj);
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002712 if (i > 0) {
2713 memcpy(dest, self->ob_bytes, mysize);
2714 dest += mysize;
2715 }
Georg Brandlb3f568f2007-02-27 08:49:18 +00002716 memcpy(dest, PyBytes_AS_STRING(obj), size);
2717 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002718 }
2719
2720 /* Done */
2721 Py_DECREF(seq);
2722 return result;
2723
2724 /* Error handling */
2725 error:
2726 Py_DECREF(seq);
2727 return NULL;
2728}
2729
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002730PyDoc_STRVAR(fromhex_doc,
2731"bytes.fromhex(string) -> bytes\n\
2732\n\
2733Create a bytes object from a string of hexadecimal numbers.\n\
2734Spaces between two numbers are accepted. Example:\n\
2735bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2736
2737static int
2738hex_digit_to_int(int c)
2739{
Georg Brandlb3f568f2007-02-27 08:49:18 +00002740 if (isdigit(c))
2741 return c - '0';
2742 else {
2743 if (isupper(c))
2744 c = tolower(c);
2745 if (c >= 'a' && c <= 'f')
2746 return c - 'a' + 10;
2747 }
2748 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002749}
2750
2751static PyObject *
2752bytes_fromhex(PyObject *cls, PyObject *args)
2753{
2754 PyObject *newbytes;
2755 char *hex, *buf;
2756 Py_ssize_t len, byteslen, i, j;
2757 int top, bot;
2758
2759 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
2760 return NULL;
2761
2762 byteslen = len / 2; /* max length if there are no spaces */
2763
2764 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
2765 if (!newbytes)
2766 return NULL;
2767 buf = PyBytes_AS_STRING(newbytes);
2768
Guido van Rossum4355a472007-05-04 05:00:04 +00002769 for (i = j = 0; i < len; i += 2) {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002770 /* skip over spaces in the input */
2771 while (Py_CHARMASK(hex[i]) == ' ')
2772 i++;
2773 if (i >= len)
2774 break;
2775 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2776 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2777 if (top == -1 || bot == -1) {
2778 PyErr_Format(PyExc_ValueError,
2779 "non-hexadecimal number string '%c%c' found in "
2780 "fromhex() arg at position %zd",
2781 hex[i], hex[i+1], i);
2782 goto error;
2783 }
2784 buf[j++] = (top << 4) + bot;
2785 }
2786 if (PyBytes_Resize(newbytes, j) < 0)
2787 goto error;
2788 return newbytes;
2789
2790 error:
2791 Py_DECREF(newbytes);
2792 return NULL;
2793}
2794
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002795PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2796
2797static PyObject *
2798bytes_reduce(PyBytesObject *self)
2799{
Martin v. Löwis9c121062007-08-05 20:26:11 +00002800 PyObject *latin1;
2801 if (self->ob_bytes)
Guido van Rossum75d38e92007-08-24 17:33:11 +00002802 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
Martin v. Löwis9c121062007-08-05 20:26:11 +00002803 Py_Size(self), NULL);
2804 else
2805 latin1 = PyUnicode_FromString("");
2806 return Py_BuildValue("(O(Ns))", Py_Type(self), latin1, "latin-1");
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002807}
2808
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002809static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002810 (lenfunc)bytes_length, /* sq_length */
2811 (binaryfunc)bytes_concat, /* sq_concat */
2812 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2813 (ssizeargfunc)bytes_getitem, /* sq_item */
2814 0, /* sq_slice */
2815 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2816 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002817 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002818 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2819 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002820};
2821
2822static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002823 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002824 (binaryfunc)bytes_subscript,
2825 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002826};
2827
2828static PyBufferProcs bytes_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002829 (getbufferproc)bytes_getbuffer,
2830 (releasebufferproc)bytes_releasebuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002831};
2832
2833static PyMethodDef
2834bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002835 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2836 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2837 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2838 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2839 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2840 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2841 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2842 startswith__doc__},
2843 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2844 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2845 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2846 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2847 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2848 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2849 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2850 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2851 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2852 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2853 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2854 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002855 {"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__},
2856 {"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__},
2857 {"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00002858 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002859 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002860 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2861 fromhex_doc},
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002862 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002863 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002864 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002865};
2866
2867PyDoc_STRVAR(bytes_doc,
2868"bytes([iterable]) -> new array of bytes.\n\
2869\n\
2870If an argument is given it must be an iterable yielding ints in range(256).");
2871
2872PyTypeObject PyBytes_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002873 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002874 "bytes",
2875 sizeof(PyBytesObject),
2876 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00002877 (destructor)bytes_dealloc, /* tp_dealloc */
2878 0, /* tp_print */
2879 0, /* tp_getattr */
2880 0, /* tp_setattr */
2881 0, /* tp_compare */
2882 (reprfunc)bytes_repr, /* tp_repr */
2883 0, /* tp_as_number */
2884 &bytes_as_sequence, /* tp_as_sequence */
2885 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002886 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00002887 0, /* tp_call */
2888 (reprfunc)bytes_str, /* tp_str */
2889 PyObject_GenericGetAttr, /* tp_getattro */
2890 0, /* tp_setattro */
2891 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002892 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002893 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00002894 bytes_doc, /* tp_doc */
2895 0, /* tp_traverse */
2896 0, /* tp_clear */
2897 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2898 0, /* tp_weaklistoffset */
2899 0, /* tp_iter */
2900 0, /* tp_iternext */
2901 bytes_methods, /* tp_methods */
2902 0, /* tp_members */
2903 0, /* tp_getset */
2904 0, /* tp_base */
2905 0, /* tp_dict */
2906 0, /* tp_descr_get */
2907 0, /* tp_descr_set */
2908 0, /* tp_dictoffset */
2909 (initproc)bytes_init, /* tp_init */
2910 PyType_GenericAlloc, /* tp_alloc */
2911 PyType_GenericNew, /* tp_new */
2912 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002913};