blob: 5a03beb234a8342f74c1196c5a257de347890b05 [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00008
Neal Norwitz6968b052007-02-27 19:02:19 +00009/* The nullbytes are used by the stringlib during partition.
10 * If partition is removed from bytes, nullbytes and its helper
11 * Init/Fini should also be removed.
12 */
13static PyBytesObject *nullbytes = NULL;
14
15void
16PyBytes_Fini(void)
17{
18 Py_CLEAR(nullbytes);
19}
20
21int
22PyBytes_Init(void)
23{
24 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
25 if (nullbytes == NULL)
26 return 0;
27 nullbytes->ob_bytes = NULL;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000028 Py_Size(nullbytes) = nullbytes->ob_alloc = 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000029 nullbytes->ob_exports = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +000030 return 1;
31}
32
33/* end nullbytes support */
34
Guido van Rossumad7d8d12007-04-13 01:39:34 +000035/* Helpers */
36
37static int
38_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000039{
40 PyObject *intarg = PyNumber_Int(arg);
41 if (! intarg)
42 return 0;
43 *value = PyInt_AsLong(intarg);
44 Py_DECREF(intarg);
45 if (*value < 0 || *value >= 256) {
46 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
47 return 0;
48 }
49 return 1;
50}
51
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000052static int
53bytes_getbuffer(PyBytesObject *obj, PyBuffer *view, int flags)
54{
55 int ret;
56 void *ptr;
57 if (view == NULL) {
58 obj->ob_exports++;
59 return 0;
60 }
61 if (obj->ob_bytes == NULL)
62 ptr = "";
63 else
64 ptr = obj->ob_bytes;
65 ret = PyBuffer_FillInfo(view, ptr, Py_Size(obj), 0, flags);
66 if (ret >= 0) {
67 obj->ob_exports++;
68 }
69 return ret;
70}
71
72static void
73bytes_releasebuffer(PyBytesObject *obj, PyBuffer *view)
74{
75 obj->ob_exports--;
76}
77
Guido van Rossumad7d8d12007-04-13 01:39:34 +000078Py_ssize_t
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000079_getbuffer(PyObject *obj, PyBuffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000080{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000081 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000082
83 if (buffer == NULL ||
84 PyUnicode_Check(obj) ||
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000085 buffer->bf_getbuffer == NULL) return -1;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000086
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000087 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
88 return -1;
89 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000090}
91
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000092/* Direct API functions */
93
94PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +000095PyBytes_FromObject(PyObject *input)
96{
97 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
98 input, NULL);
99}
100
101PyObject *
102PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000103{
104 PyBytesObject *new;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000105 int alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000106
Guido van Rossumd624f182006-04-24 13:47:05 +0000107 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000108
109 new = PyObject_New(PyBytesObject, &PyBytes_Type);
110 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000111 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000112
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000113 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000114 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000115 alloc = 0;
116 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000117 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000118 alloc = size + 1;
119 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +0000120 if (new->ob_bytes == NULL) {
121 Py_DECREF(new);
122 return NULL;
123 }
124 if (bytes != NULL)
125 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000126 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000127 }
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000128 Py_Size(new) = size;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000129 new->ob_alloc = alloc;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000130 new->ob_exports = 0;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000131
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000132 return (PyObject *)new;
133}
134
135Py_ssize_t
136PyBytes_Size(PyObject *self)
137{
138 assert(self != NULL);
139 assert(PyBytes_Check(self));
140
Guido van Rossum20188312006-05-05 15:15:40 +0000141 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000142}
143
144char *
145PyBytes_AsString(PyObject *self)
146{
147 assert(self != NULL);
148 assert(PyBytes_Check(self));
149
Guido van Rossum20188312006-05-05 15:15:40 +0000150 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000151}
152
153int
154PyBytes_Resize(PyObject *self, Py_ssize_t size)
155{
156 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000157 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000158
159 assert(self != NULL);
160 assert(PyBytes_Check(self));
161 assert(size >= 0);
162
Guido van Rossuma0867f72006-05-05 04:34:18 +0000163 if (size < alloc / 2) {
164 /* Major downsize; resize down to exact size */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000165 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000166 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000167 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000168 /* Within allocated size; quick exit */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000169 Py_Size(self) = size;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000170 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000171 return 0;
172 }
173 else if (size <= alloc * 1.125) {
174 /* Moderate upsize; overallocate similar to list_resize() */
175 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
176 }
177 else {
178 /* Major upsize; resize up to exact size */
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000179 alloc = size + 1;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000180 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000181
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000182 if (((PyBytesObject *)self)->ob_exports > 0) {
183 /*
184 fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports, ((PyBytesObject *)self)->ob_bytes);
185 */
186 PyErr_SetString(PyExc_BufferError,
187 "Existing exports of data: object cannot be re-sized");
188 return -1;
189 }
190
Guido van Rossuma0867f72006-05-05 04:34:18 +0000191 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000192 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000193 PyErr_NoMemory();
194 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000195 }
196
Guido van Rossumd624f182006-04-24 13:47:05 +0000197 ((PyBytesObject *)self)->ob_bytes = sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000198 Py_Size(self) = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000199 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000200 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
201
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000202 return 0;
203}
204
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000205PyObject *
206PyBytes_Concat(PyObject *a, PyObject *b)
207{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000208 Py_ssize_t size;
209 PyBuffer va, vb;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000210 PyBytesObject *result;
211
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000212 va.len = -1;
213 vb.len = -1;
214 if (_getbuffer(a, &va) < 0 ||
215 _getbuffer(b, &vb) < 0) {
216 if (va.len != -1)
217 PyObject_ReleaseBuffer(a, &va);
218 if (vb.len != -1)
219 PyObject_ReleaseBuffer(b, &vb);
220 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
221 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
222 return NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000223 }
224
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000225 size = va.len + vb.len;
226 if (size < 0) {
227 PyObject_ReleaseBuffer(a, &va);
228 PyObject_ReleaseBuffer(b, &vb);
229 return PyErr_NoMemory();
230 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000231
232 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
233 if (result != NULL) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000234 memcpy(result->ob_bytes, va.buf, va.len);
235 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000236 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000237
238 PyObject_ReleaseBuffer(a, &va);
239 PyObject_ReleaseBuffer(b, &vb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000240 return (PyObject *)result;
241}
242
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000243/* Functions stuffed into the type object */
244
245static Py_ssize_t
246bytes_length(PyBytesObject *self)
247{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000248 return Py_Size(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000249}
250
251static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000252bytes_concat(PyBytesObject *self, PyObject *other)
253{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000254 return PyBytes_Concat((PyObject *)self, other);
Guido van Rossumd624f182006-04-24 13:47:05 +0000255}
256
257static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000258bytes_iconcat(PyBytesObject *self, PyObject *other)
259{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000260 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000261 Py_ssize_t size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000262 PyBuffer vo;
Guido van Rossum13e57212006-04-27 22:54:26 +0000263
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000264 if (_getbuffer(other, &vo) < 0) {
265 PyErr_Format(PyExc_TypeError,
266 "can't concat bytes to %.100s", Py_Type(self)->tp_name);
267 return NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000268 }
269
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000270 mysize = Py_Size(self);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000271 size = mysize + vo.len;
272 if (size < 0) {
273 PyObject_ReleaseBuffer(other, &vo);
274 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000275 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000276 if (size < self->ob_alloc) {
277 Py_Size(self) = size;
278 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
279 }
280 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
281 PyObject_ReleaseBuffer(other, &vo);
282 return NULL;
283 }
284 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
285 PyObject_ReleaseBuffer(other, &vo);
Guido van Rossum13e57212006-04-27 22:54:26 +0000286 Py_INCREF(self);
287 return (PyObject *)self;
288}
289
290static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000291bytes_repeat(PyBytesObject *self, Py_ssize_t count)
292{
293 PyBytesObject *result;
294 Py_ssize_t mysize;
295 Py_ssize_t size;
296
297 if (count < 0)
298 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000299 mysize = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000300 size = mysize * count;
301 if (count != 0 && size / count != mysize)
302 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000303 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000304 if (result != NULL && size != 0) {
305 if (mysize == 1)
306 memset(result->ob_bytes, self->ob_bytes[0], size);
307 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000308 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000309 for (i = 0; i < count; i++)
310 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
311 }
312 }
313 return (PyObject *)result;
314}
315
316static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000317bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
318{
319 Py_ssize_t mysize;
320 Py_ssize_t size;
321
322 if (count < 0)
323 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000324 mysize = Py_Size(self);
Guido van Rossum13e57212006-04-27 22:54:26 +0000325 size = mysize * count;
326 if (count != 0 && size / count != mysize)
327 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000328 if (size < self->ob_alloc) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000329 Py_Size(self) = size;
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000330 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000331 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000332 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000333 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000334
Guido van Rossum13e57212006-04-27 22:54:26 +0000335 if (mysize == 1)
336 memset(self->ob_bytes, self->ob_bytes[0], size);
337 else {
338 Py_ssize_t i;
339 for (i = 1; i < count; i++)
340 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
341 }
342
343 Py_INCREF(self);
344 return (PyObject *)self;
345}
346
347static int
348bytes_substring(PyBytesObject *self, PyBytesObject *other)
349{
350 Py_ssize_t i;
351
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000352 if (Py_Size(other) == 1) {
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000353 return memchr(self->ob_bytes, other->ob_bytes[0],
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000354 Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000355 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000356 if (Py_Size(other) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000357 return 1; /* Edge case */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000358 for (i = 0; i + Py_Size(other) <= Py_Size(self); i++) {
Guido van Rossum13e57212006-04-27 22:54:26 +0000359 /* XXX Yeah, yeah, lots of optimizations possible... */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000360 if (memcmp(self->ob_bytes + i, other->ob_bytes, Py_Size(other)) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000361 return 1;
362 }
363 return 0;
364}
365
366static int
367bytes_contains(PyBytesObject *self, PyObject *value)
368{
369 Py_ssize_t ival;
370
371 if (PyBytes_Check(value))
372 return bytes_substring(self, (PyBytesObject *)value);
373
Thomas Woutersd204a712006-08-22 13:41:17 +0000374 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000375 if (ival == -1 && PyErr_Occurred())
376 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000377 if (ival < 0 || ival >= 256) {
378 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
379 return -1;
380 }
381
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000382 return memchr(self->ob_bytes, ival, Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000383}
384
385static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000386bytes_getitem(PyBytesObject *self, Py_ssize_t i)
387{
388 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000389 i += Py_Size(self);
390 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000391 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
392 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000393 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000394 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
395}
396
397static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000398bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000399{
Thomas Wouters376446d2006-12-19 08:30:14 +0000400 if (PyIndex_Check(item)) {
401 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000402
Thomas Wouters376446d2006-12-19 08:30:14 +0000403 if (i == -1 && PyErr_Occurred())
404 return NULL;
405
406 if (i < 0)
407 i += PyBytes_GET_SIZE(self);
408
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000409 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000410 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
411 return NULL;
412 }
413 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
414 }
415 else if (PySlice_Check(item)) {
416 Py_ssize_t start, stop, step, slicelength, cur, i;
417 if (PySlice_GetIndicesEx((PySliceObject *)item,
418 PyBytes_GET_SIZE(self),
419 &start, &stop, &step, &slicelength) < 0) {
420 return NULL;
421 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000422
Thomas Wouters376446d2006-12-19 08:30:14 +0000423 if (slicelength <= 0)
424 return PyBytes_FromStringAndSize("", 0);
425 else if (step == 1) {
426 return PyBytes_FromStringAndSize(self->ob_bytes + start,
427 slicelength);
428 }
429 else {
430 char *source_buf = PyBytes_AS_STRING(self);
431 char *result_buf = (char *)PyMem_Malloc(slicelength);
432 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000433
Thomas Wouters376446d2006-12-19 08:30:14 +0000434 if (result_buf == NULL)
435 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000436
Thomas Wouters376446d2006-12-19 08:30:14 +0000437 for (cur = start, i = 0; i < slicelength;
438 cur += step, i++) {
439 result_buf[i] = source_buf[cur];
440 }
441 result = PyBytes_FromStringAndSize(result_buf, slicelength);
442 PyMem_Free(result_buf);
443 return result;
444 }
445 }
446 else {
447 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
448 return NULL;
449 }
450}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000451
Guido van Rossumd624f182006-04-24 13:47:05 +0000452static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000453bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000454 PyObject *values)
455{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000456 Py_ssize_t avail, needed;
457 void *bytes;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000458 PyBuffer vbytes;
459 int res = 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000460
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000461 vbytes.len = -1;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000462 if (values == (PyObject *)self) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000463 /* Make a copy and call this function recursively */
Guido van Rossumd624f182006-04-24 13:47:05 +0000464 int err;
465 values = PyBytes_FromObject(values);
466 if (values == NULL)
467 return -1;
468 err = bytes_setslice(self, lo, hi, values);
469 Py_DECREF(values);
470 return err;
471 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000472 if (values == NULL) {
473 /* del b[lo:hi] */
474 bytes = NULL;
475 needed = 0;
476 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000477 else {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000478 if (_getbuffer(values, &vbytes) < 0) {
479 PyErr_Format(PyExc_TypeError,
480 "can't set bytes slice from %.100s",
481 Py_Type(values)->tp_name);
482 return -1;
483 }
484 needed = vbytes.len;
485 bytes = vbytes.buf;
Guido van Rossumd624f182006-04-24 13:47:05 +0000486 }
487
488 if (lo < 0)
489 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000490 if (hi < lo)
491 hi = lo;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000492 if (hi > Py_Size(self))
493 hi = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000494
495 avail = hi - lo;
496 if (avail < 0)
497 lo = hi = avail = 0;
498
499 if (avail != needed) {
500 if (avail > needed) {
501 /*
502 0 lo hi old_size
503 | |<----avail----->|<-----tomove------>|
504 | |<-needed->|<-----tomove------>|
505 0 lo new_hi new_size
506 */
507 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000508 Py_Size(self) - hi);
Guido van Rossumd624f182006-04-24 13:47:05 +0000509 }
Thomas Wouters376446d2006-12-19 08:30:14 +0000510 if (PyBytes_Resize((PyObject *)self,
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000511 Py_Size(self) + needed - avail) < 0) {
512 res = -1;
513 goto finish;
514 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000515 if (avail < needed) {
516 /*
517 0 lo hi old_size
518 | |<-avail->|<-----tomove------>|
519 | |<----needed---->|<-----tomove------>|
520 0 lo new_hi new_size
521 */
522 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000523 Py_Size(self) - lo - needed);
Guido van Rossumd624f182006-04-24 13:47:05 +0000524 }
525 }
526
527 if (needed > 0)
528 memcpy(self->ob_bytes + lo, bytes, needed);
529
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000530
531 finish:
532 if (vbytes.len != -1)
533 PyObject_ReleaseBuffer(values, &vbytes);
534 return res;
Guido van Rossumd624f182006-04-24 13:47:05 +0000535}
536
537static int
538bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
539{
540 Py_ssize_t ival;
541
542 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000543 i += Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000544
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000545 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000546 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
547 return -1;
548 }
549
550 if (value == NULL)
551 return bytes_setslice(self, i, i+1, NULL);
552
Thomas Woutersd204a712006-08-22 13:41:17 +0000553 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000554 if (ival == -1 && PyErr_Occurred())
555 return -1;
556
557 if (ival < 0 || ival >= 256) {
558 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
559 return -1;
560 }
561
562 self->ob_bytes[i] = ival;
563 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000564}
565
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000566static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000567bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
568{
569 Py_ssize_t start, stop, step, slicelen, needed;
570 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000571
Thomas Wouters376446d2006-12-19 08:30:14 +0000572 if (PyIndex_Check(item)) {
573 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
574
575 if (i == -1 && PyErr_Occurred())
576 return -1;
577
578 if (i < 0)
579 i += PyBytes_GET_SIZE(self);
580
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000581 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000582 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
583 return -1;
584 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000585
Thomas Wouters376446d2006-12-19 08:30:14 +0000586 if (values == NULL) {
587 /* Fall through to slice assignment */
588 start = i;
589 stop = i + 1;
590 step = 1;
591 slicelen = 1;
592 }
593 else {
594 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
595 if (ival == -1 && PyErr_Occurred())
596 return -1;
597 if (ival < 0 || ival >= 256) {
598 PyErr_SetString(PyExc_ValueError,
599 "byte must be in range(0, 256)");
600 return -1;
601 }
602 self->ob_bytes[i] = (char)ival;
603 return 0;
604 }
605 }
606 else if (PySlice_Check(item)) {
607 if (PySlice_GetIndicesEx((PySliceObject *)item,
608 PyBytes_GET_SIZE(self),
609 &start, &stop, &step, &slicelen) < 0) {
610 return -1;
611 }
612 }
613 else {
614 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
615 return -1;
616 }
617
618 if (values == NULL) {
619 bytes = NULL;
620 needed = 0;
621 }
622 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
623 /* Make a copy an call this function recursively */
624 int err;
625 values = PyBytes_FromObject(values);
626 if (values == NULL)
627 return -1;
628 err = bytes_ass_subscript(self, item, values);
629 Py_DECREF(values);
630 return err;
631 }
632 else {
633 assert(PyBytes_Check(values));
634 bytes = ((PyBytesObject *)values)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000635 needed = Py_Size(values);
Thomas Wouters376446d2006-12-19 08:30:14 +0000636 }
637 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
638 if ((step < 0 && start < stop) ||
639 (step > 0 && start > stop))
640 stop = start;
641 if (step == 1) {
642 if (slicelen != needed) {
643 if (slicelen > needed) {
644 /*
645 0 start stop old_size
646 | |<---slicelen--->|<-----tomove------>|
647 | |<-needed->|<-----tomove------>|
648 0 lo new_hi new_size
649 */
650 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000651 Py_Size(self) - stop);
Thomas Wouters376446d2006-12-19 08:30:14 +0000652 }
653 if (PyBytes_Resize((PyObject *)self,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000654 Py_Size(self) + needed - slicelen) < 0)
Thomas Wouters376446d2006-12-19 08:30:14 +0000655 return -1;
656 if (slicelen < needed) {
657 /*
658 0 lo hi old_size
659 | |<-avail->|<-----tomove------>|
660 | |<----needed---->|<-----tomove------>|
661 0 lo new_hi new_size
662 */
663 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000664 Py_Size(self) - start - needed);
Thomas Wouters376446d2006-12-19 08:30:14 +0000665 }
666 }
667
668 if (needed > 0)
669 memcpy(self->ob_bytes + start, bytes, needed);
670
671 return 0;
672 }
673 else {
674 if (needed == 0) {
675 /* Delete slice */
676 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000677
Thomas Wouters376446d2006-12-19 08:30:14 +0000678 if (step < 0) {
679 stop = start + 1;
680 start = stop + step * (slicelen - 1) - 1;
681 step = -step;
682 }
683 for (cur = start, i = 0;
684 i < slicelen; cur += step, i++) {
685 Py_ssize_t lim = step - 1;
686
687 if (cur + step >= PyBytes_GET_SIZE(self))
688 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000689
Thomas Wouters376446d2006-12-19 08:30:14 +0000690 memmove(self->ob_bytes + cur - i,
691 self->ob_bytes + cur + 1, lim);
692 }
693 /* Move the tail of the bytes, in one chunk */
694 cur = start + slicelen*step;
695 if (cur < PyBytes_GET_SIZE(self)) {
696 memmove(self->ob_bytes + cur - slicelen,
697 self->ob_bytes + cur,
698 PyBytes_GET_SIZE(self) - cur);
699 }
700 if (PyBytes_Resize((PyObject *)self,
701 PyBytes_GET_SIZE(self) - slicelen) < 0)
702 return -1;
703
704 return 0;
705 }
706 else {
707 /* Assign slice */
708 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000709
Thomas Wouters376446d2006-12-19 08:30:14 +0000710 if (needed != slicelen) {
711 PyErr_Format(PyExc_ValueError,
712 "attempt to assign bytes of size %zd "
713 "to extended slice of size %zd",
714 needed, slicelen);
715 return -1;
716 }
717 for (cur = start, i = 0; i < slicelen; cur += step, i++)
718 self->ob_bytes[cur] = bytes[i];
719 return 0;
720 }
721 }
722}
723
724static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000725bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
726{
Guido van Rossumd624f182006-04-24 13:47:05 +0000727 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000728 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000729 const char *encoding = NULL;
730 const char *errors = NULL;
731 Py_ssize_t count;
732 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000733 PyObject *(*iternext)(PyObject *);
734
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000735 if (Py_Size(self) != 0) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000736 /* Empty previous contents (yes, do this first of all!) */
737 if (PyBytes_Resize((PyObject *)self, 0) < 0)
738 return -1;
739 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000740
Guido van Rossumd624f182006-04-24 13:47:05 +0000741 /* Parse arguments */
742 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
743 &arg, &encoding, &errors))
744 return -1;
745
746 /* Make a quick exit if no first argument */
747 if (arg == NULL) {
748 if (encoding != NULL || errors != NULL) {
749 PyErr_SetString(PyExc_TypeError,
750 "encoding or errors without sequence argument");
751 return -1;
752 }
753 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000754 }
755
Guido van Rossumd624f182006-04-24 13:47:05 +0000756 if (PyUnicode_Check(arg)) {
757 /* Encode via the codec registry */
Guido van Rossum4355a472007-05-04 05:00:04 +0000758 PyObject *encoded, *new;
Guido van Rossumd624f182006-04-24 13:47:05 +0000759 if (encoding == NULL)
760 encoding = PyUnicode_GetDefaultEncoding();
761 encoded = PyCodec_Encode(arg, encoding, errors);
762 if (encoded == NULL)
763 return -1;
Guido van Rossum4355a472007-05-04 05:00:04 +0000764 if (!PyBytes_Check(encoded) && !PyString_Check(encoded)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000765 PyErr_Format(PyExc_TypeError,
Guido van Rossum4355a472007-05-04 05:00:04 +0000766 "encoder did not return a str8 or bytes object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000767 Py_Type(encoded)->tp_name);
Guido van Rossumd624f182006-04-24 13:47:05 +0000768 Py_DECREF(encoded);
769 return -1;
770 }
Guido van Rossum4355a472007-05-04 05:00:04 +0000771 new = bytes_iconcat(self, encoded);
772 Py_DECREF(encoded);
773 if (new == NULL)
774 return -1;
775 Py_DECREF(new);
776 return 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000777 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000778
Guido van Rossumd624f182006-04-24 13:47:05 +0000779 /* If it's not unicode, there can't be encoding or errors */
780 if (encoding != NULL || errors != NULL) {
781 PyErr_SetString(PyExc_TypeError,
782 "encoding or errors without a string argument");
783 return -1;
784 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000785
Guido van Rossumd624f182006-04-24 13:47:05 +0000786 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000787 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000788 if (count == -1 && PyErr_Occurred())
789 PyErr_Clear();
790 else {
791 if (count < 0) {
792 PyErr_SetString(PyExc_ValueError, "negative count");
793 return -1;
794 }
795 if (count > 0) {
796 if (PyBytes_Resize((PyObject *)self, count))
797 return -1;
798 memset(self->ob_bytes, 0, count);
799 }
800 return 0;
801 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000802
803 /* Use the modern buffer interface */
804 if (PyObject_CheckBuffer(arg)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000805 Py_ssize_t size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000806 PyBuffer view;
807 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000808 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000809 size = view.len;
810 if (PyBytes_Resize((PyObject *)self, size) < 0) goto fail;
811 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
812 goto fail;
813 PyObject_ReleaseBuffer(arg, &view);
Guido van Rossumd624f182006-04-24 13:47:05 +0000814 return 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000815 fail:
816 PyObject_ReleaseBuffer(arg, &view);
817 return -1;
Guido van Rossumd624f182006-04-24 13:47:05 +0000818 }
819
820 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000821
822 /* Get the iterator */
823 it = PyObject_GetIter(arg);
824 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000825 return -1;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000826 iternext = *Py_Type(it)->tp_iternext;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000827
828 /* Run the iterator to exhaustion */
829 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000830 PyObject *item;
831 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000832
Guido van Rossumd624f182006-04-24 13:47:05 +0000833 /* Get the next item */
834 item = iternext(it);
835 if (item == NULL) {
836 if (PyErr_Occurred()) {
837 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
838 goto error;
839 PyErr_Clear();
840 }
841 break;
842 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000843
Guido van Rossumd624f182006-04-24 13:47:05 +0000844 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000845 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000846 Py_DECREF(item);
847 if (value == -1 && PyErr_Occurred())
848 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000849
Guido van Rossumd624f182006-04-24 13:47:05 +0000850 /* Range check */
851 if (value < 0 || value >= 256) {
852 PyErr_SetString(PyExc_ValueError,
853 "bytes must be in range(0, 256)");
854 goto error;
855 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000856
Guido van Rossumd624f182006-04-24 13:47:05 +0000857 /* Append the byte */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000858 if (Py_Size(self) < self->ob_alloc)
859 Py_Size(self)++;
860 else if (PyBytes_Resize((PyObject *)self, Py_Size(self)+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000861 goto error;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000862 self->ob_bytes[Py_Size(self)-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000863 }
864
865 /* Clean up and return success */
866 Py_DECREF(it);
867 return 0;
868
869 error:
870 /* Error handling when it != NULL */
871 Py_DECREF(it);
872 return -1;
873}
874
Georg Brandlee91be42007-02-24 19:41:35 +0000875/* Mostly copied from string_repr, but without the
876 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000877static PyObject *
878bytes_repr(PyBytesObject *self)
879{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000880 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000881 size_t newsize = 3 + 4 * Py_Size(self);
Georg Brandlee91be42007-02-24 19:41:35 +0000882 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000883 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(self)) {
Georg Brandlee91be42007-02-24 19:41:35 +0000884 PyErr_SetString(PyExc_OverflowError,
885 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000886 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000887 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000888 v = PyUnicode_FromUnicode(NULL, newsize);
Georg Brandlee91be42007-02-24 19:41:35 +0000889 if (v == NULL) {
890 return NULL;
891 }
892 else {
893 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000894 register Py_UNICODE c;
895 register Py_UNICODE *p;
Georg Brandlee91be42007-02-24 19:41:35 +0000896 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000897
Walter Dörwald1ab83302007-05-18 17:15:44 +0000898 p = PyUnicode_AS_UNICODE(v);
Georg Brandlee91be42007-02-24 19:41:35 +0000899 *p++ = 'b';
900 *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000901 for (i = 0; i < Py_Size(self); i++) {
Georg Brandlee91be42007-02-24 19:41:35 +0000902 /* There's at least enough room for a hex escape
903 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000904 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Georg Brandlee91be42007-02-24 19:41:35 +0000905 c = self->ob_bytes[i];
906 if (c == quote || c == '\\')
907 *p++ = '\\', *p++ = c;
908 else if (c == '\t')
909 *p++ = '\\', *p++ = 't';
910 else if (c == '\n')
911 *p++ = '\\', *p++ = 'n';
912 else if (c == '\r')
913 *p++ = '\\', *p++ = 'r';
914 else if (c == 0)
Guido van Rossum57b93ad2007-05-08 19:09:34 +0000915 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
Georg Brandlee91be42007-02-24 19:41:35 +0000916 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000917 *p++ = '\\';
918 *p++ = 'x';
919 *p++ = hexdigits[(c & 0xf0) >> 4];
920 *p++ = hexdigits[c & 0xf];
Georg Brandlee91be42007-02-24 19:41:35 +0000921 }
922 else
923 *p++ = c;
924 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000925 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Georg Brandlee91be42007-02-24 19:41:35 +0000926 *p++ = quote;
927 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000928 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
929 Py_DECREF(v);
930 return NULL;
931 }
Georg Brandlee91be42007-02-24 19:41:35 +0000932 return v;
933 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000934}
935
936static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000937bytes_str(PyBytesObject *self)
938{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000939 return PyString_FromStringAndSize(self->ob_bytes, Py_Size(self));
Guido van Rossumd624f182006-04-24 13:47:05 +0000940}
941
942static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000943bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000944{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000945 Py_ssize_t self_size, other_size;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000946 PyBuffer self_bytes, other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000947 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000948 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000949 int cmp;
950
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000951 /* Bytes can be compared to anything that supports the (binary) buffer
952 API. Except Unicode. */
Guido van Rossumebea9be2007-04-09 00:49:13 +0000953
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000954 self_size = _getbuffer(self, &self_bytes);
955 if (self_size < 0) {
Guido van Rossumebea9be2007-04-09 00:49:13 +0000956 Py_INCREF(Py_NotImplemented);
957 return Py_NotImplemented;
958 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000959
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000960 other_size = _getbuffer(other, &other_bytes);
961 if (other_size < 0) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000962 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossumd624f182006-04-24 13:47:05 +0000963 Py_INCREF(Py_NotImplemented);
964 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000965 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000966
967 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000968 /* Shortcut: if the lengths differ, the objects differ */
969 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000970 }
971 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000972 minsize = self_size;
973 if (other_size < minsize)
974 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000975
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000976 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000977 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000978
Guido van Rossumd624f182006-04-24 13:47:05 +0000979 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000980 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000981 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000982 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +0000983 cmp = 1;
984 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000985
Guido van Rossumd624f182006-04-24 13:47:05 +0000986 switch (op) {
987 case Py_LT: cmp = cmp < 0; break;
988 case Py_LE: cmp = cmp <= 0; break;
989 case Py_EQ: cmp = cmp == 0; break;
990 case Py_NE: cmp = cmp != 0; break;
991 case Py_GT: cmp = cmp > 0; break;
992 case Py_GE: cmp = cmp >= 0; break;
993 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000994 }
995
996 res = cmp ? Py_True : Py_False;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000997 PyObject_ReleaseBuffer(self, &self_bytes);
998 PyObject_ReleaseBuffer(other, &other_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000999 Py_INCREF(res);
1000 return res;
1001}
1002
1003static void
1004bytes_dealloc(PyBytesObject *self)
1005{
Guido van Rossumd624f182006-04-24 13:47:05 +00001006 if (self->ob_bytes != 0) {
1007 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001008 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001009 Py_Type(self)->tp_free((PyObject *)self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001010}
1011
Neal Norwitz6968b052007-02-27 19:02:19 +00001012
1013/* -------------------------------------------------------------------- */
1014/* Methods */
1015
1016#define STRINGLIB_CHAR char
1017#define STRINGLIB_CMP memcmp
1018#define STRINGLIB_LEN PyBytes_GET_SIZE
1019#define STRINGLIB_NEW PyBytes_FromStringAndSize
1020#define STRINGLIB_EMPTY nullbytes
1021
1022#include "stringlib/fastsearch.h"
1023#include "stringlib/count.h"
1024#include "stringlib/find.h"
1025#include "stringlib/partition.h"
1026
1027
1028/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1029were copied from the old char* style string object. */
1030
1031Py_LOCAL_INLINE(void)
1032_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1033{
1034 if (*end > len)
1035 *end = len;
1036 else if (*end < 0)
1037 *end += len;
1038 if (*end < 0)
1039 *end = 0;
1040 if (*start < 0)
1041 *start += len;
1042 if (*start < 0)
1043 *start = 0;
1044}
1045
1046
1047Py_LOCAL_INLINE(Py_ssize_t)
1048bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1049{
1050 PyObject *subobj;
1051 const char *sub;
1052 Py_ssize_t sub_len;
1053 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1054
1055 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1056 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1057 return -2;
1058 if (PyBytes_Check(subobj)) {
1059 sub = PyBytes_AS_STRING(subobj);
1060 sub_len = PyBytes_GET_SIZE(subobj);
1061 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001062 /* XXX --> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001063 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1064 /* XXX - the "expected a character buffer object" is pretty
1065 confusing for a non-expert. remap to something else ? */
1066 return -2;
1067
1068 if (dir > 0)
1069 return stringlib_find_slice(
1070 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1071 sub, sub_len, start, end);
1072 else
1073 return stringlib_rfind_slice(
1074 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1075 sub, sub_len, start, end);
1076}
1077
1078
1079PyDoc_STRVAR(find__doc__,
1080"B.find(sub [,start [,end]]) -> int\n\
1081\n\
1082Return the lowest index in B where subsection sub is found,\n\
1083such that sub is contained within s[start,end]. Optional\n\
1084arguments start and end are interpreted as in slice notation.\n\
1085\n\
1086Return -1 on failure.");
1087
1088static PyObject *
1089bytes_find(PyBytesObject *self, PyObject *args)
1090{
1091 Py_ssize_t result = bytes_find_internal(self, args, +1);
1092 if (result == -2)
1093 return NULL;
1094 return PyInt_FromSsize_t(result);
1095}
1096
1097PyDoc_STRVAR(count__doc__,
1098"B.count(sub[, start[, end]]) -> int\n\
1099\n\
1100Return the number of non-overlapping occurrences of subsection sub in\n\
1101bytes B[start:end]. Optional arguments start and end are interpreted\n\
1102as in slice notation.");
1103
1104static PyObject *
1105bytes_count(PyBytesObject *self, PyObject *args)
1106{
1107 PyObject *sub_obj;
1108 const char *str = PyBytes_AS_STRING(self), *sub;
1109 Py_ssize_t sub_len;
1110 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1111
1112 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1113 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1114 return NULL;
1115
1116 if (PyBytes_Check(sub_obj)) {
1117 sub = PyBytes_AS_STRING(sub_obj);
1118 sub_len = PyBytes_GET_SIZE(sub_obj);
1119 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001120 /* XXX --> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001121 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
1122 return NULL;
1123
Martin v. Löwis5b222132007-06-10 09:51:05 +00001124 _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Neal Norwitz6968b052007-02-27 19:02:19 +00001125
1126 return PyInt_FromSsize_t(
1127 stringlib_count(str + start, end - start, sub, sub_len)
1128 );
1129}
1130
1131
1132PyDoc_STRVAR(index__doc__,
1133"B.index(sub [,start [,end]]) -> int\n\
1134\n\
1135Like B.find() but raise ValueError when the subsection is not found.");
1136
1137static PyObject *
1138bytes_index(PyBytesObject *self, PyObject *args)
1139{
1140 Py_ssize_t result = bytes_find_internal(self, args, +1);
1141 if (result == -2)
1142 return NULL;
1143 if (result == -1) {
1144 PyErr_SetString(PyExc_ValueError,
1145 "subsection not found");
1146 return NULL;
1147 }
1148 return PyInt_FromSsize_t(result);
1149}
1150
1151
1152PyDoc_STRVAR(rfind__doc__,
1153"B.rfind(sub [,start [,end]]) -> int\n\
1154\n\
1155Return the highest index in B where subsection sub is found,\n\
1156such that sub is contained within s[start,end]. Optional\n\
1157arguments start and end are interpreted as in slice notation.\n\
1158\n\
1159Return -1 on failure.");
1160
1161static PyObject *
1162bytes_rfind(PyBytesObject *self, PyObject *args)
1163{
1164 Py_ssize_t result = bytes_find_internal(self, args, -1);
1165 if (result == -2)
1166 return NULL;
1167 return PyInt_FromSsize_t(result);
1168}
1169
1170
1171PyDoc_STRVAR(rindex__doc__,
1172"B.rindex(sub [,start [,end]]) -> int\n\
1173\n\
1174Like B.rfind() but raise ValueError when the subsection is not found.");
1175
1176static PyObject *
1177bytes_rindex(PyBytesObject *self, PyObject *args)
1178{
1179 Py_ssize_t result = bytes_find_internal(self, args, -1);
1180 if (result == -2)
1181 return NULL;
1182 if (result == -1) {
1183 PyErr_SetString(PyExc_ValueError,
1184 "subsection not found");
1185 return NULL;
1186 }
1187 return PyInt_FromSsize_t(result);
1188}
1189
1190
1191/* Matches the end (direction >= 0) or start (direction < 0) of self
1192 * against substr, using the start and end arguments. Returns
1193 * -1 on error, 0 if not found and 1 if found.
1194 */
1195Py_LOCAL(int)
1196_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1197 Py_ssize_t end, int direction)
1198{
1199 Py_ssize_t len = PyBytes_GET_SIZE(self);
1200 Py_ssize_t slen;
1201 const char* sub;
1202 const char* str;
1203
1204 if (PyBytes_Check(substr)) {
1205 sub = PyBytes_AS_STRING(substr);
1206 slen = PyBytes_GET_SIZE(substr);
1207 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001208 /* XXX --> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001209 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
1210 return -1;
1211 str = PyBytes_AS_STRING(self);
1212
1213 _adjust_indices(&start, &end, len);
1214
1215 if (direction < 0) {
1216 /* startswith */
1217 if (start+slen > len)
1218 return 0;
1219 } else {
1220 /* endswith */
1221 if (end-start < slen || start > len)
1222 return 0;
1223
1224 if (end-slen > start)
1225 start = end - slen;
1226 }
1227 if (end-start >= slen)
1228 return ! memcmp(str+start, sub, slen);
1229 return 0;
1230}
1231
1232
1233PyDoc_STRVAR(startswith__doc__,
1234"B.startswith(prefix[, start[, end]]) -> bool\n\
1235\n\
1236Return True if B starts with the specified prefix, False otherwise.\n\
1237With optional start, test B beginning at that position.\n\
1238With optional end, stop comparing B at that position.\n\
1239prefix can also be a tuple of strings to try.");
1240
1241static PyObject *
1242bytes_startswith(PyBytesObject *self, PyObject *args)
1243{
1244 Py_ssize_t start = 0;
1245 Py_ssize_t end = PY_SSIZE_T_MAX;
1246 PyObject *subobj;
1247 int result;
1248
1249 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1250 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1251 return NULL;
1252 if (PyTuple_Check(subobj)) {
1253 Py_ssize_t i;
1254 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1255 result = _bytes_tailmatch(self,
1256 PyTuple_GET_ITEM(subobj, i),
1257 start, end, -1);
1258 if (result == -1)
1259 return NULL;
1260 else if (result) {
1261 Py_RETURN_TRUE;
1262 }
1263 }
1264 Py_RETURN_FALSE;
1265 }
1266 result = _bytes_tailmatch(self, subobj, start, end, -1);
1267 if (result == -1)
1268 return NULL;
1269 else
1270 return PyBool_FromLong(result);
1271}
1272
1273PyDoc_STRVAR(endswith__doc__,
1274"B.endswith(suffix[, start[, end]]) -> bool\n\
1275\n\
1276Return True if B ends with the specified suffix, False otherwise.\n\
1277With optional start, test B beginning at that position.\n\
1278With optional end, stop comparing B at that position.\n\
1279suffix can also be a tuple of strings to try.");
1280
1281static PyObject *
1282bytes_endswith(PyBytesObject *self, PyObject *args)
1283{
1284 Py_ssize_t start = 0;
1285 Py_ssize_t end = PY_SSIZE_T_MAX;
1286 PyObject *subobj;
1287 int result;
1288
1289 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1290 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1291 return NULL;
1292 if (PyTuple_Check(subobj)) {
1293 Py_ssize_t i;
1294 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1295 result = _bytes_tailmatch(self,
1296 PyTuple_GET_ITEM(subobj, i),
1297 start, end, +1);
1298 if (result == -1)
1299 return NULL;
1300 else if (result) {
1301 Py_RETURN_TRUE;
1302 }
1303 }
1304 Py_RETURN_FALSE;
1305 }
1306 result = _bytes_tailmatch(self, subobj, start, end, +1);
1307 if (result == -1)
1308 return NULL;
1309 else
1310 return PyBool_FromLong(result);
1311}
1312
1313
1314
1315PyDoc_STRVAR(translate__doc__,
1316"B.translate(table [,deletechars]) -> bytes\n\
1317\n\
1318Return a copy of the bytes B, where all characters occurring\n\
1319in the optional argument deletechars are removed, and the\n\
1320remaining characters have been mapped through the given\n\
1321translation table, which must be a bytes of length 256.");
1322
1323static PyObject *
1324bytes_translate(PyBytesObject *self, PyObject *args)
1325{
1326 register char *input, *output;
1327 register const char *table;
1328 register Py_ssize_t i, c, changed = 0;
1329 PyObject *input_obj = (PyObject*)self;
1330 const char *table1, *output_start, *del_table=NULL;
1331 Py_ssize_t inlen, tablen, dellen = 0;
1332 PyObject *result;
1333 int trans_table[256];
1334 PyObject *tableobj, *delobj = NULL;
1335
1336 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1337 &tableobj, &delobj))
1338 return NULL;
1339
1340 if (PyBytes_Check(tableobj)) {
1341 table1 = PyBytes_AS_STRING(tableobj);
1342 tablen = PyBytes_GET_SIZE(tableobj);
1343 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001344 /* XXX -> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001345 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1346 return NULL;
1347
1348 if (tablen != 256) {
1349 PyErr_SetString(PyExc_ValueError,
1350 "translation table must be 256 characters long");
1351 return NULL;
1352 }
1353
1354 if (delobj != NULL) {
1355 if (PyBytes_Check(delobj)) {
1356 del_table = PyBytes_AS_STRING(delobj);
1357 dellen = PyBytes_GET_SIZE(delobj);
1358 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001359 /* XXX -> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00001360 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1361 return NULL;
1362 }
1363 else {
1364 del_table = NULL;
1365 dellen = 0;
1366 }
1367
1368 table = table1;
1369 inlen = PyBytes_GET_SIZE(input_obj);
1370 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1371 if (result == NULL)
1372 return NULL;
1373 output_start = output = PyBytes_AsString(result);
1374 input = PyBytes_AS_STRING(input_obj);
1375
1376 if (dellen == 0) {
1377 /* If no deletions are required, use faster code */
1378 for (i = inlen; --i >= 0; ) {
1379 c = Py_CHARMASK(*input++);
1380 if (Py_CHARMASK((*output++ = table[c])) != c)
1381 changed = 1;
1382 }
1383 if (changed || !PyBytes_CheckExact(input_obj))
1384 return result;
1385 Py_DECREF(result);
1386 Py_INCREF(input_obj);
1387 return input_obj;
1388 }
1389
1390 for (i = 0; i < 256; i++)
1391 trans_table[i] = Py_CHARMASK(table[i]);
1392
1393 for (i = 0; i < dellen; i++)
1394 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1395
1396 for (i = inlen; --i >= 0; ) {
1397 c = Py_CHARMASK(*input++);
1398 if (trans_table[c] != -1)
1399 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1400 continue;
1401 changed = 1;
1402 }
1403 if (!changed && PyBytes_CheckExact(input_obj)) {
1404 Py_DECREF(result);
1405 Py_INCREF(input_obj);
1406 return input_obj;
1407 }
1408 /* Fix the size of the resulting string */
1409 if (inlen > 0)
1410 PyBytes_Resize(result, output - output_start);
1411 return result;
1412}
1413
1414
1415#define FORWARD 1
1416#define REVERSE -1
1417
1418/* find and count characters and substrings */
1419
1420#define findchar(target, target_len, c) \
1421 ((char *)memchr((const void *)(target), c, target_len))
1422
1423/* Don't call if length < 2 */
1424#define Py_STRING_MATCH(target, offset, pattern, length) \
1425 (target[offset] == pattern[0] && \
1426 target[offset+length-1] == pattern[length-1] && \
1427 !memcmp(target+offset+1, pattern+1, length-2) )
1428
1429
1430/* Bytes ops must return a string. */
1431/* If the object is subclass of bytes, create a copy */
1432Py_LOCAL(PyBytesObject *)
1433return_self(PyBytesObject *self)
1434{
1435 if (PyBytes_CheckExact(self)) {
1436 Py_INCREF(self);
1437 return (PyBytesObject *)self;
1438 }
1439 return (PyBytesObject *)PyBytes_FromStringAndSize(
1440 PyBytes_AS_STRING(self),
1441 PyBytes_GET_SIZE(self));
1442}
1443
1444Py_LOCAL_INLINE(Py_ssize_t)
1445countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1446{
1447 Py_ssize_t count=0;
1448 const char *start=target;
1449 const char *end=target+target_len;
1450
1451 while ( (start=findchar(start, end-start, c)) != NULL ) {
1452 count++;
1453 if (count >= maxcount)
1454 break;
1455 start += 1;
1456 }
1457 return count;
1458}
1459
1460Py_LOCAL(Py_ssize_t)
1461findstring(const char *target, Py_ssize_t target_len,
1462 const char *pattern, Py_ssize_t pattern_len,
1463 Py_ssize_t start,
1464 Py_ssize_t end,
1465 int direction)
1466{
1467 if (start < 0) {
1468 start += target_len;
1469 if (start < 0)
1470 start = 0;
1471 }
1472 if (end > target_len) {
1473 end = target_len;
1474 } else if (end < 0) {
1475 end += target_len;
1476 if (end < 0)
1477 end = 0;
1478 }
1479
1480 /* zero-length substrings always match at the first attempt */
1481 if (pattern_len == 0)
1482 return (direction > 0) ? start : end;
1483
1484 end -= pattern_len;
1485
1486 if (direction < 0) {
1487 for (; end >= start; end--)
1488 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1489 return end;
1490 } else {
1491 for (; start <= end; start++)
1492 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1493 return start;
1494 }
1495 return -1;
1496}
1497
1498Py_LOCAL_INLINE(Py_ssize_t)
1499countstring(const char *target, Py_ssize_t target_len,
1500 const char *pattern, Py_ssize_t pattern_len,
1501 Py_ssize_t start,
1502 Py_ssize_t end,
1503 int direction, Py_ssize_t maxcount)
1504{
1505 Py_ssize_t count=0;
1506
1507 if (start < 0) {
1508 start += target_len;
1509 if (start < 0)
1510 start = 0;
1511 }
1512 if (end > target_len) {
1513 end = target_len;
1514 } else if (end < 0) {
1515 end += target_len;
1516 if (end < 0)
1517 end = 0;
1518 }
1519
1520 /* zero-length substrings match everywhere */
1521 if (pattern_len == 0 || maxcount == 0) {
1522 if (target_len+1 < maxcount)
1523 return target_len+1;
1524 return maxcount;
1525 }
1526
1527 end -= pattern_len;
1528 if (direction < 0) {
1529 for (; (end >= start); end--)
1530 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1531 count++;
1532 if (--maxcount <= 0) break;
1533 end -= pattern_len-1;
1534 }
1535 } else {
1536 for (; (start <= end); start++)
1537 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1538 count++;
1539 if (--maxcount <= 0)
1540 break;
1541 start += pattern_len-1;
1542 }
1543 }
1544 return count;
1545}
1546
1547
1548/* Algorithms for different cases of string replacement */
1549
1550/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1551Py_LOCAL(PyBytesObject *)
1552replace_interleave(PyBytesObject *self,
1553 const char *to_s, Py_ssize_t to_len,
1554 Py_ssize_t maxcount)
1555{
1556 char *self_s, *result_s;
1557 Py_ssize_t self_len, result_len;
1558 Py_ssize_t count, i, product;
1559 PyBytesObject *result;
1560
1561 self_len = PyBytes_GET_SIZE(self);
1562
1563 /* 1 at the end plus 1 after every character */
1564 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001565 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001566 count = maxcount;
1567
1568 /* Check for overflow */
1569 /* result_len = count * to_len + self_len; */
1570 product = count * to_len;
1571 if (product / to_len != count) {
1572 PyErr_SetString(PyExc_OverflowError,
1573 "replace string is too long");
1574 return NULL;
1575 }
1576 result_len = product + self_len;
1577 if (result_len < 0) {
1578 PyErr_SetString(PyExc_OverflowError,
1579 "replace string is too long");
1580 return NULL;
1581 }
1582
1583 if (! (result = (PyBytesObject *)
1584 PyBytes_FromStringAndSize(NULL, result_len)) )
1585 return NULL;
1586
1587 self_s = PyBytes_AS_STRING(self);
1588 result_s = PyBytes_AS_STRING(result);
1589
1590 /* TODO: special case single character, which doesn't need memcpy */
1591
1592 /* Lay the first one down (guaranteed this will occur) */
1593 Py_MEMCPY(result_s, to_s, to_len);
1594 result_s += to_len;
1595 count -= 1;
1596
1597 for (i=0; i<count; i++) {
1598 *result_s++ = *self_s++;
1599 Py_MEMCPY(result_s, to_s, to_len);
1600 result_s += to_len;
1601 }
1602
1603 /* Copy the rest of the original string */
1604 Py_MEMCPY(result_s, self_s, self_len-i);
1605
1606 return result;
1607}
1608
1609/* Special case for deleting a single character */
1610/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1611Py_LOCAL(PyBytesObject *)
1612replace_delete_single_character(PyBytesObject *self,
1613 char from_c, Py_ssize_t maxcount)
1614{
1615 char *self_s, *result_s;
1616 char *start, *next, *end;
1617 Py_ssize_t self_len, result_len;
1618 Py_ssize_t count;
1619 PyBytesObject *result;
1620
1621 self_len = PyBytes_GET_SIZE(self);
1622 self_s = PyBytes_AS_STRING(self);
1623
1624 count = countchar(self_s, self_len, from_c, maxcount);
1625 if (count == 0) {
1626 return return_self(self);
1627 }
1628
1629 result_len = self_len - count; /* from_len == 1 */
1630 assert(result_len>=0);
1631
1632 if ( (result = (PyBytesObject *)
1633 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1634 return NULL;
1635 result_s = PyBytes_AS_STRING(result);
1636
1637 start = self_s;
1638 end = self_s + self_len;
1639 while (count-- > 0) {
1640 next = findchar(start, end-start, from_c);
1641 if (next == NULL)
1642 break;
1643 Py_MEMCPY(result_s, start, next-start);
1644 result_s += (next-start);
1645 start = next+1;
1646 }
1647 Py_MEMCPY(result_s, start, end-start);
1648
1649 return result;
1650}
1651
1652/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1653
1654Py_LOCAL(PyBytesObject *)
1655replace_delete_substring(PyBytesObject *self,
1656 const char *from_s, Py_ssize_t from_len,
1657 Py_ssize_t maxcount)
1658{
1659 char *self_s, *result_s;
1660 char *start, *next, *end;
1661 Py_ssize_t self_len, result_len;
1662 Py_ssize_t count, offset;
1663 PyBytesObject *result;
1664
1665 self_len = PyBytes_GET_SIZE(self);
1666 self_s = PyBytes_AS_STRING(self);
1667
1668 count = countstring(self_s, self_len,
1669 from_s, from_len,
1670 0, self_len, 1,
1671 maxcount);
1672
1673 if (count == 0) {
1674 /* no matches */
1675 return return_self(self);
1676 }
1677
1678 result_len = self_len - (count * from_len);
1679 assert (result_len>=0);
1680
1681 if ( (result = (PyBytesObject *)
1682 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1683 return NULL;
1684
1685 result_s = PyBytes_AS_STRING(result);
1686
1687 start = self_s;
1688 end = self_s + self_len;
1689 while (count-- > 0) {
1690 offset = findstring(start, end-start,
1691 from_s, from_len,
1692 0, end-start, FORWARD);
1693 if (offset == -1)
1694 break;
1695 next = start + offset;
1696
1697 Py_MEMCPY(result_s, start, next-start);
1698
1699 result_s += (next-start);
1700 start = next+from_len;
1701 }
1702 Py_MEMCPY(result_s, start, end-start);
1703 return result;
1704}
1705
1706/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1707Py_LOCAL(PyBytesObject *)
1708replace_single_character_in_place(PyBytesObject *self,
1709 char from_c, char to_c,
1710 Py_ssize_t maxcount)
1711{
1712 char *self_s, *result_s, *start, *end, *next;
1713 Py_ssize_t self_len;
1714 PyBytesObject *result;
1715
1716 /* The result string will be the same size */
1717 self_s = PyBytes_AS_STRING(self);
1718 self_len = PyBytes_GET_SIZE(self);
1719
1720 next = findchar(self_s, self_len, from_c);
1721
1722 if (next == NULL) {
1723 /* No matches; return the original bytes */
1724 return return_self(self);
1725 }
1726
1727 /* Need to make a new bytes */
1728 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1729 if (result == NULL)
1730 return NULL;
1731 result_s = PyBytes_AS_STRING(result);
1732 Py_MEMCPY(result_s, self_s, self_len);
1733
1734 /* change everything in-place, starting with this one */
1735 start = result_s + (next-self_s);
1736 *start = to_c;
1737 start++;
1738 end = result_s + self_len;
1739
1740 while (--maxcount > 0) {
1741 next = findchar(start, end-start, from_c);
1742 if (next == NULL)
1743 break;
1744 *next = to_c;
1745 start = next+1;
1746 }
1747
1748 return result;
1749}
1750
1751/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1752Py_LOCAL(PyBytesObject *)
1753replace_substring_in_place(PyBytesObject *self,
1754 const char *from_s, Py_ssize_t from_len,
1755 const char *to_s, Py_ssize_t to_len,
1756 Py_ssize_t maxcount)
1757{
1758 char *result_s, *start, *end;
1759 char *self_s;
1760 Py_ssize_t self_len, offset;
1761 PyBytesObject *result;
1762
1763 /* The result bytes will be the same size */
1764
1765 self_s = PyBytes_AS_STRING(self);
1766 self_len = PyBytes_GET_SIZE(self);
1767
1768 offset = findstring(self_s, self_len,
1769 from_s, from_len,
1770 0, self_len, FORWARD);
1771 if (offset == -1) {
1772 /* No matches; return the original bytes */
1773 return return_self(self);
1774 }
1775
1776 /* Need to make a new bytes */
1777 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1778 if (result == NULL)
1779 return NULL;
1780 result_s = PyBytes_AS_STRING(result);
1781 Py_MEMCPY(result_s, self_s, self_len);
1782
1783 /* change everything in-place, starting with this one */
1784 start = result_s + offset;
1785 Py_MEMCPY(start, to_s, from_len);
1786 start += from_len;
1787 end = result_s + self_len;
1788
1789 while ( --maxcount > 0) {
1790 offset = findstring(start, end-start,
1791 from_s, from_len,
1792 0, end-start, FORWARD);
1793 if (offset==-1)
1794 break;
1795 Py_MEMCPY(start+offset, to_s, from_len);
1796 start += offset+from_len;
1797 }
1798
1799 return result;
1800}
1801
1802/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1803Py_LOCAL(PyBytesObject *)
1804replace_single_character(PyBytesObject *self,
1805 char from_c,
1806 const char *to_s, Py_ssize_t to_len,
1807 Py_ssize_t maxcount)
1808{
1809 char *self_s, *result_s;
1810 char *start, *next, *end;
1811 Py_ssize_t self_len, result_len;
1812 Py_ssize_t count, product;
1813 PyBytesObject *result;
1814
1815 self_s = PyBytes_AS_STRING(self);
1816 self_len = PyBytes_GET_SIZE(self);
1817
1818 count = countchar(self_s, self_len, from_c, maxcount);
1819 if (count == 0) {
1820 /* no matches, return unchanged */
1821 return return_self(self);
1822 }
1823
1824 /* use the difference between current and new, hence the "-1" */
1825 /* result_len = self_len + count * (to_len-1) */
1826 product = count * (to_len-1);
1827 if (product / (to_len-1) != count) {
1828 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1829 return NULL;
1830 }
1831 result_len = self_len + product;
1832 if (result_len < 0) {
1833 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1834 return NULL;
1835 }
1836
1837 if ( (result = (PyBytesObject *)
1838 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1839 return NULL;
1840 result_s = PyBytes_AS_STRING(result);
1841
1842 start = self_s;
1843 end = self_s + self_len;
1844 while (count-- > 0) {
1845 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001846 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001847 break;
1848
1849 if (next == start) {
1850 /* replace with the 'to' */
1851 Py_MEMCPY(result_s, to_s, to_len);
1852 result_s += to_len;
1853 start += 1;
1854 } else {
1855 /* copy the unchanged old then the 'to' */
1856 Py_MEMCPY(result_s, start, next-start);
1857 result_s += (next-start);
1858 Py_MEMCPY(result_s, to_s, to_len);
1859 result_s += to_len;
1860 start = next+1;
1861 }
1862 }
1863 /* Copy the remainder of the remaining bytes */
1864 Py_MEMCPY(result_s, start, end-start);
1865
1866 return result;
1867}
1868
1869/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1870Py_LOCAL(PyBytesObject *)
1871replace_substring(PyBytesObject *self,
1872 const char *from_s, Py_ssize_t from_len,
1873 const char *to_s, Py_ssize_t to_len,
1874 Py_ssize_t maxcount)
1875{
1876 char *self_s, *result_s;
1877 char *start, *next, *end;
1878 Py_ssize_t self_len, result_len;
1879 Py_ssize_t count, offset, product;
1880 PyBytesObject *result;
1881
1882 self_s = PyBytes_AS_STRING(self);
1883 self_len = PyBytes_GET_SIZE(self);
1884
1885 count = countstring(self_s, self_len,
1886 from_s, from_len,
1887 0, self_len, FORWARD, maxcount);
1888 if (count == 0) {
1889 /* no matches, return unchanged */
1890 return return_self(self);
1891 }
1892
1893 /* Check for overflow */
1894 /* result_len = self_len + count * (to_len-from_len) */
1895 product = count * (to_len-from_len);
1896 if (product / (to_len-from_len) != count) {
1897 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1898 return NULL;
1899 }
1900 result_len = self_len + product;
1901 if (result_len < 0) {
1902 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1903 return NULL;
1904 }
1905
1906 if ( (result = (PyBytesObject *)
1907 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1908 return NULL;
1909 result_s = PyBytes_AS_STRING(result);
1910
1911 start = self_s;
1912 end = self_s + self_len;
1913 while (count-- > 0) {
1914 offset = findstring(start, end-start,
1915 from_s, from_len,
1916 0, end-start, FORWARD);
1917 if (offset == -1)
1918 break;
1919 next = start+offset;
1920 if (next == start) {
1921 /* replace with the 'to' */
1922 Py_MEMCPY(result_s, to_s, to_len);
1923 result_s += to_len;
1924 start += from_len;
1925 } else {
1926 /* copy the unchanged old then the 'to' */
1927 Py_MEMCPY(result_s, start, next-start);
1928 result_s += (next-start);
1929 Py_MEMCPY(result_s, to_s, to_len);
1930 result_s += to_len;
1931 start = next+from_len;
1932 }
1933 }
1934 /* Copy the remainder of the remaining bytes */
1935 Py_MEMCPY(result_s, start, end-start);
1936
1937 return result;
1938}
1939
1940
1941Py_LOCAL(PyBytesObject *)
1942replace(PyBytesObject *self,
1943 const char *from_s, Py_ssize_t from_len,
1944 const char *to_s, Py_ssize_t to_len,
1945 Py_ssize_t maxcount)
1946{
1947 if (maxcount < 0) {
1948 maxcount = PY_SSIZE_T_MAX;
1949 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1950 /* nothing to do; return the original bytes */
1951 return return_self(self);
1952 }
1953
1954 if (maxcount == 0 ||
1955 (from_len == 0 && to_len == 0)) {
1956 /* nothing to do; return the original bytes */
1957 return return_self(self);
1958 }
1959
1960 /* Handle zero-length special cases */
1961
1962 if (from_len == 0) {
1963 /* insert the 'to' bytes everywhere. */
1964 /* >>> "Python".replace("", ".") */
1965 /* '.P.y.t.h.o.n.' */
1966 return replace_interleave(self, to_s, to_len, maxcount);
1967 }
1968
1969 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1970 /* point for an empty self bytes to generate a non-empty bytes */
1971 /* Special case so the remaining code always gets a non-empty bytes */
1972 if (PyBytes_GET_SIZE(self) == 0) {
1973 return return_self(self);
1974 }
1975
1976 if (to_len == 0) {
1977 /* delete all occurances of 'from' bytes */
1978 if (from_len == 1) {
1979 return replace_delete_single_character(
1980 self, from_s[0], maxcount);
1981 } else {
1982 return replace_delete_substring(self, from_s, from_len, maxcount);
1983 }
1984 }
1985
1986 /* Handle special case where both bytes have the same length */
1987
1988 if (from_len == to_len) {
1989 if (from_len == 1) {
1990 return replace_single_character_in_place(
1991 self,
1992 from_s[0],
1993 to_s[0],
1994 maxcount);
1995 } else {
1996 return replace_substring_in_place(
1997 self, from_s, from_len, to_s, to_len, maxcount);
1998 }
1999 }
2000
2001 /* Otherwise use the more generic algorithms */
2002 if (from_len == 1) {
2003 return replace_single_character(self, from_s[0],
2004 to_s, to_len, maxcount);
2005 } else {
2006 /* len('from')>=2, len('to')>=1 */
2007 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2008 }
2009}
2010
2011PyDoc_STRVAR(replace__doc__,
2012"B.replace (old, new[, count]) -> bytes\n\
2013\n\
2014Return a copy of bytes B with all occurrences of subsection\n\
2015old replaced by new. If the optional argument count is\n\
2016given, only the first count occurrences are replaced.");
2017
2018static PyObject *
2019bytes_replace(PyBytesObject *self, PyObject *args)
2020{
2021 Py_ssize_t count = -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002022 PyObject *from, *to, *res;
Neal Norwitz6968b052007-02-27 19:02:19 +00002023 const char *from_s, *to_s;
2024 Py_ssize_t from_len, to_len;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002025 int relfrom=0, relto=0;
2026 PyBuffer vfrom, vto;
Neal Norwitz6968b052007-02-27 19:02:19 +00002027
2028 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2029 return NULL;
2030
2031 if (PyBytes_Check(from)) {
2032 from_s = PyBytes_AS_STRING(from);
2033 from_len = PyBytes_GET_SIZE(from);
2034 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002035 else {
2036 if (PyObject_GetBuffer(from, &vfrom, PyBUF_CHARACTER) < 0)
2037 return NULL;
2038 from_s = vfrom.buf;
2039 from_len = vfrom.len;
2040 relfrom = 1;
2041 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002042
2043 if (PyBytes_Check(to)) {
2044 to_s = PyBytes_AS_STRING(to);
2045 to_len = PyBytes_GET_SIZE(to);
2046 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002047 else {
2048 if (PyObject_GetBuffer(to, &vto, PyBUF_CHARACTER) < 0) {
2049 if (relfrom)
2050 PyObject_ReleaseBuffer(from, &vfrom);
2051 return NULL;
2052 }
2053 to_s = vto.buf;
2054 to_len = vto.len;
2055 relto = 1;
2056 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002057
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002058 res = (PyObject *)replace((PyBytesObject *) self,
2059 from_s, from_len,
2060 to_s, to_len, count);
2061
2062 if (relfrom)
2063 PyObject_ReleaseBuffer(from, &vfrom);
2064 if (relto)
2065 PyObject_ReleaseBuffer(to, &vto);
2066 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00002067}
2068
2069
2070/* Overallocate the initial list to reduce the number of reallocs for small
2071 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2072 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2073 text (roughly 11 words per line) and field delimited data (usually 1-10
2074 fields). For large strings the split algorithms are bandwidth limited
2075 so increasing the preallocation likely will not improve things.*/
2076
2077#define MAX_PREALLOC 12
2078
2079/* 5 splits gives 6 elements */
2080#define PREALLOC_SIZE(maxsplit) \
2081 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2082
2083#define SPLIT_APPEND(data, left, right) \
2084 str = PyBytes_FromStringAndSize((data) + (left), \
2085 (right) - (left)); \
2086 if (str == NULL) \
2087 goto onError; \
2088 if (PyList_Append(list, str)) { \
2089 Py_DECREF(str); \
2090 goto onError; \
2091 } \
2092 else \
2093 Py_DECREF(str);
2094
2095#define SPLIT_ADD(data, left, right) { \
2096 str = PyBytes_FromStringAndSize((data) + (left), \
2097 (right) - (left)); \
2098 if (str == NULL) \
2099 goto onError; \
2100 if (count < MAX_PREALLOC) { \
2101 PyList_SET_ITEM(list, count, str); \
2102 } else { \
2103 if (PyList_Append(list, str)) { \
2104 Py_DECREF(str); \
2105 goto onError; \
2106 } \
2107 else \
2108 Py_DECREF(str); \
2109 } \
2110 count++; }
2111
2112/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002113#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00002114
2115
2116Py_LOCAL_INLINE(PyObject *)
2117split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2118{
2119 register Py_ssize_t i, j, count=0;
2120 PyObject *str;
2121 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2122
2123 if (list == NULL)
2124 return NULL;
2125
2126 i = j = 0;
2127 while ((j < len) && (maxcount-- > 0)) {
2128 for(; j<len; j++) {
2129 /* I found that using memchr makes no difference */
2130 if (s[j] == ch) {
2131 SPLIT_ADD(s, i, j);
2132 i = j = j + 1;
2133 break;
2134 }
2135 }
2136 }
2137 if (i <= len) {
2138 SPLIT_ADD(s, i, len);
2139 }
2140 FIX_PREALLOC_SIZE(list);
2141 return list;
2142
2143 onError:
2144 Py_DECREF(list);
2145 return NULL;
2146}
2147
2148PyDoc_STRVAR(split__doc__,
2149"B.split(sep [,maxsplit]) -> list of bytes\n\
2150\n\
2151Return a list of the bytes in the string B, using sep as the\n\
2152delimiter. If maxsplit is given, at most maxsplit\n\
2153splits are done.");
2154
2155static PyObject *
2156bytes_split(PyBytesObject *self, PyObject *args)
2157{
2158 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2159 Py_ssize_t maxsplit = -1, count=0;
2160 const char *s = PyBytes_AS_STRING(self), *sub;
2161 PyObject *list, *str, *subobj;
2162#ifdef USE_FAST
2163 Py_ssize_t pos;
2164#endif
2165
2166 if (!PyArg_ParseTuple(args, "O|n:split", &subobj, &maxsplit))
2167 return NULL;
2168 if (maxsplit < 0)
2169 maxsplit = PY_SSIZE_T_MAX;
2170 if (PyBytes_Check(subobj)) {
2171 sub = PyBytes_AS_STRING(subobj);
2172 n = PyBytes_GET_SIZE(subobj);
2173 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002174 /* XXX -> use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00002175 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2176 return NULL;
2177
2178 if (n == 0) {
2179 PyErr_SetString(PyExc_ValueError, "empty separator");
2180 return NULL;
2181 }
2182 else if (n == 1)
2183 return split_char(s, len, sub[0], maxsplit);
2184
2185 list = PyList_New(PREALLOC_SIZE(maxsplit));
2186 if (list == NULL)
2187 return NULL;
2188
2189#ifdef USE_FAST
2190 i = j = 0;
2191 while (maxsplit-- > 0) {
2192 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2193 if (pos < 0)
2194 break;
2195 j = i+pos;
2196 SPLIT_ADD(s, i, j);
2197 i = j + n;
2198 }
2199#else
2200 i = j = 0;
2201 while ((j+n <= len) && (maxsplit-- > 0)) {
2202 for (; j+n <= len; j++) {
2203 if (Py_STRING_MATCH(s, j, sub, n)) {
2204 SPLIT_ADD(s, i, j);
2205 i = j = j + n;
2206 break;
2207 }
2208 }
2209 }
2210#endif
2211 SPLIT_ADD(s, i, len);
2212 FIX_PREALLOC_SIZE(list);
2213 return list;
2214
2215 onError:
2216 Py_DECREF(list);
2217 return NULL;
2218}
2219
2220PyDoc_STRVAR(partition__doc__,
2221"B.partition(sep) -> (head, sep, tail)\n\
2222\n\
2223Searches for the separator sep in B, and returns the part before it,\n\
2224the separator itself, and the part after it. If the separator is not\n\
2225found, returns B and two empty bytes.");
2226
2227static PyObject *
2228bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2229{
2230 PyObject *bytesep, *result;
2231
2232 bytesep = PyBytes_FromObject(sep_obj);
2233 if (! bytesep)
2234 return NULL;
2235
2236 result = stringlib_partition(
2237 (PyObject*) self,
2238 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002239 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002240 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2241 );
2242
2243 Py_DECREF(bytesep);
2244 return result;
2245}
2246
2247PyDoc_STRVAR(rpartition__doc__,
2248"B.rpartition(sep) -> (tail, sep, head)\n\
2249\n\
2250Searches for the separator sep in B, starting at the end of B, and returns\n\
2251the part before it, the separator itself, and the part after it. If the\n\
2252separator is not found, returns two empty bytes and B.");
2253
2254static PyObject *
2255bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2256{
2257 PyObject *bytesep, *result;
2258
2259 bytesep = PyBytes_FromObject(sep_obj);
2260 if (! bytesep)
2261 return NULL;
2262
2263 result = stringlib_rpartition(
2264 (PyObject*) self,
2265 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002266 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002267 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2268 );
2269
2270 Py_DECREF(bytesep);
2271 return result;
2272}
2273
2274Py_LOCAL_INLINE(PyObject *)
2275rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2276{
2277 register Py_ssize_t i, j, count=0;
2278 PyObject *str;
2279 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2280
2281 if (list == NULL)
2282 return NULL;
2283
2284 i = j = len - 1;
2285 while ((i >= 0) && (maxcount-- > 0)) {
2286 for (; i >= 0; i--) {
2287 if (s[i] == ch) {
2288 SPLIT_ADD(s, i + 1, j + 1);
2289 j = i = i - 1;
2290 break;
2291 }
2292 }
2293 }
2294 if (j >= -1) {
2295 SPLIT_ADD(s, 0, j + 1);
2296 }
2297 FIX_PREALLOC_SIZE(list);
2298 if (PyList_Reverse(list) < 0)
2299 goto onError;
2300
2301 return list;
2302
2303 onError:
2304 Py_DECREF(list);
2305 return NULL;
2306}
2307
2308PyDoc_STRVAR(rsplit__doc__,
2309"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2310\n\
2311Return a list of the sections in the byte B, using sep as the\n\
2312delimiter, starting at the end of the bytes and working\n\
2313to the front. If maxsplit is given, at most maxsplit splits are\n\
2314done.");
2315
2316static PyObject *
2317bytes_rsplit(PyBytesObject *self, PyObject *args)
2318{
2319 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
2320 Py_ssize_t maxsplit = -1, count=0;
2321 const char *s = PyBytes_AS_STRING(self), *sub;
2322 PyObject *list, *str, *subobj;
2323
2324 if (!PyArg_ParseTuple(args, "O|n:rsplit", &subobj, &maxsplit))
2325 return NULL;
2326 if (maxsplit < 0)
2327 maxsplit = PY_SSIZE_T_MAX;
2328 if (PyBytes_Check(subobj)) {
2329 sub = PyBytes_AS_STRING(subobj);
2330 n = PyBytes_GET_SIZE(subobj);
2331 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002332 /* XXX -> Use the modern buffer interface */
Neal Norwitz6968b052007-02-27 19:02:19 +00002333 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2334 return NULL;
2335
2336 if (n == 0) {
2337 PyErr_SetString(PyExc_ValueError, "empty separator");
2338 return NULL;
2339 }
2340 else if (n == 1)
2341 return rsplit_char(s, len, sub[0], maxsplit);
2342
2343 list = PyList_New(PREALLOC_SIZE(maxsplit));
2344 if (list == NULL)
2345 return NULL;
2346
2347 j = len;
2348 i = j - n;
2349
2350 while ( (i >= 0) && (maxsplit-- > 0) ) {
2351 for (; i>=0; i--) {
2352 if (Py_STRING_MATCH(s, i, sub, n)) {
2353 SPLIT_ADD(s, i + n, j);
2354 j = i;
2355 i -= n;
2356 break;
2357 }
2358 }
2359 }
2360 SPLIT_ADD(s, 0, j);
2361 FIX_PREALLOC_SIZE(list);
2362 if (PyList_Reverse(list) < 0)
2363 goto onError;
2364 return list;
2365
2366onError:
2367 Py_DECREF(list);
2368 return NULL;
2369}
2370
2371PyDoc_STRVAR(extend__doc__,
2372"B.extend(iterable int) -> None\n\
2373\n\
2374Append all the elements from the iterator or sequence to the\n\
2375end of the bytes.");
2376static PyObject *
2377bytes_extend(PyBytesObject *self, PyObject *arg)
2378{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002379 if (bytes_setslice(self, Py_Size(self), Py_Size(self), arg) == -1)
Neal Norwitz6968b052007-02-27 19:02:19 +00002380 return NULL;
2381 Py_RETURN_NONE;
2382}
2383
2384
2385PyDoc_STRVAR(reverse__doc__,
2386"B.reverse() -> None\n\
2387\n\
2388Reverse the order of the values in bytes in place.");
2389static PyObject *
2390bytes_reverse(PyBytesObject *self, PyObject *unused)
2391{
2392 char swap, *head, *tail;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002393 Py_ssize_t i, j, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002394
2395 j = n / 2;
2396 head = self->ob_bytes;
2397 tail = head + n - 1;
2398 for (i = 0; i < j; i++) {
2399 swap = *head;
2400 *head++ = *tail;
2401 *tail-- = swap;
2402 }
2403
2404 Py_RETURN_NONE;
2405}
2406
2407PyDoc_STRVAR(insert__doc__,
2408"B.insert(index, int) -> None\n\
2409\n\
2410Insert a single item into the bytes before the given index.");
2411static PyObject *
2412bytes_insert(PyBytesObject *self, PyObject *args)
2413{
2414 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002415 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002416
2417 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2418 return NULL;
2419
2420 if (n == PY_SSIZE_T_MAX) {
2421 PyErr_SetString(PyExc_OverflowError,
2422 "cannot add more objects to bytes");
2423 return NULL;
2424 }
2425 if (value < 0 || value >= 256) {
2426 PyErr_SetString(PyExc_ValueError,
2427 "byte must be in range(0, 256)");
2428 return NULL;
2429 }
2430 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2431 return NULL;
2432
2433 if (where < 0) {
2434 where += n;
2435 if (where < 0)
2436 where = 0;
2437 }
2438 if (where > n)
2439 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002440 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002441 self->ob_bytes[where] = value;
2442
2443 Py_RETURN_NONE;
2444}
2445
2446PyDoc_STRVAR(append__doc__,
2447"B.append(int) -> None\n\
2448\n\
2449Append a single item to the end of the bytes.");
2450static PyObject *
2451bytes_append(PyBytesObject *self, PyObject *arg)
2452{
2453 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002454 Py_ssize_t n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002455
2456 if (! _getbytevalue(arg, &value))
2457 return NULL;
2458 if (n == PY_SSIZE_T_MAX) {
2459 PyErr_SetString(PyExc_OverflowError,
2460 "cannot add more objects to bytes");
2461 return NULL;
2462 }
2463 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2464 return NULL;
2465
2466 self->ob_bytes[n] = value;
2467
2468 Py_RETURN_NONE;
2469}
2470
2471PyDoc_STRVAR(pop__doc__,
2472"B.pop([index]) -> int\n\
2473\n\
2474Remove and return a single item from the bytes. If no index\n\
2475argument is give, will pop the last value.");
2476static PyObject *
2477bytes_pop(PyBytesObject *self, PyObject *args)
2478{
2479 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002480 Py_ssize_t where = -1, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002481
2482 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2483 return NULL;
2484
2485 if (n == 0) {
2486 PyErr_SetString(PyExc_OverflowError,
2487 "cannot pop an empty bytes");
2488 return NULL;
2489 }
2490 if (where < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002491 where += Py_Size(self);
2492 if (where < 0 || where >= Py_Size(self)) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002493 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2494 return NULL;
2495 }
2496
2497 value = self->ob_bytes[where];
2498 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2499 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2500 return NULL;
2501
2502 return PyInt_FromLong(value);
2503}
2504
2505PyDoc_STRVAR(remove__doc__,
2506"B.remove(int) -> None\n\
2507\n\
2508Remove the first occurance of a value in bytes");
2509static PyObject *
2510bytes_remove(PyBytesObject *self, PyObject *arg)
2511{
2512 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002513 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002514
2515 if (! _getbytevalue(arg, &value))
2516 return NULL;
2517
2518 for (where = 0; where < n; where++) {
2519 if (self->ob_bytes[where] == value)
2520 break;
2521 }
2522 if (where == n) {
2523 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2524 return NULL;
2525 }
2526
2527 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2528 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2529 return NULL;
2530
2531 Py_RETURN_NONE;
2532}
2533
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002534/* XXX These two helpers could be optimized if argsize == 1 */
2535
2536Py_ssize_t
2537lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2538 void *argptr, Py_ssize_t argsize)
2539{
2540 Py_ssize_t i = 0;
2541 while (i < mysize && memchr(argptr, myptr[i], argsize))
2542 i++;
2543 return i;
2544}
2545
2546Py_ssize_t
2547rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2548 void *argptr, Py_ssize_t argsize)
2549{
2550 Py_ssize_t i = mysize - 1;
2551 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2552 i--;
2553 return i + 1;
2554}
2555
2556PyDoc_STRVAR(strip__doc__,
2557"B.strip(bytes) -> bytes\n\
2558\n\
2559Strip leading and trailing bytes contained in the argument.");
2560static PyObject *
2561bytes_strip(PyBytesObject *self, PyObject *arg)
2562{
2563 Py_ssize_t left, right, mysize, argsize;
2564 void *myptr, *argptr;
2565 if (arg == NULL || !PyBytes_Check(arg)) {
2566 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2567 return NULL;
2568 }
2569 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002570 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002571 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002572 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002573 left = lstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumeb29e9a2007-08-08 21:55:33 +00002574 if (left == mysize)
2575 right = left;
2576 else
2577 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002578 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2579}
2580
2581PyDoc_STRVAR(lstrip__doc__,
2582"B.lstrip(bytes) -> bytes\n\
2583\n\
2584Strip leading bytes contained in the argument.");
2585static PyObject *
2586bytes_lstrip(PyBytesObject *self, PyObject *arg)
2587{
2588 Py_ssize_t left, right, mysize, argsize;
2589 void *myptr, *argptr;
2590 if (arg == NULL || !PyBytes_Check(arg)) {
2591 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2592 return NULL;
2593 }
2594 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002595 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002596 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002597 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002598 left = lstrip_helper(myptr, mysize, argptr, argsize);
2599 right = mysize;
2600 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2601}
2602
2603PyDoc_STRVAR(rstrip__doc__,
2604"B.rstrip(bytes) -> bytes\n\
2605\n\
2606Strip trailing bytes contained in the argument.");
2607static PyObject *
2608bytes_rstrip(PyBytesObject *self, PyObject *arg)
2609{
2610 Py_ssize_t left, right, mysize, argsize;
2611 void *myptr, *argptr;
2612 if (arg == NULL || !PyBytes_Check(arg)) {
2613 PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
2614 return NULL;
2615 }
2616 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002617 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002618 argptr = ((PyBytesObject *)arg)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002619 argsize = Py_Size(arg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002620 left = 0;
2621 right = rstrip_helper(myptr, mysize, argptr, argsize);
2622 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2623}
Neal Norwitz6968b052007-02-27 19:02:19 +00002624
Guido van Rossumd624f182006-04-24 13:47:05 +00002625PyDoc_STRVAR(decode_doc,
2626"B.decode([encoding[,errors]]) -> unicode obect.\n\
2627\n\
2628Decodes B using the codec registered for encoding. encoding defaults\n\
2629to the default encoding. errors may be given to set a different error\n\
2630handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2631a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2632as well as any other name registerd with codecs.register_error that is\n\
2633able to handle UnicodeDecodeErrors.");
2634
2635static PyObject *
2636bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002637{
Guido van Rossumd624f182006-04-24 13:47:05 +00002638 const char *encoding = NULL;
2639 const char *errors = NULL;
2640
2641 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2642 return NULL;
2643 if (encoding == NULL)
2644 encoding = PyUnicode_GetDefaultEncoding();
2645 return PyCodec_Decode(self, encoding, errors);
2646}
2647
Guido van Rossuma0867f72006-05-05 04:34:18 +00002648PyDoc_STRVAR(alloc_doc,
2649"B.__alloc__() -> int\n\
2650\n\
2651Returns the number of bytes actually allocated.");
2652
2653static PyObject *
2654bytes_alloc(PyBytesObject *self)
2655{
2656 return PyInt_FromSsize_t(self->ob_alloc);
2657}
2658
Guido van Rossum20188312006-05-05 15:15:40 +00002659PyDoc_STRVAR(join_doc,
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002660"B.join(iterable_of_bytes) -> bytes\n\
Guido van Rossum20188312006-05-05 15:15:40 +00002661\n\
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002662Concatenates any number of bytes objects, with B in between each pair.\n\
2663Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Guido van Rossum20188312006-05-05 15:15:40 +00002664
2665static PyObject *
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002666bytes_join(PyBytesObject *self, PyObject *it)
Guido van Rossum20188312006-05-05 15:15:40 +00002667{
2668 PyObject *seq;
Martin v. Löwis5d7428b2007-07-21 18:47:48 +00002669 Py_ssize_t mysize = Py_Size(self);
Guido van Rossum20188312006-05-05 15:15:40 +00002670 Py_ssize_t i;
2671 Py_ssize_t n;
2672 PyObject **items;
2673 Py_ssize_t totalsize = 0;
2674 PyObject *result;
2675 char *dest;
2676
2677 seq = PySequence_Fast(it, "can only join an iterable");
2678 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002679 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002680 n = PySequence_Fast_GET_SIZE(seq);
2681 items = PySequence_Fast_ITEMS(seq);
2682
2683 /* Compute the total size, and check that they are all bytes */
2684 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002685 PyObject *obj = items[i];
2686 if (!PyBytes_Check(obj)) {
2687 PyErr_Format(PyExc_TypeError,
2688 "can only join an iterable of bytes "
2689 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002690 /* XXX %ld isn't right on Win64 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002691 (long)i, Py_Type(obj)->tp_name);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002692 goto error;
2693 }
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002694 if (i > 0)
2695 totalsize += mysize;
Georg Brandlb3f568f2007-02-27 08:49:18 +00002696 totalsize += PyBytes_GET_SIZE(obj);
2697 if (totalsize < 0) {
2698 PyErr_NoMemory();
2699 goto error;
2700 }
Guido van Rossum20188312006-05-05 15:15:40 +00002701 }
2702
2703 /* Allocate the result, and copy the bytes */
2704 result = PyBytes_FromStringAndSize(NULL, totalsize);
2705 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002706 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002707 dest = PyBytes_AS_STRING(result);
2708 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002709 PyObject *obj = items[i];
2710 Py_ssize_t size = PyBytes_GET_SIZE(obj);
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002711 if (i > 0) {
2712 memcpy(dest, self->ob_bytes, mysize);
2713 dest += mysize;
2714 }
Georg Brandlb3f568f2007-02-27 08:49:18 +00002715 memcpy(dest, PyBytes_AS_STRING(obj), size);
2716 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002717 }
2718
2719 /* Done */
2720 Py_DECREF(seq);
2721 return result;
2722
2723 /* Error handling */
2724 error:
2725 Py_DECREF(seq);
2726 return NULL;
2727}
2728
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002729PyDoc_STRVAR(fromhex_doc,
2730"bytes.fromhex(string) -> bytes\n\
2731\n\
2732Create a bytes object from a string of hexadecimal numbers.\n\
2733Spaces between two numbers are accepted. Example:\n\
2734bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2735
2736static int
2737hex_digit_to_int(int c)
2738{
Georg Brandlb3f568f2007-02-27 08:49:18 +00002739 if (isdigit(c))
2740 return c - '0';
2741 else {
2742 if (isupper(c))
2743 c = tolower(c);
2744 if (c >= 'a' && c <= 'f')
2745 return c - 'a' + 10;
2746 }
2747 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002748}
2749
2750static PyObject *
2751bytes_fromhex(PyObject *cls, PyObject *args)
2752{
2753 PyObject *newbytes;
2754 char *hex, *buf;
2755 Py_ssize_t len, byteslen, i, j;
2756 int top, bot;
2757
2758 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len))
2759 return NULL;
2760
2761 byteslen = len / 2; /* max length if there are no spaces */
2762
2763 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
2764 if (!newbytes)
2765 return NULL;
2766 buf = PyBytes_AS_STRING(newbytes);
2767
Guido van Rossum4355a472007-05-04 05:00:04 +00002768 for (i = j = 0; i < len; i += 2) {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002769 /* skip over spaces in the input */
2770 while (Py_CHARMASK(hex[i]) == ' ')
2771 i++;
2772 if (i >= len)
2773 break;
2774 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2775 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2776 if (top == -1 || bot == -1) {
2777 PyErr_Format(PyExc_ValueError,
2778 "non-hexadecimal number string '%c%c' found in "
2779 "fromhex() arg at position %zd",
2780 hex[i], hex[i+1], i);
2781 goto error;
2782 }
2783 buf[j++] = (top << 4) + bot;
2784 }
2785 if (PyBytes_Resize(newbytes, j) < 0)
2786 goto error;
2787 return newbytes;
2788
2789 error:
2790 Py_DECREF(newbytes);
2791 return NULL;
2792}
2793
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002794PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2795
2796static PyObject *
2797bytes_reduce(PyBytesObject *self)
2798{
Martin v. Löwis9c121062007-08-05 20:26:11 +00002799 PyObject *latin1;
2800 if (self->ob_bytes)
2801 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
2802 Py_Size(self), NULL);
2803 else
2804 latin1 = PyUnicode_FromString("");
2805 return Py_BuildValue("(O(Ns))", Py_Type(self), latin1, "latin-1");
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002806}
2807
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002808static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002809 (lenfunc)bytes_length, /* sq_length */
2810 (binaryfunc)bytes_concat, /* sq_concat */
2811 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2812 (ssizeargfunc)bytes_getitem, /* sq_item */
2813 0, /* sq_slice */
2814 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2815 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002816 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002817 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2818 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002819};
2820
2821static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002822 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002823 (binaryfunc)bytes_subscript,
2824 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002825};
2826
2827static PyBufferProcs bytes_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002828 (getbufferproc)bytes_getbuffer,
2829 (releasebufferproc)bytes_releasebuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002830};
2831
2832static PyMethodDef
2833bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002834 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2835 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2836 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2837 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2838 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2839 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2840 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2841 startswith__doc__},
2842 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2843 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2844 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2845 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2846 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2847 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2848 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2849 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2850 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2851 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
2852 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
2853 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002854 {"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__},
2855 {"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__},
2856 {"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00002857 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002858 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002859 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2860 fromhex_doc},
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002861 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002862 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00002863 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002864};
2865
2866PyDoc_STRVAR(bytes_doc,
2867"bytes([iterable]) -> new array of bytes.\n\
2868\n\
2869If an argument is given it must be an iterable yielding ints in range(256).");
2870
2871PyTypeObject PyBytes_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002872 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002873 "bytes",
2874 sizeof(PyBytesObject),
2875 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00002876 (destructor)bytes_dealloc, /* tp_dealloc */
2877 0, /* tp_print */
2878 0, /* tp_getattr */
2879 0, /* tp_setattr */
2880 0, /* tp_compare */
2881 (reprfunc)bytes_repr, /* tp_repr */
2882 0, /* tp_as_number */
2883 &bytes_as_sequence, /* tp_as_sequence */
2884 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002885 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00002886 0, /* tp_call */
2887 (reprfunc)bytes_str, /* tp_str */
2888 PyObject_GenericGetAttr, /* tp_getattro */
2889 0, /* tp_setattro */
2890 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00002891 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002892 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00002893 bytes_doc, /* tp_doc */
2894 0, /* tp_traverse */
2895 0, /* tp_clear */
2896 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2897 0, /* tp_weaklistoffset */
2898 0, /* tp_iter */
2899 0, /* tp_iternext */
2900 bytes_methods, /* tp_methods */
2901 0, /* tp_members */
2902 0, /* tp_getset */
2903 0, /* tp_base */
2904 0, /* tp_dict */
2905 0, /* tp_descr_get */
2906 0, /* tp_descr_set */
2907 0, /* tp_dictoffset */
2908 (initproc)bytes_init, /* tp_init */
2909 PyType_GenericAlloc, /* tp_alloc */
2910 PyType_GenericNew, /* tp_new */
2911 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002912};