blob: 18d0f57f8809872967d17cfe0592bf52ca2d2692 [file] [log] [blame]
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001/* Bytes object implementation */
2
3/* XXX TO DO: optimizations */
4
5#define PY_SSIZE_T_CLEAN
6#include "Python.h"
Guido van Rossuma0867f72006-05-05 04:34:18 +00007#include "structmember.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00008#include "bytes_methods.h"
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00009
Neal Norwitz6968b052007-02-27 19:02:19 +000010static PyBytesObject *nullbytes = NULL;
11
12void
13PyBytes_Fini(void)
14{
15 Py_CLEAR(nullbytes);
16}
17
18int
19PyBytes_Init(void)
20{
21 nullbytes = PyObject_New(PyBytesObject, &PyBytes_Type);
22 if (nullbytes == NULL)
23 return 0;
24 nullbytes->ob_bytes = NULL;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000025 Py_Size(nullbytes) = nullbytes->ob_alloc = 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000026 nullbytes->ob_exports = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +000027 return 1;
28}
29
30/* end nullbytes support */
31
Guido van Rossumad7d8d12007-04-13 01:39:34 +000032/* Helpers */
33
34static int
35_getbytevalue(PyObject* arg, int *value)
Neal Norwitz6968b052007-02-27 19:02:19 +000036{
Gregory P. Smith60d241f2007-10-16 06:31:30 +000037 long face_value;
38
39 if (PyInt_Check(arg)) {
40 face_value = PyInt_AsLong(arg);
41 if (face_value < 0 || face_value >= 256) {
42 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
43 return 0;
44 }
45 } else {
46 PyErr_Format(PyExc_TypeError, "an integer is required");
Neal Norwitz6968b052007-02-27 19:02:19 +000047 return 0;
48 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +000049
50 *value = face_value;
Neal Norwitz6968b052007-02-27 19:02:19 +000051 return 1;
52}
53
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000054static int
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000055bytes_getbuffer(PyBytesObject *obj, Py_buffer *view, int flags)
Guido van Rossum75d38e92007-08-24 17:33:11 +000056{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000057 int ret;
58 void *ptr;
59 if (view == NULL) {
60 obj->ob_exports++;
61 return 0;
62 }
Guido van Rossum75d38e92007-08-24 17:33:11 +000063 if (obj->ob_bytes == NULL)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000064 ptr = "";
65 else
66 ptr = obj->ob_bytes;
67 ret = PyBuffer_FillInfo(view, ptr, Py_Size(obj), 0, flags);
68 if (ret >= 0) {
69 obj->ob_exports++;
70 }
71 return ret;
72}
73
74static void
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000075bytes_releasebuffer(PyBytesObject *obj, Py_buffer *view)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000076{
77 obj->ob_exports--;
78}
79
Neal Norwitz2bad9702007-08-27 06:19:22 +000080static Py_ssize_t
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +000081_getbuffer(PyObject *obj, Py_buffer *view)
Guido van Rossumad7d8d12007-04-13 01:39:34 +000082{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000083 PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000084
Gregory P. Smith60d241f2007-10-16 06:31:30 +000085 if (buffer == NULL || buffer->bf_getbuffer == NULL)
Guido van Rossuma74184e2007-08-29 04:05:57 +000086 {
87 PyErr_Format(PyExc_TypeError,
88 "Type %.100s doesn't support the buffer API",
89 Py_Type(obj)->tp_name);
90 return -1;
91 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +000092
Travis E. Oliphantb99f7622007-08-18 11:21:56 +000093 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
94 return -1;
95 return view->len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +000096}
97
Guido van Rossum4dfe8a12006-04-22 23:28:04 +000098/* Direct API functions */
99
100PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000101PyBytes_FromObject(PyObject *input)
102{
103 return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
104 input, NULL);
105}
106
107PyObject *
108PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000109{
110 PyBytesObject *new;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000111 int alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000112
Guido van Rossumd624f182006-04-24 13:47:05 +0000113 assert(size >= 0);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000114
115 new = PyObject_New(PyBytesObject, &PyBytes_Type);
116 if (new == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000117 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000118
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000119 if (size == 0) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000120 new->ob_bytes = NULL;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000121 alloc = 0;
122 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000123 else {
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000124 alloc = size + 1;
125 new->ob_bytes = PyMem_Malloc(alloc);
Guido van Rossumd624f182006-04-24 13:47:05 +0000126 if (new->ob_bytes == NULL) {
127 Py_DECREF(new);
Neal Norwitz16596dd2007-08-30 05:44:54 +0000128 return PyErr_NoMemory();
Guido van Rossumd624f182006-04-24 13:47:05 +0000129 }
130 if (bytes != NULL)
131 memcpy(new->ob_bytes, bytes, size);
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000132 new->ob_bytes[size] = '\0'; /* Trailing null byte */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000133 }
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000134 Py_Size(new) = size;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000135 new->ob_alloc = alloc;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000136 new->ob_exports = 0;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000137
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000138 return (PyObject *)new;
139}
140
141Py_ssize_t
142PyBytes_Size(PyObject *self)
143{
144 assert(self != NULL);
145 assert(PyBytes_Check(self));
146
Guido van Rossum20188312006-05-05 15:15:40 +0000147 return PyBytes_GET_SIZE(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000148}
149
150char *
151PyBytes_AsString(PyObject *self)
152{
153 assert(self != NULL);
154 assert(PyBytes_Check(self));
155
Guido van Rossum20188312006-05-05 15:15:40 +0000156 return PyBytes_AS_STRING(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000157}
158
159int
160PyBytes_Resize(PyObject *self, Py_ssize_t size)
161{
162 void *sval;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000163 Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000164
165 assert(self != NULL);
166 assert(PyBytes_Check(self));
167 assert(size >= 0);
168
Guido van Rossuma0867f72006-05-05 04:34:18 +0000169 if (size < alloc / 2) {
170 /* Major downsize; resize down to exact size */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000171 alloc = size + 1;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000172 }
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000173 else if (size < alloc) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000174 /* Within allocated size; quick exit */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000175 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000176 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
Guido van Rossuma0867f72006-05-05 04:34:18 +0000177 return 0;
178 }
179 else if (size <= alloc * 1.125) {
180 /* Moderate upsize; overallocate similar to list_resize() */
181 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
182 }
183 else {
184 /* Major upsize; resize up to exact size */
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000185 alloc = size + 1;
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000186 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000187
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000188 if (((PyBytesObject *)self)->ob_exports > 0) {
189 /*
Guido van Rossuma74184e2007-08-29 04:05:57 +0000190 fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
191 ((PyBytesObject *)self)->ob_bytes);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000192 */
193 PyErr_SetString(PyExc_BufferError,
Guido van Rossuma74184e2007-08-29 04:05:57 +0000194 "Existing exports of data: object cannot be re-sized");
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000195 return -1;
196 }
197
Guido van Rossuma0867f72006-05-05 04:34:18 +0000198 sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000199 if (sval == NULL) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000200 PyErr_NoMemory();
201 return -1;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000202 }
203
Guido van Rossumd624f182006-04-24 13:47:05 +0000204 ((PyBytesObject *)self)->ob_bytes = sval;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000205 Py_Size(self) = size;
Guido van Rossuma0867f72006-05-05 04:34:18 +0000206 ((PyBytesObject *)self)->ob_alloc = alloc;
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000207 ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
208
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000209 return 0;
210}
211
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000212PyObject *
213PyBytes_Concat(PyObject *a, PyObject *b)
214{
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000215 Py_ssize_t size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000216 Py_buffer va, vb;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000217 PyBytesObject *result;
218
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000219 va.len = -1;
220 vb.len = -1;
221 if (_getbuffer(a, &va) < 0 ||
222 _getbuffer(b, &vb) < 0) {
Guido van Rossum75d38e92007-08-24 17:33:11 +0000223 if (va.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000224 PyObject_ReleaseBuffer(a, &va);
225 if (vb.len != -1)
226 PyObject_ReleaseBuffer(b, &vb);
227 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
228 Py_Type(a)->tp_name, Py_Type(b)->tp_name);
229 return NULL;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000230 }
231
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000232 size = va.len + vb.len;
233 if (size < 0) {
234 PyObject_ReleaseBuffer(a, &va);
235 PyObject_ReleaseBuffer(b, &vb);
236 return PyErr_NoMemory();
237 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000238
239 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
240 if (result != NULL) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000241 memcpy(result->ob_bytes, va.buf, va.len);
242 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000243 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000244
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000245 PyObject_ReleaseBuffer(a, &va);
246 PyObject_ReleaseBuffer(b, &vb);
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000247 return (PyObject *)result;
248}
249
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000250/* Functions stuffed into the type object */
251
252static Py_ssize_t
253bytes_length(PyBytesObject *self)
254{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000255 return Py_Size(self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000256}
257
258static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000259bytes_concat(PyBytesObject *self, PyObject *other)
260{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000261 return PyBytes_Concat((PyObject *)self, other);
Guido van Rossumd624f182006-04-24 13:47:05 +0000262}
263
264static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000265bytes_iconcat(PyBytesObject *self, PyObject *other)
266{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000267 Py_ssize_t mysize;
Guido van Rossum13e57212006-04-27 22:54:26 +0000268 Py_ssize_t size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000269 Py_buffer vo;
Guido van Rossum13e57212006-04-27 22:54:26 +0000270
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000271 if (_getbuffer(other, &vo) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000272 PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
273 Py_Type(self)->tp_name);
274 return NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000275 }
276
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000277 mysize = Py_Size(self);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000278 size = mysize + vo.len;
279 if (size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000280 PyObject_ReleaseBuffer(other, &vo);
281 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000282 }
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000283 if (size < self->ob_alloc) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000284 Py_Size(self) = size;
285 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000286 }
287 else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000288 PyObject_ReleaseBuffer(other, &vo);
289 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000290 }
291 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
292 PyObject_ReleaseBuffer(other, &vo);
Guido van Rossum13e57212006-04-27 22:54:26 +0000293 Py_INCREF(self);
294 return (PyObject *)self;
295}
296
297static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000298bytes_repeat(PyBytesObject *self, Py_ssize_t count)
299{
300 PyBytesObject *result;
301 Py_ssize_t mysize;
302 Py_ssize_t size;
303
304 if (count < 0)
305 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000306 mysize = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000307 size = mysize * count;
308 if (count != 0 && size / count != mysize)
309 return PyErr_NoMemory();
Guido van Rossumf15a29f2007-05-04 00:41:39 +0000310 result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
Guido van Rossumd624f182006-04-24 13:47:05 +0000311 if (result != NULL && size != 0) {
312 if (mysize == 1)
313 memset(result->ob_bytes, self->ob_bytes[0], size);
314 else {
Guido van Rossum13e57212006-04-27 22:54:26 +0000315 Py_ssize_t i;
Guido van Rossumd624f182006-04-24 13:47:05 +0000316 for (i = 0; i < count; i++)
317 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
318 }
319 }
320 return (PyObject *)result;
321}
322
323static PyObject *
Guido van Rossum13e57212006-04-27 22:54:26 +0000324bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
325{
326 Py_ssize_t mysize;
327 Py_ssize_t size;
328
329 if (count < 0)
330 count = 0;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000331 mysize = Py_Size(self);
Guido van Rossum13e57212006-04-27 22:54:26 +0000332 size = mysize * count;
333 if (count != 0 && size / count != mysize)
334 return PyErr_NoMemory();
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000335 if (size < self->ob_alloc) {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000336 Py_Size(self) = size;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000337 self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Guido van Rossum6c1e6742007-05-04 04:27:16 +0000338 }
Guido van Rossuma0867f72006-05-05 04:34:18 +0000339 else if (PyBytes_Resize((PyObject *)self, size) < 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000340 return NULL;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000341
Guido van Rossum13e57212006-04-27 22:54:26 +0000342 if (mysize == 1)
343 memset(self->ob_bytes, self->ob_bytes[0], size);
344 else {
345 Py_ssize_t i;
346 for (i = 1; i < count; i++)
347 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
348 }
349
350 Py_INCREF(self);
351 return (PyObject *)self;
352}
353
354static int
355bytes_substring(PyBytesObject *self, PyBytesObject *other)
356{
357 Py_ssize_t i;
358
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000359 if (Py_Size(other) == 1) {
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000360 return memchr(self->ob_bytes, other->ob_bytes[0],
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000361 Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000362 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000363 if (Py_Size(other) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000364 return 1; /* Edge case */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000365 for (i = 0; i + Py_Size(other) <= Py_Size(self); i++) {
Guido van Rossum13e57212006-04-27 22:54:26 +0000366 /* XXX Yeah, yeah, lots of optimizations possible... */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000367 if (memcmp(self->ob_bytes + i, other->ob_bytes, Py_Size(other)) == 0)
Guido van Rossum13e57212006-04-27 22:54:26 +0000368 return 1;
369 }
370 return 0;
371}
372
373static int
374bytes_contains(PyBytesObject *self, PyObject *value)
375{
376 Py_ssize_t ival;
377
378 if (PyBytes_Check(value))
379 return bytes_substring(self, (PyBytesObject *)value);
380
Thomas Woutersd204a712006-08-22 13:41:17 +0000381 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossum13e57212006-04-27 22:54:26 +0000382 if (ival == -1 && PyErr_Occurred())
383 return -1;
Guido van Rossum13e57212006-04-27 22:54:26 +0000384 if (ival < 0 || ival >= 256) {
385 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
386 return -1;
387 }
388
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000389 return memchr(self->ob_bytes, ival, Py_Size(self)) != NULL;
Guido van Rossum13e57212006-04-27 22:54:26 +0000390}
391
392static PyObject *
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000393bytes_getitem(PyBytesObject *self, Py_ssize_t i)
394{
395 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000396 i += Py_Size(self);
397 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000398 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
399 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000400 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000401 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
402}
403
404static PyObject *
Thomas Wouters376446d2006-12-19 08:30:14 +0000405bytes_subscript(PyBytesObject *self, PyObject *item)
Guido van Rossumd624f182006-04-24 13:47:05 +0000406{
Thomas Wouters376446d2006-12-19 08:30:14 +0000407 if (PyIndex_Check(item)) {
408 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000409
Thomas Wouters376446d2006-12-19 08:30:14 +0000410 if (i == -1 && PyErr_Occurred())
411 return NULL;
412
413 if (i < 0)
414 i += PyBytes_GET_SIZE(self);
415
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000416 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000417 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
418 return NULL;
419 }
420 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
421 }
422 else if (PySlice_Check(item)) {
423 Py_ssize_t start, stop, step, slicelength, cur, i;
424 if (PySlice_GetIndicesEx((PySliceObject *)item,
425 PyBytes_GET_SIZE(self),
426 &start, &stop, &step, &slicelength) < 0) {
427 return NULL;
428 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000429
Thomas Wouters376446d2006-12-19 08:30:14 +0000430 if (slicelength <= 0)
431 return PyBytes_FromStringAndSize("", 0);
432 else if (step == 1) {
433 return PyBytes_FromStringAndSize(self->ob_bytes + start,
434 slicelength);
435 }
436 else {
437 char *source_buf = PyBytes_AS_STRING(self);
438 char *result_buf = (char *)PyMem_Malloc(slicelength);
439 PyObject *result;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000440
Thomas Wouters376446d2006-12-19 08:30:14 +0000441 if (result_buf == NULL)
442 return PyErr_NoMemory();
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000443
Thomas Wouters376446d2006-12-19 08:30:14 +0000444 for (cur = start, i = 0; i < slicelength;
445 cur += step, i++) {
446 result_buf[i] = source_buf[cur];
447 }
448 result = PyBytes_FromStringAndSize(result_buf, slicelength);
449 PyMem_Free(result_buf);
450 return result;
451 }
452 }
453 else {
454 PyErr_SetString(PyExc_TypeError, "bytes indices must be integers");
455 return NULL;
456 }
457}
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000458
Guido van Rossumd624f182006-04-24 13:47:05 +0000459static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000460bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Guido van Rossumd624f182006-04-24 13:47:05 +0000461 PyObject *values)
462{
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000463 Py_ssize_t avail, needed;
464 void *bytes;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000465 Py_buffer vbytes;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000466 int res = 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000467
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000468 vbytes.len = -1;
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000469 if (values == (PyObject *)self) {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000470 /* Make a copy and call this function recursively */
Guido van Rossumd624f182006-04-24 13:47:05 +0000471 int err;
472 values = PyBytes_FromObject(values);
473 if (values == NULL)
474 return -1;
475 err = bytes_setslice(self, lo, hi, values);
476 Py_DECREF(values);
477 return err;
478 }
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000479 if (values == NULL) {
480 /* del b[lo:hi] */
481 bytes = NULL;
482 needed = 0;
483 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000484 else {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000485 if (_getbuffer(values, &vbytes) < 0) {
486 PyErr_Format(PyExc_TypeError,
487 "can't set bytes slice from %.100s",
488 Py_Type(values)->tp_name);
489 return -1;
490 }
491 needed = vbytes.len;
492 bytes = vbytes.buf;
Guido van Rossumd624f182006-04-24 13:47:05 +0000493 }
494
495 if (lo < 0)
496 lo = 0;
Thomas Wouters9a6e62b2006-08-23 23:20:29 +0000497 if (hi < lo)
498 hi = lo;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000499 if (hi > Py_Size(self))
500 hi = Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000501
502 avail = hi - lo;
503 if (avail < 0)
504 lo = hi = avail = 0;
505
506 if (avail != needed) {
507 if (avail > needed) {
508 /*
509 0 lo hi old_size
510 | |<----avail----->|<-----tomove------>|
511 | |<-needed->|<-----tomove------>|
512 0 lo new_hi new_size
513 */
514 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000515 Py_Size(self) - hi);
Guido van Rossumd624f182006-04-24 13:47:05 +0000516 }
Guido van Rossuma74184e2007-08-29 04:05:57 +0000517 /* XXX(nnorwitz): need to verify this can't overflow! */
Thomas Wouters376446d2006-12-19 08:30:14 +0000518 if (PyBytes_Resize((PyObject *)self,
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000519 Py_Size(self) + needed - avail) < 0) {
520 res = -1;
521 goto finish;
522 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000523 if (avail < needed) {
524 /*
525 0 lo hi old_size
526 | |<-avail->|<-----tomove------>|
527 | |<----needed---->|<-----tomove------>|
528 0 lo new_hi new_size
529 */
530 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000531 Py_Size(self) - lo - needed);
Guido van Rossumd624f182006-04-24 13:47:05 +0000532 }
533 }
534
535 if (needed > 0)
536 memcpy(self->ob_bytes + lo, bytes, needed);
537
Guido van Rossum75d38e92007-08-24 17:33:11 +0000538
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000539 finish:
Guido van Rossum75d38e92007-08-24 17:33:11 +0000540 if (vbytes.len != -1)
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000541 PyObject_ReleaseBuffer(values, &vbytes);
542 return res;
Guido van Rossumd624f182006-04-24 13:47:05 +0000543}
544
545static int
546bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
547{
548 Py_ssize_t ival;
549
550 if (i < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000551 i += Py_Size(self);
Guido van Rossumd624f182006-04-24 13:47:05 +0000552
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000553 if (i < 0 || i >= Py_Size(self)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000554 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
555 return -1;
556 }
557
558 if (value == NULL)
559 return bytes_setslice(self, i, i+1, NULL);
560
Thomas Woutersd204a712006-08-22 13:41:17 +0000561 ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000562 if (ival == -1 && PyErr_Occurred())
563 return -1;
564
565 if (ival < 0 || ival >= 256) {
566 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
567 return -1;
568 }
569
570 self->ob_bytes[i] = ival;
571 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000572}
573
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000574static int
Thomas Wouters376446d2006-12-19 08:30:14 +0000575bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
576{
577 Py_ssize_t start, stop, step, slicelen, needed;
578 char *bytes;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000579
Thomas Wouters376446d2006-12-19 08:30:14 +0000580 if (PyIndex_Check(item)) {
581 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
582
583 if (i == -1 && PyErr_Occurred())
584 return -1;
585
586 if (i < 0)
587 i += PyBytes_GET_SIZE(self);
588
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000589 if (i < 0 || i >= Py_Size(self)) {
Thomas Wouters376446d2006-12-19 08:30:14 +0000590 PyErr_SetString(PyExc_IndexError, "bytes index out of range");
591 return -1;
592 }
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000593
Thomas Wouters376446d2006-12-19 08:30:14 +0000594 if (values == NULL) {
595 /* Fall through to slice assignment */
596 start = i;
597 stop = i + 1;
598 step = 1;
599 slicelen = 1;
600 }
601 else {
602 Py_ssize_t ival = PyNumber_AsSsize_t(values, PyExc_ValueError);
603 if (ival == -1 && PyErr_Occurred())
604 return -1;
605 if (ival < 0 || ival >= 256) {
606 PyErr_SetString(PyExc_ValueError,
607 "byte must be in range(0, 256)");
608 return -1;
609 }
610 self->ob_bytes[i] = (char)ival;
611 return 0;
612 }
613 }
614 else if (PySlice_Check(item)) {
615 if (PySlice_GetIndicesEx((PySliceObject *)item,
616 PyBytes_GET_SIZE(self),
617 &start, &stop, &step, &slicelen) < 0) {
618 return -1;
619 }
620 }
621 else {
622 PyErr_SetString(PyExc_TypeError, "bytes indices must be integer");
623 return -1;
624 }
625
626 if (values == NULL) {
627 bytes = NULL;
628 needed = 0;
629 }
630 else if (values == (PyObject *)self || !PyBytes_Check(values)) {
631 /* Make a copy an call this function recursively */
632 int err;
633 values = PyBytes_FromObject(values);
634 if (values == NULL)
635 return -1;
636 err = bytes_ass_subscript(self, item, values);
637 Py_DECREF(values);
638 return err;
639 }
640 else {
641 assert(PyBytes_Check(values));
642 bytes = ((PyBytesObject *)values)->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000643 needed = Py_Size(values);
Thomas Wouters376446d2006-12-19 08:30:14 +0000644 }
645 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
646 if ((step < 0 && start < stop) ||
647 (step > 0 && start > stop))
648 stop = start;
649 if (step == 1) {
650 if (slicelen != needed) {
651 if (slicelen > needed) {
652 /*
653 0 start stop old_size
654 | |<---slicelen--->|<-----tomove------>|
655 | |<-needed->|<-----tomove------>|
656 0 lo new_hi new_size
657 */
658 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000659 Py_Size(self) - stop);
Thomas Wouters376446d2006-12-19 08:30:14 +0000660 }
661 if (PyBytes_Resize((PyObject *)self,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000662 Py_Size(self) + needed - slicelen) < 0)
Thomas Wouters376446d2006-12-19 08:30:14 +0000663 return -1;
664 if (slicelen < needed) {
665 /*
666 0 lo hi old_size
667 | |<-avail->|<-----tomove------>|
668 | |<----needed---->|<-----tomove------>|
669 0 lo new_hi new_size
670 */
671 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000672 Py_Size(self) - start - needed);
Thomas Wouters376446d2006-12-19 08:30:14 +0000673 }
674 }
675
676 if (needed > 0)
677 memcpy(self->ob_bytes + start, bytes, needed);
678
679 return 0;
680 }
681 else {
682 if (needed == 0) {
683 /* Delete slice */
684 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000685
Thomas Wouters376446d2006-12-19 08:30:14 +0000686 if (step < 0) {
687 stop = start + 1;
688 start = stop + step * (slicelen - 1) - 1;
689 step = -step;
690 }
691 for (cur = start, i = 0;
692 i < slicelen; cur += step, i++) {
693 Py_ssize_t lim = step - 1;
694
695 if (cur + step >= PyBytes_GET_SIZE(self))
696 lim = PyBytes_GET_SIZE(self) - cur - 1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000697
Thomas Wouters376446d2006-12-19 08:30:14 +0000698 memmove(self->ob_bytes + cur - i,
699 self->ob_bytes + cur + 1, lim);
700 }
701 /* Move the tail of the bytes, in one chunk */
702 cur = start + slicelen*step;
703 if (cur < PyBytes_GET_SIZE(self)) {
704 memmove(self->ob_bytes + cur - slicelen,
705 self->ob_bytes + cur,
706 PyBytes_GET_SIZE(self) - cur);
707 }
708 if (PyBytes_Resize((PyObject *)self,
709 PyBytes_GET_SIZE(self) - slicelen) < 0)
710 return -1;
711
712 return 0;
713 }
714 else {
715 /* Assign slice */
716 Py_ssize_t cur, i;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +0000717
Thomas Wouters376446d2006-12-19 08:30:14 +0000718 if (needed != slicelen) {
719 PyErr_Format(PyExc_ValueError,
720 "attempt to assign bytes of size %zd "
721 "to extended slice of size %zd",
722 needed, slicelen);
723 return -1;
724 }
725 for (cur = start, i = 0; i < slicelen; cur += step, i++)
726 self->ob_bytes[cur] = bytes[i];
727 return 0;
728 }
729 }
730}
731
732static int
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000733bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
734{
Guido van Rossumd624f182006-04-24 13:47:05 +0000735 static char *kwlist[] = {"source", "encoding", "errors", 0};
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000736 PyObject *arg = NULL;
Guido van Rossumd624f182006-04-24 13:47:05 +0000737 const char *encoding = NULL;
738 const char *errors = NULL;
739 Py_ssize_t count;
740 PyObject *it;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000741 PyObject *(*iternext)(PyObject *);
742
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000743 if (Py_Size(self) != 0) {
Guido van Rossuma0867f72006-05-05 04:34:18 +0000744 /* Empty previous contents (yes, do this first of all!) */
745 if (PyBytes_Resize((PyObject *)self, 0) < 0)
746 return -1;
747 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000748
Guido van Rossumd624f182006-04-24 13:47:05 +0000749 /* Parse arguments */
750 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
751 &arg, &encoding, &errors))
752 return -1;
753
754 /* Make a quick exit if no first argument */
755 if (arg == NULL) {
756 if (encoding != NULL || errors != NULL) {
757 PyErr_SetString(PyExc_TypeError,
758 "encoding or errors without sequence argument");
759 return -1;
760 }
761 return 0;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000762 }
763
Guido van Rossumd624f182006-04-24 13:47:05 +0000764 if (PyUnicode_Check(arg)) {
765 /* Encode via the codec registry */
Guido van Rossum4355a472007-05-04 05:00:04 +0000766 PyObject *encoded, *new;
Guido van Rossuma74184e2007-08-29 04:05:57 +0000767 if (encoding == NULL) {
768 PyErr_SetString(PyExc_TypeError,
769 "string argument without an encoding");
770 return -1;
771 }
Guido van Rossumd624f182006-04-24 13:47:05 +0000772 encoded = PyCodec_Encode(arg, encoding, errors);
773 if (encoded == NULL)
774 return -1;
Guido van Rossum4355a472007-05-04 05:00:04 +0000775 if (!PyBytes_Check(encoded) && !PyString_Check(encoded)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000776 PyErr_Format(PyExc_TypeError,
Guido van Rossum4355a472007-05-04 05:00:04 +0000777 "encoder did not return a str8 or bytes object (type=%.400s)",
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000778 Py_Type(encoded)->tp_name);
Guido van Rossumd624f182006-04-24 13:47:05 +0000779 Py_DECREF(encoded);
780 return -1;
781 }
Guido van Rossuma74184e2007-08-29 04:05:57 +0000782 new = bytes_iconcat(self, encoded);
783 Py_DECREF(encoded);
784 if (new == NULL)
785 return -1;
786 Py_DECREF(new);
787 return 0;
Guido van Rossumd624f182006-04-24 13:47:05 +0000788 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000789
Guido van Rossumd624f182006-04-24 13:47:05 +0000790 /* If it's not unicode, there can't be encoding or errors */
791 if (encoding != NULL || errors != NULL) {
792 PyErr_SetString(PyExc_TypeError,
793 "encoding or errors without a string argument");
794 return -1;
795 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000796
Guido van Rossumd624f182006-04-24 13:47:05 +0000797 /* Is it an int? */
Thomas Woutersd204a712006-08-22 13:41:17 +0000798 count = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000799 if (count == -1 && PyErr_Occurred())
800 PyErr_Clear();
801 else {
802 if (count < 0) {
803 PyErr_SetString(PyExc_ValueError, "negative count");
804 return -1;
805 }
806 if (count > 0) {
807 if (PyBytes_Resize((PyObject *)self, count))
808 return -1;
809 memset(self->ob_bytes, 0, count);
810 }
811 return 0;
812 }
Guido van Rossum75d38e92007-08-24 17:33:11 +0000813
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000814 /* Use the modern buffer interface */
815 if (PyObject_CheckBuffer(arg)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000816 Py_ssize_t size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000817 Py_buffer view;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000818 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000819 return -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000820 size = view.len;
821 if (PyBytes_Resize((PyObject *)self, size) < 0) goto fail;
822 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
823 goto fail;
824 PyObject_ReleaseBuffer(arg, &view);
Guido van Rossumd624f182006-04-24 13:47:05 +0000825 return 0;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000826 fail:
827 PyObject_ReleaseBuffer(arg, &view);
828 return -1;
Guido van Rossumd624f182006-04-24 13:47:05 +0000829 }
830
831 /* XXX Optimize this if the arguments is a list, tuple */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000832
833 /* Get the iterator */
834 it = PyObject_GetIter(arg);
835 if (it == NULL)
Guido van Rossumd624f182006-04-24 13:47:05 +0000836 return -1;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000837 iternext = *Py_Type(it)->tp_iternext;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000838
839 /* Run the iterator to exhaustion */
840 for (;;) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000841 PyObject *item;
842 Py_ssize_t value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000843
Guido van Rossumd624f182006-04-24 13:47:05 +0000844 /* Get the next item */
845 item = iternext(it);
846 if (item == NULL) {
847 if (PyErr_Occurred()) {
848 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
849 goto error;
850 PyErr_Clear();
851 }
852 break;
853 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000854
Guido van Rossumd624f182006-04-24 13:47:05 +0000855 /* Interpret it as an int (__index__) */
Thomas Woutersd204a712006-08-22 13:41:17 +0000856 value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Guido van Rossumd624f182006-04-24 13:47:05 +0000857 Py_DECREF(item);
858 if (value == -1 && PyErr_Occurred())
859 goto error;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000860
Guido van Rossumd624f182006-04-24 13:47:05 +0000861 /* Range check */
862 if (value < 0 || value >= 256) {
863 PyErr_SetString(PyExc_ValueError,
864 "bytes must be in range(0, 256)");
865 goto error;
866 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000867
Guido van Rossumd624f182006-04-24 13:47:05 +0000868 /* Append the byte */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000869 if (Py_Size(self) < self->ob_alloc)
870 Py_Size(self)++;
871 else if (PyBytes_Resize((PyObject *)self, Py_Size(self)+1) < 0)
Guido van Rossumd624f182006-04-24 13:47:05 +0000872 goto error;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000873 self->ob_bytes[Py_Size(self)-1] = value;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000874 }
875
876 /* Clean up and return success */
877 Py_DECREF(it);
878 return 0;
879
880 error:
881 /* Error handling when it != NULL */
882 Py_DECREF(it);
883 return -1;
884}
885
Georg Brandlee91be42007-02-24 19:41:35 +0000886/* Mostly copied from string_repr, but without the
887 "smart quote" functionality. */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000888static PyObject *
889bytes_repr(PyBytesObject *self)
890{
Walter Dörwald1ab83302007-05-18 17:15:44 +0000891 static const char *hexdigits = "0123456789abcdef";
Martin v. Löwis5d7428b2007-07-21 18:47:48 +0000892 size_t newsize = 3 + 4 * Py_Size(self);
Georg Brandlee91be42007-02-24 19:41:35 +0000893 PyObject *v;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000894 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_Size(self)) {
Georg Brandlee91be42007-02-24 19:41:35 +0000895 PyErr_SetString(PyExc_OverflowError,
896 "bytes object is too large to make repr");
Guido van Rossumd624f182006-04-24 13:47:05 +0000897 return NULL;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000898 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000899 v = PyUnicode_FromUnicode(NULL, newsize);
Georg Brandlee91be42007-02-24 19:41:35 +0000900 if (v == NULL) {
901 return NULL;
902 }
903 else {
904 register Py_ssize_t i;
Walter Dörwald1ab83302007-05-18 17:15:44 +0000905 register Py_UNICODE c;
906 register Py_UNICODE *p;
Georg Brandlee91be42007-02-24 19:41:35 +0000907 int quote = '\'';
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000908
Walter Dörwald1ab83302007-05-18 17:15:44 +0000909 p = PyUnicode_AS_UNICODE(v);
Georg Brandlee91be42007-02-24 19:41:35 +0000910 *p++ = 'b';
911 *p++ = quote;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000912 for (i = 0; i < Py_Size(self); i++) {
Georg Brandlee91be42007-02-24 19:41:35 +0000913 /* There's at least enough room for a hex escape
914 and a closing quote. */
Walter Dörwald1ab83302007-05-18 17:15:44 +0000915 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
Georg Brandlee91be42007-02-24 19:41:35 +0000916 c = self->ob_bytes[i];
917 if (c == quote || c == '\\')
918 *p++ = '\\', *p++ = c;
919 else if (c == '\t')
920 *p++ = '\\', *p++ = 't';
921 else if (c == '\n')
922 *p++ = '\\', *p++ = 'n';
923 else if (c == '\r')
924 *p++ = '\\', *p++ = 'r';
925 else if (c == 0)
Guido van Rossum57b93ad2007-05-08 19:09:34 +0000926 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
Georg Brandlee91be42007-02-24 19:41:35 +0000927 else if (c < ' ' || c >= 0x7f) {
Walter Dörwald1ab83302007-05-18 17:15:44 +0000928 *p++ = '\\';
929 *p++ = 'x';
930 *p++ = hexdigits[(c & 0xf0) >> 4];
931 *p++ = hexdigits[c & 0xf];
Georg Brandlee91be42007-02-24 19:41:35 +0000932 }
933 else
934 *p++ = c;
935 }
Walter Dörwald1ab83302007-05-18 17:15:44 +0000936 assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
Georg Brandlee91be42007-02-24 19:41:35 +0000937 *p++ = quote;
938 *p = '\0';
Walter Dörwald1ab83302007-05-18 17:15:44 +0000939 if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
940 Py_DECREF(v);
941 return NULL;
942 }
Georg Brandlee91be42007-02-24 19:41:35 +0000943 return v;
944 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000945}
946
947static PyObject *
Guido van Rossumd624f182006-04-24 13:47:05 +0000948bytes_str(PyBytesObject *self)
949{
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000950 return PyString_FromStringAndSize(self->ob_bytes, Py_Size(self));
Guido van Rossumd624f182006-04-24 13:47:05 +0000951}
952
953static PyObject *
Guido van Rossum343e97f2007-04-09 00:43:24 +0000954bytes_richcompare(PyObject *self, PyObject *other, int op)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000955{
Guido van Rossum343e97f2007-04-09 00:43:24 +0000956 Py_ssize_t self_size, other_size;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +0000957 Py_buffer self_bytes, other_bytes;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000958 PyObject *res;
Guido van Rossum343e97f2007-04-09 00:43:24 +0000959 Py_ssize_t minsize;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000960 int cmp;
961
Jeremy Hylton18c3ff82007-08-29 18:47:16 +0000962 /* Bytes can be compared to anything that supports the (binary)
963 buffer API. Except that a comparison with Unicode is always an
964 error, even if the comparison is for equality. */
965 if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
966 PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
Guido van Rossum1e35e762007-10-09 17:21:10 +0000967 Py_INCREF(Py_NotImplemented);
968 return Py_NotImplemented;
Jeremy Hylton18c3ff82007-08-29 18:47:16 +0000969 }
Guido van Rossumebea9be2007-04-09 00:49:13 +0000970
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000971 self_size = _getbuffer(self, &self_bytes);
972 if (self_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000973 PyErr_Clear();
Guido van Rossumebea9be2007-04-09 00:49:13 +0000974 Py_INCREF(Py_NotImplemented);
975 return Py_NotImplemented;
976 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000977
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000978 other_size = _getbuffer(other, &other_bytes);
979 if (other_size < 0) {
Guido van Rossuma74184e2007-08-29 04:05:57 +0000980 PyErr_Clear();
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000981 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossumd624f182006-04-24 13:47:05 +0000982 Py_INCREF(Py_NotImplemented);
983 return Py_NotImplemented;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000984 }
Guido van Rossum343e97f2007-04-09 00:43:24 +0000985
986 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
Guido van Rossumd624f182006-04-24 13:47:05 +0000987 /* Shortcut: if the lengths differ, the objects differ */
988 cmp = (op == Py_NE);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000989 }
990 else {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000991 minsize = self_size;
992 if (other_size < minsize)
993 minsize = other_size;
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000994
Travis E. Oliphantb99f7622007-08-18 11:21:56 +0000995 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
Guido van Rossumd624f182006-04-24 13:47:05 +0000996 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +0000997
Guido van Rossumd624f182006-04-24 13:47:05 +0000998 if (cmp == 0) {
Guido van Rossum343e97f2007-04-09 00:43:24 +0000999 if (self_size < other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +00001000 cmp = -1;
Guido van Rossum343e97f2007-04-09 00:43:24 +00001001 else if (self_size > other_size)
Guido van Rossumd624f182006-04-24 13:47:05 +00001002 cmp = 1;
1003 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001004
Guido van Rossumd624f182006-04-24 13:47:05 +00001005 switch (op) {
1006 case Py_LT: cmp = cmp < 0; break;
1007 case Py_LE: cmp = cmp <= 0; break;
1008 case Py_EQ: cmp = cmp == 0; break;
1009 case Py_NE: cmp = cmp != 0; break;
1010 case Py_GT: cmp = cmp > 0; break;
1011 case Py_GE: cmp = cmp >= 0; break;
1012 }
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001013 }
1014
1015 res = cmp ? Py_True : Py_False;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00001016 PyObject_ReleaseBuffer(self, &self_bytes);
Guido van Rossum75d38e92007-08-24 17:33:11 +00001017 PyObject_ReleaseBuffer(other, &other_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001018 Py_INCREF(res);
1019 return res;
1020}
1021
1022static void
1023bytes_dealloc(PyBytesObject *self)
1024{
Guido van Rossumd624f182006-04-24 13:47:05 +00001025 if (self->ob_bytes != 0) {
1026 PyMem_Free(self->ob_bytes);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001027 }
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001028 Py_Type(self)->tp_free((PyObject *)self);
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00001029}
1030
Neal Norwitz6968b052007-02-27 19:02:19 +00001031
1032/* -------------------------------------------------------------------- */
1033/* Methods */
1034
1035#define STRINGLIB_CHAR char
1036#define STRINGLIB_CMP memcmp
1037#define STRINGLIB_LEN PyBytes_GET_SIZE
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001038#define STRINGLIB_STR PyBytes_AS_STRING
Neal Norwitz6968b052007-02-27 19:02:19 +00001039#define STRINGLIB_NEW PyBytes_FromStringAndSize
1040#define STRINGLIB_EMPTY nullbytes
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001041#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
1042#define STRINGLIB_MUTABLE 1
Neal Norwitz6968b052007-02-27 19:02:19 +00001043
1044#include "stringlib/fastsearch.h"
1045#include "stringlib/count.h"
1046#include "stringlib/find.h"
1047#include "stringlib/partition.h"
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001048#include "stringlib/ctype.h"
1049#include "stringlib/transmogrify.h"
Neal Norwitz6968b052007-02-27 19:02:19 +00001050
1051
1052/* The following Py_LOCAL_INLINE and Py_LOCAL functions
1053were copied from the old char* style string object. */
1054
1055Py_LOCAL_INLINE(void)
1056_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1057{
1058 if (*end > len)
1059 *end = len;
1060 else if (*end < 0)
1061 *end += len;
1062 if (*end < 0)
1063 *end = 0;
1064 if (*start < 0)
1065 *start += len;
1066 if (*start < 0)
1067 *start = 0;
1068}
1069
1070
1071Py_LOCAL_INLINE(Py_ssize_t)
1072bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1073{
1074 PyObject *subobj;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00001075 Py_buffer subbuf;
Neal Norwitz6968b052007-02-27 19:02:19 +00001076 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Guido van Rossum06b8b022007-08-31 13:48:41 +00001077 Py_ssize_t res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001078
1079 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1080 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1081 return -2;
Guido van Rossum06b8b022007-08-31 13:48:41 +00001082 if (_getbuffer(subobj, &subbuf) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00001083 return -2;
Neal Norwitz6968b052007-02-27 19:02:19 +00001084 if (dir > 0)
Guido van Rossum06b8b022007-08-31 13:48:41 +00001085 res = stringlib_find_slice(
Neal Norwitz6968b052007-02-27 19:02:19 +00001086 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossum06b8b022007-08-31 13:48:41 +00001087 subbuf.buf, subbuf.len, start, end);
Neal Norwitz6968b052007-02-27 19:02:19 +00001088 else
Guido van Rossum06b8b022007-08-31 13:48:41 +00001089 res = stringlib_rfind_slice(
Neal Norwitz6968b052007-02-27 19:02:19 +00001090 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossum06b8b022007-08-31 13:48:41 +00001091 subbuf.buf, subbuf.len, start, end);
1092 PyObject_ReleaseBuffer(subobj, &subbuf);
1093 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00001094}
1095
Neal Norwitz6968b052007-02-27 19:02:19 +00001096PyDoc_STRVAR(find__doc__,
1097"B.find(sub [,start [,end]]) -> int\n\
1098\n\
1099Return the lowest index in B where subsection sub is found,\n\
1100such that sub is contained within s[start,end]. Optional\n\
1101arguments start and end are interpreted as in slice notation.\n\
1102\n\
1103Return -1 on failure.");
1104
1105static PyObject *
1106bytes_find(PyBytesObject *self, PyObject *args)
1107{
1108 Py_ssize_t result = bytes_find_internal(self, args, +1);
1109 if (result == -2)
1110 return NULL;
1111 return PyInt_FromSsize_t(result);
1112}
1113
1114PyDoc_STRVAR(count__doc__,
1115"B.count(sub[, start[, end]]) -> int\n\
1116\n\
1117Return the number of non-overlapping occurrences of subsection sub in\n\
1118bytes B[start:end]. Optional arguments start and end are interpreted\n\
1119as in slice notation.");
1120
1121static PyObject *
1122bytes_count(PyBytesObject *self, PyObject *args)
1123{
1124 PyObject *sub_obj;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001125 const char *str = PyBytes_AS_STRING(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00001126 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001127 Py_buffer vsub;
1128 PyObject *count_obj;
Neal Norwitz6968b052007-02-27 19:02:19 +00001129
1130 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
1131 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1132 return NULL;
1133
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001134 if (_getbuffer(sub_obj, &vsub) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00001135 return NULL;
1136
Martin v. Löwis5b222132007-06-10 09:51:05 +00001137 _adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
Neal Norwitz6968b052007-02-27 19:02:19 +00001138
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001139 count_obj = PyInt_FromSsize_t(
1140 stringlib_count(str + start, end - start, vsub.buf, vsub.len)
Neal Norwitz6968b052007-02-27 19:02:19 +00001141 );
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001142 PyObject_ReleaseBuffer(sub_obj, &vsub);
1143 return count_obj;
Neal Norwitz6968b052007-02-27 19:02:19 +00001144}
1145
1146
1147PyDoc_STRVAR(index__doc__,
1148"B.index(sub [,start [,end]]) -> int\n\
1149\n\
1150Like B.find() but raise ValueError when the subsection is not found.");
1151
1152static PyObject *
1153bytes_index(PyBytesObject *self, PyObject *args)
1154{
1155 Py_ssize_t result = bytes_find_internal(self, args, +1);
1156 if (result == -2)
1157 return NULL;
1158 if (result == -1) {
1159 PyErr_SetString(PyExc_ValueError,
1160 "subsection not found");
1161 return NULL;
1162 }
1163 return PyInt_FromSsize_t(result);
1164}
1165
1166
1167PyDoc_STRVAR(rfind__doc__,
1168"B.rfind(sub [,start [,end]]) -> int\n\
1169\n\
1170Return the highest index in B where subsection sub is found,\n\
1171such that sub is contained within s[start,end]. Optional\n\
1172arguments start and end are interpreted as in slice notation.\n\
1173\n\
1174Return -1 on failure.");
1175
1176static PyObject *
1177bytes_rfind(PyBytesObject *self, PyObject *args)
1178{
1179 Py_ssize_t result = bytes_find_internal(self, args, -1);
1180 if (result == -2)
1181 return NULL;
1182 return PyInt_FromSsize_t(result);
1183}
1184
1185
1186PyDoc_STRVAR(rindex__doc__,
1187"B.rindex(sub [,start [,end]]) -> int\n\
1188\n\
1189Like B.rfind() but raise ValueError when the subsection is not found.");
1190
1191static PyObject *
1192bytes_rindex(PyBytesObject *self, PyObject *args)
1193{
1194 Py_ssize_t result = bytes_find_internal(self, args, -1);
1195 if (result == -2)
1196 return NULL;
1197 if (result == -1) {
1198 PyErr_SetString(PyExc_ValueError,
1199 "subsection not found");
1200 return NULL;
1201 }
1202 return PyInt_FromSsize_t(result);
1203}
1204
1205
1206/* Matches the end (direction >= 0) or start (direction < 0) of self
1207 * against substr, using the start and end arguments. Returns
1208 * -1 on error, 0 if not found and 1 if found.
1209 */
1210Py_LOCAL(int)
1211_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
1212 Py_ssize_t end, int direction)
1213{
1214 Py_ssize_t len = PyBytes_GET_SIZE(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00001215 const char* str;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001216 Py_buffer vsubstr;
1217 int rv;
Neal Norwitz6968b052007-02-27 19:02:19 +00001218
Neal Norwitz6968b052007-02-27 19:02:19 +00001219 str = PyBytes_AS_STRING(self);
1220
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001221 if (_getbuffer(substr, &vsubstr) < 0)
1222 return -1;
1223
Neal Norwitz6968b052007-02-27 19:02:19 +00001224 _adjust_indices(&start, &end, len);
1225
1226 if (direction < 0) {
1227 /* startswith */
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001228 if (start+vsubstr.len > len) {
1229 rv = 0;
1230 goto done;
1231 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001232 } else {
1233 /* endswith */
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001234 if (end-start < vsubstr.len || start > len) {
1235 rv = 0;
1236 goto done;
1237 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001238
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001239 if (end-vsubstr.len > start)
1240 start = end - vsubstr.len;
Neal Norwitz6968b052007-02-27 19:02:19 +00001241 }
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001242 if (end-start >= vsubstr.len)
1243 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1244
1245done:
1246 PyObject_ReleaseBuffer(substr, &vsubstr);
1247 return rv;
Neal Norwitz6968b052007-02-27 19:02:19 +00001248}
1249
1250
1251PyDoc_STRVAR(startswith__doc__,
1252"B.startswith(prefix[, start[, end]]) -> bool\n\
1253\n\
1254Return True if B starts with the specified prefix, False otherwise.\n\
1255With optional start, test B beginning at that position.\n\
1256With optional end, stop comparing B at that position.\n\
1257prefix can also be a tuple of strings to try.");
1258
1259static PyObject *
1260bytes_startswith(PyBytesObject *self, PyObject *args)
1261{
1262 Py_ssize_t start = 0;
1263 Py_ssize_t end = PY_SSIZE_T_MAX;
1264 PyObject *subobj;
1265 int result;
1266
1267 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1268 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1269 return NULL;
1270 if (PyTuple_Check(subobj)) {
1271 Py_ssize_t i;
1272 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1273 result = _bytes_tailmatch(self,
1274 PyTuple_GET_ITEM(subobj, i),
1275 start, end, -1);
1276 if (result == -1)
1277 return NULL;
1278 else if (result) {
1279 Py_RETURN_TRUE;
1280 }
1281 }
1282 Py_RETURN_FALSE;
1283 }
1284 result = _bytes_tailmatch(self, subobj, start, end, -1);
1285 if (result == -1)
1286 return NULL;
1287 else
1288 return PyBool_FromLong(result);
1289}
1290
1291PyDoc_STRVAR(endswith__doc__,
1292"B.endswith(suffix[, start[, end]]) -> bool\n\
1293\n\
1294Return True if B ends with the specified suffix, False otherwise.\n\
1295With optional start, test B beginning at that position.\n\
1296With optional end, stop comparing B at that position.\n\
1297suffix can also be a tuple of strings to try.");
1298
1299static PyObject *
1300bytes_endswith(PyBytesObject *self, PyObject *args)
1301{
1302 Py_ssize_t start = 0;
1303 Py_ssize_t end = PY_SSIZE_T_MAX;
1304 PyObject *subobj;
1305 int result;
1306
1307 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1308 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1309 return NULL;
1310 if (PyTuple_Check(subobj)) {
1311 Py_ssize_t i;
1312 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1313 result = _bytes_tailmatch(self,
1314 PyTuple_GET_ITEM(subobj, i),
1315 start, end, +1);
1316 if (result == -1)
1317 return NULL;
1318 else if (result) {
1319 Py_RETURN_TRUE;
1320 }
1321 }
1322 Py_RETURN_FALSE;
1323 }
1324 result = _bytes_tailmatch(self, subobj, start, end, +1);
1325 if (result == -1)
1326 return NULL;
1327 else
1328 return PyBool_FromLong(result);
1329}
1330
1331
Neal Norwitz6968b052007-02-27 19:02:19 +00001332PyDoc_STRVAR(translate__doc__,
1333"B.translate(table [,deletechars]) -> bytes\n\
1334\n\
1335Return a copy of the bytes B, where all characters occurring\n\
1336in the optional argument deletechars are removed, and the\n\
1337remaining characters have been mapped through the given\n\
1338translation table, which must be a bytes of length 256.");
1339
1340static PyObject *
1341bytes_translate(PyBytesObject *self, PyObject *args)
1342{
1343 register char *input, *output;
1344 register const char *table;
1345 register Py_ssize_t i, c, changed = 0;
1346 PyObject *input_obj = (PyObject*)self;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001347 const char *output_start;
1348 Py_ssize_t inlen;
Neal Norwitz6968b052007-02-27 19:02:19 +00001349 PyObject *result;
1350 int trans_table[256];
1351 PyObject *tableobj, *delobj = NULL;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001352 Py_buffer vtable, vdel;
Neal Norwitz6968b052007-02-27 19:02:19 +00001353
1354 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1355 &tableobj, &delobj))
1356 return NULL;
1357
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001358 if (_getbuffer(tableobj, &vtable) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00001359 return NULL;
1360
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001361 if (vtable.len != 256) {
Neal Norwitz6968b052007-02-27 19:02:19 +00001362 PyErr_SetString(PyExc_ValueError,
1363 "translation table must be 256 characters long");
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001364 result = NULL;
1365 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001366 }
1367
1368 if (delobj != NULL) {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001369 if (_getbuffer(delobj, &vdel) < 0) {
1370 result = NULL;
1371 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001372 }
Neal Norwitz6968b052007-02-27 19:02:19 +00001373 }
1374 else {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001375 vdel.buf = NULL;
1376 vdel.len = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00001377 }
1378
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001379 table = (const char *)vtable.buf;
Neal Norwitz6968b052007-02-27 19:02:19 +00001380 inlen = PyBytes_GET_SIZE(input_obj);
1381 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1382 if (result == NULL)
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001383 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001384 output_start = output = PyBytes_AsString(result);
1385 input = PyBytes_AS_STRING(input_obj);
1386
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001387 if (vdel.len == 0) {
Neal Norwitz6968b052007-02-27 19:02:19 +00001388 /* If no deletions are required, use faster code */
1389 for (i = inlen; --i >= 0; ) {
1390 c = Py_CHARMASK(*input++);
1391 if (Py_CHARMASK((*output++ = table[c])) != c)
1392 changed = 1;
1393 }
1394 if (changed || !PyBytes_CheckExact(input_obj))
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001395 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001396 Py_DECREF(result);
1397 Py_INCREF(input_obj);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001398 result = input_obj;
1399 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001400 }
1401
1402 for (i = 0; i < 256; i++)
1403 trans_table[i] = Py_CHARMASK(table[i]);
1404
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001405 for (i = 0; i < vdel.len; i++)
1406 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
Neal Norwitz6968b052007-02-27 19:02:19 +00001407
1408 for (i = inlen; --i >= 0; ) {
1409 c = Py_CHARMASK(*input++);
1410 if (trans_table[c] != -1)
1411 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1412 continue;
1413 changed = 1;
1414 }
1415 if (!changed && PyBytes_CheckExact(input_obj)) {
1416 Py_DECREF(result);
1417 Py_INCREF(input_obj);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001418 result = input_obj;
1419 goto done;
Neal Norwitz6968b052007-02-27 19:02:19 +00001420 }
1421 /* Fix the size of the resulting string */
1422 if (inlen > 0)
1423 PyBytes_Resize(result, output - output_start);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00001424
1425done:
1426 PyObject_ReleaseBuffer(tableobj, &vtable);
1427 if (delobj != NULL)
1428 PyObject_ReleaseBuffer(delobj, &vdel);
Neal Norwitz6968b052007-02-27 19:02:19 +00001429 return result;
1430}
1431
1432
1433#define FORWARD 1
1434#define REVERSE -1
1435
1436/* find and count characters and substrings */
1437
1438#define findchar(target, target_len, c) \
1439 ((char *)memchr((const void *)(target), c, target_len))
1440
1441/* Don't call if length < 2 */
1442#define Py_STRING_MATCH(target, offset, pattern, length) \
1443 (target[offset] == pattern[0] && \
1444 target[offset+length-1] == pattern[length-1] && \
1445 !memcmp(target+offset+1, pattern+1, length-2) )
1446
1447
1448/* Bytes ops must return a string. */
1449/* If the object is subclass of bytes, create a copy */
1450Py_LOCAL(PyBytesObject *)
1451return_self(PyBytesObject *self)
1452{
1453 if (PyBytes_CheckExact(self)) {
1454 Py_INCREF(self);
1455 return (PyBytesObject *)self;
1456 }
1457 return (PyBytesObject *)PyBytes_FromStringAndSize(
1458 PyBytes_AS_STRING(self),
1459 PyBytes_GET_SIZE(self));
1460}
1461
1462Py_LOCAL_INLINE(Py_ssize_t)
1463countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
1464{
1465 Py_ssize_t count=0;
1466 const char *start=target;
1467 const char *end=target+target_len;
1468
1469 while ( (start=findchar(start, end-start, c)) != NULL ) {
1470 count++;
1471 if (count >= maxcount)
1472 break;
1473 start += 1;
1474 }
1475 return count;
1476}
1477
1478Py_LOCAL(Py_ssize_t)
1479findstring(const char *target, Py_ssize_t target_len,
1480 const char *pattern, Py_ssize_t pattern_len,
1481 Py_ssize_t start,
1482 Py_ssize_t end,
1483 int direction)
1484{
1485 if (start < 0) {
1486 start += target_len;
1487 if (start < 0)
1488 start = 0;
1489 }
1490 if (end > target_len) {
1491 end = target_len;
1492 } else if (end < 0) {
1493 end += target_len;
1494 if (end < 0)
1495 end = 0;
1496 }
1497
1498 /* zero-length substrings always match at the first attempt */
1499 if (pattern_len == 0)
1500 return (direction > 0) ? start : end;
1501
1502 end -= pattern_len;
1503
1504 if (direction < 0) {
1505 for (; end >= start; end--)
1506 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
1507 return end;
1508 } else {
1509 for (; start <= end; start++)
1510 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
1511 return start;
1512 }
1513 return -1;
1514}
1515
1516Py_LOCAL_INLINE(Py_ssize_t)
1517countstring(const char *target, Py_ssize_t target_len,
1518 const char *pattern, Py_ssize_t pattern_len,
1519 Py_ssize_t start,
1520 Py_ssize_t end,
1521 int direction, Py_ssize_t maxcount)
1522{
1523 Py_ssize_t count=0;
1524
1525 if (start < 0) {
1526 start += target_len;
1527 if (start < 0)
1528 start = 0;
1529 }
1530 if (end > target_len) {
1531 end = target_len;
1532 } else if (end < 0) {
1533 end += target_len;
1534 if (end < 0)
1535 end = 0;
1536 }
1537
1538 /* zero-length substrings match everywhere */
1539 if (pattern_len == 0 || maxcount == 0) {
1540 if (target_len+1 < maxcount)
1541 return target_len+1;
1542 return maxcount;
1543 }
1544
1545 end -= pattern_len;
1546 if (direction < 0) {
1547 for (; (end >= start); end--)
1548 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
1549 count++;
1550 if (--maxcount <= 0) break;
1551 end -= pattern_len-1;
1552 }
1553 } else {
1554 for (; (start <= end); start++)
1555 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
1556 count++;
1557 if (--maxcount <= 0)
1558 break;
1559 start += pattern_len-1;
1560 }
1561 }
1562 return count;
1563}
1564
1565
1566/* Algorithms for different cases of string replacement */
1567
1568/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1569Py_LOCAL(PyBytesObject *)
1570replace_interleave(PyBytesObject *self,
1571 const char *to_s, Py_ssize_t to_len,
1572 Py_ssize_t maxcount)
1573{
1574 char *self_s, *result_s;
1575 Py_ssize_t self_len, result_len;
1576 Py_ssize_t count, i, product;
1577 PyBytesObject *result;
1578
1579 self_len = PyBytes_GET_SIZE(self);
1580
1581 /* 1 at the end plus 1 after every character */
1582 count = self_len+1;
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001583 if (maxcount < count)
Neal Norwitz6968b052007-02-27 19:02:19 +00001584 count = maxcount;
1585
1586 /* Check for overflow */
1587 /* result_len = count * to_len + self_len; */
1588 product = count * to_len;
1589 if (product / to_len != count) {
1590 PyErr_SetString(PyExc_OverflowError,
1591 "replace string is too long");
1592 return NULL;
1593 }
1594 result_len = product + self_len;
1595 if (result_len < 0) {
1596 PyErr_SetString(PyExc_OverflowError,
1597 "replace string is too long");
1598 return NULL;
1599 }
1600
1601 if (! (result = (PyBytesObject *)
1602 PyBytes_FromStringAndSize(NULL, result_len)) )
1603 return NULL;
1604
1605 self_s = PyBytes_AS_STRING(self);
1606 result_s = PyBytes_AS_STRING(result);
1607
1608 /* TODO: special case single character, which doesn't need memcpy */
1609
1610 /* Lay the first one down (guaranteed this will occur) */
1611 Py_MEMCPY(result_s, to_s, to_len);
1612 result_s += to_len;
1613 count -= 1;
1614
1615 for (i=0; i<count; i++) {
1616 *result_s++ = *self_s++;
1617 Py_MEMCPY(result_s, to_s, to_len);
1618 result_s += to_len;
1619 }
1620
1621 /* Copy the rest of the original string */
1622 Py_MEMCPY(result_s, self_s, self_len-i);
1623
1624 return result;
1625}
1626
1627/* Special case for deleting a single character */
1628/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1629Py_LOCAL(PyBytesObject *)
1630replace_delete_single_character(PyBytesObject *self,
1631 char from_c, Py_ssize_t maxcount)
1632{
1633 char *self_s, *result_s;
1634 char *start, *next, *end;
1635 Py_ssize_t self_len, result_len;
1636 Py_ssize_t count;
1637 PyBytesObject *result;
1638
1639 self_len = PyBytes_GET_SIZE(self);
1640 self_s = PyBytes_AS_STRING(self);
1641
1642 count = countchar(self_s, self_len, from_c, maxcount);
1643 if (count == 0) {
1644 return return_self(self);
1645 }
1646
1647 result_len = self_len - count; /* from_len == 1 */
1648 assert(result_len>=0);
1649
1650 if ( (result = (PyBytesObject *)
1651 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1652 return NULL;
1653 result_s = PyBytes_AS_STRING(result);
1654
1655 start = self_s;
1656 end = self_s + self_len;
1657 while (count-- > 0) {
1658 next = findchar(start, end-start, from_c);
1659 if (next == NULL)
1660 break;
1661 Py_MEMCPY(result_s, start, next-start);
1662 result_s += (next-start);
1663 start = next+1;
1664 }
1665 Py_MEMCPY(result_s, start, end-start);
1666
1667 return result;
1668}
1669
1670/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1671
1672Py_LOCAL(PyBytesObject *)
1673replace_delete_substring(PyBytesObject *self,
1674 const char *from_s, Py_ssize_t from_len,
1675 Py_ssize_t maxcount)
1676{
1677 char *self_s, *result_s;
1678 char *start, *next, *end;
1679 Py_ssize_t self_len, result_len;
1680 Py_ssize_t count, offset;
1681 PyBytesObject *result;
1682
1683 self_len = PyBytes_GET_SIZE(self);
1684 self_s = PyBytes_AS_STRING(self);
1685
1686 count = countstring(self_s, self_len,
1687 from_s, from_len,
1688 0, self_len, 1,
1689 maxcount);
1690
1691 if (count == 0) {
1692 /* no matches */
1693 return return_self(self);
1694 }
1695
1696 result_len = self_len - (count * from_len);
1697 assert (result_len>=0);
1698
1699 if ( (result = (PyBytesObject *)
1700 PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1701 return NULL;
1702
1703 result_s = PyBytes_AS_STRING(result);
1704
1705 start = self_s;
1706 end = self_s + self_len;
1707 while (count-- > 0) {
1708 offset = findstring(start, end-start,
1709 from_s, from_len,
1710 0, end-start, FORWARD);
1711 if (offset == -1)
1712 break;
1713 next = start + offset;
1714
1715 Py_MEMCPY(result_s, start, next-start);
1716
1717 result_s += (next-start);
1718 start = next+from_len;
1719 }
1720 Py_MEMCPY(result_s, start, end-start);
1721 return result;
1722}
1723
1724/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1725Py_LOCAL(PyBytesObject *)
1726replace_single_character_in_place(PyBytesObject *self,
1727 char from_c, char to_c,
1728 Py_ssize_t maxcount)
1729{
1730 char *self_s, *result_s, *start, *end, *next;
1731 Py_ssize_t self_len;
1732 PyBytesObject *result;
1733
1734 /* The result string will be the same size */
1735 self_s = PyBytes_AS_STRING(self);
1736 self_len = PyBytes_GET_SIZE(self);
1737
1738 next = findchar(self_s, self_len, from_c);
1739
1740 if (next == NULL) {
1741 /* No matches; return the original bytes */
1742 return return_self(self);
1743 }
1744
1745 /* Need to make a new bytes */
1746 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1747 if (result == NULL)
1748 return NULL;
1749 result_s = PyBytes_AS_STRING(result);
1750 Py_MEMCPY(result_s, self_s, self_len);
1751
1752 /* change everything in-place, starting with this one */
1753 start = result_s + (next-self_s);
1754 *start = to_c;
1755 start++;
1756 end = result_s + self_len;
1757
1758 while (--maxcount > 0) {
1759 next = findchar(start, end-start, from_c);
1760 if (next == NULL)
1761 break;
1762 *next = to_c;
1763 start = next+1;
1764 }
1765
1766 return result;
1767}
1768
1769/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1770Py_LOCAL(PyBytesObject *)
1771replace_substring_in_place(PyBytesObject *self,
1772 const char *from_s, Py_ssize_t from_len,
1773 const char *to_s, Py_ssize_t to_len,
1774 Py_ssize_t maxcount)
1775{
1776 char *result_s, *start, *end;
1777 char *self_s;
1778 Py_ssize_t self_len, offset;
1779 PyBytesObject *result;
1780
1781 /* The result bytes will be the same size */
1782
1783 self_s = PyBytes_AS_STRING(self);
1784 self_len = PyBytes_GET_SIZE(self);
1785
1786 offset = findstring(self_s, self_len,
1787 from_s, from_len,
1788 0, self_len, FORWARD);
1789 if (offset == -1) {
1790 /* No matches; return the original bytes */
1791 return return_self(self);
1792 }
1793
1794 /* Need to make a new bytes */
1795 result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1796 if (result == NULL)
1797 return NULL;
1798 result_s = PyBytes_AS_STRING(result);
1799 Py_MEMCPY(result_s, self_s, self_len);
1800
1801 /* change everything in-place, starting with this one */
1802 start = result_s + offset;
1803 Py_MEMCPY(start, to_s, from_len);
1804 start += from_len;
1805 end = result_s + self_len;
1806
1807 while ( --maxcount > 0) {
1808 offset = findstring(start, end-start,
1809 from_s, from_len,
1810 0, end-start, FORWARD);
1811 if (offset==-1)
1812 break;
1813 Py_MEMCPY(start+offset, to_s, from_len);
1814 start += offset+from_len;
1815 }
1816
1817 return result;
1818}
1819
1820/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1821Py_LOCAL(PyBytesObject *)
1822replace_single_character(PyBytesObject *self,
1823 char from_c,
1824 const char *to_s, Py_ssize_t to_len,
1825 Py_ssize_t maxcount)
1826{
1827 char *self_s, *result_s;
1828 char *start, *next, *end;
1829 Py_ssize_t self_len, result_len;
1830 Py_ssize_t count, product;
1831 PyBytesObject *result;
1832
1833 self_s = PyBytes_AS_STRING(self);
1834 self_len = PyBytes_GET_SIZE(self);
1835
1836 count = countchar(self_s, self_len, from_c, maxcount);
1837 if (count == 0) {
1838 /* no matches, return unchanged */
1839 return return_self(self);
1840 }
1841
1842 /* use the difference between current and new, hence the "-1" */
1843 /* result_len = self_len + count * (to_len-1) */
1844 product = count * (to_len-1);
1845 if (product / (to_len-1) != count) {
1846 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1847 return NULL;
1848 }
1849 result_len = self_len + product;
1850 if (result_len < 0) {
1851 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1852 return NULL;
1853 }
1854
1855 if ( (result = (PyBytesObject *)
1856 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1857 return NULL;
1858 result_s = PyBytes_AS_STRING(result);
1859
1860 start = self_s;
1861 end = self_s + self_len;
1862 while (count-- > 0) {
1863 next = findchar(start, end-start, from_c);
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00001864 if (next == NULL)
Neal Norwitz6968b052007-02-27 19:02:19 +00001865 break;
1866
1867 if (next == start) {
1868 /* replace with the 'to' */
1869 Py_MEMCPY(result_s, to_s, to_len);
1870 result_s += to_len;
1871 start += 1;
1872 } else {
1873 /* copy the unchanged old then the 'to' */
1874 Py_MEMCPY(result_s, start, next-start);
1875 result_s += (next-start);
1876 Py_MEMCPY(result_s, to_s, to_len);
1877 result_s += to_len;
1878 start = next+1;
1879 }
1880 }
1881 /* Copy the remainder of the remaining bytes */
1882 Py_MEMCPY(result_s, start, end-start);
1883
1884 return result;
1885}
1886
1887/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1888Py_LOCAL(PyBytesObject *)
1889replace_substring(PyBytesObject *self,
1890 const char *from_s, Py_ssize_t from_len,
1891 const char *to_s, Py_ssize_t to_len,
1892 Py_ssize_t maxcount)
1893{
1894 char *self_s, *result_s;
1895 char *start, *next, *end;
1896 Py_ssize_t self_len, result_len;
1897 Py_ssize_t count, offset, product;
1898 PyBytesObject *result;
1899
1900 self_s = PyBytes_AS_STRING(self);
1901 self_len = PyBytes_GET_SIZE(self);
1902
1903 count = countstring(self_s, self_len,
1904 from_s, from_len,
1905 0, self_len, FORWARD, maxcount);
1906 if (count == 0) {
1907 /* no matches, return unchanged */
1908 return return_self(self);
1909 }
1910
1911 /* Check for overflow */
1912 /* result_len = self_len + count * (to_len-from_len) */
1913 product = count * (to_len-from_len);
1914 if (product / (to_len-from_len) != count) {
1915 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1916 return NULL;
1917 }
1918 result_len = self_len + product;
1919 if (result_len < 0) {
1920 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1921 return NULL;
1922 }
1923
1924 if ( (result = (PyBytesObject *)
1925 PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1926 return NULL;
1927 result_s = PyBytes_AS_STRING(result);
1928
1929 start = self_s;
1930 end = self_s + self_len;
1931 while (count-- > 0) {
1932 offset = findstring(start, end-start,
1933 from_s, from_len,
1934 0, end-start, FORWARD);
1935 if (offset == -1)
1936 break;
1937 next = start+offset;
1938 if (next == start) {
1939 /* replace with the 'to' */
1940 Py_MEMCPY(result_s, to_s, to_len);
1941 result_s += to_len;
1942 start += from_len;
1943 } else {
1944 /* copy the unchanged old then the 'to' */
1945 Py_MEMCPY(result_s, start, next-start);
1946 result_s += (next-start);
1947 Py_MEMCPY(result_s, to_s, to_len);
1948 result_s += to_len;
1949 start = next+from_len;
1950 }
1951 }
1952 /* Copy the remainder of the remaining bytes */
1953 Py_MEMCPY(result_s, start, end-start);
1954
1955 return result;
1956}
1957
1958
1959Py_LOCAL(PyBytesObject *)
1960replace(PyBytesObject *self,
1961 const char *from_s, Py_ssize_t from_len,
1962 const char *to_s, Py_ssize_t to_len,
1963 Py_ssize_t maxcount)
1964{
1965 if (maxcount < 0) {
1966 maxcount = PY_SSIZE_T_MAX;
1967 } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
1968 /* nothing to do; return the original bytes */
1969 return return_self(self);
1970 }
1971
1972 if (maxcount == 0 ||
1973 (from_len == 0 && to_len == 0)) {
1974 /* nothing to do; return the original bytes */
1975 return return_self(self);
1976 }
1977
1978 /* Handle zero-length special cases */
1979
1980 if (from_len == 0) {
1981 /* insert the 'to' bytes everywhere. */
1982 /* >>> "Python".replace("", ".") */
1983 /* '.P.y.t.h.o.n.' */
1984 return replace_interleave(self, to_s, to_len, maxcount);
1985 }
1986
1987 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1988 /* point for an empty self bytes to generate a non-empty bytes */
1989 /* Special case so the remaining code always gets a non-empty bytes */
1990 if (PyBytes_GET_SIZE(self) == 0) {
1991 return return_self(self);
1992 }
1993
1994 if (to_len == 0) {
1995 /* delete all occurances of 'from' bytes */
1996 if (from_len == 1) {
1997 return replace_delete_single_character(
1998 self, from_s[0], maxcount);
1999 } else {
2000 return replace_delete_substring(self, from_s, from_len, maxcount);
2001 }
2002 }
2003
2004 /* Handle special case where both bytes have the same length */
2005
2006 if (from_len == to_len) {
2007 if (from_len == 1) {
2008 return replace_single_character_in_place(
2009 self,
2010 from_s[0],
2011 to_s[0],
2012 maxcount);
2013 } else {
2014 return replace_substring_in_place(
2015 self, from_s, from_len, to_s, to_len, maxcount);
2016 }
2017 }
2018
2019 /* Otherwise use the more generic algorithms */
2020 if (from_len == 1) {
2021 return replace_single_character(self, from_s[0],
2022 to_s, to_len, maxcount);
2023 } else {
2024 /* len('from')>=2, len('to')>=1 */
2025 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2026 }
2027}
2028
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002029
Neal Norwitz6968b052007-02-27 19:02:19 +00002030PyDoc_STRVAR(replace__doc__,
2031"B.replace (old, new[, count]) -> bytes\n\
2032\n\
2033Return a copy of bytes B with all occurrences of subsection\n\
2034old replaced by new. If the optional argument count is\n\
2035given, only the first count occurrences are replaced.");
2036
2037static PyObject *
2038bytes_replace(PyBytesObject *self, PyObject *args)
2039{
2040 Py_ssize_t count = -1;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002041 PyObject *from, *to, *res;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002042 Py_buffer vfrom, vto;
Neal Norwitz6968b052007-02-27 19:02:19 +00002043
2044 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2045 return NULL;
2046
Guido van Rossuma74184e2007-08-29 04:05:57 +00002047 if (_getbuffer(from, &vfrom) < 0)
2048 return NULL;
2049 if (_getbuffer(to, &vto) < 0) {
2050 PyObject_ReleaseBuffer(from, &vfrom);
2051 return NULL;
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002052 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002053
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002054 res = (PyObject *)replace((PyBytesObject *) self,
Guido van Rossuma74184e2007-08-29 04:05:57 +00002055 vfrom.buf, vfrom.len,
2056 vto.buf, vto.len, count);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002057
Guido van Rossuma74184e2007-08-29 04:05:57 +00002058 PyObject_ReleaseBuffer(from, &vfrom);
2059 PyObject_ReleaseBuffer(to, &vto);
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002060 return res;
Neal Norwitz6968b052007-02-27 19:02:19 +00002061}
2062
2063
2064/* Overallocate the initial list to reduce the number of reallocs for small
2065 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
2066 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
2067 text (roughly 11 words per line) and field delimited data (usually 1-10
2068 fields). For large strings the split algorithms are bandwidth limited
2069 so increasing the preallocation likely will not improve things.*/
2070
2071#define MAX_PREALLOC 12
2072
2073/* 5 splits gives 6 elements */
2074#define PREALLOC_SIZE(maxsplit) \
2075 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
2076
2077#define SPLIT_APPEND(data, left, right) \
2078 str = PyBytes_FromStringAndSize((data) + (left), \
2079 (right) - (left)); \
2080 if (str == NULL) \
2081 goto onError; \
2082 if (PyList_Append(list, str)) { \
2083 Py_DECREF(str); \
2084 goto onError; \
2085 } \
2086 else \
2087 Py_DECREF(str);
2088
2089#define SPLIT_ADD(data, left, right) { \
2090 str = PyBytes_FromStringAndSize((data) + (left), \
2091 (right) - (left)); \
2092 if (str == NULL) \
2093 goto onError; \
2094 if (count < MAX_PREALLOC) { \
2095 PyList_SET_ITEM(list, count, str); \
2096 } else { \
2097 if (PyList_Append(list, str)) { \
2098 Py_DECREF(str); \
2099 goto onError; \
2100 } \
2101 else \
2102 Py_DECREF(str); \
2103 } \
2104 count++; }
2105
2106/* Always force the list to the expected size. */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002107#define FIX_PREALLOC_SIZE(list) Py_Size(list) = count
Neal Norwitz6968b052007-02-27 19:02:19 +00002108
2109
2110Py_LOCAL_INLINE(PyObject *)
2111split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2112{
Guido van Rossum8f950672007-09-10 16:53:45 +00002113 register Py_ssize_t i, j, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002114 PyObject *str;
2115 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2116
2117 if (list == NULL)
2118 return NULL;
2119
2120 i = j = 0;
2121 while ((j < len) && (maxcount-- > 0)) {
Guido van Rossum8f950672007-09-10 16:53:45 +00002122 for(; j < len; j++) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002123 /* I found that using memchr makes no difference */
2124 if (s[j] == ch) {
2125 SPLIT_ADD(s, i, j);
2126 i = j = j + 1;
2127 break;
2128 }
2129 }
2130 }
2131 if (i <= len) {
2132 SPLIT_ADD(s, i, len);
2133 }
2134 FIX_PREALLOC_SIZE(list);
2135 return list;
2136
2137 onError:
2138 Py_DECREF(list);
2139 return NULL;
2140}
2141
Guido van Rossum8f950672007-09-10 16:53:45 +00002142
2143Py_LOCAL_INLINE(PyObject *)
2144split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2145{
2146 register Py_ssize_t i, j, count = 0;
2147 PyObject *str;
2148 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2149
2150 if (list == NULL)
2151 return NULL;
2152
2153 for (i = j = 0; i < len; ) {
2154 /* find a token */
2155 while (i < len && ISSPACE(s[i]))
2156 i++;
2157 j = i;
2158 while (i < len && !ISSPACE(s[i]))
2159 i++;
2160 if (j < i) {
2161 if (maxcount-- <= 0)
2162 break;
2163 SPLIT_ADD(s, j, i);
2164 while (i < len && ISSPACE(s[i]))
2165 i++;
2166 j = i;
2167 }
2168 }
2169 if (j < len) {
2170 SPLIT_ADD(s, j, len);
2171 }
2172 FIX_PREALLOC_SIZE(list);
2173 return list;
2174
2175 onError:
2176 Py_DECREF(list);
2177 return NULL;
2178}
2179
Neal Norwitz6968b052007-02-27 19:02:19 +00002180PyDoc_STRVAR(split__doc__,
Guido van Rossum8f950672007-09-10 16:53:45 +00002181"B.split([sep [, maxsplit]]) -> list of bytes\n\
Neal Norwitz6968b052007-02-27 19:02:19 +00002182\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002183Return a list of the bytes in the string B, using sep as the delimiter.\n\
2184If sep is not given, B is split on ASCII whitespace charcters\n\
2185(space, tab, return, newline, formfeed, vertical tab).\n\
2186If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002187
2188static PyObject *
2189bytes_split(PyBytesObject *self, PyObject *args)
2190{
2191 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
Guido van Rossum8f950672007-09-10 16:53:45 +00002192 Py_ssize_t maxsplit = -1, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002193 const char *s = PyBytes_AS_STRING(self), *sub;
Guido van Rossum8f950672007-09-10 16:53:45 +00002194 PyObject *list, *str, *subobj = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002195 Py_buffer vsub;
Neal Norwitz6968b052007-02-27 19:02:19 +00002196#ifdef USE_FAST
2197 Py_ssize_t pos;
2198#endif
2199
Guido van Rossum8f950672007-09-10 16:53:45 +00002200 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Neal Norwitz6968b052007-02-27 19:02:19 +00002201 return NULL;
2202 if (maxsplit < 0)
2203 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum8f950672007-09-10 16:53:45 +00002204
2205 if (subobj == Py_None)
2206 return split_whitespace(s, len, maxsplit);
2207
2208 if (_getbuffer(subobj, &vsub) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00002209 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002210 sub = vsub.buf;
2211 n = vsub.len;
Neal Norwitz6968b052007-02-27 19:02:19 +00002212
2213 if (n == 0) {
2214 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum8f950672007-09-10 16:53:45 +00002215 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002216 return NULL;
2217 }
Guido van Rossum8f950672007-09-10 16:53:45 +00002218 if (n == 1)
Neal Norwitz6968b052007-02-27 19:02:19 +00002219 return split_char(s, len, sub[0], maxsplit);
2220
2221 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum8f950672007-09-10 16:53:45 +00002222 if (list == NULL) {
2223 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002224 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002225 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002226
2227#ifdef USE_FAST
2228 i = j = 0;
2229 while (maxsplit-- > 0) {
2230 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
2231 if (pos < 0)
2232 break;
2233 j = i+pos;
2234 SPLIT_ADD(s, i, j);
2235 i = j + n;
2236 }
2237#else
2238 i = j = 0;
2239 while ((j+n <= len) && (maxsplit-- > 0)) {
2240 for (; j+n <= len; j++) {
2241 if (Py_STRING_MATCH(s, j, sub, n)) {
2242 SPLIT_ADD(s, i, j);
2243 i = j = j + n;
2244 break;
2245 }
2246 }
2247 }
2248#endif
2249 SPLIT_ADD(s, i, len);
2250 FIX_PREALLOC_SIZE(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002251 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002252 return list;
2253
2254 onError:
2255 Py_DECREF(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002256 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002257 return NULL;
2258}
2259
2260PyDoc_STRVAR(partition__doc__,
2261"B.partition(sep) -> (head, sep, tail)\n\
2262\n\
2263Searches for the separator sep in B, and returns the part before it,\n\
2264the separator itself, and the part after it. If the separator is not\n\
2265found, returns B and two empty bytes.");
2266
2267static PyObject *
2268bytes_partition(PyBytesObject *self, PyObject *sep_obj)
2269{
2270 PyObject *bytesep, *result;
2271
2272 bytesep = PyBytes_FromObject(sep_obj);
2273 if (! bytesep)
2274 return NULL;
2275
2276 result = stringlib_partition(
2277 (PyObject*) self,
2278 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002279 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002280 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2281 );
2282
2283 Py_DECREF(bytesep);
2284 return result;
2285}
2286
2287PyDoc_STRVAR(rpartition__doc__,
2288"B.rpartition(sep) -> (tail, sep, head)\n\
2289\n\
2290Searches for the separator sep in B, starting at the end of B, and returns\n\
2291the part before it, the separator itself, and the part after it. If the\n\
2292separator is not found, returns two empty bytes and B.");
2293
2294static PyObject *
2295bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
2296{
2297 PyObject *bytesep, *result;
2298
2299 bytesep = PyBytes_FromObject(sep_obj);
2300 if (! bytesep)
2301 return NULL;
2302
2303 result = stringlib_rpartition(
2304 (PyObject*) self,
2305 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002306 bytesep,
Neal Norwitz6968b052007-02-27 19:02:19 +00002307 PyBytes_AS_STRING(bytesep), PyBytes_GET_SIZE(bytesep)
2308 );
2309
2310 Py_DECREF(bytesep);
2311 return result;
2312}
2313
2314Py_LOCAL_INLINE(PyObject *)
2315rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
2316{
2317 register Py_ssize_t i, j, count=0;
2318 PyObject *str;
2319 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2320
2321 if (list == NULL)
2322 return NULL;
2323
2324 i = j = len - 1;
2325 while ((i >= 0) && (maxcount-- > 0)) {
2326 for (; i >= 0; i--) {
2327 if (s[i] == ch) {
2328 SPLIT_ADD(s, i + 1, j + 1);
2329 j = i = i - 1;
2330 break;
2331 }
2332 }
2333 }
2334 if (j >= -1) {
2335 SPLIT_ADD(s, 0, j + 1);
2336 }
2337 FIX_PREALLOC_SIZE(list);
2338 if (PyList_Reverse(list) < 0)
2339 goto onError;
2340
2341 return list;
2342
2343 onError:
2344 Py_DECREF(list);
2345 return NULL;
2346}
2347
Guido van Rossum8f950672007-09-10 16:53:45 +00002348Py_LOCAL_INLINE(PyObject *)
2349rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
2350{
2351 register Py_ssize_t i, j, count = 0;
2352 PyObject *str;
2353 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2354
2355 if (list == NULL)
2356 return NULL;
2357
2358 for (i = j = len - 1; i >= 0; ) {
2359 /* find a token */
2360 while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
2361 i--;
2362 j = i;
2363 while (i >= 0 && !Py_UNICODE_ISSPACE(s[i]))
2364 i--;
2365 if (j > i) {
2366 if (maxcount-- <= 0)
2367 break;
2368 SPLIT_ADD(s, i + 1, j + 1);
2369 while (i >= 0 && Py_UNICODE_ISSPACE(s[i]))
2370 i--;
2371 j = i;
2372 }
2373 }
2374 if (j >= 0) {
2375 SPLIT_ADD(s, 0, j + 1);
2376 }
2377 FIX_PREALLOC_SIZE(list);
2378 if (PyList_Reverse(list) < 0)
2379 goto onError;
2380
2381 return list;
2382
2383 onError:
2384 Py_DECREF(list);
2385 return NULL;
2386}
2387
Neal Norwitz6968b052007-02-27 19:02:19 +00002388PyDoc_STRVAR(rsplit__doc__,
2389"B.rsplit(sep [,maxsplit]) -> list of bytes\n\
2390\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002391Return a list of the sections in the byte B, using sep as the delimiter,\n\
2392starting at the end of the bytes and working to the front.\n\
2393If sep is not given, B is split on ASCII whitespace characters\n\
2394(space, tab, return, newline, formfeed, vertical tab).\n\
2395If maxsplit is given, at most maxsplit splits are done.");
Neal Norwitz6968b052007-02-27 19:02:19 +00002396
2397static PyObject *
2398bytes_rsplit(PyBytesObject *self, PyObject *args)
2399{
2400 Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
Guido van Rossum8f950672007-09-10 16:53:45 +00002401 Py_ssize_t maxsplit = -1, count = 0;
Neal Norwitz6968b052007-02-27 19:02:19 +00002402 const char *s = PyBytes_AS_STRING(self), *sub;
Guido van Rossum8f950672007-09-10 16:53:45 +00002403 PyObject *list, *str, *subobj = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002404 Py_buffer vsub;
Neal Norwitz6968b052007-02-27 19:02:19 +00002405
Guido van Rossum8f950672007-09-10 16:53:45 +00002406 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Neal Norwitz6968b052007-02-27 19:02:19 +00002407 return NULL;
2408 if (maxsplit < 0)
2409 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum8f950672007-09-10 16:53:45 +00002410
2411 if (subobj == Py_None)
2412 return rsplit_whitespace(s, len, maxsplit);
2413
2414 if (_getbuffer(subobj, &vsub) < 0)
Neal Norwitz6968b052007-02-27 19:02:19 +00002415 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002416 sub = vsub.buf;
2417 n = vsub.len;
Neal Norwitz6968b052007-02-27 19:02:19 +00002418
2419 if (n == 0) {
2420 PyErr_SetString(PyExc_ValueError, "empty separator");
Guido van Rossum8f950672007-09-10 16:53:45 +00002421 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002422 return NULL;
2423 }
2424 else if (n == 1)
2425 return rsplit_char(s, len, sub[0], maxsplit);
2426
2427 list = PyList_New(PREALLOC_SIZE(maxsplit));
Guido van Rossum8f950672007-09-10 16:53:45 +00002428 if (list == NULL) {
2429 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002430 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002431 }
Neal Norwitz6968b052007-02-27 19:02:19 +00002432
2433 j = len;
2434 i = j - n;
2435
2436 while ( (i >= 0) && (maxsplit-- > 0) ) {
2437 for (; i>=0; i--) {
2438 if (Py_STRING_MATCH(s, i, sub, n)) {
2439 SPLIT_ADD(s, i + n, j);
2440 j = i;
2441 i -= n;
2442 break;
2443 }
2444 }
2445 }
2446 SPLIT_ADD(s, 0, j);
2447 FIX_PREALLOC_SIZE(list);
2448 if (PyList_Reverse(list) < 0)
2449 goto onError;
Guido van Rossum8f950672007-09-10 16:53:45 +00002450 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002451 return list;
2452
2453onError:
2454 Py_DECREF(list);
Guido van Rossum8f950672007-09-10 16:53:45 +00002455 PyObject_ReleaseBuffer(subobj, &vsub);
Neal Norwitz6968b052007-02-27 19:02:19 +00002456 return NULL;
2457}
2458
2459PyDoc_STRVAR(extend__doc__,
2460"B.extend(iterable int) -> None\n\
2461\n\
2462Append all the elements from the iterator or sequence to the\n\
2463end of the bytes.");
2464static PyObject *
2465bytes_extend(PyBytesObject *self, PyObject *arg)
2466{
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002467 /* XXX(gps): The docstring says any iterable int will do but the
2468 * bytes_setslice code only accepts something supporting PEP 3118.
2469 * A list or tuple of 0 <= int <= 255 is supposed to work. */
2470 /* bug being tracked on: http://bugs.python.org/issue1283 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002471 if (bytes_setslice(self, Py_Size(self), Py_Size(self), arg) == -1)
Neal Norwitz6968b052007-02-27 19:02:19 +00002472 return NULL;
2473 Py_RETURN_NONE;
2474}
2475
2476
2477PyDoc_STRVAR(reverse__doc__,
2478"B.reverse() -> None\n\
2479\n\
2480Reverse the order of the values in bytes in place.");
2481static PyObject *
2482bytes_reverse(PyBytesObject *self, PyObject *unused)
2483{
2484 char swap, *head, *tail;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002485 Py_ssize_t i, j, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002486
2487 j = n / 2;
2488 head = self->ob_bytes;
2489 tail = head + n - 1;
2490 for (i = 0; i < j; i++) {
2491 swap = *head;
2492 *head++ = *tail;
2493 *tail-- = swap;
2494 }
2495
2496 Py_RETURN_NONE;
2497}
2498
2499PyDoc_STRVAR(insert__doc__,
2500"B.insert(index, int) -> None\n\
2501\n\
2502Insert a single item into the bytes before the given index.");
2503static PyObject *
2504bytes_insert(PyBytesObject *self, PyObject *args)
2505{
2506 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002507 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002508
2509 if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
2510 return NULL;
2511
2512 if (n == PY_SSIZE_T_MAX) {
2513 PyErr_SetString(PyExc_OverflowError,
2514 "cannot add more objects to bytes");
2515 return NULL;
2516 }
2517 if (value < 0 || value >= 256) {
2518 PyErr_SetString(PyExc_ValueError,
2519 "byte must be in range(0, 256)");
2520 return NULL;
2521 }
2522 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2523 return NULL;
2524
2525 if (where < 0) {
2526 where += n;
2527 if (where < 0)
2528 where = 0;
2529 }
2530 if (where > n)
2531 where = n;
Guido van Rossum4fc8ae42007-02-27 20:57:45 +00002532 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
Neal Norwitz6968b052007-02-27 19:02:19 +00002533 self->ob_bytes[where] = value;
2534
2535 Py_RETURN_NONE;
2536}
2537
2538PyDoc_STRVAR(append__doc__,
2539"B.append(int) -> None\n\
2540\n\
2541Append a single item to the end of the bytes.");
2542static PyObject *
2543bytes_append(PyBytesObject *self, PyObject *arg)
2544{
2545 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002546 Py_ssize_t n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002547
2548 if (! _getbytevalue(arg, &value))
2549 return NULL;
2550 if (n == PY_SSIZE_T_MAX) {
2551 PyErr_SetString(PyExc_OverflowError,
2552 "cannot add more objects to bytes");
2553 return NULL;
2554 }
2555 if (PyBytes_Resize((PyObject *)self, n + 1) < 0)
2556 return NULL;
2557
2558 self->ob_bytes[n] = value;
2559
2560 Py_RETURN_NONE;
2561}
2562
2563PyDoc_STRVAR(pop__doc__,
2564"B.pop([index]) -> int\n\
2565\n\
2566Remove and return a single item from the bytes. If no index\n\
2567argument is give, will pop the last value.");
2568static PyObject *
2569bytes_pop(PyBytesObject *self, PyObject *args)
2570{
2571 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002572 Py_ssize_t where = -1, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002573
2574 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2575 return NULL;
2576
2577 if (n == 0) {
2578 PyErr_SetString(PyExc_OverflowError,
2579 "cannot pop an empty bytes");
2580 return NULL;
2581 }
2582 if (where < 0)
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002583 where += Py_Size(self);
2584 if (where < 0 || where >= Py_Size(self)) {
Neal Norwitz6968b052007-02-27 19:02:19 +00002585 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2586 return NULL;
2587 }
2588
2589 value = self->ob_bytes[where];
2590 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2591 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2592 return NULL;
2593
2594 return PyInt_FromLong(value);
2595}
2596
2597PyDoc_STRVAR(remove__doc__,
2598"B.remove(int) -> None\n\
2599\n\
2600Remove the first occurance of a value in bytes");
2601static PyObject *
2602bytes_remove(PyBytesObject *self, PyObject *arg)
2603{
2604 int value;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002605 Py_ssize_t where, n = Py_Size(self);
Neal Norwitz6968b052007-02-27 19:02:19 +00002606
2607 if (! _getbytevalue(arg, &value))
2608 return NULL;
2609
2610 for (where = 0; where < n; where++) {
2611 if (self->ob_bytes[where] == value)
2612 break;
2613 }
2614 if (where == n) {
2615 PyErr_SetString(PyExc_ValueError, "value not found in bytes");
2616 return NULL;
2617 }
2618
2619 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2620 if (PyBytes_Resize((PyObject *)self, n - 1) < 0)
2621 return NULL;
2622
2623 Py_RETURN_NONE;
2624}
2625
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002626/* XXX These two helpers could be optimized if argsize == 1 */
2627
Neal Norwitz2bad9702007-08-27 06:19:22 +00002628static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002629lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2630 void *argptr, Py_ssize_t argsize)
2631{
2632 Py_ssize_t i = 0;
2633 while (i < mysize && memchr(argptr, myptr[i], argsize))
2634 i++;
2635 return i;
2636}
2637
Neal Norwitz2bad9702007-08-27 06:19:22 +00002638static Py_ssize_t
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002639rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2640 void *argptr, Py_ssize_t argsize)
2641{
2642 Py_ssize_t i = mysize - 1;
2643 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2644 i--;
2645 return i + 1;
2646}
2647
2648PyDoc_STRVAR(strip__doc__,
Guido van Rossum8f950672007-09-10 16:53:45 +00002649"B.strip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002650\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002651Strip leading and trailing bytes contained in the argument.\n\
2652If the argument is omitted, strip ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002653static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002654bytes_strip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002655{
2656 Py_ssize_t left, right, mysize, argsize;
2657 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002658 PyObject *arg = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002659 Py_buffer varg;
Guido van Rossum8f950672007-09-10 16:53:45 +00002660 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002661 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002662 if (arg == Py_None) {
2663 argptr = "\t\n\r\f\v ";
2664 argsize = 6;
2665 }
2666 else {
2667 if (_getbuffer(arg, &varg) < 0)
2668 return NULL;
2669 argptr = varg.buf;
2670 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002671 }
2672 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002673 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002674 left = lstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossumeb29e9a2007-08-08 21:55:33 +00002675 if (left == mysize)
2676 right = left;
2677 else
2678 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossum8f950672007-09-10 16:53:45 +00002679 if (arg != Py_None)
2680 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002681 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2682}
2683
2684PyDoc_STRVAR(lstrip__doc__,
Guido van Rossum8f950672007-09-10 16:53:45 +00002685"B.lstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002686\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002687Strip leading bytes contained in the argument.\n\
2688If the argument is omitted, strip leading ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002689static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002690bytes_lstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002691{
2692 Py_ssize_t left, right, mysize, argsize;
2693 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002694 PyObject *arg = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002695 Py_buffer varg;
Guido van Rossum8f950672007-09-10 16:53:45 +00002696 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002697 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002698 if (arg == Py_None) {
2699 argptr = "\t\n\r\f\v ";
2700 argsize = 6;
2701 }
2702 else {
2703 if (_getbuffer(arg, &varg) < 0)
2704 return NULL;
2705 argptr = varg.buf;
2706 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002707 }
2708 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002709 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002710 left = lstrip_helper(myptr, mysize, argptr, argsize);
2711 right = mysize;
Guido van Rossum8f950672007-09-10 16:53:45 +00002712 if (arg != Py_None)
2713 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002714 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2715}
2716
2717PyDoc_STRVAR(rstrip__doc__,
Guido van Rossum8f950672007-09-10 16:53:45 +00002718"B.rstrip([bytes]) -> bytes\n\
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002719\n\
Guido van Rossum8f950672007-09-10 16:53:45 +00002720Strip trailing bytes contained in the argument.\n\
2721If the argument is omitted, strip trailing ASCII whitespace.");
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002722static PyObject *
Guido van Rossum8f950672007-09-10 16:53:45 +00002723bytes_rstrip(PyBytesObject *self, PyObject *args)
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002724{
2725 Py_ssize_t left, right, mysize, argsize;
2726 void *myptr, *argptr;
Guido van Rossum8f950672007-09-10 16:53:45 +00002727 PyObject *arg = Py_None;
Travis E. Oliphant8ae62b62007-09-23 02:00:13 +00002728 Py_buffer varg;
Guido van Rossum8f950672007-09-10 16:53:45 +00002729 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002730 return NULL;
Guido van Rossum8f950672007-09-10 16:53:45 +00002731 if (arg == Py_None) {
2732 argptr = "\t\n\r\f\v ";
2733 argsize = 6;
2734 }
2735 else {
2736 if (_getbuffer(arg, &varg) < 0)
2737 return NULL;
2738 argptr = varg.buf;
2739 argsize = varg.len;
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002740 }
2741 myptr = self->ob_bytes;
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002742 mysize = Py_Size(self);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002743 left = 0;
2744 right = rstrip_helper(myptr, mysize, argptr, argsize);
Guido van Rossum8f950672007-09-10 16:53:45 +00002745 if (arg != Py_None)
2746 PyObject_ReleaseBuffer(arg, &varg);
Guido van Rossumad7d8d12007-04-13 01:39:34 +00002747 return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
2748}
Neal Norwitz6968b052007-02-27 19:02:19 +00002749
Guido van Rossumd624f182006-04-24 13:47:05 +00002750PyDoc_STRVAR(decode_doc,
2751"B.decode([encoding[,errors]]) -> unicode obect.\n\
2752\n\
2753Decodes B using the codec registered for encoding. encoding defaults\n\
2754to the default encoding. errors may be given to set a different error\n\
2755handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2756a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2757as well as any other name registerd with codecs.register_error that is\n\
2758able to handle UnicodeDecodeErrors.");
2759
2760static PyObject *
2761bytes_decode(PyObject *self, PyObject *args)
Guido van Rossumb6f1fdc2007-04-12 22:49:52 +00002762{
Guido van Rossumd624f182006-04-24 13:47:05 +00002763 const char *encoding = NULL;
2764 const char *errors = NULL;
2765
2766 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2767 return NULL;
2768 if (encoding == NULL)
2769 encoding = PyUnicode_GetDefaultEncoding();
2770 return PyCodec_Decode(self, encoding, errors);
2771}
2772
Guido van Rossuma0867f72006-05-05 04:34:18 +00002773PyDoc_STRVAR(alloc_doc,
2774"B.__alloc__() -> int\n\
2775\n\
2776Returns the number of bytes actually allocated.");
2777
2778static PyObject *
2779bytes_alloc(PyBytesObject *self)
2780{
2781 return PyInt_FromSsize_t(self->ob_alloc);
2782}
2783
Guido van Rossum20188312006-05-05 15:15:40 +00002784PyDoc_STRVAR(join_doc,
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002785"B.join(iterable_of_bytes) -> bytes\n\
Guido van Rossum20188312006-05-05 15:15:40 +00002786\n\
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002787Concatenates any number of bytes objects, with B in between each pair.\n\
2788Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
Guido van Rossum20188312006-05-05 15:15:40 +00002789
2790static PyObject *
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002791bytes_join(PyBytesObject *self, PyObject *it)
Guido van Rossum20188312006-05-05 15:15:40 +00002792{
2793 PyObject *seq;
Martin v. Löwis5d7428b2007-07-21 18:47:48 +00002794 Py_ssize_t mysize = Py_Size(self);
Guido van Rossum20188312006-05-05 15:15:40 +00002795 Py_ssize_t i;
2796 Py_ssize_t n;
2797 PyObject **items;
2798 Py_ssize_t totalsize = 0;
2799 PyObject *result;
2800 char *dest;
2801
2802 seq = PySequence_Fast(it, "can only join an iterable");
2803 if (seq == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002804 return NULL;
Guido van Rossum20188312006-05-05 15:15:40 +00002805 n = PySequence_Fast_GET_SIZE(seq);
2806 items = PySequence_Fast_ITEMS(seq);
2807
2808 /* Compute the total size, and check that they are all bytes */
2809 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002810 PyObject *obj = items[i];
2811 if (!PyBytes_Check(obj)) {
2812 PyErr_Format(PyExc_TypeError,
2813 "can only join an iterable of bytes "
2814 "(item %ld has type '%.100s')",
Guido van Rossum3cf5b1e2006-07-27 21:53:35 +00002815 /* XXX %ld isn't right on Win64 */
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00002816 (long)i, Py_Type(obj)->tp_name);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002817 goto error;
2818 }
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002819 if (i > 0)
2820 totalsize += mysize;
Georg Brandlb3f568f2007-02-27 08:49:18 +00002821 totalsize += PyBytes_GET_SIZE(obj);
2822 if (totalsize < 0) {
2823 PyErr_NoMemory();
2824 goto error;
2825 }
Guido van Rossum20188312006-05-05 15:15:40 +00002826 }
2827
2828 /* Allocate the result, and copy the bytes */
2829 result = PyBytes_FromStringAndSize(NULL, totalsize);
2830 if (result == NULL)
Georg Brandlb3f568f2007-02-27 08:49:18 +00002831 goto error;
Guido van Rossum20188312006-05-05 15:15:40 +00002832 dest = PyBytes_AS_STRING(result);
2833 for (i = 0; i < n; i++) {
Georg Brandlb3f568f2007-02-27 08:49:18 +00002834 PyObject *obj = items[i];
2835 Py_ssize_t size = PyBytes_GET_SIZE(obj);
Guido van Rossumcd6ae682007-05-09 19:52:16 +00002836 if (i > 0) {
2837 memcpy(dest, self->ob_bytes, mysize);
2838 dest += mysize;
2839 }
Georg Brandlb3f568f2007-02-27 08:49:18 +00002840 memcpy(dest, PyBytes_AS_STRING(obj), size);
2841 dest += size;
Guido van Rossum20188312006-05-05 15:15:40 +00002842 }
2843
2844 /* Done */
2845 Py_DECREF(seq);
2846 return result;
2847
2848 /* Error handling */
2849 error:
2850 Py_DECREF(seq);
2851 return NULL;
2852}
2853
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002854PyDoc_STRVAR(fromhex_doc,
2855"bytes.fromhex(string) -> bytes\n\
2856\n\
2857Create a bytes object from a string of hexadecimal numbers.\n\
2858Spaces between two numbers are accepted. Example:\n\
2859bytes.fromhex('10 2030') -> bytes([0x10, 0x20, 0x30]).");
2860
2861static int
2862hex_digit_to_int(int c)
2863{
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002864 if (ISDIGIT(c))
Georg Brandlb3f568f2007-02-27 08:49:18 +00002865 return c - '0';
2866 else {
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002867 if (ISUPPER(c))
2868 c = TOLOWER(c);
Georg Brandlb3f568f2007-02-27 08:49:18 +00002869 if (c >= 'a' && c <= 'f')
2870 return c - 'a' + 10;
2871 }
2872 return -1;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002873}
2874
2875static PyObject *
2876bytes_fromhex(PyObject *cls, PyObject *args)
2877{
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002878 PyObject *newbytes, *hexobj;
2879 char *buf;
2880 unsigned char *hex;
2881 Py_ssize_t byteslen, i, j;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002882 int top, bot;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002883 Py_buffer vhex;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002884
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002885 if (!PyArg_ParseTuple(args, "O:fromhex", &hexobj))
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002886 return NULL;
2887
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002888 if (_getbuffer(hexobj, &vhex) < 0)
2889 return NULL;
2890
2891 byteslen = vhex.len / 2; /* max length if there are no spaces */
2892 hex = vhex.buf;
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002893
2894 newbytes = PyBytes_FromStringAndSize(NULL, byteslen);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002895 if (!newbytes) {
2896 PyObject_ReleaseBuffer(hexobj, &vhex);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002897 return NULL;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002898 }
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002899 buf = PyBytes_AS_STRING(newbytes);
2900
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002901 for (i = j = 0; i < vhex.len; i += 2) {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002902 /* skip over spaces in the input */
2903 while (Py_CHARMASK(hex[i]) == ' ')
2904 i++;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002905 if (i >= vhex.len)
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002906 break;
2907 top = hex_digit_to_int(Py_CHARMASK(hex[i]));
2908 bot = hex_digit_to_int(Py_CHARMASK(hex[i+1]));
2909 if (top == -1 || bot == -1) {
2910 PyErr_Format(PyExc_ValueError,
2911 "non-hexadecimal number string '%c%c' found in "
2912 "fromhex() arg at position %zd",
2913 hex[i], hex[i+1], i);
2914 goto error;
2915 }
2916 buf[j++] = (top << 4) + bot;
2917 }
2918 if (PyBytes_Resize(newbytes, j) < 0)
2919 goto error;
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002920 PyObject_ReleaseBuffer(hexobj, &vhex);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002921 return newbytes;
2922
2923 error:
2924 Py_DECREF(newbytes);
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002925 PyObject_ReleaseBuffer(hexobj, &vhex);
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002926 return NULL;
2927}
2928
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002929PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2930
2931static PyObject *
2932bytes_reduce(PyBytesObject *self)
2933{
Martin v. Löwis9c121062007-08-05 20:26:11 +00002934 PyObject *latin1;
2935 if (self->ob_bytes)
Guido van Rossuma74184e2007-08-29 04:05:57 +00002936 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
2937 Py_Size(self), NULL);
Martin v. Löwis9c121062007-08-05 20:26:11 +00002938 else
Guido van Rossuma74184e2007-08-29 04:05:57 +00002939 latin1 = PyUnicode_FromString("");
Martin v. Löwis9c121062007-08-05 20:26:11 +00002940 return Py_BuildValue("(O(Ns))", Py_Type(self), latin1, "latin-1");
Guido van Rossum0dd32e22007-04-11 05:40:58 +00002941}
2942
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002943static PySequenceMethods bytes_as_sequence = {
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002944 (lenfunc)bytes_length, /* sq_length */
2945 (binaryfunc)bytes_concat, /* sq_concat */
2946 (ssizeargfunc)bytes_repeat, /* sq_repeat */
2947 (ssizeargfunc)bytes_getitem, /* sq_item */
2948 0, /* sq_slice */
2949 (ssizeobjargproc)bytes_setitem, /* sq_ass_item */
2950 0, /* sq_ass_slice */
Guido van Rossumd624f182006-04-24 13:47:05 +00002951 (objobjproc)bytes_contains, /* sq_contains */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00002952 (binaryfunc)bytes_iconcat, /* sq_inplace_concat */
2953 (ssizeargfunc)bytes_irepeat, /* sq_inplace_repeat */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002954};
2955
2956static PyMappingMethods bytes_as_mapping = {
Guido van Rossumd624f182006-04-24 13:47:05 +00002957 (lenfunc)bytes_length,
Thomas Wouters376446d2006-12-19 08:30:14 +00002958 (binaryfunc)bytes_subscript,
2959 (objobjargproc)bytes_ass_subscript,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002960};
2961
2962static PyBufferProcs bytes_as_buffer = {
Travis E. Oliphantb99f7622007-08-18 11:21:56 +00002963 (getbufferproc)bytes_getbuffer,
2964 (releasebufferproc)bytes_releasebuffer,
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00002965};
2966
2967static PyMethodDef
2968bytes_methods[] = {
Neal Norwitz6968b052007-02-27 19:02:19 +00002969 {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2970 {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2971 {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2972 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2973 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2974 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, endswith__doc__},
2975 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2976 startswith__doc__},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00002977 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2978 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2979 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2980 _Py_capitalize__doc__},
2981 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2982 _Py_swapcase__doc__},
2983 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,_Py_islower__doc__},
2984 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,_Py_isupper__doc__},
2985 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,_Py_isspace__doc__},
2986 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,_Py_isdigit__doc__},
2987 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,_Py_istitle__doc__},
2988 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,_Py_isalpha__doc__},
2989 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,_Py_isalnum__doc__},
Neal Norwitz6968b052007-02-27 19:02:19 +00002990 {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2991 {"translate", (PyCFunction)bytes_translate, METH_VARARGS, translate__doc__},
2992 {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2993 {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__},
2994 {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
2995 {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
2996 {"extend", (PyCFunction)bytes_extend, METH_O, extend__doc__},
2997 {"insert", (PyCFunction)bytes_insert, METH_VARARGS, insert__doc__},
2998 {"append", (PyCFunction)bytes_append, METH_O, append__doc__},
2999 {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
3000 {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
3001 {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
Guido van Rossum8f950672007-09-10 16:53:45 +00003002 {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
3003 {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
3004 {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
Guido van Rossumd624f182006-04-24 13:47:05 +00003005 {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00003006 {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003007 {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
3008 fromhex_doc},
Guido van Rossumcd6ae682007-05-09 19:52:16 +00003009 {"join", (PyCFunction)bytes_join, METH_O, join_doc},
Gregory P. Smith60d241f2007-10-16 06:31:30 +00003010 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
3011 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
3012 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
3013 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
3014 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
3015 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
3016 expandtabs__doc__},
3017 {"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
3018 splitlines__doc__},
Guido van Rossum0dd32e22007-04-11 05:40:58 +00003019 {"__reduce__", (PyCFunction)bytes_reduce, METH_NOARGS, reduce_doc},
Guido van Rossuma0867f72006-05-05 04:34:18 +00003020 {NULL}
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003021};
3022
3023PyDoc_STRVAR(bytes_doc,
3024"bytes([iterable]) -> new array of bytes.\n\
3025\n\
3026If an argument is given it must be an iterable yielding ints in range(256).");
3027
3028PyTypeObject PyBytes_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00003029 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003030 "bytes",
3031 sizeof(PyBytesObject),
3032 0,
Guido van Rossumd624f182006-04-24 13:47:05 +00003033 (destructor)bytes_dealloc, /* tp_dealloc */
3034 0, /* tp_print */
3035 0, /* tp_getattr */
3036 0, /* tp_setattr */
3037 0, /* tp_compare */
3038 (reprfunc)bytes_repr, /* tp_repr */
3039 0, /* tp_as_number */
3040 &bytes_as_sequence, /* tp_as_sequence */
3041 &bytes_as_mapping, /* tp_as_mapping */
Georg Brandlb3f568f2007-02-27 08:49:18 +00003042 0, /* tp_hash */
Guido van Rossumd624f182006-04-24 13:47:05 +00003043 0, /* tp_call */
3044 (reprfunc)bytes_str, /* tp_str */
3045 PyObject_GenericGetAttr, /* tp_getattro */
3046 0, /* tp_setattro */
3047 &bytes_as_buffer, /* tp_as_buffer */
Georg Brandlb3f568f2007-02-27 08:49:18 +00003048 /* bytes is 'final' or 'sealed' */
Georg Brandl0b9b9e02007-02-27 08:40:54 +00003049 Py_TPFLAGS_DEFAULT, /* tp_flags */
Guido van Rossumd624f182006-04-24 13:47:05 +00003050 bytes_doc, /* tp_doc */
3051 0, /* tp_traverse */
3052 0, /* tp_clear */
3053 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
3054 0, /* tp_weaklistoffset */
3055 0, /* tp_iter */
3056 0, /* tp_iternext */
3057 bytes_methods, /* tp_methods */
3058 0, /* tp_members */
3059 0, /* tp_getset */
3060 0, /* tp_base */
3061 0, /* tp_dict */
3062 0, /* tp_descr_get */
3063 0, /* tp_descr_set */
3064 0, /* tp_dictoffset */
3065 (initproc)bytes_init, /* tp_init */
3066 PyType_GenericAlloc, /* tp_alloc */
3067 PyType_GenericNew, /* tp_new */
3068 PyObject_Del, /* tp_free */
Guido van Rossum4dfe8a12006-04-22 23:28:04 +00003069};